├── .gitattributes
├── .gitignore
├── analysis.py
├── autograder.py
├── crawler.py
├── environment.py
├── featureExtractors.py
├── game.py
├── ghostAgents.py
├── grading.py
├── graphicsCrawlerDisplay.py
├── graphicsDisplay.py
├── graphicsGridworldDisplay.py
├── graphicsUtils.py
├── gridworld.py
├── keyboardAgents.py
├── layout.py
├── layouts
    ├── capsuleClassic.lay
    ├── contestClassic.lay
    ├── mediumClassic.lay
    ├── mediumGrid.lay
    ├── minimaxClassic.lay
    ├── openClassic.lay
    ├── originalClassic.lay
    ├── smallClassic.lay
    ├── smallGrid.lay
    ├── testClassic.lay
    ├── trappedClassic.lay
    └── trickyClassic.lay
├── learningAgents.py
├── mdp.py
├── pacman.py
├── pacmanAgents.py
├── projectParams.py
├── qlearningAgents.py
├── reinforcementTestClasses.py
├── testClasses.py
├── testParser.py
├── test_cases
    ├── CONFIG
    ├── q1
    │   ├── 1-tinygrid.solution
    │   ├── 1-tinygrid.test
    │   ├── 2-tinygrid-noisy.solution
    │   ├── 2-tinygrid-noisy.test
    │   ├── 3-bridge.solution
    │   ├── 3-bridge.test
    │   ├── 4-discountgrid.solution
    │   ├── 4-discountgrid.test
    │   └── CONFIG
    ├── q2
    │   ├── 1-bridge-grid.solution
    │   ├── 1-bridge-grid.test
    │   └── CONFIG
    ├── q3
    │   ├── 1-question-3.1.solution
    │   ├── 1-question-3.1.test
    │   ├── 2-question-3.2.solution
    │   ├── 2-question-3.2.test
    │   ├── 3-question-3.3.solution
    │   ├── 3-question-3.3.test
    │   ├── 4-question-3.4.solution
    │   ├── 4-question-3.4.test
    │   ├── 5-question-3.5.solution
    │   ├── 5-question-3.5.test
    │   └── CONFIG
    ├── q4
    │   ├── 1-tinygrid.solution
    │   ├── 1-tinygrid.test
    │   ├── 2-tinygrid-noisy.solution
    │   ├── 2-tinygrid-noisy.test
    │   ├── 3-bridge.solution
    │   ├── 3-bridge.test
    │   ├── 4-discountgrid.solution
    │   ├── 4-discountgrid.test
    │   └── CONFIG
    ├── q5
    │   ├── 1-tinygrid.solution
    │   ├── 1-tinygrid.test
    │   ├── 2-tinygrid-noisy.solution
    │   ├── 2-tinygrid-noisy.test
    │   ├── 3-bridge.solution
    │   ├── 3-bridge.test
    │   ├── 4-discountgrid.solution
    │   ├── 4-discountgrid.test
    │   └── CONFIG
    ├── q6
    │   ├── CONFIG
    │   ├── grade-agent.solution
    │   └── grade-agent.test
    ├── q7
    │   ├── CONFIG
    │   ├── grade-agent.solution
    │   └── grade-agent.test
    └── q8
    │   ├── 1-tinygrid.solution
    │   ├── 1-tinygrid.test
    │   ├── 2-tinygrid-noisy.solution
    │   ├── 2-tinygrid-noisy.test
    │   ├── 3-bridge.solution
    │   ├── 3-bridge.test
    │   ├── 4-discountgrid.solution
    │   ├── 4-discountgrid.test
    │   └── CONFIG
├── textDisplay.py
├── textGridworldDisplay.py
├── util.py
└── valueIterationAgents.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | *.sln    merge=union
 7 | *.csproj merge=union
 8 | *.vbproj merge=union
 9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 | 
12 | # Standard to msysgit
13 | *.doc	 diff=astextplain
14 | *.DOC	 diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot  diff=astextplain
18 | *.DOT  diff=astextplain
19 | *.pdf  diff=astextplain
20 | *.PDF	 diff=astextplain
21 | *.rtf	 diff=astextplain
22 | *.RTF	 diff=astextplain
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #################
  2 | ## Eclipse
  3 | #################
  4 | 
  5 | *.pydevproject
  6 | .project
  7 | .metadata
  8 | bin/
  9 | tmp/
 10 | *.tmp
 11 | *.bak
 12 | *.swp
 13 | *~.nib
 14 | local.properties
 15 | .classpath
 16 | .settings/
 17 | .loadpath
 18 | 
 19 | # External tool builders
 20 | .externalToolBuilders/
 21 | 
 22 | # Locally stored "Eclipse launch configurations"
 23 | *.launch
 24 | 
 25 | # CDT-specific
 26 | .cproject
 27 | 
 28 | # PDT-specific
 29 | .buildpath
 30 | 
 31 | 
 32 | #################
 33 | ## Visual Studio
 34 | #################
 35 | 
 36 | ## Ignore Visual Studio temporary files, build results, and
 37 | ## files generated by popular Visual Studio add-ons.
 38 | 
 39 | # User-specific files
 40 | *.suo
 41 | *.user
 42 | *.sln.docstates
 43 | 
 44 | # Build results
 45 | [Dd]ebug/
 46 | [Rr]elease/
 47 | *_i.c
 48 | *_p.c
 49 | *.ilk
 50 | *.meta
 51 | *.obj
 52 | *.pch
 53 | *.pdb
 54 | *.pgc
 55 | *.pgd
 56 | *.rsp
 57 | *.sbr
 58 | *.tlb
 59 | *.tli
 60 | *.tlh
 61 | *.tmp
 62 | *.vspscc
 63 | .builds
 64 | *.dotCover
 65 | 
 66 | ## TODO: If you have NuGet Package Restore enabled, uncomment this
 67 | #packages/
 68 | 
 69 | # Visual C++ cache files
 70 | ipch/
 71 | *.aps
 72 | *.ncb
 73 | *.opensdf
 74 | *.sdf
 75 | 
 76 | # Visual Studio profiler
 77 | *.psess
 78 | *.vsp
 79 | 
 80 | # ReSharper is a .NET coding add-in
 81 | _ReSharper*
 82 | 
 83 | # Installshield output folder
 84 | [Ee]xpress
 85 | 
 86 | # DocProject is a documentation generator add-in
 87 | DocProject/buildhelp/
 88 | DocProject/Help/*.HxT
 89 | DocProject/Help/*.HxC
 90 | DocProject/Help/*.hhc
 91 | DocProject/Help/*.hhk
 92 | DocProject/Help/*.hhp
 93 | DocProject/Help/Html2
 94 | DocProject/Help/html
 95 | 
 96 | # Click-Once directory
 97 | publish
 98 | 
 99 | # Others
100 | [Bb]in
101 | [Oo]bj
102 | sql
103 | TestResults
104 | *.Cache
105 | ClientBin
106 | stylecop.*
107 | ~$*
108 | *.dbmdl
109 | Generated_Code #added for RIA/Silverlight projects
110 | 
111 | # Backup & report files from converting an old project file to a newer
112 | # Visual Studio version. Backup files are not needed, because we have git ;-)
113 | _UpgradeReport_Files/
114 | Backup*/
115 | UpgradeLog*.XML
116 | 
117 | 
118 | 
119 | ############
120 | ## Windows
121 | ############
122 | 
123 | # Windows image file caches
124 | Thumbs.db
125 | 
126 | # Folder config file
127 | Desktop.ini
128 | 
129 | 
130 | #############
131 | ## Python
132 | #############
133 | 
134 | *.py[co]
135 | 
136 | # Packages
137 | *.egg
138 | *.egg-info
139 | dist
140 | build
141 | eggs
142 | parts
143 | bin
144 | var
145 | sdist
146 | develop-eggs
147 | .installed.cfg
148 | 
149 | # Installer logs
150 | pip-log.txt
151 | 
152 | # Unit test / coverage reports
153 | .coverage
154 | .tox
155 | 
156 | #Translations
157 | *.mo
158 | 
159 | #Mr Developer
160 | .mr.developer.cfg
161 | 
162 | # Mac crap
163 | .DS_Store
164 | 


--------------------------------------------------------------------------------
/analysis.py:
--------------------------------------------------------------------------------
 1 | # analysis.py
 2 | # -----------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | ######################
12 | # ANALYSIS QUESTIONS #
13 | ######################
14 | 
15 | # Set the given parameters to obtain the specified policies through
16 | # value iteration.
17 | 
18 | def question2():
19 |     answerDiscount = 0.9
20 |     answerNoise = 0
21 |     return answerDiscount, answerNoise
22 | 
23 | def question3a():
24 |     answerDiscount = .1
25 |     answerNoise = 0
26 |     answerLivingReward = -1
27 |     return answerDiscount, answerNoise, answerLivingReward
28 |     # If not possible, return 'NOT POSSIBLE'
29 | 
30 | def question3b():
31 |     answerDiscount = .3
32 |     answerNoise = .2
33 |     answerLivingReward = -1
34 |     return answerDiscount, answerNoise, answerLivingReward
35 |     # If not possible, return 'NOT POSSIBLE'
36 | 
37 | def question3c():
38 |     answerDiscount = 1
39 |     answerNoise = 0
40 |     answerLivingReward = -1
41 |     return answerDiscount, answerNoise, answerLivingReward
42 |     # If not possible, return 'NOT POSSIBLE'
43 | 
44 | def question3d():
45 |     answerDiscount = .9
46 |     answerNoise = .4
47 |     answerLivingReward = -1
48 |     return answerDiscount, answerNoise, answerLivingReward
49 |     # If not possible, return 'NOT POSSIBLE'
50 | 
51 | def question3e():
52 |     answerDiscount = 0
53 |     answerNoise = 0
54 |     answerLivingReward = 0
55 |     return answerDiscount, answerNoise, answerLivingReward
56 |     # If not possible, return 'NOT POSSIBLE'
57 | 
58 | def question6():
59 |     answerEpsilon = None
60 |     answerLearningRate = None
61 |     return 'NOT POSSIBLE'
62 |     # If not possible, return 'NOT POSSIBLE'
63 | 
64 | if __name__ == '__main__':
65 |     print 'Answers to analysis questions:'
66 |     import analysis
67 |     for q in [q for q in dir(analysis) if q.startswith('question')]:
68 |         response = getattr(analysis, q)()
69 |         print '  Question %s:\t%s' % (q, str(response))
70 | 


--------------------------------------------------------------------------------
/autograder.py:
--------------------------------------------------------------------------------
  1 | # autograder.py
  2 | # -------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | # imports from python standard library
 12 | import grading
 13 | import imp
 14 | import optparse
 15 | import os
 16 | import re
 17 | import sys
 18 | import projectParams
 19 | import random
 20 | from util import FixedRandom
 21 | random.setstate(FixedRandom().random.getstate())
 22 | 
 23 | # register arguments and set default values
 24 | def readCommand(argv):
 25 |     parser = optparse.OptionParser(description = 'Run public tests on student code')
 26 |     parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False)
 27 |     parser.add_option('--test-directory',
 28 |                       dest = 'testRoot',
 29 |                       default = 'test_cases',
 30 |                       help = 'Root test directory which contains subdirectories corresponding to each question')
 31 |     parser.add_option('--student-code',
 32 |                       dest = 'studentCode',
 33 |                       default = projectParams.STUDENT_CODE_DEFAULT,
 34 |                       help = 'comma separated list of student code files')
 35 |     parser.add_option('--code-directory',
 36 |                     dest = 'codeRoot',
 37 |                     default = "",
 38 |                     help = 'Root directory containing the student and testClass code')
 39 |     parser.add_option('--test-case-code',
 40 |                       dest = 'testCaseCode',
 41 |                       default = projectParams.PROJECT_TEST_CLASSES,
 42 |                       help = 'class containing testClass classes for this project')
 43 |     parser.add_option('--generate-solutions',
 44 |                       dest = 'generateSolutions',
 45 |                       action = 'store_true',
 46 |                       help = 'Write solutions generated to .solution file')
 47 |     parser.add_option('--edx-output',
 48 |                     dest = 'edxOutput',
 49 |                     action = 'store_true',
 50 |                     help = 'Generate edX output files')
 51 |     parser.add_option('--mute',
 52 |                     dest = 'muteOutput',
 53 |                     action = 'store_true',
 54 |                     help = 'Mute output from executing tests')
 55 |     parser.add_option('--print-tests', '-p',
 56 |                     dest = 'printTestCase',
 57 |                     action = 'store_true',
 58 |                     help = 'Print each test case before running them.')
 59 |     parser.add_option('--test', '-t',
 60 |                       dest = 'runTest',
 61 |                       default = None,
 62 |                       help = 'Run one particular test.  Relative to test root.')
 63 |     parser.add_option('--question', '-q',
 64 |                     dest = 'gradeQuestion',
 65 |                     default = None,
 66 |                     help = 'Grade one particular question.')
 67 |     (options, args) = parser.parse_args(argv)
 68 |     return options
 69 | 
 70 | 
 71 | # confirm we should author solution files
 72 | def confirmGenerate():
 73 |     print 'WARNING: this action will overwrite any solution files.'
 74 |     print 'Are you sure you want to proceed? (yes/no)'
 75 |     while True:
 76 |         ans = sys.stdin.readline().strip()
 77 |         if ans == 'yes':
 78 |             break
 79 |         elif ans == 'no':
 80 |             sys.exit(0)
 81 |         else:
 82 |             print 'please answer either "yes" or "no"'
 83 | 
 84 | 
 85 | # TODO: Fix this so that it tracebacks work correctly
 86 | # Looking at source of the traceback module, presuming it works
 87 | # the same as the intepreters, it uses co_filename.  This is,
 88 | # however, a readonly attribute.
 89 | def setModuleName(module, filename):
 90 |     functionType = type(confirmGenerate)
 91 |     classType = type(optparse.Option)
 92 | 
 93 |     for i in dir(module):
 94 |         o = getattr(module, i)
 95 |         if hasattr(o, '__file__'): continue
 96 |         
 97 |         if type(o) == functionType:
 98 |             setattr(o, '__file__', filename)
 99 |         elif type(o) == classType:
100 |             setattr(o, '__file__', filename)
101 |             # TODO: assign member __file__'s?
102 |         #print i, type(o)            
103 |     
104 | 
105 | #from cStringIO import StringIO
106 | 
107 | def loadModuleString(moduleSource):
108 |     # Below broken, imp doesn't believe its being passed a file:
109 |     #    ValueError: load_module arg#2 should be a file or None
110 |     #
111 |     #f = StringIO(moduleCodeDict[k])
112 |     #tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE))
113 |     tmp = imp.new_module(k)
114 |     exec moduleCodeDict[k] in tmp.__dict__
115 |     setModuleName(tmp, k)
116 |     return tmp
117 | 
118 | import py_compile
119 | 
120 | def loadModuleFile(moduleName, filePath):
121 |     with open(filePath, 'r') as f:
122 |         return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE))
123 | 
124 |  
125 | def readFile(path, root=""):
126 |     "Read file from disk at specified path and return as string"
127 |     with open(os.path.join(root, path), 'r') as handle:
128 |         return handle.read()
129 | 
130 | 
131 | #######################################################################
132 | # Error Hint Map
133 | #######################################################################
134 | 
135 | # TODO: use these
136 | ERROR_HINT_MAP = {
137 |   'q1': {
138 |     "<type 'exceptions.IndexError'>": """
139 |       We noticed that your project threw an IndexError on q1.
140 |       While many things may cause this, it may have been from
141 |       assuming a certain number of successors from a state space
142 |       or assuming a certain number of actions available from a given
143 |       state. Try making your code more general (no hardcoded indices)
144 |       and submit again!
145 |     """
146 |   },
147 |   'q3': {
148 |       "<type 'exceptions.AttributeError'>": """
149 |         We noticed that your project threw an AttributeError on q3.
150 |         While many things may cause this, it may have been from assuming
151 |         a certain size or structure to the state space. For example, if you have
152 |         a line of code assuming that the state is (x, y) and we run your code
153 |         on a state space with (x, y, z), this error could be thrown. Try
154 |         making your code more general and submit again!
155 | 
156 |     """
157 |   }
158 | }
159 | 
160 | import pprint
161 | 
162 | def splitStrings(d):
163 |     d2 = dict(d)
164 |     for k in d:
165 |         if k[0:2] == "__":
166 |             del d2[k]
167 |             continue
168 |         if d2[k].find("\n") >= 0:
169 |             d2[k] = d2[k].split("\n")
170 |     return d2
171 |             
172 | 
173 | def printTest(testDict, solutionDict):
174 |     pp = pprint.PrettyPrinter(indent=4)
175 |     print "Test case:"
176 |     for line in testDict["__raw_lines__"]:
177 |         print "   |", line 
178 |     print "Solution:"
179 |     for line in solutionDict["__raw_lines__"]:
180 |         print "   |", line 
181 |     
182 | 
183 | def runTest(testName, moduleDict, printTestCase=False):
184 |     import testParser
185 |     import testClasses
186 |     for module in moduleDict:
187 |         setattr(sys.modules[__name__], module, moduleDict[module])
188 |     
189 |     # This is a hack, will break if tests check question without testing for None
190 |     question = None
191 | 
192 |     testDict = testParser.TestParser(testName + ".test").parse()
193 |     solutionDict = testParser.TestParser(testName + ".solution").parse()
194 |     test_out_file = os.path.join('%s.test_output' % testName)
195 |     testDict['test_out_file'] = test_out_file
196 |     testClass = getattr(projectTestClasses, testDict['class'])
197 |     testCase = testClass(question, testDict)
198 | 
199 |     if printTestCase: 
200 |         printTest(testDict, solutionDict)
201 |     
202 |     # This is a fragile hack to create a stub grades object
203 |     grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)])
204 |     testCase.execute(grades, moduleDict, solutionDict)
205 | 
206 | # evaluate student code
207 | def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False, 
208 |             printTestCase=False, questionToGrade=None):
209 |     # imports of testbench code.  note that the testClasses import must follow
210 |     # the import of student code due to dependencies
211 |     import testParser
212 |     import testClasses
213 |     for module in moduleDict:
214 |         setattr(sys.modules[__name__], module, moduleDict[module])
215 |     
216 |     problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse()
217 |     
218 |     # iterate through and run tests
219 |     if 'order' in problemDict:
220 |       test_subdirs = problemDict['order'].split()
221 |     else:
222 |       test_subdirs = sorted(os.listdir(testRoot))
223 |     questions = []
224 |     questionDicts = {}
225 |     for q in test_subdirs:
226 |         subdir_path = os.path.join(testRoot, q)
227 |         if not os.path.isdir(subdir_path) or q[0] == '.':
228 |             continue
229 |             
230 |         if questionToGrade != None and q != questionToGrade:
231 |             continue
232 | 
233 |         # create a question object
234 |         questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse()
235 |         questionClass = getattr(testClasses, questionDict['class'])
236 |         question = questionClass(questionDict)
237 |         questionDicts[q] = questionDict
238 | 
239 |         # load test cases into question
240 |         tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path))
241 |         tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests)
242 |         for t in sorted(tests):
243 |             test_file = os.path.join(subdir_path, '%s.test' % t)
244 |             solution_file = os.path.join(subdir_path, '%s.solution' % t)
245 |             test_out_file = os.path.join(subdir_path, '%s.test_output' % t)
246 |             testDict = testParser.TestParser(test_file).parse()
247 |             if testDict.get("disabled", "false").lower() == "true":
248 |                 continue
249 |             testDict['test_out_file'] = test_out_file
250 |             testClass = getattr(projectTestClasses, testDict['class'])
251 |             testCase = testClass(question, testDict)
252 |             def makefun(testCase, solution_file):
253 |                 if generateSolutions:
254 |                     # write solution file to disk
255 |                     return lambda grades: testCase.writeSolution(moduleDict, solution_file)
256 |                 else:
257 |                     # read in solution dictionary and pass as an argument
258 |                     testDict = testParser.TestParser(test_file).parse()
259 |                     solutionDict = testParser.TestParser(solution_file).parse()
260 |                     if printTestCase:
261 |                         return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict)
262 |                     else:
263 |                         return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
264 |             question.addTestCase(testCase, makefun(testCase, solution_file))
265 | 
266 |         # Note extra function is necessary for scoping reasons
267 |         def makefun(question):
268 |             return lambda grades: question.execute(grades)
269 |         setattr(sys.modules[__name__], q, makefun(question))
270 |         questions.append((q, question.getMaxPoints()))
271 | 
272 |     grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput)
273 |     if questionToGrade == None:
274 |         for q in questionDicts:
275 |             for prereq in questionDicts[q].get('depends', '').split():
276 |                 grades.addPrereq(q, prereq)
277 | 
278 |     grades.grade(sys.modules[__name__])      
279 |     return grades.points
280 | 
281 | 
282 | if __name__ == '__main__':
283 |     options = readCommand(sys.argv)
284 |     if options.generateSolutions:
285 |         confirmGenerate()
286 |     codePaths = options.studentCode.split(',')
287 |     # moduleCodeDict = {}
288 |     # for cp in codePaths:
289 |     #     moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
290 |     #     moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot)
291 |     # moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot)
292 |     # moduleDict = loadModuleDict(moduleCodeDict)
293 |     
294 |     moduleDict = {}
295 |     for cp in codePaths:
296 |         moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
297 |         moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp))
298 |     moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1)     
299 |     moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode))    
300 |     
301 |     
302 |     if options.runTest != None:
303 |         runTest(options.runTest, moduleDict, printTestCase=options.printTestCase)
304 |     else:
305 |         evaluate(options.generateSolutions, options.testRoot, moduleDict, 
306 |             edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase,
307 |             questionToGrade=options.gradeQuestion)
308 | 


--------------------------------------------------------------------------------
/environment.py:
--------------------------------------------------------------------------------
 1 | # environment.py
 2 | # --------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | #!/usr/bin/python
12 | 
13 | class Environment:
14 | 
15 |     def getCurrentState(self):
16 |         """
17 |         Returns the current state of enviornment
18 |         """
19 |         abstract
20 | 
21 |     def getPossibleActions(self, state):
22 |         """
23 |           Returns possible actions the agent
24 |           can take in the given state. Can
25 |           return the empty list if we are in
26 |           a terminal state.
27 |         """
28 |         abstract
29 | 
30 |     def doAction(self, action):
31 |         """
32 |           Performs the given action in the current
33 |           environment state and updates the enviornment.
34 | 
35 |           Returns a (reward, nextState) pair
36 |         """
37 |         abstract
38 | 
39 |     def reset(self):
40 |         """
41 |           Resets the current state to the start state
42 |         """
43 |         abstract
44 | 
45 |     def isTerminal(self):
46 |         """
47 |           Has the enviornment entered a terminal
48 |           state? This means there are no successors
49 |         """
50 |         state = self.getCurrentState()
51 |         actions = self.getPossibleActions(state)
52 |         return len(actions) == 0
53 | 


--------------------------------------------------------------------------------
/featureExtractors.py:
--------------------------------------------------------------------------------
 1 | # featureExtractors.py
 2 | # --------------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | "Feature extractors for Pacman game states"
12 | 
13 | from game import Directions, Actions
14 | import util
15 | 
16 | class FeatureExtractor:
17 |     def getFeatures(self, state, action):
18 |         """
19 |           Returns a dict from features to counts
20 |           Usually, the count will just be 1.0 for
21 |           indicator functions.
22 |         """
23 |         util.raiseNotDefined()
24 | 
25 | class IdentityExtractor(FeatureExtractor):
26 |     def getFeatures(self, state, action):
27 |         feats = util.Counter()
28 |         feats[(state,action)] = 1.0
29 |         return feats
30 | 
31 | def closestFood(pos, food, walls):
32 |     """
33 |     closestFood -- this is similar to the function that we have
34 |     worked on in the search project; here its all in one place
35 |     """
36 |     fringe = [(pos[0], pos[1], 0)]
37 |     expanded = set()
38 |     while fringe:
39 |         pos_x, pos_y, dist = fringe.pop(0)
40 |         if (pos_x, pos_y) in expanded:
41 |             continue
42 |         expanded.add((pos_x, pos_y))
43 |         # if we find a food at this location then exit
44 |         if food[pos_x][pos_y]:
45 |             return dist
46 |         # otherwise spread out from the location to its neighbours
47 |         nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
48 |         for nbr_x, nbr_y in nbrs:
49 |             fringe.append((nbr_x, nbr_y, dist+1))
50 |     # no food found
51 |     return None
52 | 
53 | class SimpleExtractor(FeatureExtractor):
54 |     """
55 |     Returns simple features for a basic reflex Pacman:
56 |     - whether food will be eaten
57 |     - how far away the next food is
58 |     - whether a ghost collision is imminent
59 |     - whether a ghost is one step away
60 |     """
61 | 
62 |     def getFeatures(self, state, action):
63 |         # extract the grid of food and wall locations and get the ghost locations
64 |         food = state.getFood()
65 |         walls = state.getWalls()
66 |         ghosts = state.getGhostPositions()
67 | 
68 |         features = util.Counter()
69 | 
70 |         features["bias"] = 1.0
71 | 
72 |         # compute the location of pacman after he takes the action
73 |         x, y = state.getPacmanPosition()
74 |         dx, dy = Actions.directionToVector(action)
75 |         next_x, next_y = int(x + dx), int(y + dy)
76 | 
77 |         # count the number of ghosts 1-step away
78 |         features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
79 | 
80 |         # if there is no danger of ghosts then add the food feature
81 |         if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
82 |             features["eats-food"] = 1.0
83 | 
84 |         dist = closestFood((next_x, next_y), food, walls)
85 |         if dist is not None:
86 |             # make the distance a number less than one otherwise the update
87 |             # will diverge wildly
88 |             features["closest-food"] = float(dist) / (walls.width * walls.height)
89 |         features.divideAll(10.0)
90 |         return features
91 | 


--------------------------------------------------------------------------------
/ghostAgents.py:
--------------------------------------------------------------------------------
 1 | # ghostAgents.py
 2 | # --------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | from game import Agent
12 | from game import Actions
13 | from game import Directions
14 | import random
15 | from util import manhattanDistance
16 | import util
17 | 
18 | class GhostAgent( Agent ):
19 |     def __init__( self, index ):
20 |         self.index = index
21 | 
22 |     def getAction( self, state ):
23 |         dist = self.getDistribution(state)
24 |         if len(dist) == 0:
25 |             return Directions.STOP
26 |         else:
27 |             return util.chooseFromDistribution( dist )
28 | 
29 |     def getDistribution(self, state):
30 |         "Returns a Counter encoding a distribution over actions from the provided state."
31 |         util.raiseNotDefined()
32 | 
33 | class RandomGhost( GhostAgent ):
34 |     "A ghost that chooses a legal action uniformly at random."
35 |     def getDistribution( self, state ):
36 |         dist = util.Counter()
37 |         for a in state.getLegalActions( self.index ): dist[a] = 1.0
38 |         dist.normalize()
39 |         return dist
40 | 
41 | class DirectionalGhost( GhostAgent ):
42 |     "A ghost that prefers to rush Pacman, or flee when scared."
43 |     def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ):
44 |         self.index = index
45 |         self.prob_attack = prob_attack
46 |         self.prob_scaredFlee = prob_scaredFlee
47 | 
48 |     def getDistribution( self, state ):
49 |         # Read variables from state
50 |         ghostState = state.getGhostState( self.index )
51 |         legalActions = state.getLegalActions( self.index )
52 |         pos = state.getGhostPosition( self.index )
53 |         isScared = ghostState.scaredTimer > 0
54 | 
55 |         speed = 1
56 |         if isScared: speed = 0.5
57 | 
58 |         actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
59 |         newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
60 |         pacmanPosition = state.getPacmanPosition()
61 | 
62 |         # Select best actions given the state
63 |         distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
64 |         if isScared:
65 |             bestScore = max( distancesToPacman )
66 |             bestProb = self.prob_scaredFlee
67 |         else:
68 |             bestScore = min( distancesToPacman )
69 |             bestProb = self.prob_attack
70 |         bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
71 | 
72 |         # Construct distribution
73 |         dist = util.Counter()
74 |         for a in bestActions: dist[a] = bestProb / len(bestActions)
75 |         for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
76 |         dist.normalize()
77 |         return dist
78 | 


--------------------------------------------------------------------------------
/grading.py:
--------------------------------------------------------------------------------
  1 | # grading.py
  2 | # ----------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | "Common code for autograders"
 12 | 
 13 | import cgi
 14 | import time
 15 | import sys
 16 | import traceback
 17 | import pdb
 18 | from collections import defaultdict
 19 | import util
 20 | 
 21 | class Grades:
 22 |   "A data structure for project grades, along with formatting code to display them"
 23 |   def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False):
 24 |     """
 25 |     Defines the grading scheme for a project
 26 |       projectName: project name
 27 |       questionsAndMaxesDict: a list of (question name, max points per question)
 28 |     """
 29 |     self.questions = [el[0] for el in questionsAndMaxesList]
 30 |     self.maxes = dict(questionsAndMaxesList)
 31 |     self.points = Counter()
 32 |     self.messages = dict([(q, []) for q in self.questions])
 33 |     self.project = projectName
 34 |     self.start = time.localtime()[1:6]
 35 |     self.sane = True # Sanity checks
 36 |     self.currentQuestion = None # Which question we're grading
 37 |     self.edxOutput = edxOutput
 38 |     self.mute = muteOutput
 39 |     self.prereqs = defaultdict(set)
 40 |     
 41 |     #print 'Autograder transcript for %s' % self.project
 42 |     print 'Starting on %d-%d at %d:%02d:%02d' % self.start
 43 |     
 44 |   def addPrereq(self, question, prereq):
 45 |     self.prereqs[question].add(prereq)
 46 | 
 47 |   def grade(self, gradingModule, exceptionMap = {}):
 48 |     """
 49 |     Grades each question
 50 |       gradingModule: the module with all the grading functions (pass in with sys.modules[__name__])
 51 |     """
 52 |     
 53 |     completedQuestions = set([])
 54 |     for q in self.questions:
 55 |       print '\nQuestion %s' % q
 56 |       print '=' * (9 + len(q))
 57 |       print
 58 |       self.currentQuestion = q
 59 |       
 60 |       incompleted = self.prereqs[q].difference(completedQuestions)
 61 |       if len(incompleted) > 0:
 62 |           prereq = incompleted.pop()
 63 |           print \
 64 | """*** NOTE: Make sure to complete Question %s before working on Question %s,
 65 | *** because Question %s builds upon your answer for Question %s.
 66 | """ % (prereq, q, q, prereq)
 67 |           continue
 68 |       
 69 |       if self.mute: util.mutePrint()
 70 |       try:
 71 |         util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
 72 |         #TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function
 73 |       except Exception, inst:
 74 |         self.addExceptionMessage(q, inst, traceback)
 75 |         self.addErrorHints(exceptionMap, inst, q[1])
 76 |       except:
 77 |         self.fail('FAIL: Terminated with a string exception.')
 78 |       finally:
 79 |         if self.mute: util.unmutePrint()
 80 |       
 81 |       if self.points[q] >= self.maxes[q]:
 82 |         completedQuestions.add(q)
 83 |         
 84 |       print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q])
 85 |         
 86 | 
 87 |     print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6]
 88 |     print "\nProvisional grades\n=================="
 89 |     
 90 |     for q in self.questions:
 91 |       print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q])
 92 |     print '------------------'
 93 |     print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values()))
 94 |     print """
 95 | Your grades are NOT yet registered.  To register your grades you must
 96 | submit your files to the edX website.  The grades obtained through the
 97 | edX website are your final grades unless your submission was not in
 98 | the spirit of the course,  such as if your submission simply hardcoded
 99 | the answers to the tests.   We will screen for this after the deadline.
100 | 
101 | *If you worked with a partner, you must both submit separately.*
102 | """
103 |     
104 |     if self.edxOutput:
105 |         self.produceOutput()
106 | 
107 |   def addExceptionMessage(self, q, inst, traceback):
108 |     """
109 |     Method to format the exception message, this is more complicated because
110 |     we need to cgi.escape the traceback but wrap the exception in a <pre> tag
111 |     """
112 |     self.fail('FAIL: Exception raised: %s' % inst)
113 |     self.addMessage('')
114 |     for line in traceback.format_exc().split('\n'):
115 |         self.addMessage(line)
116 | 
117 |   def addErrorHints(self, exceptionMap, errorInstance, questionNum):
118 |     typeOf = str(type(errorInstance))
119 |     questionName = 'q' + questionNum
120 |     errorHint = ''
121 | 
122 |     # question specific error hints
123 |     if exceptionMap.get(questionName):
124 |       questionMap = exceptionMap.get(questionName)
125 |       if (questionMap.get(typeOf)):
126 |         errorHint = questionMap.get(typeOf)
127 |     # fall back to general error messages if a question specific
128 |     # one does not exist
129 |     if (exceptionMap.get(typeOf)):
130 |       errorHint = exceptionMap.get(typeOf)
131 | 
132 |     # dont include the HTML if we have no error hint
133 |     if not errorHint:
134 |       return ''
135 | 
136 |     for line in errorHint.split('\n'):
137 |       self.addMessage(line)
138 | 
139 |   def produceOutput(self):
140 |     edxOutput = open('edx_response.html', 'w')
141 |     edxOutput.write("<div>")
142 | 
143 |     # first sum
144 |     total_possible = sum(self.maxes.values())
145 |     total_score = sum(self.points.values())
146 |     checkOrX = '<span class="incorrect"/>'
147 |     if (total_score >= total_possible):
148 |         checkOrX = '<span class="correct"/>'
149 |     header = """
150 |         <h3>
151 |             Total score ({total_score} / {total_possible})
152 |         </h3>
153 |     """.format(total_score = total_score,
154 |       total_possible = total_possible,
155 |       checkOrX = checkOrX
156 |     )
157 |     edxOutput.write(header)
158 | 
159 |     for q in self.questions:
160 |       if len(q) == 2:
161 |           name = q[1]
162 |       else: 
163 |           name = q
164 |       checkOrX = '<span class="incorrect"/>'
165 |       if (self.points[q] == self.maxes[q]):
166 |         checkOrX = '<span class="correct"/>'
167 |       #messages = '\n<br/>\n'.join(self.messages[q])
168 |       messages = "<pre>%s</pre>" % '\n'.join(self.messages[q])
169 |       output = """
170 |         <div class="test">
171 |           <section>
172 |           <div class="shortform">
173 |             Question {q} ({points}/{max}) {checkOrX}
174 |           </div>
175 |         <div class="longform">
176 |           {messages}
177 |         </div>
178 |         </section>
179 |       </div>
180 |       """.format(q = name,
181 |         max = self.maxes[q],
182 |         messages = messages,
183 |         checkOrX = checkOrX,
184 |         points = self.points[q]
185 |       )
186 |       # print "*** output for Question %s " % q[1]
187 |       # print output
188 |       edxOutput.write(output)
189 |     edxOutput.write("</div>")
190 |     edxOutput.close()
191 |     edxOutput = open('edx_grade', 'w')
192 |     edxOutput.write(str(self.points.totalCount()))
193 |     edxOutput.close()
194 | 
195 |   def fail(self, message, raw=False):
196 |     "Sets sanity check bit to false and outputs a message"
197 |     self.sane = False
198 |     self.assignZeroCredit()
199 |     self.addMessage(message, raw)
200 | 
201 |   def assignZeroCredit(self):
202 |     self.points[self.currentQuestion] = 0
203 |   
204 |   def addPoints(self, amt):
205 |     self.points[self.currentQuestion] += amt
206 | 
207 |   def deductPoints(self, amt):
208 |     self.points[self.currentQuestion] -= amt
209 | 
210 |   def assignFullCredit(self, message="", raw=False):
211 |     self.points[self.currentQuestion] = self.maxes[self.currentQuestion]
212 |     if message != "":
213 |       self.addMessage(message, raw)
214 | 
215 |   def addMessage(self, message, raw=False):
216 |     if not raw:
217 |         # We assume raw messages, formatted for HTML, are printed separately
218 |         if self.mute: util.unmutePrint()      
219 |         print '*** ' + message
220 |         if self.mute: util.mutePrint()        
221 |         message = cgi.escape(message)
222 |     self.messages[self.currentQuestion].append(message)
223 | 
224 |   def addMessageToEmail(self, message):
225 |     print "WARNING**** addMessageToEmail is deprecated %s" % message
226 |     for line in message.split('\n'):
227 |       pass
228 |       #print '%%% ' + line + ' %%%'
229 |       #self.messages[self.currentQuestion].append(line)
230 | 
231 | 
232 | 
233 | 
234 | 
235 | class Counter(dict):
236 |   """
237 |   Dict with default 0
238 |   """
239 |   def __getitem__(self, idx):
240 |     try:
241 |       return dict.__getitem__(self, idx)
242 |     except KeyError:
243 |       return 0
244 | 
245 |   def totalCount(self):
246 |     """
247 |     Returns the sum of counts for all keys.
248 |     """
249 |     return sum(self.values())
250 | 
251 | 


--------------------------------------------------------------------------------
/graphicsCrawlerDisplay.py:
--------------------------------------------------------------------------------
  1 | # graphicsCrawlerDisplay.py
  2 | # -------------------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | # graphicsCrawlerDisplay.py
 12 | # -------------------------
 13 | # Licensing Information: Please do not distribute or publish solutions to this
 14 | # project. You are free to use and extend these projects for educational
 15 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 16 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 17 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 18 | # Abbeel in Spring 2013.
 19 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 20 | 
 21 | import Tkinter
 22 | import qlearningAgents
 23 | import time
 24 | import threading
 25 | import sys
 26 | import crawler
 27 | #import pendulum
 28 | import math
 29 | from math import pi as PI
 30 | 
 31 | robotType = 'crawler'
 32 | 
 33 | class Application:
 34 | 
 35 |     def sigmoid(self, x):
 36 |         return 1.0 / (1.0 + 2.0 ** (-x))
 37 | 
 38 |     def incrementSpeed(self, inc):
 39 |         self.tickTime *= inc
 40 | #        self.epsilon = min(1.0, self.epsilon)
 41 | #        self.epsilon = max(0.0,self.epsilon)
 42 | #        self.learner.setSpeed(self.epsilon)
 43 |         self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
 44 | 
 45 |     def incrementEpsilon(self, inc):
 46 |         self.ep += inc
 47 |         self.epsilon = self.sigmoid(self.ep)
 48 |         self.learner.setEpsilon(self.epsilon)
 49 |         self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
 50 | 
 51 |     def incrementGamma(self, inc):
 52 |         self.ga += inc
 53 |         self.gamma = self.sigmoid(self.ga)
 54 |         self.learner.setDiscount(self.gamma)
 55 |         self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
 56 | 
 57 |     def incrementAlpha(self, inc):
 58 |         self.al += inc
 59 |         self.alpha = self.sigmoid(self.al)
 60 |         self.learner.setLearningRate(self.alpha)
 61 |         self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
 62 | 
 63 |     def __initGUI(self, win):
 64 |         ## Window ##
 65 |         self.win = win
 66 | 
 67 |         ## Initialize Frame ##
 68 |         win.grid()
 69 |         self.dec = -.5
 70 |         self.inc = .5
 71 |         self.tickTime = 0.1
 72 | 
 73 |         ## Epsilon Button + Label ##
 74 |         self.setupSpeedButtonAndLabel(win)
 75 | 
 76 |         self.setupEpsilonButtonAndLabel(win)
 77 | 
 78 |         ## Gamma Button + Label ##
 79 |         self.setUpGammaButtonAndLabel(win)
 80 | 
 81 |         ## Alpha Button + Label ##
 82 |         self.setupAlphaButtonAndLabel(win)
 83 | 
 84 |         ## Exit Button ##
 85 |         #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
 86 |         #self.exit_button.grid(row=0, column=9)
 87 | 
 88 |         ## Simulation Buttons ##
 89 | #        self.setupSimulationButtons(win)
 90 | 
 91 |          ## Canvas ##
 92 |         self.canvas = Tkinter.Canvas(root, height=200, width=1000)
 93 |         self.canvas.grid(row=2,columnspan=10)
 94 | 
 95 |     def setupAlphaButtonAndLabel(self, win):
 96 |         self.alpha_minus = Tkinter.Button(win,
 97 |         text="-",command=(lambda: self.incrementAlpha(self.dec)))
 98 |         self.alpha_minus.grid(row=1, column=3, padx=10)
 99 | 
100 |         self.alpha = self.sigmoid(self.al)
101 |         self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
102 |         self.alpha_label.grid(row=1, column=4)
103 | 
104 |         self.alpha_plus = Tkinter.Button(win,
105 |         text="+",command=(lambda: self.incrementAlpha(self.inc)))
106 |         self.alpha_plus.grid(row=1, column=5, padx=10)
107 | 
108 |     def setUpGammaButtonAndLabel(self, win):
109 |         self.gamma_minus = Tkinter.Button(win,
110 |         text="-",command=(lambda: self.incrementGamma(self.dec)))
111 |         self.gamma_minus.grid(row=1, column=0, padx=10)
112 | 
113 |         self.gamma = self.sigmoid(self.ga)
114 |         self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
115 |         self.gamma_label.grid(row=1, column=1)
116 | 
117 |         self.gamma_plus = Tkinter.Button(win,
118 |         text="+",command=(lambda: self.incrementGamma(self.inc)))
119 |         self.gamma_plus.grid(row=1, column=2, padx=10)
120 | 
121 |     def setupEpsilonButtonAndLabel(self, win):
122 |         self.epsilon_minus = Tkinter.Button(win,
123 |         text="-",command=(lambda: self.incrementEpsilon(self.dec)))
124 |         self.epsilon_minus.grid(row=0, column=3)
125 | 
126 |         self.epsilon = self.sigmoid(self.ep)
127 |         self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
128 |         self.epsilon_label.grid(row=0, column=4)
129 | 
130 |         self.epsilon_plus = Tkinter.Button(win,
131 |         text="+",command=(lambda: self.incrementEpsilon(self.inc)))
132 |         self.epsilon_plus.grid(row=0, column=5)
133 | 
134 |     def setupSpeedButtonAndLabel(self, win):
135 |         self.speed_minus = Tkinter.Button(win,
136 |         text="-",command=(lambda: self.incrementSpeed(.5)))
137 |         self.speed_minus.grid(row=0, column=0)
138 | 
139 |         self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
140 |         self.speed_label.grid(row=0, column=1)
141 | 
142 |         self.speed_plus = Tkinter.Button(win,
143 |         text="+",command=(lambda: self.incrementSpeed(2)))
144 |         self.speed_plus.grid(row=0, column=2)
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 |     def skip5kSteps(self):
153 |         self.stepsToSkip = 5000
154 | 
155 |     def __init__(self, win):
156 | 
157 |         self.ep = 0
158 |         self.ga = 2
159 |         self.al = 2
160 |         self.stepCount = 0
161 |         ## Init Gui
162 | 
163 |         self.__initGUI(win)
164 | 
165 |         # Init environment
166 |         if robotType == 'crawler':
167 |             self.robot = crawler.CrawlingRobot(self.canvas)
168 |             self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
169 |         elif robotType == 'pendulum':
170 |             self.robot = pendulum.PendulumRobot(self.canvas)
171 |             self.robotEnvironment = \
172 |                 pendulum.PendulumRobotEnvironment(self.robot)
173 |         else:
174 |             raise "Unknown RobotType"
175 | 
176 |         # Init Agent
177 |         simulationFn = lambda agent: \
178 |           simulation.SimulationEnvironment(self.robotEnvironment,agent)
179 |         actionFn = lambda state: \
180 |           self.robotEnvironment.getPossibleActions(state)
181 |         self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
182 | 
183 |         self.learner.setEpsilon(self.epsilon)
184 |         self.learner.setLearningRate(self.alpha)
185 |         self.learner.setDiscount(self.gamma)
186 | 
187 |         # Start GUI
188 |         self.running = True
189 |         self.stopped = False
190 |         self.stepsToSkip = 0
191 |         self.thread = threading.Thread(target=self.run)
192 |         self.thread.start()
193 | 
194 | 
195 |     def exit(self):
196 |         self.running = False
197 |         for i in range(5):
198 |             if not self.stopped:
199 |                 time.sleep(0.1)
200 |         try:
201 |             self.win.destroy()
202 |         except:
203 |             pass
204 |         sys.exit(0)
205 | 
206 |     def step(self):
207 | 
208 |         self.stepCount += 1
209 | 
210 |         state = self.robotEnvironment.getCurrentState()
211 |         actions = self.robotEnvironment.getPossibleActions(state)
212 |         if len(actions) == 0.0:
213 |             self.robotEnvironment.reset()
214 |             state = self.robotEnvironment.getCurrentState()
215 |             actions = self.robotEnvironment.getPossibleActions(state)
216 |             print 'Reset!'
217 |         action = self.learner.getAction(state)
218 |         if action == None:
219 |             raise 'None action returned: Code Not Complete'
220 |         nextState, reward = self.robotEnvironment.doAction(action)
221 |         self.learner.observeTransition(state, action, nextState, reward)
222 | 
223 |     def animatePolicy(self):
224 |         if robotType != 'pendulum':
225 |             raise 'Only pendulum can animatePolicy'
226 | 
227 | 
228 |         totWidth = self.canvas.winfo_reqwidth()
229 |         totHeight = self.canvas.winfo_reqheight()
230 | 
231 |         length = 0.48 * min(totWidth, totHeight)
232 |         x,y = totWidth-length-30, length+10
233 | 
234 | 
235 | 
236 |         angleMin, angleMax = self.robot.getMinAndMaxAngle()
237 |         velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
238 | 
239 |         if not 'animatePolicyBox' in dir(self):
240 |             self.canvas.create_line(x,y,x+length,y)
241 |             self.canvas.create_line(x+length,y,x+length,y-length)
242 |             self.canvas.create_line(x+length,y-length,x,y-length)
243 |             self.canvas.create_line(x,y-length,x,y)
244 |             self.animatePolicyBox = 1
245 |             self.canvas.create_text(x+length/2,y+10,text='angle')
246 |             self.canvas.create_text(x-30,y-length/2,text='velocity')
247 |             self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
248 |             self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
249 |             self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
250 | 
251 | 
252 | 
253 |         angleDelta = (angleMax-angleMin) / 100
254 |         velDelta = (velMax-velMin) / 100
255 |         for i in range(100):
256 |             angle = angleMin + i * angleDelta
257 | 
258 |             for j in range(100):
259 |                 vel = velMin + j * velDelta
260 |                 state = self.robotEnvironment.getState(angle,vel)
261 |                 max, argMax = None, None
262 |                 if not self.learner.seenState(state):
263 |                     argMax = 'unseen'
264 |                 else:
265 |                     for action in ('kickLeft','kickRight','doNothing'):
266 |                         qVal = self.learner.getQValue(state, action)
267 |                         if max == None or qVal > max:
268 |                             max, argMax = qVal, action
269 |                 if argMax != 'unseen':
270 |                     if argMax == 'kickLeft':
271 |                         color = 'blue'
272 |                     elif argMax == 'kickRight':
273 |                         color = 'red'
274 |                     elif argMax == 'doNothing':
275 |                         color = 'white'
276 |                     dx = length / 100.0
277 |                     dy = length / 100.0
278 |                     x0, y0 = x+i*dx, y-j*dy
279 |                     self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
280 | 
281 | 
282 | 
283 | 
284 |     def run(self):
285 |         self.stepCount = 0
286 |         self.learner.startEpisode()
287 |         while True:
288 |             minSleep = .01
289 |             tm = max(minSleep, self.tickTime)
290 |             time.sleep(tm)
291 |             self.stepsToSkip = int(tm / self.tickTime) - 1
292 | 
293 |             if not self.running:
294 |                 self.stopped = True
295 |                 return
296 |             for i in range(self.stepsToSkip):
297 |                 self.step()
298 |             self.stepsToSkip = 0
299 |             self.step()
300 | #          self.robot.draw()
301 |         self.learner.stopEpisode()
302 | 
303 |     def start(self):
304 |         self.win.mainloop()
305 | 
306 | 
307 | 
308 | 
309 | 
310 | def run():
311 |     global root
312 |     root = Tkinter.Tk()
313 |     root.title( 'Crawler GUI' )
314 |     root.resizable( 0, 0 )
315 | 
316 | #  root.mainloop()
317 | 
318 | 
319 |     app = Application(root)
320 |     def update_gui():
321 |         app.robot.draw(app.stepCount, app.tickTime)
322 |         root.after(10, update_gui)
323 |     update_gui()
324 | 
325 |     root.protocol( 'WM_DELETE_WINDOW', app.exit)
326 |     try:
327 |         app.start()
328 |     except:
329 |         app.exit()
330 | 


--------------------------------------------------------------------------------
/graphicsGridworldDisplay.py:
--------------------------------------------------------------------------------
  1 | # graphicsGridworldDisplay.py
  2 | # ---------------------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | import util
 12 | from graphicsUtils import *
 13 | 
 14 | class GraphicsGridworldDisplay:
 15 | 
 16 |     def __init__(self, gridworld, size=120, speed=1.0):
 17 |         self.gridworld = gridworld
 18 |         self.size = size
 19 |         self.speed = speed
 20 | 
 21 |     def start(self):
 22 |         setup(self.gridworld, size=self.size)
 23 | 
 24 |     def pause(self):
 25 |         wait_for_keys()
 26 | 
 27 |     def displayValues(self, agent, currentState = None, message = 'Agent Values'):
 28 |         values = util.Counter()
 29 |         policy = {}
 30 |         states = self.gridworld.getStates()
 31 |         for state in states:
 32 |             values[state] = agent.getValue(state)
 33 |             policy[state] = agent.getPolicy(state)
 34 |         drawValues(self.gridworld, values, policy, currentState, message)
 35 |         sleep(0.05 / self.speed)
 36 | 
 37 |     def displayNullValues(self, currentState = None, message = ''):
 38 |         values = util.Counter()
 39 |         #policy = {}
 40 |         states = self.gridworld.getStates()
 41 |         for state in states:
 42 |             values[state] = 0.0
 43 |             #policy[state] = agent.getPolicy(state)
 44 |         drawNullValues(self.gridworld, currentState,'')
 45 |         # drawValues(self.gridworld, values, policy, currentState, message)
 46 |         sleep(0.05 / self.speed)
 47 | 
 48 |     def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
 49 |         qValues = util.Counter()
 50 |         states = self.gridworld.getStates()
 51 |         for state in states:
 52 |             for action in self.gridworld.getPossibleActions(state):
 53 |                 qValues[(state, action)] = agent.getQValue(state, action)
 54 |         drawQValues(self.gridworld, qValues, currentState, message)
 55 |         sleep(0.05 / self.speed)
 56 | 
 57 | BACKGROUND_COLOR = formatColor(0,0,0)
 58 | EDGE_COLOR = formatColor(1,1,1)
 59 | OBSTACLE_COLOR = formatColor(0.5,0.5,0.5)
 60 | TEXT_COLOR = formatColor(1,1,1)
 61 | MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7)
 62 | LOCATION_COLOR = formatColor(0,0,1)
 63 | 
 64 | WINDOW_SIZE = -1
 65 | GRID_SIZE = -1
 66 | GRID_HEIGHT = -1
 67 | MARGIN = -1
 68 | 
 69 | def setup(gridworld, title = "Gridworld Display", size = 120):
 70 |     global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT
 71 |     grid = gridworld.grid
 72 |     WINDOW_SIZE = size
 73 |     GRID_SIZE = size
 74 |     GRID_HEIGHT = grid.height
 75 |     MARGIN = GRID_SIZE * 0.75
 76 |     screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2
 77 |     screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2
 78 | 
 79 |     begin_graphics(screen_width,
 80 |                    screen_height,
 81 |                    BACKGROUND_COLOR, title=title)
 82 | 
 83 | def drawNullValues(gridworld, currentState = None, message = ''):
 84 |     grid = gridworld.grid
 85 |     blank()
 86 |     for x in range(grid.width):
 87 |         for y in range(grid.height):
 88 |             state = (x, y)
 89 |             gridType = grid[x][y]
 90 |             isExit = (str(gridType) != gridType)
 91 |             isCurrent = (currentState == state)
 92 |             if gridType == '#':
 93 |                 drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
 94 |             else:
 95 |                 drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent)
 96 |     pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
 97 |     text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
 98 | 
 99 | 
100 | def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'):
101 |     grid = gridworld.grid
102 |     blank()
103 |     valueList = [values[state] for state in gridworld.getStates()] + [0.0]
104 |     minValue = min(valueList)
105 |     maxValue = max(valueList)
106 |     for x in range(grid.width):
107 |         for y in range(grid.height):
108 |             state = (x, y)
109 |             gridType = grid[x][y]
110 |             isExit = (str(gridType) != gridType)
111 |             isCurrent = (currentState == state)
112 |             if gridType == '#':
113 |                 drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
114 |             else:
115 |                 value = values[state]
116 |                 action = None
117 |                 if policy != None and state in policy:
118 |                     action = policy[state]
119 |                     actions = gridworld.getPossibleActions(state)
120 |                 if action not in actions and 'exit' in actions:
121 |                     action = 'exit'
122 |                 valString = '%.2f' % value
123 |                 drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
124 |     pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
125 |     text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
126 | 
127 | def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
128 |     grid = gridworld.grid
129 |     blank()
130 |     stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
131 |     qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
132 |     qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
133 |     minValue = min(qValueList)
134 |     maxValue = max(qValueList)
135 |     for x in range(grid.width):
136 |         for y in range(grid.height):
137 |             state = (x, y)
138 |             gridType = grid[x][y]
139 |             isExit = (str(gridType) != gridType)
140 |             isCurrent = (currentState == state)
141 |             actions = gridworld.getPossibleActions(state)
142 |             if actions == None or len(actions) == 0:
143 |                 actions = [None]
144 |             bestQ = max([qValues[(state, action)] for action in actions])
145 |             bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
146 | 
147 |             q = util.Counter()
148 |             valStrings = {}
149 |             for action in actions:
150 |                 v = qValues[(state, action)]
151 |                 q[action] += v
152 |                 valStrings[action] = '%.2f' % v
153 |             if gridType == '#':
154 |                 drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
155 |             elif isExit:
156 |                 action = 'exit'
157 |                 value = q[action]
158 |                 valString = '%.2f' % value
159 |                 drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
160 |             else:
161 |                 drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
162 |     pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
163 |     text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
164 | 
165 | 
166 | def blank():
167 |     clear_screen()
168 | 
169 | def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent):
170 | 
171 |     square_color = getColor(0, -1, 1)
172 | 
173 |     if isObstacle:
174 |         square_color = OBSTACLE_COLOR
175 | 
176 |     (screen_x, screen_y) = to_screen((x, y))
177 |     square( (screen_x, screen_y),
178 |                    0.5* GRID_SIZE,
179 |                    color = square_color,
180 |                    filled = 1,
181 |                    width = 1)
182 | 
183 |     square( (screen_x, screen_y),
184 |                    0.5* GRID_SIZE,
185 |                    color = EDGE_COLOR,
186 |                    filled = 0,
187 |                    width = 3)
188 | 
189 |     if isTerminal and not isObstacle:
190 |         square( (screen_x, screen_y),
191 |                      0.4* GRID_SIZE,
192 |                      color = EDGE_COLOR,
193 |                      filled = 0,
194 |                      width = 2)
195 |         text( (screen_x, screen_y),
196 |                TEXT_COLOR,
197 |                str(grid[x][y]),
198 |                "Courier", -24, "bold", "c")
199 | 
200 | 
201 |     text_color = TEXT_COLOR
202 | 
203 |     if not isObstacle and isCurrent:
204 |         circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
205 | 
206 |     # if not isObstacle:
207 |     #   text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c")
208 | 
209 | def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent):
210 | 
211 |     square_color = getColor(val, min, max)
212 | 
213 |     if isObstacle:
214 |         square_color = OBSTACLE_COLOR
215 | 
216 |     (screen_x, screen_y) = to_screen((x, y))
217 |     square( (screen_x, screen_y),
218 |                    0.5* GRID_SIZE,
219 |                    color = square_color,
220 |                    filled = 1,
221 |                    width = 1)
222 |     square( (screen_x, screen_y),
223 |                    0.5* GRID_SIZE,
224 |                    color = EDGE_COLOR,
225 |                    filled = 0,
226 |                    width = 3)
227 |     if isTerminal and not isObstacle:
228 |         square( (screen_x, screen_y),
229 |                      0.4* GRID_SIZE,
230 |                      color = EDGE_COLOR,
231 |                      filled = 0,
232 |                      width = 2)
233 | 
234 | 
235 |     if action == 'north':
236 |         polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
237 |     if action == 'south':
238 |         polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
239 |     if action == 'west':
240 |         polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
241 |     if action == 'east':
242 |         polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
243 | 
244 | 
245 |     text_color = TEXT_COLOR
246 | 
247 |     if not isObstacle and isCurrent:
248 |         circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR )
249 | 
250 |     if not isObstacle:
251 |         text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c")
252 | 
253 | 
254 | def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent):
255 | 
256 |     (screen_x, screen_y) = to_screen((x, y))
257 | 
258 |     center = (screen_x, screen_y)
259 |     nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
260 |     ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
261 |     se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
262 |     sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
263 |     n = (screen_x, screen_y-0.5*GRID_SIZE+5)
264 |     s = (screen_x, screen_y+0.5*GRID_SIZE-5)
265 |     w = (screen_x-0.5*GRID_SIZE+5, screen_y)
266 |     e = (screen_x+0.5*GRID_SIZE-5, screen_y)
267 | 
268 |     actions = qVals.keys()
269 |     for action in actions:
270 | 
271 |         wedge_color = getColor(qVals[action], minVal, maxVal)
272 | 
273 |         if action == 'north':
274 |             polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False)
275 |             #text(n, text_color, valStr, "Courier", 8, "bold", "n")
276 |         if action == 'south':
277 |             polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False)
278 |             #text(s, text_color, valStr, "Courier", 8, "bold", "s")
279 |         if action == 'east':
280 |             polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False)
281 |             #text(e, text_color, valStr, "Courier", 8, "bold", "e")
282 |         if action == 'west':
283 |             polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False)
284 |             #text(w, text_color, valStr, "Courier", 8, "bold", "w")
285 | 
286 |     square( (screen_x, screen_y),
287 |                    0.5* GRID_SIZE,
288 |                    color = EDGE_COLOR,
289 |                    filled = 0,
290 |                    width = 3)
291 |     line(ne, sw, color = EDGE_COLOR)
292 |     line(nw, se, color = EDGE_COLOR)
293 | 
294 |     if isCurrent:
295 |         circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
296 | 
297 |     for action in actions:
298 |         text_color = TEXT_COLOR
299 |         if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR
300 |         valStr = ""
301 |         if action in valStrs:
302 |             valStr = valStrs[action]
303 |         h = -20
304 |         if action == 'north':
305 |             #polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0)
306 |             text(n, text_color, valStr, "Courier", h, "bold", "n")
307 |         if action == 'south':
308 |             #polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0)
309 |             text(s, text_color, valStr, "Courier", h, "bold", "s")
310 |         if action == 'east':
311 |             #polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0)
312 |             text(e, text_color, valStr, "Courier", h, "bold", "e")
313 |         if action == 'west':
314 |             #polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0)
315 |             text(w, text_color, valStr, "Courier", h, "bold", "w")
316 | 
317 | 
318 | def getColor(val, minVal, max):
319 |     r, g = 0.0, 0.0
320 |     if val < 0 and minVal < 0:
321 |         r = val * 0.65 / minVal
322 |     if val > 0 and max > 0:
323 |         g = val * 0.65 / max
324 |     return formatColor(r,g,0.0)
325 | 
326 | 
327 | def square(pos, size, color, filled, width):
328 |     x, y = pos
329 |     dx, dy = size, size
330 |     return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False)
331 | 
332 | 
333 | def to_screen(point):
334 |     ( gamex, gamey ) = point
335 |     x = gamex*GRID_SIZE + MARGIN
336 |     y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN
337 |     return ( x, y )
338 | 
339 | def to_grid(point):
340 |     (x, y) = point
341 |     x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
342 |     y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
343 |     print point, "-->", (x, y)
344 |     return (x, y)
345 | 


--------------------------------------------------------------------------------
/graphicsUtils.py:
--------------------------------------------------------------------------------
  1 | # graphicsUtils.py
  2 | # ----------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | import sys
 12 | import math
 13 | import random
 14 | import string
 15 | import time
 16 | import types
 17 | import Tkinter
 18 | 
 19 | _Windows = sys.platform == 'win32'  # True if on Win95/98/NT
 20 | 
 21 | _root_window = None      # The root window for graphics output
 22 | _canvas = None      # The canvas which holds graphics
 23 | _canvas_xs = None      # Size of canvas object
 24 | _canvas_ys = None
 25 | _canvas_x = None      # Current position on canvas
 26 | _canvas_y = None
 27 | _canvas_col = None      # Current colour (set to black below)
 28 | _canvas_tsize = 12
 29 | _canvas_tserifs = 0
 30 | 
 31 | def formatColor(r, g, b):
 32 |     return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255))
 33 | 
 34 | def colorToVector(color):
 35 |     return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]])
 36 | 
 37 | if _Windows:
 38 |     _canvas_tfonts = ['times new roman', 'lucida console']
 39 | else:
 40 |     _canvas_tfonts = ['times', 'lucidasans-24']
 41 |     pass # XXX need defaults here
 42 | 
 43 | def sleep(secs):
 44 |     global _root_window
 45 |     if _root_window == None:
 46 |         time.sleep(secs)
 47 |     else:
 48 |         _root_window.update_idletasks()
 49 |         _root_window.after(int(1000 * secs), _root_window.quit)
 50 |         _root_window.mainloop()
 51 | 
 52 | def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None):
 53 | 
 54 |     global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color
 55 | 
 56 |     # Check for duplicate call
 57 |     if _root_window is not None:
 58 |         # Lose the window.
 59 |         _root_window.destroy()
 60 | 
 61 |     # Save the canvas size parameters
 62 |     _canvas_xs, _canvas_ys = width - 1, height - 1
 63 |     _canvas_x, _canvas_y = 0, _canvas_ys
 64 |     _bg_color = color
 65 | 
 66 |     # Create the root window
 67 |     _root_window = Tkinter.Tk()
 68 |     _root_window.protocol('WM_DELETE_WINDOW', _destroy_window)
 69 |     _root_window.title(title or 'Graphics Window')
 70 |     _root_window.resizable(0, 0)
 71 | 
 72 |     # Create the canvas object
 73 |     try:
 74 |         _canvas = Tkinter.Canvas(_root_window, width=width, height=height)
 75 |         _canvas.pack()
 76 |         draw_background()
 77 |         _canvas.update()
 78 |     except:
 79 |         _root_window = None
 80 |         raise
 81 | 
 82 |     # Bind to key-down and key-up events
 83 |     _root_window.bind( "<KeyPress>", _keypress )
 84 |     _root_window.bind( "<KeyRelease>", _keyrelease )
 85 |     _root_window.bind( "<FocusIn>", _clear_keys )
 86 |     _root_window.bind( "<FocusOut>", _clear_keys )
 87 |     _root_window.bind( "<Button-1>", _leftclick )
 88 |     _root_window.bind( "<Button-2>", _rightclick )
 89 |     _root_window.bind( "<Button-3>", _rightclick )
 90 |     _root_window.bind( "<Control-Button-1>", _ctrl_leftclick)
 91 |     _clear_keys()
 92 | 
 93 | _leftclick_loc = None
 94 | _rightclick_loc = None
 95 | _ctrl_leftclick_loc = None
 96 | 
 97 | def _leftclick(event):
 98 |     global _leftclick_loc
 99 |     _leftclick_loc = (event.x, event.y)
100 | 
101 | def _rightclick(event):
102 |     global _rightclick_loc
103 |     _rightclick_loc = (event.x, event.y)
104 | 
105 | def _ctrl_leftclick(event):
106 |     global _ctrl_leftclick_loc
107 |     _ctrl_leftclick_loc = (event.x, event.y)
108 | 
109 | def wait_for_click():
110 |     while True:
111 |         global _leftclick_loc
112 |         global _rightclick_loc
113 |         global _ctrl_leftclick_loc
114 |         if _leftclick_loc != None:
115 |             val = _leftclick_loc
116 |             _leftclick_loc = None
117 |             return val, 'left'
118 |         if _rightclick_loc != None:
119 |             val = _rightclick_loc
120 |             _rightclick_loc = None
121 |             return val, 'right'
122 |         if _ctrl_leftclick_loc != None:
123 |             val = _ctrl_leftclick_loc
124 |             _ctrl_leftclick_loc = None
125 |             return val, 'ctrl_left'
126 |         sleep(0.05)
127 | 
128 | def draw_background():
129 |     corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)]
130 |     polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False)
131 | 
132 | def _destroy_window(event=None):
133 |     sys.exit(0)
134 | #    global _root_window
135 | #    _root_window.destroy()
136 | #    _root_window = None
137 |     #print "DESTROY"
138 | 
139 | def end_graphics():
140 |     global _root_window, _canvas, _mouse_enabled
141 |     try:
142 |         try:
143 |             sleep(1)
144 |             if _root_window != None:
145 |                 _root_window.destroy()
146 |         except SystemExit, e:
147 |             print 'Ending graphics raised an exception:', e
148 |     finally:
149 |         _root_window = None
150 |         _canvas = None
151 |         _mouse_enabled = 0
152 |         _clear_keys()
153 | 
154 | def clear_screen(background=None):
155 |     global _canvas_x, _canvas_y
156 |     _canvas.delete('all')
157 |     draw_background()
158 |     _canvas_x, _canvas_y = 0, _canvas_ys
159 | 
160 | def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1):
161 |     c = []
162 |     for coord in coords:
163 |         c.append(coord[0])
164 |         c.append(coord[1])
165 |     if fillColor == None: fillColor = outlineColor
166 |     if filled == 0: fillColor = ""
167 |     poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width)
168 |     if behind > 0:
169 |         _canvas.tag_lower(poly, behind) # Higher should be more visible
170 |     return poly
171 | 
172 | def square(pos, r, color, filled=1, behind=0):
173 |     x, y = pos
174 |     coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)]
175 |     return polygon(coords, color, color, filled, 0, behind=behind)
176 | 
177 | def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2):
178 |     x, y = pos
179 |     x0, x1 = x - r - 1, x + r
180 |     y0, y1 = y - r - 1, y + r
181 |     if endpoints == None:
182 |         e = [0, 359]
183 |     else:
184 |         e = list(endpoints)
185 |     while e[0] > e[1]: e[1] = e[1] + 360
186 | 
187 |     return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor,
188 |                               extent=e[1] - e[0], start=e[0], style=style, width=width)
189 | 
190 | def image(pos, file="../../blueghost.gif"):
191 |     x, y = pos
192 |     # img = PhotoImage(file=file)
193 |     return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW)
194 | 
195 | 
196 | def refresh():
197 |     _canvas.update_idletasks()
198 | 
199 | def moveCircle(id, pos, r, endpoints=None):
200 |     global _canvas_x, _canvas_y
201 | 
202 |     x, y = pos
203 | #    x0, x1 = x - r, x + r + 1
204 | #    y0, y1 = y - r, y + r + 1
205 |     x0, x1 = x - r - 1, x + r
206 |     y0, y1 = y - r - 1, y + r
207 |     if endpoints == None:
208 |         e = [0, 359]
209 |     else:
210 |         e = list(endpoints)
211 |     while e[0] > e[1]: e[1] = e[1] + 360
212 | 
213 |     edit(id, ('start', e[0]), ('extent', e[1] - e[0]))
214 |     move_to(id, x0, y0)
215 | 
216 | def edit(id, *args):
217 |     _canvas.itemconfigure(id, **dict(args))
218 | 
219 | def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"):
220 |     global _canvas_x, _canvas_y
221 |     x, y = pos
222 |     font = (font, str(size), style)
223 |     return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor)
224 | 
225 | def changeText(id, newText, font=None, size=12, style='normal'):
226 |     _canvas.itemconfigure(id, text=newText)
227 |     if font != None:
228 |         _canvas.itemconfigure(id, font=(font, '-%d' % size, style))
229 | 
230 | def changeColor(id, newColor):
231 |     _canvas.itemconfigure(id, fill=newColor)
232 | 
233 | def line(here, there, color=formatColor(0, 0, 0), width=2):
234 |     x0, y0 = here[0], here[1]
235 |     x1, y1 = there[0], there[1]
236 |     return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width)
237 | 
238 | ##############################################################################
239 | ### Keypress handling ########################################################
240 | ##############################################################################
241 | 
242 | # We bind to key-down and key-up events.
243 | 
244 | _keysdown = {}
245 | _keyswaiting = {}
246 | # This holds an unprocessed key release.  We delay key releases by up to
247 | # one call to keys_pressed() to get round a problem with auto repeat.
248 | _got_release = None
249 | 
250 | def _keypress(event):
251 |     global _got_release
252 |     #remap_arrows(event)
253 |     _keysdown[event.keysym] = 1
254 |     _keyswaiting[event.keysym] = 1
255 | #    print event.char, event.keycode
256 |     _got_release = None
257 | 
258 | def _keyrelease(event):
259 |     global _got_release
260 |     #remap_arrows(event)
261 |     try:
262 |         del _keysdown[event.keysym]
263 |     except:
264 |         pass
265 |     _got_release = 1
266 | 
267 | def remap_arrows(event):
268 |     # TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT)
269 |     if event.char in ['a', 's', 'd', 'w']:
270 |         return
271 |     if event.keycode in [37, 101]: # LEFT ARROW (win / x)
272 |         event.char = 'a'
273 |     if event.keycode in [38, 99]: # UP ARROW
274 |         event.char = 'w'
275 |     if event.keycode in [39, 102]: # RIGHT ARROW
276 |         event.char = 'd'
277 |     if event.keycode in [40, 104]: # DOWN ARROW
278 |         event.char = 's'
279 | 
280 | def _clear_keys(event=None):
281 |     global _keysdown, _got_release, _keyswaiting
282 |     _keysdown = {}
283 |     _keyswaiting = {}
284 |     _got_release = None
285 | 
286 | def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent,
287 |                  d_w=Tkinter.tkinter.DONT_WAIT):
288 |     d_o_e(d_w)
289 |     if _got_release:
290 |         d_o_e(d_w)
291 |     return _keysdown.keys()
292 | 
293 | def keys_waiting():
294 |     global _keyswaiting
295 |     keys = _keyswaiting.keys()
296 |     _keyswaiting = {}
297 |     return keys
298 | 
299 | # Block for a list of keys...
300 | 
301 | def wait_for_keys():
302 |     keys = []
303 |     while keys == []:
304 |         keys = keys_pressed()
305 |         sleep(0.05)
306 |     return keys
307 | 
308 | def remove_from_screen(x,
309 |                        d_o_e=Tkinter.tkinter.dooneevent,
310 |                        d_w=Tkinter.tkinter.DONT_WAIT):
311 |     _canvas.delete(x)
312 |     d_o_e(d_w)
313 | 
314 | def _adjust_coords(coord_list, x, y):
315 |     for i in range(0, len(coord_list), 2):
316 |         coord_list[i] = coord_list[i] + x
317 |         coord_list[i + 1] = coord_list[i + 1] + y
318 |     return coord_list
319 | 
320 | def move_to(object, x, y=None,
321 |             d_o_e=Tkinter.tkinter.dooneevent,
322 |             d_w=Tkinter.tkinter.DONT_WAIT):
323 |     if y is None:
324 |         try: x, y = x
325 |         except: raise  'incomprehensible coordinates'
326 | 
327 |     horiz = True
328 |     newCoords = []
329 |     current_x, current_y = _canvas.coords(object)[0:2] # first point
330 |     for coord in  _canvas.coords(object):
331 |         if horiz:
332 |             inc = x - current_x
333 |         else:
334 |             inc = y - current_y
335 |         horiz = not horiz
336 | 
337 |         newCoords.append(coord + inc)
338 | 
339 |     _canvas.coords(object, *newCoords)
340 |     d_o_e(d_w)
341 | 
342 | def move_by(object, x, y=None,
343 |             d_o_e=Tkinter.tkinter.dooneevent,
344 |             d_w=Tkinter.tkinter.DONT_WAIT, lift=False):
345 |     if y is None:
346 |         try: x, y = x
347 |         except: raise Exception, 'incomprehensible coordinates'
348 | 
349 |     horiz = True
350 |     newCoords = []
351 |     for coord in  _canvas.coords(object):
352 |         if horiz:
353 |             inc = x
354 |         else:
355 |             inc = y
356 |         horiz = not horiz
357 | 
358 |         newCoords.append(coord + inc)
359 | 
360 |     _canvas.coords(object, *newCoords)
361 |     d_o_e(d_w)
362 |     if lift:
363 |         _canvas.tag_raise(object)
364 | 
365 | def writePostscript(filename):
366 |     "Writes the current canvas to a postscript file."
367 |     psfile = file(filename, 'w')
368 |     psfile.write(_canvas.postscript(pageanchor='sw',
369 |                      y='0.c',
370 |                      x='0.c'))
371 |     psfile.close()
372 | 
373 | ghost_shape = [
374 |     (0, - 0.5),
375 |     (0.25, - 0.75),
376 |     (0.5, - 0.5),
377 |     (0.75, - 0.75),
378 |     (0.75, 0.5),
379 |     (0.5, 0.75),
380 |     (- 0.5, 0.75),
381 |     (- 0.75, 0.5),
382 |     (- 0.75, - 0.75),
383 |     (- 0.5, - 0.5),
384 |     (- 0.25, - 0.75)
385 |   ]
386 | 
387 | if __name__ == '__main__':
388 |     begin_graphics()
389 |     clear_screen()
390 |     ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape]
391 |     g = polygon(ghost_shape, formatColor(1, 1, 1))
392 |     move_to(g, (50, 50))
393 |     circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15])
394 |     sleep(2)
395 | 


--------------------------------------------------------------------------------
/keyboardAgents.py:
--------------------------------------------------------------------------------
 1 | # keyboardAgents.py
 2 | # -----------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | from game import Agent
12 | from game import Directions
13 | import random
14 | 
15 | class KeyboardAgent(Agent):
16 |     """
17 |     An agent controlled by the keyboard.
18 |     """
19 |     # NOTE: Arrow keys also work.
20 |     WEST_KEY  = 'a'
21 |     EAST_KEY  = 'd'
22 |     NORTH_KEY = 'w'
23 |     SOUTH_KEY = 's'
24 |     STOP_KEY = 'q'
25 | 
26 |     def __init__( self, index = 0 ):
27 | 
28 |         self.lastMove = Directions.STOP
29 |         self.index = index
30 |         self.keys = []
31 | 
32 |     def getAction( self, state):
33 |         from graphicsUtils import keys_waiting
34 |         from graphicsUtils import keys_pressed
35 |         keys = keys_waiting() + keys_pressed()
36 |         if keys != []:
37 |             self.keys = keys
38 | 
39 |         legal = state.getLegalActions(self.index)
40 |         move = self.getMove(legal)
41 | 
42 |         if move == Directions.STOP:
43 |             # Try to move in the same direction as before
44 |             if self.lastMove in legal:
45 |                 move = self.lastMove
46 | 
47 |         if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP
48 | 
49 |         if move not in legal:
50 |             move = random.choice(legal)
51 | 
52 |         self.lastMove = move
53 |         return move
54 | 
55 |     def getMove(self, legal):
56 |         move = Directions.STOP
57 |         if   (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal:  move = Directions.WEST
58 |         if   (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST
59 |         if   (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal:   move = Directions.NORTH
60 |         if   (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
61 |         return move
62 | 
63 | class KeyboardAgent2(KeyboardAgent):
64 |     """
65 |     A second agent controlled by the keyboard.
66 |     """
67 |     # NOTE: Arrow keys also work.
68 |     WEST_KEY  = 'j'
69 |     EAST_KEY  = "l"
70 |     NORTH_KEY = 'i'
71 |     SOUTH_KEY = 'k'
72 |     STOP_KEY = 'u'
73 | 
74 |     def getMove(self, legal):
75 |         move = Directions.STOP
76 |         if   (self.WEST_KEY in self.keys) and Directions.WEST in legal:  move = Directions.WEST
77 |         if   (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST
78 |         if   (self.NORTH_KEY in self.keys) and Directions.NORTH in legal:   move = Directions.NORTH
79 |         if   (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
80 |         return move
81 | 


--------------------------------------------------------------------------------
/layout.py:
--------------------------------------------------------------------------------
  1 | # layout.py
  2 | # ---------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | from util import manhattanDistance
 12 | from game import Grid
 13 | import os
 14 | import random
 15 | 
 16 | VISIBILITY_MATRIX_CACHE = {}
 17 | 
 18 | class Layout:
 19 |     """
 20 |     A Layout manages the static information about the game board.
 21 |     """
 22 | 
 23 |     def __init__(self, layoutText):
 24 |         self.width = len(layoutText[0])
 25 |         self.height= len(layoutText)
 26 |         self.walls = Grid(self.width, self.height, False)
 27 |         self.food = Grid(self.width, self.height, False)
 28 |         self.capsules = []
 29 |         self.agentPositions = []
 30 |         self.numGhosts = 0
 31 |         self.processLayoutText(layoutText)
 32 |         self.layoutText = layoutText
 33 |         # self.initializeVisibilityMatrix()
 34 | 
 35 |     def getNumGhosts(self):
 36 |         return self.numGhosts
 37 | 
 38 |     def initializeVisibilityMatrix(self):
 39 |         global VISIBILITY_MATRIX_CACHE
 40 |         if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE:
 41 |             from game import Directions
 42 |             vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)]
 43 |             dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST]
 44 |             vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()})
 45 |             for x in range(self.width):
 46 |                 for y in range(self.height):
 47 |                     if self.walls[x][y] == False:
 48 |                         for vec, direction in zip(vecs, dirs):
 49 |                             dx, dy = vec
 50 |                             nextx, nexty = x + dx, y + dy
 51 |                             while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] :
 52 |                                 vis[x][y][direction].add((nextx, nexty))
 53 |                                 nextx, nexty = x + dx, y + dy
 54 |             self.visibility = vis
 55 |             VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis
 56 |         else:
 57 |             self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)]
 58 | 
 59 |     def isWall(self, pos):
 60 |         x, col = pos
 61 |         return self.walls[x][col]
 62 | 
 63 |     def getRandomLegalPosition(self):
 64 |         x = random.choice(range(self.width))
 65 |         y = random.choice(range(self.height))
 66 |         while self.isWall( (x, y) ):
 67 |             x = random.choice(range(self.width))
 68 |             y = random.choice(range(self.height))
 69 |         return (x,y)
 70 | 
 71 |     def getRandomCorner(self):
 72 |         poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
 73 |         return random.choice(poses)
 74 | 
 75 |     def getFurthestCorner(self, pacPos):
 76 |         poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
 77 |         dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses])
 78 |         return pos
 79 | 
 80 |     def isVisibleFrom(self, ghostPos, pacPos, pacDirection):
 81 |         row, col = [int(x) for x in pacPos]
 82 |         return ghostPos in self.visibility[row][col][pacDirection]
 83 | 
 84 |     def __str__(self):
 85 |         return "\n".join(self.layoutText)
 86 | 
 87 |     def deepCopy(self):
 88 |         return Layout(self.layoutText[:])
 89 | 
 90 |     def processLayoutText(self, layoutText):
 91 |         """
 92 |         Coordinates are flipped from the input format to the (x,y) convention here
 93 | 
 94 |         The shape of the maze.  Each character
 95 |         represents a different type of object.
 96 |          % - Wall
 97 |          . - Food
 98 |          o - Capsule
 99 |          G - Ghost
100 |          P - Pacman
101 |         Other characters are ignored.
102 |         """
103 |         maxY = self.height - 1
104 |         for y in range(self.height):
105 |             for x in range(self.width):
106 |                 layoutChar = layoutText[maxY - y][x]
107 |                 self.processLayoutChar(x, y, layoutChar)
108 |         self.agentPositions.sort()
109 |         self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions]
110 | 
111 |     def processLayoutChar(self, x, y, layoutChar):
112 |         if layoutChar == '%':
113 |             self.walls[x][y] = True
114 |         elif layoutChar == '.':
115 |             self.food[x][y] = True
116 |         elif layoutChar == 'o':
117 |             self.capsules.append((x, y))
118 |         elif layoutChar == 'P':
119 |             self.agentPositions.append( (0, (x, y) ) )
120 |         elif layoutChar in ['G']:
121 |             self.agentPositions.append( (1, (x, y) ) )
122 |             self.numGhosts += 1
123 |         elif layoutChar in  ['1', '2', '3', '4']:
124 |             self.agentPositions.append( (int(layoutChar), (x,y)))
125 |             self.numGhosts += 1
126 | def getLayout(name, back = 2):
127 |     if name.endswith('.lay'):
128 |         layout = tryToLoad('layouts/' + name)
129 |         if layout == None: layout = tryToLoad(name)
130 |     else:
131 |         layout = tryToLoad('layouts/' + name + '.lay')
132 |         if layout == None: layout = tryToLoad(name + '.lay')
133 |     if layout == None and back >= 0:
134 |         curdir = os.path.abspath('.')
135 |         os.chdir('..')
136 |         layout = getLayout(name, back -1)
137 |         os.chdir(curdir)
138 |     return layout
139 | 
140 | def tryToLoad(fullname):
141 |     if(not os.path.exists(fullname)): return None
142 |     f = open(fullname)
143 |     try: return Layout([line.strip() for line in f])
144 |     finally: f.close()
145 | 


--------------------------------------------------------------------------------
/layouts/capsuleClassic.lay:
--------------------------------------------------------------------------------
1 | %%%%%%%%%%%%%%%%%%%
2 | %G.       G   ....%
3 | %.% % %%%%%% %.%%.%
4 | %.%o% %   o% %.o%.%
5 | %.%%%.%  %%% %..%.%
6 | %.....  P    %..%G%
7 | %%%%%%%%%%%%%%%%%%%%
8 | 


--------------------------------------------------------------------------------
/layouts/contestClassic.lay:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%
 2 | %o...%........%...o%
 3 | %.%%.%.%%..%%.%.%%.%
 4 | %...... G GG%......%
 5 | %.%.%%.%% %%%.%%.%.%
 6 | %.%....% ooo%.%..%.%
 7 | %.%.%%.% %% %.%.%%.%
 8 | %o%......P....%....%
 9 | %%%%%%%%%%%%%%%%%%%%
10 | 


--------------------------------------------------------------------------------
/layouts/mediumClassic.lay:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%
 2 | %o...%........%....%
 3 | %.%%.%.%%%%%%.%.%%.%
 4 | %.%..............%.%
 5 | %.%.%%.%%  %%.%%.%.%
 6 | %......%G  G%......%
 7 | %.%.%%.%%%%%%.%%.%.%
 8 | %.%..............%.%
 9 | %.%%.%.%%%%%%.%.%%.%
10 | %....%...P....%...o%
11 | %%%%%%%%%%%%%%%%%%%%
12 | 


--------------------------------------------------------------------------------
/layouts/mediumGrid.lay:
--------------------------------------------------------------------------------
1 | %%%%%%%%
2 | %P     %
3 | % .% . %
4 | %  %   %
5 | % .% . %
6 | %     G%
7 | %%%%%%%%
8 | 


--------------------------------------------------------------------------------
/layouts/minimaxClassic.lay:
--------------------------------------------------------------------------------
1 | %%%%%%%%%
2 | %.P    G% 
3 | % %.%G%%%  
4 | %G    %%% 
5 | %%%%%%%%%
6 | 


--------------------------------------------------------------------------------
/layouts/openClassic.lay:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%
 2 | %.. P  ....      ....   %
 3 | %..  ...  ...  ...  ... %
 4 | %..  ...  ...  ...  ... %
 5 | %..    ....      .... G %
 6 | %..  ...  ...  ...  ... %
 7 | %..  ...  ...  ...  ... %
 8 | %..    ....      ....  o%
 9 | %%%%%%%%%%%%%%%%%%%%%%%%%
10 | 


--------------------------------------------------------------------------------
/layouts/originalClassic.lay:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | %............%%............%
 3 | %.%%%%.%%%%%.%%.%%%%%.%%%%.%
 4 | %o%%%%.%%%%%.%%.%%%%%.%%%%o%
 5 | %.%%%%.%%%%%.%%.%%%%%.%%%%.%
 6 | %..........................%
 7 | %.%%%%.%%.%%%%%%%%.%%.%%%%.%
 8 | %.%%%%.%%.%%%%%%%%.%%.%%%%.%
 9 | %......%%....%%....%%......%
10 | %%%%%%.%%%%% %% %%%%%.%%%%%%
11 | %%%%%%.%%%%% %% %%%%%.%%%%%%
12 | %%%%%%.%            %.%%%%%%
13 | %%%%%%.% %%%%  %%%% %.%%%%%%
14 | %     .  %G  GG  G%  .     %
15 | %%%%%%.% %%%%%%%%%% %.%%%%%%
16 | %%%%%%.%            %.%%%%%%
17 | %%%%%%.% %%%%%%%%%% %.%%%%%%
18 | %............%%............%
19 | %.%%%%.%%%%%.%%.%%%%%.%%%%.%
20 | %.%%%%.%%%%%.%%.%%%%%.%%%%.%
21 | %o..%%.......  .......%%..o%
22 | %%%.%%.%%.%%%%%%%%.%%.%%.%%%
23 | %%%.%%.%%.%%%%%%%%.%%.%%.%%%
24 | %......%%....%%....%%......%
25 | %.%%%%%%%%%%.%%.%%%%%%%%%%.%
26 | %.............P............%
27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%
28 | 


--------------------------------------------------------------------------------
/layouts/smallClassic.lay:
--------------------------------------------------------------------------------
1 | %%%%%%%%%%%%%%%%%%%%
2 | %......%G  G%......%
3 | %.%%...%%  %%...%%.%
4 | %.%o.%........%.o%.%
5 | %.%%.%.%%%%%%.%.%%.%
6 | %........P.........%
7 | %%%%%%%%%%%%%%%%%%%%
8 | 


--------------------------------------------------------------------------------
/layouts/smallGrid.lay:
--------------------------------------------------------------------------------
1 | %%%%%%%
2 | % P   %
3 | % %%% %
4 | % %.  %
5 | % %%% %
6 | %. G  %
7 | %%%%%%%
8 | 


--------------------------------------------------------------------------------
/layouts/testClassic.lay:
--------------------------------------------------------------------------------
 1 | %%%%%
 2 | % . %
 3 | %.G.%
 4 | % . %
 5 | %. .%
 6 | %   %
 7 | %  .%
 8 | %   %
 9 | %P .%
10 | %%%%%
11 | 


--------------------------------------------------------------------------------
/layouts/trappedClassic.lay:
--------------------------------------------------------------------------------
1 | %%%%%%%%
2 | %   P G%
3 | %G%%%%%%
4 | %....  %
5 | %%%%%%%%
6 | 


--------------------------------------------------------------------------------
/layouts/trickyClassic.lay:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%
 2 | %o...%........%...o%
 3 | %.%%.%.%%..%%.%.%%.%
 4 | %.%.....%..%.....%.%
 5 | %.%.%%.%%  %%.%%.%.%
 6 | %...... GGGG%.%....%
 7 | %.%....%%%%%%.%..%.%
 8 | %.%....%  oo%.%..%.%
 9 | %.%....% %%%%.%..%.%
10 | %.%...........%..%.%
11 | %.%%.%.%%%%%%.%.%%.%
12 | %o...%...P....%...o%
13 | %%%%%%%%%%%%%%%%%%%%
14 | 


--------------------------------------------------------------------------------
/learningAgents.py:
--------------------------------------------------------------------------------
  1 | # learningAgents.py
  2 | # -----------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | from game import Directions, Agent, Actions
 12 | 
 13 | import random,util,time
 14 | 
 15 | class ValueEstimationAgent(Agent):
 16 |     """
 17 |       Abstract agent which assigns values to (state,action)
 18 |       Q-Values for an environment. As well as a value to a
 19 |       state and a policy given respectively by,
 20 | 
 21 |       V(s) = max_{a in actions} Q(s,a)
 22 |       policy(s) = arg_max_{a in actions} Q(s,a)
 23 | 
 24 |       Both ValueIterationAgent and QLearningAgent inherit
 25 |       from this agent. While a ValueIterationAgent has
 26 |       a model of the environment via a MarkovDecisionProcess
 27 |       (see mdp.py) that is used to estimate Q-Values before
 28 |       ever actually acting, the QLearningAgent estimates
 29 |       Q-Values while acting in the environment.
 30 |     """
 31 | 
 32 |     def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10):
 33 |         """
 34 |         Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,...
 35 |         alpha    - learning rate
 36 |         epsilon  - exploration rate
 37 |         gamma    - discount factor
 38 |         numTraining - number of training episodes, i.e. no learning after these many episodes
 39 |         """
 40 |         self.alpha = float(alpha)
 41 |         self.epsilon = float(epsilon)
 42 |         self.discount = float(gamma)
 43 |         self.numTraining = int(numTraining)
 44 | 
 45 |     ####################################
 46 |     #    Override These Functions      #
 47 |     ####################################
 48 |     def getQValue(self, state, action):
 49 |         """
 50 |         Should return Q(state,action)
 51 |         """
 52 |         util.raiseNotDefined()
 53 | 
 54 |     def getValue(self, state):
 55 |         """
 56 |         What is the value of this state under the best action?
 57 |         Concretely, this is given by
 58 | 
 59 |         V(s) = max_{a in actions} Q(s,a)
 60 |         """
 61 |         util.raiseNotDefined()
 62 | 
 63 |     def getPolicy(self, state):
 64 |         """
 65 |         What is the best action to take in the state. Note that because
 66 |         we might want to explore, this might not coincide with getAction
 67 |         Concretely, this is given by
 68 | 
 69 |         policy(s) = arg_max_{a in actions} Q(s,a)
 70 | 
 71 |         If many actions achieve the maximal Q-value,
 72 |         it doesn't matter which is selected.
 73 |         """
 74 |         util.raiseNotDefined()
 75 | 
 76 |     def getAction(self, state):
 77 |         """
 78 |         state: can call state.getLegalActions()
 79 |         Choose an action and return it.
 80 |         """
 81 |         util.raiseNotDefined()
 82 | 
 83 | class ReinforcementAgent(ValueEstimationAgent):
 84 |     """
 85 |       Abstract Reinforcemnt Agent: A ValueEstimationAgent
 86 |             which estimates Q-Values (as well as policies) from experience
 87 |             rather than a model
 88 | 
 89 |         What you need to know:
 90 |                     - The environment will call
 91 |                       observeTransition(state,action,nextState,deltaReward),
 92 |                       which will call update(state, action, nextState, deltaReward)
 93 |                       which you should override.
 94 |         - Use self.getLegalActions(state) to know which actions
 95 |                       are available in a state
 96 |     """
 97 |     ####################################
 98 |     #    Override These Functions      #
 99 |     ####################################
100 | 
101 |     def update(self, state, action, nextState, reward):
102 |         """
103 |                 This class will call this function, which you write, after
104 |                 observing a transition and reward
105 |         """
106 |         util.raiseNotDefined()
107 | 
108 |     ####################################
109 |     #    Read These Functions          #
110 |     ####################################
111 | 
112 |     def getLegalActions(self,state):
113 |         """
114 |           Get the actions available for a given
115 |           state. This is what you should use to
116 |           obtain legal actions for a state
117 |         """
118 |         return self.actionFn(state)
119 | 
120 |     def observeTransition(self, state,action,nextState,deltaReward):
121 |         """
122 |             Called by environment to inform agent that a transition has
123 |             been observed. This will result in a call to self.update
124 |             on the same arguments
125 | 
126 |             NOTE: Do *not* override or call this function
127 |         """
128 |         self.episodeRewards += deltaReward
129 |         self.update(state,action,nextState,deltaReward)
130 | 
131 |     def startEpisode(self):
132 |         """
133 |           Called by environment when new episode is starting
134 |         """
135 |         self.lastState = None
136 |         self.lastAction = None
137 |         self.episodeRewards = 0.0
138 | 
139 |     def stopEpisode(self):
140 |         """
141 |           Called by environment when episode is done
142 |         """
143 |         if self.episodesSoFar < self.numTraining:
144 |             self.accumTrainRewards += self.episodeRewards
145 |         else:
146 |             self.accumTestRewards += self.episodeRewards
147 |         self.episodesSoFar += 1
148 |         if self.episodesSoFar >= self.numTraining:
149 |             # Take off the training wheels
150 |             self.epsilon = 0.0    # no exploration
151 |             self.alpha = 0.0      # no learning
152 | 
153 |     def isInTraining(self):
154 |         return self.episodesSoFar < self.numTraining
155 | 
156 |     def isInTesting(self):
157 |         return not self.isInTraining()
158 | 
159 |     def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1):
160 |         """
161 |         actionFn: Function which takes a state and returns the list of legal actions
162 | 
163 |         alpha    - learning rate
164 |         epsilon  - exploration rate
165 |         gamma    - discount factor
166 |         numTraining - number of training episodes, i.e. no learning after these many episodes
167 |         """
168 |         if actionFn == None:
169 |             actionFn = lambda state: state.getLegalActions()
170 |         self.actionFn = actionFn
171 |         self.episodesSoFar = 0
172 |         self.accumTrainRewards = 0.0
173 |         self.accumTestRewards = 0.0
174 |         self.numTraining = int(numTraining)
175 |         self.epsilon = float(epsilon)
176 |         self.alpha = float(alpha)
177 |         self.discount = float(gamma)
178 | 
179 |     ################################
180 |     # Controls needed for Crawler  #
181 |     ################################
182 |     def setEpsilon(self, epsilon):
183 |         self.epsilon = epsilon
184 | 
185 |     def setLearningRate(self, alpha):
186 |         self.alpha = alpha
187 | 
188 |     def setDiscount(self, discount):
189 |         self.discount = discount
190 | 
191 |     def doAction(self,state,action):
192 |         """
193 |             Called by inherited class when
194 |             an action is taken in a state
195 |         """
196 |         self.lastState = state
197 |         self.lastAction = action
198 | 
199 |     ###################
200 |     # Pacman Specific #
201 |     ###################
202 |     def observationFunction(self, state):
203 |         """
204 |             This is where we ended up after our last action.
205 |             The simulation should somehow ensure this is called
206 |         """
207 |         if not self.lastState is None:
208 |             reward = state.getScore() - self.lastState.getScore()
209 |             self.observeTransition(self.lastState, self.lastAction, state, reward)
210 |         return state
211 | 
212 |     def registerInitialState(self, state):
213 |         self.startEpisode()
214 |         if self.episodesSoFar == 0:
215 |             print 'Beginning %d episodes of Training' % (self.numTraining)
216 | 
217 |     def final(self, state):
218 |         """
219 |           Called by Pacman game at the terminal state
220 |         """
221 |         deltaReward = state.getScore() - self.lastState.getScore()
222 |         self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
223 |         self.stopEpisode()
224 | 
225 |         # Make sure we have this var
226 |         if not 'episodeStartTime' in self.__dict__:
227 |             self.episodeStartTime = time.time()
228 |         if not 'lastWindowAccumRewards' in self.__dict__:
229 |             self.lastWindowAccumRewards = 0.0
230 |         self.lastWindowAccumRewards += state.getScore()
231 | 
232 |         NUM_EPS_UPDATE = 100
233 |         if self.episodesSoFar % NUM_EPS_UPDATE == 0:
234 |             print 'Reinforcement Learning Status:'
235 |             windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
236 |             if self.episodesSoFar <= self.numTraining:
237 |                 trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
238 |                 print '\tCompleted %d out of %d training episodes' % (
239 |                        self.episodesSoFar,self.numTraining)
240 |                 print '\tAverage Rewards over all training: %.2f' % (
241 |                         trainAvg)
242 |             else:
243 |                 testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
244 |                 print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
245 |                 print '\tAverage Rewards over testing: %.2f' % testAvg
246 |             print '\tAverage Rewards for last %d episodes: %.2f'  % (
247 |                     NUM_EPS_UPDATE,windowAvg)
248 |             print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
249 |             self.lastWindowAccumRewards = 0.0
250 |             self.episodeStartTime = time.time()
251 | 
252 |         if self.episodesSoFar == self.numTraining:
253 |             msg = 'Training Done (turning off epsilon and alpha)'
254 |             print '%s\n%s' % (msg,'-' * len(msg))
255 | 


--------------------------------------------------------------------------------
/mdp.py:
--------------------------------------------------------------------------------
 1 | # mdp.py
 2 | # ------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | import random
12 | 
13 | class MarkovDecisionProcess:
14 | 
15 |     def getStates(self):
16 |         """
17 |         Return a list of all states in the MDP.
18 |         Not generally possible for large MDPs.
19 |         """
20 |         abstract
21 | 
22 |     def getStartState(self):
23 |         """
24 |         Return the start state of the MDP.
25 |         """
26 |         abstract
27 | 
28 |     def getPossibleActions(self, state):
29 |         """
30 |         Return list of possible actions from 'state'.
31 |         """
32 |         abstract
33 | 
34 |     def getTransitionStatesAndProbs(self, state, action):
35 |         """
36 |         Returns list of (nextState, prob) pairs
37 |         representing the states reachable
38 |         from 'state' by taking 'action' along
39 |         with their transition probabilities.
40 | 
41 |         Note that in Q-Learning and reinforcment
42 |         learning in general, we do not know these
43 |         probabilities nor do we directly model them.
44 |         """
45 |         abstract
46 | 
47 |     def getReward(self, state, action, nextState):
48 |         """
49 |         Get the reward for the state, action, nextState transition.
50 | 
51 |         Not available in reinforcement learning.
52 |         """
53 |         abstract
54 | 
55 |     def isTerminal(self, state):
56 |         """
57 |         Returns true if the current state is a terminal state.  By convention,
58 |         a terminal state has zero future rewards.  Sometimes the terminal state(s)
59 |         may have no possible actions.  It is also common to think of the terminal
60 |         state as having a self-loop action 'pass' with zero reward; the formulations
61 |         are equivalent.
62 |         """
63 |         abstract
64 | 


--------------------------------------------------------------------------------
/pacmanAgents.py:
--------------------------------------------------------------------------------
 1 | # pacmanAgents.py
 2 | # ---------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | from pacman import Directions
12 | from game import Agent
13 | import random
14 | import game
15 | import util
16 | 
17 | class LeftTurnAgent(game.Agent):
18 |     "An agent that turns left at every opportunity"
19 | 
20 |     def getAction(self, state):
21 |         legal = state.getLegalPacmanActions()
22 |         current = state.getPacmanState().configuration.direction
23 |         if current == Directions.STOP: current = Directions.NORTH
24 |         left = Directions.LEFT[current]
25 |         if left in legal: return left
26 |         if current in legal: return current
27 |         if Directions.RIGHT[current] in legal: return Directions.RIGHT[current]
28 |         if Directions.LEFT[left] in legal: return Directions.LEFT[left]
29 |         return Directions.STOP
30 | 
31 | class GreedyAgent(Agent):
32 |     def __init__(self, evalFn="scoreEvaluation"):
33 |         self.evaluationFunction = util.lookup(evalFn, globals())
34 |         assert self.evaluationFunction != None
35 | 
36 |     def getAction(self, state):
37 |         # Generate candidate actions
38 |         legal = state.getLegalPacmanActions()
39 |         if Directions.STOP in legal: legal.remove(Directions.STOP)
40 | 
41 |         successors = [(state.generateSuccessor(0, action), action) for action in legal]
42 |         scored = [(self.evaluationFunction(state), action) for state, action in successors]
43 |         bestScore = max(scored)[0]
44 |         bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
45 |         return random.choice(bestActions)
46 | 
47 | def scoreEvaluation(state):
48 |     return state.getScore()
49 | 


--------------------------------------------------------------------------------
/projectParams.py:
--------------------------------------------------------------------------------
 1 | # projectParams.py
 2 | # ----------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py'
12 | PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py'
13 | PROJECT_NAME = 'Project 3: Reinforcement learning'
14 | 


--------------------------------------------------------------------------------
/qlearningAgents.py:
--------------------------------------------------------------------------------
  1 | # qlearningAgents.py
  2 | # ------------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | from game import *
 12 | from learningAgents import ReinforcementAgent
 13 | from featureExtractors import *
 14 | import random,util,math
 15 | 
 16 | class QLearningAgent(ReinforcementAgent):
 17 |     """
 18 |       Q-Learning Agent
 19 | 
 20 |       Functions you should fill in:
 21 |         - computeValueFromQValues
 22 |         - computeActionFromQValues
 23 |         - getQValue
 24 |         - getAction
 25 |         - update
 26 | 
 27 |       Instance variables you have access to
 28 |         - self.epsilon (exploration prob)
 29 |         - self.alpha (learning rate)
 30 |         - self.discount (discount rate)
 31 | 
 32 |       Functions you should use
 33 |         - self.getLegalActions(state)
 34 |           which returns legal actions for a state
 35 |     """
 36 |     def __init__(self, **args):
 37 |         "You can initialize Q-values here..."
 38 |         ReinforcementAgent.__init__(self, **args)
 39 |         self.q_values = util.Counter()
 40 | 
 41 |     def getQValue(self, state, action):
 42 |         """
 43 |           Returns Q(state,action)
 44 |           Should return 0.0 if we have never seen a state
 45 |           or the Q node value otherwise
 46 |         """
 47 |         return self.q_values[(state, action)]
 48 | 
 49 |     def computeValueFromQValues(self, state):
 50 |         """
 51 |           Returns max_action Q(state,action)
 52 |           where the max is over legal actions.  Note that if
 53 |           there are no legal actions, which is the case at the
 54 |           terminal state, you should return a value of 0.0.
 55 |         """
 56 |         q_vals = []
 57 |         for action in self.getLegalActions(state):
 58 |             q_vals.append(self.getQValue(state, action))
 59 |         if len(self.getLegalActions(state)) == 0:
 60 |             return 0.0
 61 |         else:
 62 |             return max(q_vals)
 63 | 
 64 |     def computeActionFromQValues(self, state):
 65 |         """
 66 |           Compute the best action to take in a state.  Note that if there
 67 |           are no legal actions, which is the case at the terminal state,
 68 |           you should return None.
 69 |         """
 70 |         max_action = None
 71 |         max_q_val = 0
 72 |         for action in self.getLegalActions(state):
 73 |             q_val = self.getQValue(state, action)
 74 |             if q_val > max_q_val or max_action is None:
 75 |                 max_q_val = q_val
 76 |                 max_action = action
 77 |         return max_action
 78 | 
 79 |     def getAction(self, state):
 80 |         """
 81 |           Compute the action to take in the current state.  With
 82 |           probability self.epsilon, we should take a random action and
 83 |           take the best policy action otherwise.  Note that if there are
 84 |           no legal actions, which is the case at the terminal state, you
 85 |           should choose None as the action.
 86 | 
 87 |           HINT: You might want to use util.flipCoin(prob)
 88 |           HINT: To pick randomly from a list, use random.choice(list)
 89 |         """
 90 |         # Pick Action
 91 |         legalActions = self.getLegalActions(state)
 92 |         if util.flipCoin(self.epsilon):
 93 |             return random.choice(legalActions)
 94 |         else:
 95 |             return self.computeActionFromQValues(state)
 96 | 
 97 |     def update(self, state, action, nextState, reward):
 98 |         """
 99 |           The parent class calls this to observe a
100 |           state = action => nextState and reward transition.
101 |           You should do your Q-Value update here
102 | 
103 |           NOTE: You should never call this function,
104 |           it will be called on your behalf
105 |         """
106 |         first_part = (1 - self.alpha) * self.getQValue(state, action)
107 |         if len(self.getLegalActions(nextState)) == 0:
108 |             sample = reward
109 |         else:
110 |             sample = reward + (self.discount * max([self.getQValue(nextState, next_action) for next_action in self.getLegalActions(nextState)]))
111 |         second_part = self.alpha * sample
112 |         self.q_values[(state, action)] = first_part + second_part
113 | 
114 |     def getPolicy(self, state):
115 |         return self.computeActionFromQValues(state)
116 | 
117 |     def getValue(self, state):
118 |         return self.computeValueFromQValues(state)
119 | 
120 | 
121 | class PacmanQAgent(QLearningAgent):
122 |     "Exactly the same as QLearningAgent, but with different default parameters"
123 | 
124 |     def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
125 |         """
126 |         These default parameters can be changed from the pacman.py command line.
127 |         For example, to change the exploration rate, try:
128 |             python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
129 | 
130 |         alpha    - learning rate
131 |         epsilon  - exploration rate
132 |         gamma    - discount factor
133 |         numTraining - number of training episodes, i.e. no learning after these many episodes
134 |         """
135 |         args['epsilon'] = epsilon
136 |         args['gamma'] = gamma
137 |         args['alpha'] = alpha
138 |         args['numTraining'] = numTraining
139 |         self.index = 0  # This is always Pacman
140 |         QLearningAgent.__init__(self, **args)
141 | 
142 |     def getAction(self, state):
143 |         """
144 |         Simply calls the getAction method of QLearningAgent and then
145 |         informs parent of action for Pacman.  Do not change or remove this
146 |         method.
147 |         """
148 |         action = QLearningAgent.getAction(self,state)
149 |         self.doAction(state,action)
150 |         return action
151 | 
152 | 
153 | class ApproximateQAgent(PacmanQAgent):
154 |     """
155 |        ApproximateQLearningAgent
156 | 
157 |        You should only have to overwrite getQValue
158 |        and update.  All other QLearningAgent functions
159 |        should work as is.
160 |     """
161 |     def __init__(self, extractor='IdentityExtractor', **args):
162 |         self.featExtractor = util.lookup(extractor, globals())()
163 |         PacmanQAgent.__init__(self, **args)
164 |         self.weights = util.Counter()
165 |         self.weight = 0
166 | 
167 |     def getWeights(self):
168 |         return self.weights
169 | 
170 |     def getQValue(self, state, action):
171 |         """
172 |           Should return Q(state,action) = w * featureVector
173 |           where * is the dotProduct operator
174 |         """
175 |         q_value = 0
176 |         features = self.featExtractor.getFeatures(state, action)
177 |         counter = 0
178 |         for feature in features:
179 |             q_value += features[feature] * self.weights[feature]
180 |             counter += 1
181 | 
182 |         return q_value
183 | 
184 |     def update(self, state, action, nextState, reward):
185 |         """
186 |            Should update your weights based on transition
187 |         """
188 |         features = self.featExtractor.getFeatures(state, action)
189 |         features_list = features.sortedKeys()
190 |         counter = 0
191 |         for feature in features:
192 |             difference = 0
193 |             if len(self.getLegalActions(nextState)) == 0:
194 |                 difference = reward - self.getQValue(state, action)
195 |             else:
196 |                 difference = (reward + self.discount * max([self.getQValue(nextState, nextAction) for nextAction in self.getLegalActions(nextState)])) - self.getQValue(state, action)
197 |             self.weights[feature] = self.weights[feature] + self.alpha * difference * features[feature]
198 |             counter += 1
199 | 
200 |     def final(self, state):
201 |         "Called at the end of each game."
202 |         # call the super-class final method
203 |         PacmanQAgent.final(self, state)
204 | 
205 |         # did we finish training?
206 |         if self.episodesSoFar == self.numTraining:
207 |             # you might want to print your weights here for debugging
208 |             "*** YOUR CODE HERE ***"
209 |             pass
210 | 


--------------------------------------------------------------------------------
/testClasses.py:
--------------------------------------------------------------------------------
  1 | # testClasses.py
  2 | # --------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | # import modules from python standard library
 12 | import inspect
 13 | import re
 14 | import sys
 15 | 
 16 | 
 17 | # Class which models a question in a project.  Note that questions have a
 18 | # maximum number of points they are worth, and are composed of a series of
 19 | # test cases
 20 | class Question(object):
 21 | 
 22 |     def raiseNotDefined(self):
 23 |         print 'Method not implemented: %s' % inspect.stack()[1][3]
 24 |         sys.exit(1)
 25 | 
 26 |     def __init__(self, questionDict):
 27 |         self.maxPoints = int(questionDict['max_points'])
 28 |         self.testCases = []
 29 | 
 30 |     def getMaxPoints(self):
 31 |         return self.maxPoints
 32 | 
 33 |     # Note that 'thunk' must be a function which accepts a single argument,
 34 |     # namely a 'grading' object
 35 |     def addTestCase(self, testCase, thunk):        
 36 |         self.testCases.append((testCase, thunk))
 37 | 
 38 |     def execute(self, grades):
 39 |         self.raiseNotDefined()
 40 | 
 41 | # Question in which all test cases must be passed in order to receive credit
 42 | class PassAllTestsQuestion(Question):
 43 | 
 44 |     def execute(self, grades):
 45 |         # TODO: is this the right way to use grades?  The autograder doesn't seem to use it.            
 46 |         testsFailed = False
 47 |         grades.assignZeroCredit()
 48 |         for _, f in self.testCases:
 49 |             if not f(grades):
 50 |                 testsFailed = True
 51 |         if testsFailed:
 52 |             grades.fail("Tests failed.")
 53 |         else:
 54 |             grades.assignFullCredit()
 55 |             
 56 | 
 57 | # Question in which predict credit is given for test cases with a ``points'' property.
 58 | # All other tests are mandatory and must be passed.
 59 | class HackedPartialCreditQuestion(Question):
 60 | 
 61 |     def execute(self, grades):
 62 |         # TODO: is this the right way to use grades?  The autograder doesn't seem to use it.            
 63 |         grades.assignZeroCredit()
 64 |         
 65 |         points = 0
 66 |         passed = True
 67 |         for testCase, f in self.testCases:
 68 |             testResult = f(grades)
 69 |             if "points" in testCase.testDict:
 70 |                 if testResult: points += float(testCase.testDict["points"])                
 71 |             else:
 72 |                 passed = passed and testResult        
 73 |         
 74 |         ## FIXME: Below terrible hack to match q3's logic
 75 |         if int(points) == self.maxPoints and not passed:
 76 |             grades.assignZeroCredit()
 77 |         else:
 78 |             grades.addPoints(int(points))
 79 | 
 80 | 
 81 | class Q6PartialCreditQuestion(Question):
 82 |     """Fails any test which returns False, otherwise doesn't effect the grades object.
 83 |     Partial credit tests will add the required points."""
 84 | 
 85 |     def execute(self, grades):
 86 |         grades.assignZeroCredit()
 87 | 
 88 |         results = []
 89 |         for _, f in self.testCases:
 90 |             results.append(f(grades))
 91 |         if False in results:
 92 |             grades.assignZeroCredit()
 93 |             
 94 | class PartialCreditQuestion(Question):
 95 |     """Fails any test which returns False, otherwise doesn't effect the grades object.
 96 |     Partial credit tests will add the required points."""
 97 | 
 98 |     def execute(self, grades):
 99 |         grades.assignZeroCredit()
100 |         
101 |         for _, f in self.testCases:
102 |             if not f(grades):
103 |                 grades.assignZeroCredit()
104 |                 grades.fail("Tests failed.")
105 |                 return False
106 |             
107 | 
108 | 
109 | class NumberPassedQuestion(Question):
110 |     """Grade is the number of test cases passed."""
111 | 
112 |     def execute(self, grades):
113 |         grades.addPoints([f(grades) for _, f in self.testCases].count(True))
114 | 
115 | 
116 | 
117 | 
118 | 
119 | # Template modeling a generic test case 
120 | class TestCase(object):
121 |     
122 |     def raiseNotDefined(self):
123 |         print 'Method not implemented: %s' % inspect.stack()[1][3]
124 |         sys.exit(1)
125 | 
126 |     def getPath(self):
127 |         return self.path
128 | 
129 |     def __init__(self, question, testDict):
130 |         self.question = question
131 |         self.testDict = testDict
132 |         self.path = testDict['path']
133 |         self.messages = []
134 | 
135 |     def __str__(self):
136 |         self.raiseNotDefined()
137 |         
138 |     def execute(self, grades, moduleDict, solutionDict):
139 |         self.raiseNotDefined()
140 | 
141 |     def writeSolution(self, moduleDict, filePath):
142 |         self.raiseNotDefined()
143 |         return True
144 | 
145 |     # Tests should call the following messages for grading
146 |     # to ensure a uniform format for test output.
147 |     #
148 |     # TODO: this is hairy, but we need to fix grading.py's interface
149 |     # to get a nice hierarchical project - question - test structure,
150 |     # then these should be moved into Question proper.
151 |     def testPass(self, grades):
152 |         grades.addMessage('PASS: %s' % (self.path,))
153 |         for line in self.messages:
154 |             grades.addMessage('    %s' % (line,))
155 |         return True
156 | 
157 |     def testFail(self, grades):
158 |         grades.addMessage('FAIL: %s' % (self.path,))
159 |         for line in self.messages:
160 |             grades.addMessage('    %s' % (line,))
161 |         return False
162 |     
163 |     # This should really be question level?
164 |     #
165 |     def testPartial(self, grades, points, maxPoints):
166 |         grades.addPoints(points)
167 |         extraCredit = max(0, points - maxPoints)
168 |         regularCredit = points - extraCredit
169 |         
170 |         grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints))
171 |         if extraCredit > 0:
172 |             grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,))
173 |         
174 |         for line in self.messages:
175 |             grades.addMessage('    %s' % (line,))
176 |         
177 |         return True
178 |         
179 |     def addMessage(self, message):
180 |         self.messages.extend(message.split('\n'))        
181 | 
182 | 


--------------------------------------------------------------------------------
/testParser.py:
--------------------------------------------------------------------------------
 1 | # testParser.py
 2 | # -------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | import re
12 | import sys
13 | 
14 | class TestParser(object):
15 |     
16 |     def __init__(self, path):
17 |         # save the path to the test file
18 |         self.path = path
19 | 
20 |     def removeComments(self, rawlines):
21 |         # remove any portion of a line following a '#' symbol
22 |         fixed_lines = []
23 |         for l in rawlines:
24 |             idx = l.find('#')
25 |             if idx == -1:
26 |                 fixed_lines.append(l)
27 |             else:
28 |                 fixed_lines.append(l[0:idx])
29 |         return '\n'.join(fixed_lines)
30 | 
31 |     def parse(self):
32 |         # read in the test case and remove comments
33 |         test = {}
34 |         with open(self.path) as handle:
35 |             raw_lines = handle.read().split('\n')
36 |         
37 |         test_text = self.removeComments(raw_lines)
38 |         test['__raw_lines__'] = raw_lines
39 |         test['path'] = self.path
40 |         test['__emit__'] = []
41 |         lines = test_text.split('\n')
42 |         i = 0
43 |         # read a property in each loop cycle
44 |         while(i < len(lines)):
45 |             # skip blank lines
46 |             if re.match('\A\s*\Z', lines[i]):
47 |                 test['__emit__'].append(("raw", raw_lines[i]))
48 |                 i += 1
49 |                 continue
50 |             m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i])
51 |             if m:
52 |                 test[m.group(1)] = m.group(2)
53 |                 test['__emit__'].append(("oneline", m.group(1)))
54 |                 i += 1
55 |                 continue
56 |             m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i])
57 |             if m:
58 |                 msg = []
59 |                 i += 1
60 |                 while(not re.match('\A\s*"""\s*\Z', lines[i])):
61 |                     msg.append(raw_lines[i])
62 |                     i += 1
63 |                 test[m.group(1)] = '\n'.join(msg)
64 |                 test['__emit__'].append(("multiline", m.group(1)))
65 |                 i += 1
66 |                 continue
67 |             print 'error parsing test file: %s' % self.path
68 |             sys.exit(1)
69 |         return test
70 | 
71 |     
72 | def emitTestDict(testDict, handle):
73 |     for kind, data in testDict['__emit__']:
74 |         if kind == "raw":
75 |             handle.write(data + "\n")
76 |         elif kind == "oneline":
77 |             handle.write('%s: "%s"\n' % (data, testDict[data]))
78 |         elif kind == "multiline":
79 |             handle.write('%s: """\n%s\n"""\n' % (data, testDict[data]))
80 |         else:
81 |             raise Exception("Bad __emit__")


--------------------------------------------------------------------------------
/test_cases/CONFIG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lightninglu10/pacman-reinforcementlearning/2b4b89cbec2bdcfb274cc92dbcf53801e0450b77/test_cases/CONFIG


--------------------------------------------------------------------------------
/test_cases/q1/1-tinygrid.solution:
--------------------------------------------------------------------------------
  1 | values_k_0: """
  2 |             0.0000
  3 |             0.0000
  4 |             0.0000
  5 | """
  6 | 
  7 | q_values_k_0_action_north: """
  8 |            illegal
  9 |             0.0000
 10 |            illegal
 11 | """
 12 | 
 13 | q_values_k_0_action_east: """
 14 |            illegal
 15 |             0.0000
 16 |            illegal
 17 | """
 18 | 
 19 | q_values_k_0_action_exit: """
 20 |           -10.0000
 21 |            illegal
 22 |            10.0000
 23 | """
 24 | 
 25 | q_values_k_0_action_south: """
 26 |            illegal
 27 |             0.0000
 28 |            illegal
 29 | """
 30 | 
 31 | q_values_k_0_action_west: """
 32 |            illegal
 33 |             0.0000
 34 |            illegal
 35 | """
 36 | 
 37 | values_k_1: """
 38 |           -10.0000
 39 |             0.0000
 40 |            10.0000
 41 | """
 42 | 
 43 | q_values_k_1_action_north: """
 44 |            illegal
 45 |            -5.0000
 46 |            illegal
 47 | """
 48 | 
 49 | q_values_k_1_action_east: """
 50 |            illegal
 51 |             0.0000
 52 |            illegal
 53 | """
 54 | 
 55 | q_values_k_1_action_exit: """
 56 |           -10.0000
 57 |            illegal
 58 |            10.0000
 59 | """
 60 | 
 61 | q_values_k_1_action_south: """
 62 |            illegal
 63 |             5.0000
 64 |            illegal
 65 | """
 66 | 
 67 | q_values_k_1_action_west: """
 68 |            illegal
 69 |             0.0000
 70 |            illegal
 71 | """
 72 | 
 73 | values_k_2: """
 74 |           -10.0000
 75 |             5.0000
 76 |            10.0000
 77 | """
 78 | 
 79 | q_values_k_2_action_north: """
 80 |            illegal
 81 |            -5.0000
 82 |            illegal
 83 | """
 84 | 
 85 | q_values_k_2_action_east: """
 86 |            illegal
 87 |             2.5000
 88 |            illegal
 89 | """
 90 | 
 91 | q_values_k_2_action_exit: """
 92 |           -10.0000
 93 |            illegal
 94 |            10.0000
 95 | """
 96 | 
 97 | q_values_k_2_action_south: """
 98 |            illegal
 99 |             5.0000
100 |            illegal
101 | """
102 | 
103 | q_values_k_2_action_west: """
104 |            illegal
105 |             2.5000
106 |            illegal
107 | """
108 | 
109 | values_k_3: """
110 |           -10.0000
111 |             5.0000
112 |            10.0000
113 | """
114 | 
115 | q_values_k_3_action_north: """
116 |            illegal
117 |            -5.0000
118 |            illegal
119 | """
120 | 
121 | q_values_k_3_action_east: """
122 |            illegal
123 |             2.5000
124 |            illegal
125 | """
126 | 
127 | q_values_k_3_action_exit: """
128 |           -10.0000
129 |            illegal
130 |            10.0000
131 | """
132 | 
133 | q_values_k_3_action_south: """
134 |            illegal
135 |             5.0000
136 |            illegal
137 | """
138 | 
139 | q_values_k_3_action_west: """
140 |            illegal
141 |             2.5000
142 |            illegal
143 | """
144 | 
145 | values_k_4: """
146 |           -10.0000
147 |             5.0000
148 |            10.0000
149 | """
150 | 
151 | q_values_k_4_action_north: """
152 |            illegal
153 |            -5.0000
154 |            illegal
155 | """
156 | 
157 | q_values_k_4_action_east: """
158 |            illegal
159 |             2.5000
160 |            illegal
161 | """
162 | 
163 | q_values_k_4_action_exit: """
164 |           -10.0000
165 |            illegal
166 |            10.0000
167 | """
168 | 
169 | q_values_k_4_action_south: """
170 |            illegal
171 |             5.0000
172 |            illegal
173 | """
174 | 
175 | q_values_k_4_action_west: """
176 |            illegal
177 |             2.5000
178 |            illegal
179 | """
180 | 
181 | values_k_5: """
182 |           -10.0000
183 |             5.0000
184 |            10.0000
185 | """
186 | 
187 | q_values_k_5_action_north: """
188 |            illegal
189 |            -5.0000
190 |            illegal
191 | """
192 | 
193 | q_values_k_5_action_east: """
194 |            illegal
195 |             2.5000
196 |            illegal
197 | """
198 | 
199 | q_values_k_5_action_exit: """
200 |           -10.0000
201 |            illegal
202 |            10.0000
203 | """
204 | 
205 | q_values_k_5_action_south: """
206 |            illegal
207 |             5.0000
208 |            illegal
209 | """
210 | 
211 | q_values_k_5_action_west: """
212 |            illegal
213 |             2.5000
214 |            illegal
215 | """
216 | 
217 | values_k_6: """
218 |           -10.0000
219 |             5.0000
220 |            10.0000
221 | """
222 | 
223 | q_values_k_6_action_north: """
224 |            illegal
225 |            -5.0000
226 |            illegal
227 | """
228 | 
229 | q_values_k_6_action_east: """
230 |            illegal
231 |             2.5000
232 |            illegal
233 | """
234 | 
235 | q_values_k_6_action_exit: """
236 |           -10.0000
237 |            illegal
238 |            10.0000
239 | """
240 | 
241 | q_values_k_6_action_south: """
242 |            illegal
243 |             5.0000
244 |            illegal
245 | """
246 | 
247 | q_values_k_6_action_west: """
248 |            illegal
249 |             2.5000
250 |            illegal
251 | """
252 | 
253 | values_k_7: """
254 |           -10.0000
255 |             5.0000
256 |            10.0000
257 | """
258 | 
259 | q_values_k_7_action_north: """
260 |            illegal
261 |            -5.0000
262 |            illegal
263 | """
264 | 
265 | q_values_k_7_action_east: """
266 |            illegal
267 |             2.5000
268 |            illegal
269 | """
270 | 
271 | q_values_k_7_action_exit: """
272 |           -10.0000
273 |            illegal
274 |            10.0000
275 | """
276 | 
277 | q_values_k_7_action_south: """
278 |            illegal
279 |             5.0000
280 |            illegal
281 | """
282 | 
283 | q_values_k_7_action_west: """
284 |            illegal
285 |             2.5000
286 |            illegal
287 | """
288 | 
289 | values_k_8: """
290 |           -10.0000
291 |             5.0000
292 |            10.0000
293 | """
294 | 
295 | q_values_k_8_action_north: """
296 |            illegal
297 |            -5.0000
298 |            illegal
299 | """
300 | 
301 | q_values_k_8_action_east: """
302 |            illegal
303 |             2.5000
304 |            illegal
305 | """
306 | 
307 | q_values_k_8_action_exit: """
308 |           -10.0000
309 |            illegal
310 |            10.0000
311 | """
312 | 
313 | q_values_k_8_action_south: """
314 |            illegal
315 |             5.0000
316 |            illegal
317 | """
318 | 
319 | q_values_k_8_action_west: """
320 |            illegal
321 |             2.5000
322 |            illegal
323 | """
324 | 
325 | values_k_9: """
326 |           -10.0000
327 |             5.0000
328 |            10.0000
329 | """
330 | 
331 | q_values_k_9_action_north: """
332 |            illegal
333 |            -5.0000
334 |            illegal
335 | """
336 | 
337 | q_values_k_9_action_east: """
338 |            illegal
339 |             2.5000
340 |            illegal
341 | """
342 | 
343 | q_values_k_9_action_exit: """
344 |           -10.0000
345 |            illegal
346 |            10.0000
347 | """
348 | 
349 | q_values_k_9_action_south: """
350 |            illegal
351 |             5.0000
352 |            illegal
353 | """
354 | 
355 | q_values_k_9_action_west: """
356 |            illegal
357 |             2.5000
358 |            illegal
359 | """
360 | 
361 | values_k_100: """
362 |           -10.0000
363 |             5.0000
364 |            10.0000
365 | """
366 | 
367 | q_values_k_100_action_north: """
368 |            illegal
369 |            -5.0000
370 |            illegal
371 | """
372 | 
373 | q_values_k_100_action_east: """
374 |            illegal
375 |             2.5000
376 |            illegal
377 | """
378 | 
379 | q_values_k_100_action_exit: """
380 |           -10.0000
381 |            illegal
382 |            10.0000
383 | """
384 | 
385 | q_values_k_100_action_south: """
386 |            illegal
387 |             5.0000
388 |            illegal
389 | """
390 | 
391 | q_values_k_100_action_west: """
392 |            illegal
393 |             2.5000
394 |            illegal
395 | """
396 | 
397 | policy: """
398 |         exit      
399 |         south     
400 |         exit
401 | """
402 | 
403 | actions: """
404 | north
405 | east
406 | exit
407 | south
408 | west
409 | """
410 | 
411 | 


--------------------------------------------------------------------------------
/test_cases/q1/1-tinygrid.test:
--------------------------------------------------------------------------------
 1 | class: "ValueIterationTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.5"
15 | noise: "0.0"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q1/2-tinygrid-noisy.solution:
--------------------------------------------------------------------------------
  1 | values_k_0: """
  2 |             0.0000
  3 |             0.0000
  4 |             0.0000
  5 | """
  6 | 
  7 | q_values_k_0_action_north: """
  8 |            illegal
  9 |             0.0000
 10 |            illegal
 11 | """
 12 | 
 13 | q_values_k_0_action_east: """
 14 |            illegal
 15 |             0.0000
 16 |            illegal
 17 | """
 18 | 
 19 | q_values_k_0_action_exit: """
 20 |           -10.0000
 21 |            illegal
 22 |            10.0000
 23 | """
 24 | 
 25 | q_values_k_0_action_south: """
 26 |            illegal
 27 |             0.0000
 28 |            illegal
 29 | """
 30 | 
 31 | q_values_k_0_action_west: """
 32 |            illegal
 33 |             0.0000
 34 |            illegal
 35 | """
 36 | 
 37 | values_k_1: """
 38 |           -10.0000
 39 |             0.0000
 40 |            10.0000
 41 | """
 42 | 
 43 | q_values_k_1_action_north: """
 44 |            illegal
 45 |            -5.6250
 46 |            illegal
 47 | """
 48 | 
 49 | q_values_k_1_action_east: """
 50 |            illegal
 51 |             0.0000
 52 |            illegal
 53 | """
 54 | 
 55 | q_values_k_1_action_exit: """
 56 |           -10.0000
 57 |            illegal
 58 |            10.0000
 59 | """
 60 | 
 61 | q_values_k_1_action_south: """
 62 |            illegal
 63 |             5.6250
 64 |            illegal
 65 | """
 66 | 
 67 | q_values_k_1_action_west: """
 68 |            illegal
 69 |             0.0000
 70 |            illegal
 71 | """
 72 | 
 73 | values_k_2: """
 74 |           -10.0000
 75 |             5.6250
 76 |            10.0000
 77 | """
 78 | 
 79 | q_values_k_2_action_north: """
 80 |            illegal
 81 |            -4.5703
 82 |            illegal
 83 | """
 84 | 
 85 | q_values_k_2_action_east: """
 86 |            illegal
 87 |             3.1641
 88 |            illegal
 89 | """
 90 | 
 91 | q_values_k_2_action_exit: """
 92 |           -10.0000
 93 |            illegal
 94 |            10.0000
 95 | """
 96 | 
 97 | q_values_k_2_action_south: """
 98 |            illegal
 99 |             6.6797
100 |            illegal
101 | """
102 | 
103 | q_values_k_2_action_west: """
104 |            illegal
105 |             3.1641
106 |            illegal
107 | """
108 | 
109 | values_k_3: """
110 |           -10.0000
111 |             6.6797
112 |            10.0000
113 | """
114 | 
115 | q_values_k_3_action_north: """
116 |            illegal
117 |            -4.3726
118 |            illegal
119 | """
120 | 
121 | q_values_k_3_action_east: """
122 |            illegal
123 |             3.7573
124 |            illegal
125 | """
126 | 
127 | q_values_k_3_action_exit: """
128 |           -10.0000
129 |            illegal
130 |            10.0000
131 | """
132 | 
133 | q_values_k_3_action_south: """
134 |            illegal
135 |             6.8774
136 |            illegal
137 | """
138 | 
139 | q_values_k_3_action_west: """
140 |            illegal
141 |             3.7573
142 |            illegal
143 | """
144 | 
145 | values_k_4: """
146 |           -10.0000
147 |             6.8774
148 |            10.0000
149 | """
150 | 
151 | q_values_k_4_action_north: """
152 |            illegal
153 |            -4.3355
154 |            illegal
155 | """
156 | 
157 | q_values_k_4_action_east: """
158 |            illegal
159 |             3.8686
160 |            illegal
161 | """
162 | 
163 | q_values_k_4_action_exit: """
164 |           -10.0000
165 |            illegal
166 |            10.0000
167 | """
168 | 
169 | q_values_k_4_action_south: """
170 |            illegal
171 |             6.9145
172 |            illegal
173 | """
174 | 
175 | q_values_k_4_action_west: """
176 |            illegal
177 |             3.8686
178 |            illegal
179 | """
180 | 
181 | values_k_5: """
182 |           -10.0000
183 |             6.9145
184 |            10.0000
185 | """
186 | 
187 | q_values_k_5_action_north: """
188 |            illegal
189 |            -4.3285
190 |            illegal
191 | """
192 | 
193 | q_values_k_5_action_east: """
194 |            illegal
195 |             3.8894
196 |            illegal
197 | """
198 | 
199 | q_values_k_5_action_exit: """
200 |           -10.0000
201 |            illegal
202 |            10.0000
203 | """
204 | 
205 | q_values_k_5_action_south: """
206 |            illegal
207 |             6.9215
208 |            illegal
209 | """
210 | 
211 | q_values_k_5_action_west: """
212 |            illegal
213 |             3.8894
214 |            illegal
215 | """
216 | 
217 | values_k_6: """
218 |           -10.0000
219 |             6.9215
220 |            10.0000
221 | """
222 | 
223 | q_values_k_6_action_north: """
224 |            illegal
225 |            -4.3272
226 |            illegal
227 | """
228 | 
229 | q_values_k_6_action_east: """
230 |            illegal
231 |             3.8933
232 |            illegal
233 | """
234 | 
235 | q_values_k_6_action_exit: """
236 |           -10.0000
237 |            illegal
238 |            10.0000
239 | """
240 | 
241 | q_values_k_6_action_south: """
242 |            illegal
243 |             6.9228
244 |            illegal
245 | """
246 | 
247 | q_values_k_6_action_west: """
248 |            illegal
249 |             3.8933
250 |            illegal
251 | """
252 | 
253 | values_k_7: """
254 |           -10.0000
255 |             6.9228
256 |            10.0000
257 | """
258 | 
259 | q_values_k_7_action_north: """
260 |            illegal
261 |            -4.3270
262 |            illegal
263 | """
264 | 
265 | q_values_k_7_action_east: """
266 |            illegal
267 |             3.8941
268 |            illegal
269 | """
270 | 
271 | q_values_k_7_action_exit: """
272 |           -10.0000
273 |            illegal
274 |            10.0000
275 | """
276 | 
277 | q_values_k_7_action_south: """
278 |            illegal
279 |             6.9230
280 |            illegal
281 | """
282 | 
283 | q_values_k_7_action_west: """
284 |            illegal
285 |             3.8941
286 |            illegal
287 | """
288 | 
289 | values_k_8: """
290 |           -10.0000
291 |             6.9230
292 |            10.0000
293 | """
294 | 
295 | q_values_k_8_action_north: """
296 |            illegal
297 |            -4.3269
298 |            illegal
299 | """
300 | 
301 | q_values_k_8_action_east: """
302 |            illegal
303 |             3.8942
304 |            illegal
305 | """
306 | 
307 | q_values_k_8_action_exit: """
308 |           -10.0000
309 |            illegal
310 |            10.0000
311 | """
312 | 
313 | q_values_k_8_action_south: """
314 |            illegal
315 |             6.9231
316 |            illegal
317 | """
318 | 
319 | q_values_k_8_action_west: """
320 |            illegal
321 |             3.8942
322 |            illegal
323 | """
324 | 
325 | values_k_9: """
326 |           -10.0000
327 |             6.9231
328 |            10.0000
329 | """
330 | 
331 | q_values_k_9_action_north: """
332 |            illegal
333 |            -4.3269
334 |            illegal
335 | """
336 | 
337 | q_values_k_9_action_east: """
338 |            illegal
339 |             3.8942
340 |            illegal
341 | """
342 | 
343 | q_values_k_9_action_exit: """
344 |           -10.0000
345 |            illegal
346 |            10.0000
347 | """
348 | 
349 | q_values_k_9_action_south: """
350 |            illegal
351 |             6.9231
352 |            illegal
353 | """
354 | 
355 | q_values_k_9_action_west: """
356 |            illegal
357 |             3.8942
358 |            illegal
359 | """
360 | 
361 | values_k_100: """
362 |           -10.0000
363 |             6.9231
364 |            10.0000
365 | """
366 | 
367 | q_values_k_100_action_north: """
368 |            illegal
369 |            -4.3269
370 |            illegal
371 | """
372 | 
373 | q_values_k_100_action_east: """
374 |            illegal
375 |             3.8942
376 |            illegal
377 | """
378 | 
379 | q_values_k_100_action_exit: """
380 |           -10.0000
381 |            illegal
382 |            10.0000
383 | """
384 | 
385 | q_values_k_100_action_south: """
386 |            illegal
387 |             6.9231
388 |            illegal
389 | """
390 | 
391 | q_values_k_100_action_west: """
392 |            illegal
393 |             3.8942
394 |            illegal
395 | """
396 | 
397 | policy: """
398 |         exit      
399 |         south     
400 |         exit
401 | """
402 | 
403 | actions: """
404 | north
405 | east
406 | exit
407 | south
408 | west
409 | """
410 | 
411 | 


--------------------------------------------------------------------------------
/test_cases/q1/2-tinygrid-noisy.test:
--------------------------------------------------------------------------------
 1 | class: "ValueIterationTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.75"
15 | noise: "0.25"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q1/3-bridge.test:
--------------------------------------------------------------------------------
 1 | class: "ValueIterationTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |     #   10    #
11 |  -100    _ -100
12 |  -100    _ -100
13 |  -100    _ -100
14 |  -100    _ -100
15 |  -100    S -100
16 |     #    1    #
17 | """
18 | gridName: "bridgeGrid"
19 | discount: "0.85"
20 | noise: "0.1"
21 | livingReward: "0.0"
22 | epsilon: "0.5"
23 | learningRate: "0.1"
24 | numExperiences: "500"
25 | valueIterations: "100"
26 | iterations: "10000"
27 | 
28 | 


--------------------------------------------------------------------------------
/test_cases/q1/4-discountgrid.test:
--------------------------------------------------------------------------------
 1 | class: "ValueIterationTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10    _   10    _    _
11 |   -10    _    #    _    _
12 |   -10    _    1    _    _
13 |   -10    _    #    #    _
14 |   -10    S    _    _    _
15 | """
16 | discount: "0.9"
17 | noise: "0.2"
18 | livingReward: "0.0"
19 | epsilon: "0.2"
20 | learningRate: "0.1"
21 | numExperiences: "3000"
22 | valueIterations: "100"
23 | iterations: "10000"
24 | 
25 | 


--------------------------------------------------------------------------------
/test_cases/q1/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "6"
2 | class: "PassAllTestsQuestion"


--------------------------------------------------------------------------------
/test_cases/q2/1-bridge-grid.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q2/1-bridge-grid.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q2/1-bridge-grid.test:
--------------------------------------------------------------------------------
 1 | class: "GridPolicyTest"
 2 | 
 3 | # Function in module in analysis that returns (discount, noise)
 4 | parameterFn: "question2"
 5 | question2: "true"
 6 | 
 7 | # GridWorld specification
 8 | #    _ is empty space
 9 | #    numbers are terminal states with that value
10 | #    # is a wall
11 | #    S is a start state
12 | #
13 | grid: """
14 |    # -100 -100 -100 -100 -100    #
15 |    1    S    _    _    _    _    10
16 |    # -100 -100 -100 -100 -100    #   
17 | """
18 | gridName: "bridgeGrid"
19 | 
20 | # Policy specification
21 | #    _ 			policy choice not checked
22 | #    N, E, S, W policy action must be north, east, south, west
23 | #
24 | policy: """
25 |    _    _    _    _    _    _    _
26 |    _    E    _    _    _    _    _
27 |    _    _    _    _    _    _    _
28 | """
29 | 
30 | 


--------------------------------------------------------------------------------
/test_cases/q2/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "1"
2 | class: "PassAllTestsQuestion"
3 | 


--------------------------------------------------------------------------------
/test_cases/q3/1-question-3.1.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q3/1-question-3.1.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q3/1-question-3.1.test:
--------------------------------------------------------------------------------
 1 | class: "GridPolicyTest"
 2 | 
 3 | # Function in module in analysis that returns (discount, noise)
 4 | parameterFn: "question3a"
 5 | 
 6 | # GridWorld specification
 7 | #    _ is empty space
 8 | #    numbers are terminal states with that value
 9 | #    # is a wall
10 | #    S is a start state
11 | #
12 | grid: """
13 |    _    _    _    _    _  
14 |    _    #    _    _    _
15 |    _    #    1    #   10
16 |    S    _    _    _    _
17 |  -10  -10  -10  -10  -10
18 | """
19 | gridName: "discountGrid"
20 | 
21 | # Policy specification
22 | #    _ 			policy choice not checked
23 | #    N, E, S, W policy action must be north, east, south, west
24 | #
25 | policy: """
26 |    _    _    _    _    _  
27 |    _    _    _    _    _  
28 |    _    _    _    _    _  
29 |    E    E    N    _    _  
30 |    _    _    _    _    _  
31 | """
32 | 


--------------------------------------------------------------------------------
/test_cases/q3/2-question-3.2.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q3/2-question-3.2.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q3/2-question-3.2.test:
--------------------------------------------------------------------------------
 1 | class: "GridPolicyTest"
 2 | 
 3 | # Function in module in analysis that returns (discount, noise)
 4 | parameterFn: "question3b"
 5 | 
 6 | # GridWorld specification
 7 | #    _ is empty space
 8 | #    numbers are terminal states with that value
 9 | #    # is a wall
10 | #    S is a start state
11 | #
12 | grid: """
13 |    _    _    _    _    _  
14 |    _    #    _    _    _
15 |    _    #    1    #   10
16 |    S    _    _    _    _
17 |  -10  -10  -10  -10  -10
18 | """
19 | gridName: "discountGrid"
20 | 
21 | # Policy specification
22 | #    _ 			policy choice not checked
23 | #    N, E, S, W policy action must be north, east, south, west
24 | #
25 | policy: """
26 |    E    E    S    _    _  
27 |    N    _    S    _    _  
28 |    N    _    _    _    _  
29 |    N    _    _    _    _  
30 |    _    _    _    _    _  
31 | """
32 | 


--------------------------------------------------------------------------------
/test_cases/q3/3-question-3.3.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q3/3-question-3.3.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q3/3-question-3.3.test:
--------------------------------------------------------------------------------
 1 | class: "GridPolicyTest"
 2 | 
 3 | # Function in module in analysis that returns (discount, noise)
 4 | parameterFn: "question3c"
 5 | 
 6 | # GridWorld specification
 7 | #    _ is empty space
 8 | #    numbers are terminal states with that value
 9 | #    # is a wall
10 | #    S is a start state
11 | #
12 | grid: """
13 |    _    _    _    _    _  
14 |    _    #    _    _    _
15 |    _    #    1    #   10
16 |    S    _    _    _    _
17 |  -10  -10  -10  -10  -10
18 | """
19 | gridName: "discountGrid"
20 | 
21 | # Policy specification
22 | #    _ 			policy choice not checked
23 | #    N, E, S, W policy action must be north, east, south, west
24 | #
25 | policy: """
26 |    _    _    _    _    _  
27 |    _    _    _    _    _  
28 |    _    _    _    _    _  
29 |    E    E    E    E    N  
30 |    _    _    _    _    _  
31 | """
32 | 


--------------------------------------------------------------------------------
/test_cases/q3/4-question-3.4.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q3/4-question-3.4.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q3/4-question-3.4.test:
--------------------------------------------------------------------------------
 1 | class: "GridPolicyTest"
 2 | 
 3 | # Function in module in analysis that returns (discount, noise)
 4 | parameterFn: "question3d"
 5 | 
 6 | # GridWorld specification
 7 | #    _ is empty space
 8 | #    numbers are terminal states with that value
 9 | #    # is a wall
10 | #    S is a start state
11 | #
12 | grid: """
13 |    _    _    _    _    _  
14 |    _    #    _    _    _
15 |    _    #    1    #   10
16 |    S    _    _    _    _
17 |  -10  -10  -10  -10  -10
18 | """
19 | gridName: "discountGrid"
20 | 
21 | # Policy specification
22 | #    _ 			policy choice not checked
23 | #    N, E, S, W policy action must be north, east, south, west
24 | #
25 | policy: """
26 |    _    _    _    _    _  
27 |    _    _    _    _    _  
28 |    _    _    _    _    _  
29 |    N    _    _    _    _  
30 |    _    _    _    _    _  
31 | """
32 | 
33 | # State the most probable path must visit
34 | #    (x,y) for a particular location; (0,0) is bottom left
35 | #    TERMINAL_STATE for the terminal state
36 | pathVisits: "(4,2)"
37 | 


--------------------------------------------------------------------------------
/test_cases/q3/5-question-3.5.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q3/5-question-3.5.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q3/5-question-3.5.test:
--------------------------------------------------------------------------------
 1 | class: "GridPolicyTest"
 2 | 
 3 | # Function in module in analysis that returns (discount, noise)
 4 | parameterFn: "question3e"
 5 | 
 6 | # GridWorld specification
 7 | #    _ is empty space
 8 | #    numbers are terminal states with that value
 9 | #    # is a wall
10 | #    S is a start state
11 | #
12 | grid: """
13 |    _    _    _    _    _  
14 |    _    #    _    _    _
15 |    _    #    1    #   10
16 |    S    _    _    _    _
17 |  -10  -10  -10  -10  -10
18 | """
19 | gridName: "discountGrid"
20 | 
21 | # Policy specification
22 | #    _ 			policy choice not checked
23 | #    N, E, S, W policy action must be north, east, south, west
24 | #
25 | policy: """
26 |    _    _    _    _    _  
27 |    _    _    _    _    _  
28 |    _    _    _    _    _  
29 |    _    _    _    _    _
30 |    _    _    _    _    _  
31 | """
32 | 
33 | # State the most probable path must not visit
34 | #    (x,y) for a particular location; (0,0) is bottom left
35 | #    TERMINAL_STATE for the terminal state
36 | pathNotVisits: "TERMINAL_STATE"
37 | 


--------------------------------------------------------------------------------
/test_cases/q3/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "5"
2 | class: "NumberPassedQuestion"
3 | 


--------------------------------------------------------------------------------
/test_cases/q4/1-tinygrid.solution:
--------------------------------------------------------------------------------
  1 | q_values_k_0_action_north: """
  2 |            illegal
  3 |             0.0000
  4 |            illegal
  5 | """
  6 | 
  7 | q_values_k_0_action_east: """
  8 |            illegal
  9 |             0.0000
 10 |            illegal
 11 | """
 12 | 
 13 | q_values_k_0_action_exit: """
 14 |             0.0000
 15 |            illegal
 16 |             0.0000
 17 | """
 18 | 
 19 | q_values_k_0_action_south: """
 20 |            illegal
 21 |             0.0000
 22 |            illegal
 23 | """
 24 | 
 25 | q_values_k_0_action_west: """
 26 |            illegal
 27 |             0.0000
 28 |            illegal
 29 | """
 30 | 
 31 | q_values_k_1_action_north: """
 32 |            illegal
 33 |             0.0000
 34 |            illegal
 35 | """
 36 | 
 37 | q_values_k_1_action_east: """
 38 |            illegal
 39 |             0.0000
 40 |            illegal
 41 | """
 42 | 
 43 | q_values_k_1_action_exit: """
 44 |             0.0000
 45 |            illegal
 46 |             1.0000
 47 | """
 48 | 
 49 | q_values_k_1_action_south: """
 50 |            illegal
 51 |             0.0000
 52 |            illegal
 53 | """
 54 | 
 55 | q_values_k_1_action_west: """
 56 |            illegal
 57 |             0.0000
 58 |            illegal
 59 | """
 60 | 
 61 | q_values_k_2_action_north: """
 62 |            illegal
 63 |             0.0000
 64 |            illegal
 65 | """
 66 | 
 67 | q_values_k_2_action_east: """
 68 |            illegal
 69 |             0.0000
 70 |            illegal
 71 | """
 72 | 
 73 | q_values_k_2_action_exit: """
 74 |             0.0000
 75 |            illegal
 76 |             1.0000
 77 | """
 78 | 
 79 | q_values_k_2_action_south: """
 80 |            illegal
 81 |             0.0000
 82 |            illegal
 83 | """
 84 | 
 85 | q_values_k_2_action_west: """
 86 |            illegal
 87 |             0.0000
 88 |            illegal
 89 | """
 90 | 
 91 | q_values_k_3_action_north: """
 92 |            illegal
 93 |             0.0000
 94 |            illegal
 95 | """
 96 | 
 97 | q_values_k_3_action_east: """
 98 |            illegal
 99 |             0.0000
100 |            illegal
101 | """
102 | 
103 | q_values_k_3_action_exit: """
104 |             0.0000
105 |            illegal
106 |             1.9000
107 | """
108 | 
109 | q_values_k_3_action_south: """
110 |            illegal
111 |             0.0000
112 |            illegal
113 | """
114 | 
115 | q_values_k_3_action_west: """
116 |            illegal
117 |             0.0000
118 |            illegal
119 | """
120 | 
121 | q_values_k_4_action_north: """
122 |            illegal
123 |             0.0000
124 |            illegal
125 | """
126 | 
127 | q_values_k_4_action_east: """
128 |            illegal
129 |             0.0000
130 |            illegal
131 | """
132 | 
133 | q_values_k_4_action_exit: """
134 |             0.0000
135 |            illegal
136 |             2.7100
137 | """
138 | 
139 | q_values_k_4_action_south: """
140 |            illegal
141 |             0.0000
142 |            illegal
143 | """
144 | 
145 | q_values_k_4_action_west: """
146 |            illegal
147 |             0.0000
148 |            illegal
149 | """
150 | 
151 | q_values_k_5_action_north: """
152 |            illegal
153 |             0.0000
154 |            illegal
155 | """
156 | 
157 | q_values_k_5_action_east: """
158 |            illegal
159 |             0.0000
160 |            illegal
161 | """
162 | 
163 | q_values_k_5_action_exit: """
164 |            -1.0000
165 |            illegal
166 |             2.7100
167 | """
168 | 
169 | q_values_k_5_action_south: """
170 |            illegal
171 |             0.0000
172 |            illegal
173 | """
174 | 
175 | q_values_k_5_action_west: """
176 |            illegal
177 |             0.0000
178 |            illegal
179 | """
180 | 
181 | q_values_k_6_action_north: """
182 |            illegal
183 |             0.0000
184 |            illegal
185 | """
186 | 
187 | q_values_k_6_action_east: """
188 |            illegal
189 |             0.0000
190 |            illegal
191 | """
192 | 
193 | q_values_k_6_action_exit: """
194 |            -1.0000
195 |            illegal
196 |             3.4390
197 | """
198 | 
199 | q_values_k_6_action_south: """
200 |            illegal
201 |             0.0000
202 |            illegal
203 | """
204 | 
205 | q_values_k_6_action_west: """
206 |            illegal
207 |             0.0000
208 |            illegal
209 | """
210 | 
211 | q_values_k_7_action_north: """
212 |            illegal
213 |             0.0000
214 |            illegal
215 | """
216 | 
217 | q_values_k_7_action_east: """
218 |            illegal
219 |             0.0000
220 |            illegal
221 | """
222 | 
223 | q_values_k_7_action_exit: """
224 |            -1.0000
225 |            illegal
226 |             3.4390
227 | """
228 | 
229 | q_values_k_7_action_south: """
230 |            illegal
231 |             0.1720
232 |            illegal
233 | """
234 | 
235 | q_values_k_7_action_west: """
236 |            illegal
237 |             0.0000
238 |            illegal
239 | """
240 | 
241 | q_values_k_8_action_north: """
242 |            illegal
243 |             0.0000
244 |            illegal
245 | """
246 | 
247 | q_values_k_8_action_east: """
248 |            illegal
249 |             0.0000
250 |            illegal
251 | """
252 | 
253 | q_values_k_8_action_exit: """
254 |            -1.0000
255 |            illegal
256 |             4.0951
257 | """
258 | 
259 | q_values_k_8_action_south: """
260 |            illegal
261 |             0.1720
262 |            illegal
263 | """
264 | 
265 | q_values_k_8_action_west: """
266 |            illegal
267 |             0.0000
268 |            illegal
269 | """
270 | 
271 | q_values_k_9_action_north: """
272 |            illegal
273 |             0.0000
274 |            illegal
275 | """
276 | 
277 | q_values_k_9_action_east: """
278 |            illegal
279 |             0.0000
280 |            illegal
281 | """
282 | 
283 | q_values_k_9_action_exit: """
284 |            -1.0000
285 |            illegal
286 |             4.6856
287 | """
288 | 
289 | q_values_k_9_action_south: """
290 |            illegal
291 |             0.1720
292 |            illegal
293 | """
294 | 
295 | q_values_k_9_action_west: """
296 |            illegal
297 |             0.0000
298 |            illegal
299 | """
300 | 
301 | q_values_k_100_action_north: """
302 |            illegal
303 |            -0.4534
304 |            illegal
305 | """
306 | 
307 | q_values_k_100_action_east: """
308 |            illegal
309 |             0.4063
310 |            illegal
311 | """
312 | 
313 | q_values_k_100_action_exit: """
314 |            -9.4767
315 |            illegal
316 |             9.8175
317 | """
318 | 
319 | q_values_k_100_action_south: """
320 |            illegal
321 |             2.1267
322 |            illegal
323 | """
324 | 
325 | q_values_k_100_action_west: """
326 |            illegal
327 |             0.3919
328 |            illegal
329 | """
330 | 
331 | values: """
332 |            -9.4767
333 |             2.1267
334 |             9.8175
335 | """
336 | 
337 | policy: """
338 |         exit      
339 |         south     
340 |         exit
341 | """
342 | 
343 | 


--------------------------------------------------------------------------------
/test_cases/q4/1-tinygrid.test:
--------------------------------------------------------------------------------
 1 | class: "QLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.5"
15 | noise: "0.0"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q4/2-tinygrid-noisy.solution:
--------------------------------------------------------------------------------
  1 | q_values_k_0_action_north: """
  2 |            illegal
  3 |             0.0000
  4 |            illegal
  5 | """
  6 | 
  7 | q_values_k_0_action_east: """
  8 |            illegal
  9 |             0.0000
 10 |            illegal
 11 | """
 12 | 
 13 | q_values_k_0_action_exit: """
 14 |             0.0000
 15 |            illegal
 16 |             0.0000
 17 | """
 18 | 
 19 | q_values_k_0_action_south: """
 20 |            illegal
 21 |             0.0000
 22 |            illegal
 23 | """
 24 | 
 25 | q_values_k_0_action_west: """
 26 |            illegal
 27 |             0.0000
 28 |            illegal
 29 | """
 30 | 
 31 | q_values_k_1_action_north: """
 32 |            illegal
 33 |             0.0000
 34 |            illegal
 35 | """
 36 | 
 37 | q_values_k_1_action_east: """
 38 |            illegal
 39 |             0.0000
 40 |            illegal
 41 | """
 42 | 
 43 | q_values_k_1_action_exit: """
 44 |             0.0000
 45 |            illegal
 46 |             1.0000
 47 | """
 48 | 
 49 | q_values_k_1_action_south: """
 50 |            illegal
 51 |             0.0000
 52 |            illegal
 53 | """
 54 | 
 55 | q_values_k_1_action_west: """
 56 |            illegal
 57 |             0.0000
 58 |            illegal
 59 | """
 60 | 
 61 | q_values_k_2_action_north: """
 62 |            illegal
 63 |             0.0000
 64 |            illegal
 65 | """
 66 | 
 67 | q_values_k_2_action_east: """
 68 |            illegal
 69 |             0.0000
 70 |            illegal
 71 | """
 72 | 
 73 | q_values_k_2_action_exit: """
 74 |             0.0000
 75 |            illegal
 76 |             1.0000
 77 | """
 78 | 
 79 | q_values_k_2_action_south: """
 80 |            illegal
 81 |             0.0000
 82 |            illegal
 83 | """
 84 | 
 85 | q_values_k_2_action_west: """
 86 |            illegal
 87 |             0.0000
 88 |            illegal
 89 | """
 90 | 
 91 | q_values_k_3_action_north: """
 92 |            illegal
 93 |             0.0000
 94 |            illegal
 95 | """
 96 | 
 97 | q_values_k_3_action_east: """
 98 |            illegal
 99 |             0.0000
100 |            illegal
101 | """
102 | 
103 | q_values_k_3_action_exit: """
104 |             0.0000
105 |            illegal
106 |             1.9000
107 | """
108 | 
109 | q_values_k_3_action_south: """
110 |            illegal
111 |             0.0000
112 |            illegal
113 | """
114 | 
115 | q_values_k_3_action_west: """
116 |            illegal
117 |             0.0000
118 |            illegal
119 | """
120 | 
121 | q_values_k_4_action_north: """
122 |            illegal
123 |             0.0000
124 |            illegal
125 | """
126 | 
127 | q_values_k_4_action_east: """
128 |            illegal
129 |             0.0000
130 |            illegal
131 | """
132 | 
133 | q_values_k_4_action_exit: """
134 |             0.0000
135 |            illegal
136 |             2.7100
137 | """
138 | 
139 | q_values_k_4_action_south: """
140 |            illegal
141 |             0.0000
142 |            illegal
143 | """
144 | 
145 | q_values_k_4_action_west: """
146 |            illegal
147 |             0.0000
148 |            illegal
149 | """
150 | 
151 | q_values_k_5_action_north: """
152 |            illegal
153 |             0.0000
154 |            illegal
155 | """
156 | 
157 | q_values_k_5_action_east: """
158 |            illegal
159 |             0.0000
160 |            illegal
161 | """
162 | 
163 | q_values_k_5_action_exit: """
164 |            -1.0000
165 |            illegal
166 |             2.7100
167 | """
168 | 
169 | q_values_k_5_action_south: """
170 |            illegal
171 |             0.0000
172 |            illegal
173 | """
174 | 
175 | q_values_k_5_action_west: """
176 |            illegal
177 |             0.0000
178 |            illegal
179 | """
180 | 
181 | q_values_k_6_action_north: """
182 |            illegal
183 |             0.0000
184 |            illegal
185 | """
186 | 
187 | q_values_k_6_action_east: """
188 |            illegal
189 |             0.0000
190 |            illegal
191 | """
192 | 
193 | q_values_k_6_action_exit: """
194 |            -1.0000
195 |            illegal
196 |             3.4390
197 | """
198 | 
199 | q_values_k_6_action_south: """
200 |            illegal
201 |             0.0000
202 |            illegal
203 | """
204 | 
205 | q_values_k_6_action_west: """
206 |            illegal
207 |             0.0000
208 |            illegal
209 | """
210 | 
211 | q_values_k_7_action_north: """
212 |            illegal
213 |             0.0000
214 |            illegal
215 | """
216 | 
217 | q_values_k_7_action_east: """
218 |            illegal
219 |             0.0000
220 |            illegal
221 | """
222 | 
223 | q_values_k_7_action_exit: """
224 |            -1.0000
225 |            illegal
226 |             3.4390
227 | """
228 | 
229 | q_values_k_7_action_south: """
230 |            illegal
231 |             0.2579
232 |            illegal
233 | """
234 | 
235 | q_values_k_7_action_west: """
236 |            illegal
237 |             0.0000
238 |            illegal
239 | """
240 | 
241 | q_values_k_8_action_north: """
242 |            illegal
243 |             0.0000
244 |            illegal
245 | """
246 | 
247 | q_values_k_8_action_east: """
248 |            illegal
249 |             0.0000
250 |            illegal
251 | """
252 | 
253 | q_values_k_8_action_exit: """
254 |            -1.0000
255 |            illegal
256 |             4.0951
257 | """
258 | 
259 | q_values_k_8_action_south: """
260 |            illegal
261 |             0.2579
262 |            illegal
263 | """
264 | 
265 | q_values_k_8_action_west: """
266 |            illegal
267 |             0.0000
268 |            illegal
269 | """
270 | 
271 | q_values_k_9_action_north: """
272 |            illegal
273 |             0.0000
274 |            illegal
275 | """
276 | 
277 | q_values_k_9_action_east: """
278 |            illegal
279 |             0.0000
280 |            illegal
281 | """
282 | 
283 | q_values_k_9_action_exit: """
284 |            -1.0000
285 |            illegal
286 |             4.6856
287 | """
288 | 
289 | q_values_k_9_action_south: """
290 |            illegal
291 |             0.2579
292 |            illegal
293 | """
294 | 
295 | q_values_k_9_action_west: """
296 |            illegal
297 |             0.0000
298 |            illegal
299 | """
300 | 
301 | q_values_k_100_action_north: """
302 |            illegal
303 |            -0.6670
304 |            illegal
305 | """
306 | 
307 | q_values_k_100_action_east: """
308 |            illegal
309 |             0.9499
310 |            illegal
311 | """
312 | 
313 | q_values_k_100_action_exit: """
314 |            -9.4767
315 |            illegal
316 |             9.8175
317 | """
318 | 
319 | q_values_k_100_action_south: """
320 |            illegal
321 |             3.2562
322 |            illegal
323 | """
324 | 
325 | q_values_k_100_action_west: """
326 |            illegal
327 |             0.8236
328 |            illegal
329 | """
330 | 
331 | values: """
332 |            -9.4767
333 |             3.2562
334 |             9.8175
335 | """
336 | 
337 | policy: """
338 |         exit      
339 |         south     
340 |         exit
341 | """
342 | 
343 | 


--------------------------------------------------------------------------------
/test_cases/q4/2-tinygrid-noisy.test:
--------------------------------------------------------------------------------
 1 | class: "QLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.75"
15 | noise: "0.25"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q4/3-bridge.test:
--------------------------------------------------------------------------------
 1 | class: "QLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |     #   10    #
11 |  -100    _ -100
12 |  -100    _ -100
13 |  -100    _ -100
14 |  -100    _ -100
15 |  -100    S -100
16 |     #    1    #
17 | """
18 | gridName: "bridgeGrid"
19 | discount: "0.85"
20 | noise: "0.1"
21 | livingReward: "0.0"
22 | epsilon: "0.5"
23 | learningRate: "0.1"
24 | numExperiences: "500"
25 | valueIterations: "100"
26 | iterations: "10000"
27 | 
28 | 


--------------------------------------------------------------------------------
/test_cases/q4/4-discountgrid.test:
--------------------------------------------------------------------------------
 1 | class: "QLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10    _   10    _    _
11 |   -10    _    #    _    _
12 |   -10    _    1    _    _
13 |   -10    _    #    #    _
14 |   -10    S    _    _    _
15 | """
16 | discount: "0.9"
17 | noise: "0.2"
18 | livingReward: "0.0"
19 | epsilon: "0.2"
20 | learningRate: "0.1"
21 | numExperiences: "3000"
22 | valueIterations: "100"
23 | iterations: "10000"
24 | 
25 | 


--------------------------------------------------------------------------------
/test_cases/q4/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "5"
2 | class: "PassAllTestsQuestion"
3 | 


--------------------------------------------------------------------------------
/test_cases/q5/1-tinygrid.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q5/1-tinygrid.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q5/1-tinygrid.test:
--------------------------------------------------------------------------------
 1 | class: "EpsilonGreedyTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.5"
15 | noise: "0.0"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q5/2-tinygrid-noisy.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q5/2-tinygrid-noisy.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q5/2-tinygrid-noisy.test:
--------------------------------------------------------------------------------
 1 | class: "EpsilonGreedyTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.75"
15 | noise: "0.25"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q5/3-bridge.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q5/3-bridge.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q5/3-bridge.test:
--------------------------------------------------------------------------------
 1 | class: "EpsilonGreedyTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |     #   10    #
11 |  -100    _ -100
12 |  -100    _ -100
13 |  -100    _ -100
14 |  -100    _ -100
15 |  -100    S -100
16 |     #    1    #
17 | """
18 | gridName: "bridgeGrid"
19 | discount: "0.85"
20 | noise: "0.1"
21 | livingReward: "0.0"
22 | epsilon: "0.5"
23 | learningRate: "0.1"
24 | numExperiences: "500"
25 | valueIterations: "100"
26 | iterations: "10000"
27 | 
28 | 


--------------------------------------------------------------------------------
/test_cases/q5/4-discountgrid.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q5/4-discountgrid.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q5/4-discountgrid.test:
--------------------------------------------------------------------------------
 1 | class: "EpsilonGreedyTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10    _   10    _    _
11 |   -10    _    #    _    _
12 |   -10    _    1    _    _
13 |   -10    _    #    #    _
14 |   -10    S    _    _    _
15 | """
16 | discount: "0.9"
17 | noise: "0.2"
18 | livingReward: "0.0"
19 | epsilon: "0.2"
20 | learningRate: "0.1"
21 | numExperiences: "3000"
22 | valueIterations: "100"
23 | iterations: "10000"
24 | 
25 | 


--------------------------------------------------------------------------------
/test_cases/q5/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "3"
2 | class: "PassAllTestsQuestion"
3 | 


--------------------------------------------------------------------------------
/test_cases/q6/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "1"
2 | class: "PassAllTestsQuestion"
3 | 


--------------------------------------------------------------------------------
/test_cases/q6/grade-agent.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q6/grade-agent.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q6/grade-agent.test:
--------------------------------------------------------------------------------
1 | class: "Question6Test"
2 | 
3 | 


--------------------------------------------------------------------------------
/test_cases/q7/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "1"
2 | class: "PassAllTestsQuestion"
3 | 


--------------------------------------------------------------------------------
/test_cases/q7/grade-agent.solution:
--------------------------------------------------------------------------------
1 | # This is the solution file for test_cases/q7/grade-agent.test.
2 | # File intentionally blank.
3 | 


--------------------------------------------------------------------------------
/test_cases/q7/grade-agent.test:
--------------------------------------------------------------------------------
1 | class: "EvalAgentTest"
2 | 
3 | # 100 test games after 2000 training games
4 | pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
5 | 
6 | winsThresholds: "70"
7 | 


--------------------------------------------------------------------------------
/test_cases/q8/1-tinygrid.solution:
--------------------------------------------------------------------------------
  1 | weights_k_0: """
  2 | {((0, 0), 'exit'): 0,
  3 |  ((0, 1), 'east'): 0,
  4 |  ((0, 1), 'north'): 0,
  5 |  ((0, 1), 'south'): 0,
  6 |  ((0, 1), 'west'): 0,
  7 |  ((0, 2), 'exit'): 0}
  8 | """
  9 | 
 10 | q_values_k_0_action_north: """
 11 |            illegal
 12 |             0.0000
 13 |            illegal
 14 | """
 15 | 
 16 | q_values_k_0_action_east: """
 17 |            illegal
 18 |             0.0000
 19 |            illegal
 20 | """
 21 | 
 22 | q_values_k_0_action_exit: """
 23 |             0.0000
 24 |            illegal
 25 |             0.0000
 26 | """
 27 | 
 28 | q_values_k_0_action_south: """
 29 |            illegal
 30 |             0.0000
 31 |            illegal
 32 | """
 33 | 
 34 | q_values_k_0_action_west: """
 35 |            illegal
 36 |             0.0000
 37 |            illegal
 38 | """
 39 | 
 40 | weights_k_1: """
 41 | {((0, 0), 'exit'): 1.0,
 42 |  ((0, 1), 'east'): 0,
 43 |  ((0, 1), 'north'): 0,
 44 |  ((0, 1), 'south'): 0,
 45 |  ((0, 1), 'west'): 0,
 46 |  ((0, 2), 'exit'): 0}
 47 | """
 48 | 
 49 | q_values_k_1_action_north: """
 50 |            illegal
 51 |             0.0000
 52 |            illegal
 53 | """
 54 | 
 55 | q_values_k_1_action_east: """
 56 |            illegal
 57 |             0.0000
 58 |            illegal
 59 | """
 60 | 
 61 | q_values_k_1_action_exit: """
 62 |             0.0000
 63 |            illegal
 64 |             1.0000
 65 | """
 66 | 
 67 | q_values_k_1_action_south: """
 68 |            illegal
 69 |             0.0000
 70 |            illegal
 71 | """
 72 | 
 73 | q_values_k_1_action_west: """
 74 |            illegal
 75 |             0.0000
 76 |            illegal
 77 | """
 78 | 
 79 | weights_k_2: """
 80 | {((0, 0), 'exit'): 1.0,
 81 |  ((0, 1), 'east'): 0,
 82 |  ((0, 1), 'north'): 0,
 83 |  ((0, 1), 'south'): 0.0,
 84 |  ((0, 1), 'west'): 0,
 85 |  ((0, 2), 'exit'): 0}
 86 | """
 87 | 
 88 | q_values_k_2_action_north: """
 89 |            illegal
 90 |             0.0000
 91 |            illegal
 92 | """
 93 | 
 94 | q_values_k_2_action_east: """
 95 |            illegal
 96 |             0.0000
 97 |            illegal
 98 | """
 99 | 
100 | q_values_k_2_action_exit: """
101 |             0.0000
102 |            illegal
103 |             1.0000
104 | """
105 | 
106 | q_values_k_2_action_south: """
107 |            illegal
108 |             0.0000
109 |            illegal
110 | """
111 | 
112 | q_values_k_2_action_west: """
113 |            illegal
114 |             0.0000
115 |            illegal
116 | """
117 | 
118 | weights_k_3: """
119 | {((0, 0), 'exit'): 1.8999999999999999,
120 |  ((0, 1), 'east'): 0,
121 |  ((0, 1), 'north'): 0,
122 |  ((0, 1), 'south'): 0.0,
123 |  ((0, 1), 'west'): 0,
124 |  ((0, 2), 'exit'): 0}
125 | """
126 | 
127 | q_values_k_3_action_north: """
128 |            illegal
129 |             0.0000
130 |            illegal
131 | """
132 | 
133 | q_values_k_3_action_east: """
134 |            illegal
135 |             0.0000
136 |            illegal
137 | """
138 | 
139 | q_values_k_3_action_exit: """
140 |             0.0000
141 |            illegal
142 |             1.9000
143 | """
144 | 
145 | q_values_k_3_action_south: """
146 |            illegal
147 |             0.0000
148 |            illegal
149 | """
150 | 
151 | q_values_k_3_action_west: """
152 |            illegal
153 |             0.0000
154 |            illegal
155 | """
156 | 
157 | weights_k_4: """
158 | {((0, 0), 'exit'): 2.71,
159 |  ((0, 1), 'east'): 0,
160 |  ((0, 1), 'north'): 0,
161 |  ((0, 1), 'south'): 0.0,
162 |  ((0, 1), 'west'): 0,
163 |  ((0, 2), 'exit'): 0}
164 | """
165 | 
166 | q_values_k_4_action_north: """
167 |            illegal
168 |             0.0000
169 |            illegal
170 | """
171 | 
172 | q_values_k_4_action_east: """
173 |            illegal
174 |             0.0000
175 |            illegal
176 | """
177 | 
178 | q_values_k_4_action_exit: """
179 |             0.0000
180 |            illegal
181 |             2.7100
182 | """
183 | 
184 | q_values_k_4_action_south: """
185 |            illegal
186 |             0.0000
187 |            illegal
188 | """
189 | 
190 | q_values_k_4_action_west: """
191 |            illegal
192 |             0.0000
193 |            illegal
194 | """
195 | 
196 | weights_k_5: """
197 | {((0, 0), 'exit'): 2.71,
198 |  ((0, 1), 'east'): 0,
199 |  ((0, 1), 'north'): 0,
200 |  ((0, 1), 'south'): 0.0,
201 |  ((0, 1), 'west'): 0,
202 |  ((0, 2), 'exit'): -1.0}
203 | """
204 | 
205 | q_values_k_5_action_north: """
206 |            illegal
207 |             0.0000
208 |            illegal
209 | """
210 | 
211 | q_values_k_5_action_east: """
212 |            illegal
213 |             0.0000
214 |            illegal
215 | """
216 | 
217 | q_values_k_5_action_exit: """
218 |            -1.0000
219 |            illegal
220 |             2.7100
221 | """
222 | 
223 | q_values_k_5_action_south: """
224 |            illegal
225 |             0.0000
226 |            illegal
227 | """
228 | 
229 | q_values_k_5_action_west: """
230 |            illegal
231 |             0.0000
232 |            illegal
233 | """
234 | 
235 | weights_k_6: """
236 | {((0, 0), 'exit'): 3.4390000000000001,
237 |  ((0, 1), 'east'): 0,
238 |  ((0, 1), 'north'): 0,
239 |  ((0, 1), 'south'): 0.0,
240 |  ((0, 1), 'west'): 0,
241 |  ((0, 2), 'exit'): -1.0}
242 | """
243 | 
244 | q_values_k_6_action_north: """
245 |            illegal
246 |             0.0000
247 |            illegal
248 | """
249 | 
250 | q_values_k_6_action_east: """
251 |            illegal
252 |             0.0000
253 |            illegal
254 | """
255 | 
256 | q_values_k_6_action_exit: """
257 |            -1.0000
258 |            illegal
259 |             3.4390
260 | """
261 | 
262 | q_values_k_6_action_south: """
263 |            illegal
264 |             0.0000
265 |            illegal
266 | """
267 | 
268 | q_values_k_6_action_west: """
269 |            illegal
270 |             0.0000
271 |            illegal
272 | """
273 | 
274 | weights_k_7: """
275 | {((0, 0), 'exit'): 3.4390000000000001,
276 |  ((0, 1), 'east'): 0,
277 |  ((0, 1), 'north'): 0,
278 |  ((0, 1), 'south'): 0.17195000000000002,
279 |  ((0, 1), 'west'): 0,
280 |  ((0, 2), 'exit'): -1.0}
281 | """
282 | 
283 | q_values_k_7_action_north: """
284 |            illegal
285 |             0.0000
286 |            illegal
287 | """
288 | 
289 | q_values_k_7_action_east: """
290 |            illegal
291 |             0.0000
292 |            illegal
293 | """
294 | 
295 | q_values_k_7_action_exit: """
296 |            -1.0000
297 |            illegal
298 |             3.4390
299 | """
300 | 
301 | q_values_k_7_action_south: """
302 |            illegal
303 |             0.1720
304 |            illegal
305 | """
306 | 
307 | q_values_k_7_action_west: """
308 |            illegal
309 |             0.0000
310 |            illegal
311 | """
312 | 
313 | weights_k_8: """
314 | {((0, 0), 'exit'): 4.0951000000000004,
315 |  ((0, 1), 'east'): 0,
316 |  ((0, 1), 'north'): 0,
317 |  ((0, 1), 'south'): 0.17195000000000002,
318 |  ((0, 1), 'west'): 0,
319 |  ((0, 2), 'exit'): -1.0}
320 | """
321 | 
322 | q_values_k_8_action_north: """
323 |            illegal
324 |             0.0000
325 |            illegal
326 | """
327 | 
328 | q_values_k_8_action_east: """
329 |            illegal
330 |             0.0000
331 |            illegal
332 | """
333 | 
334 | q_values_k_8_action_exit: """
335 |            -1.0000
336 |            illegal
337 |             4.0951
338 | """
339 | 
340 | q_values_k_8_action_south: """
341 |            illegal
342 |             0.1720
343 |            illegal
344 | """
345 | 
346 | q_values_k_8_action_west: """
347 |            illegal
348 |             0.0000
349 |            illegal
350 | """
351 | 
352 | weights_k_9: """
353 | {((0, 0), 'exit'): 4.6855900000000004,
354 |  ((0, 1), 'east'): 0,
355 |  ((0, 1), 'north'): 0,
356 |  ((0, 1), 'south'): 0.17195000000000002,
357 |  ((0, 1), 'west'): 0,
358 |  ((0, 2), 'exit'): -1.0}
359 | """
360 | 
361 | q_values_k_9_action_north: """
362 |            illegal
363 |             0.0000
364 |            illegal
365 | """
366 | 
367 | q_values_k_9_action_east: """
368 |            illegal
369 |             0.0000
370 |            illegal
371 | """
372 | 
373 | q_values_k_9_action_exit: """
374 |            -1.0000
375 |            illegal
376 |             4.6856
377 | """
378 | 
379 | q_values_k_9_action_south: """
380 |            illegal
381 |             0.1720
382 |            illegal
383 | """
384 | 
385 | q_values_k_9_action_west: """
386 |            illegal
387 |             0.0000
388 |            illegal
389 | """
390 | 
391 | weights_k_100: """
392 | {((0, 0), 'exit'): 9.8175199636859922,
393 |  ((0, 1), 'east'): 0.40629236674335106,
394 |  ((0, 1), 'north'): -0.45341857899847993,
395 |  ((0, 1), 'south'): 2.126721095524319,
396 |  ((0, 1), 'west'): 0.39193283364906867,
397 |  ((0, 2), 'exit'): -9.4766523669726386}
398 | """
399 | 
400 | q_values_k_100_action_north: """
401 |            illegal
402 |            -0.4534
403 |            illegal
404 | """
405 | 
406 | q_values_k_100_action_east: """
407 |            illegal
408 |             0.4063
409 |            illegal
410 | """
411 | 
412 | q_values_k_100_action_exit: """
413 |            -9.4767
414 |            illegal
415 |             9.8175
416 | """
417 | 
418 | q_values_k_100_action_south: """
419 |            illegal
420 |             2.1267
421 |            illegal
422 | """
423 | 
424 | q_values_k_100_action_west: """
425 |            illegal
426 |             0.3919
427 |            illegal
428 | """
429 | 
430 | 


--------------------------------------------------------------------------------
/test_cases/q8/1-tinygrid.test:
--------------------------------------------------------------------------------
 1 | class: "ApproximateQLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.5"
15 | noise: "0.0"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q8/2-tinygrid-noisy.solution:
--------------------------------------------------------------------------------
  1 | weights_k_0: """
  2 | {((0, 0), 'exit'): 0,
  3 |  ((0, 1), 'east'): 0,
  4 |  ((0, 1), 'north'): 0,
  5 |  ((0, 1), 'south'): 0,
  6 |  ((0, 1), 'west'): 0,
  7 |  ((0, 2), 'exit'): 0}
  8 | """
  9 | 
 10 | q_values_k_0_action_north: """
 11 |            illegal
 12 |             0.0000
 13 |            illegal
 14 | """
 15 | 
 16 | q_values_k_0_action_east: """
 17 |            illegal
 18 |             0.0000
 19 |            illegal
 20 | """
 21 | 
 22 | q_values_k_0_action_exit: """
 23 |             0.0000
 24 |            illegal
 25 |             0.0000
 26 | """
 27 | 
 28 | q_values_k_0_action_south: """
 29 |            illegal
 30 |             0.0000
 31 |            illegal
 32 | """
 33 | 
 34 | q_values_k_0_action_west: """
 35 |            illegal
 36 |             0.0000
 37 |            illegal
 38 | """
 39 | 
 40 | weights_k_1: """
 41 | {((0, 0), 'exit'): 1.0,
 42 |  ((0, 1), 'east'): 0,
 43 |  ((0, 1), 'north'): 0,
 44 |  ((0, 1), 'south'): 0,
 45 |  ((0, 1), 'west'): 0,
 46 |  ((0, 2), 'exit'): 0}
 47 | """
 48 | 
 49 | q_values_k_1_action_north: """
 50 |            illegal
 51 |             0.0000
 52 |            illegal
 53 | """
 54 | 
 55 | q_values_k_1_action_east: """
 56 |            illegal
 57 |             0.0000
 58 |            illegal
 59 | """
 60 | 
 61 | q_values_k_1_action_exit: """
 62 |             0.0000
 63 |            illegal
 64 |             1.0000
 65 | """
 66 | 
 67 | q_values_k_1_action_south: """
 68 |            illegal
 69 |             0.0000
 70 |            illegal
 71 | """
 72 | 
 73 | q_values_k_1_action_west: """
 74 |            illegal
 75 |             0.0000
 76 |            illegal
 77 | """
 78 | 
 79 | weights_k_2: """
 80 | {((0, 0), 'exit'): 1.0,
 81 |  ((0, 1), 'east'): 0,
 82 |  ((0, 1), 'north'): 0,
 83 |  ((0, 1), 'south'): 0.0,
 84 |  ((0, 1), 'west'): 0,
 85 |  ((0, 2), 'exit'): 0}
 86 | """
 87 | 
 88 | q_values_k_2_action_north: """
 89 |            illegal
 90 |             0.0000
 91 |            illegal
 92 | """
 93 | 
 94 | q_values_k_2_action_east: """
 95 |            illegal
 96 |             0.0000
 97 |            illegal
 98 | """
 99 | 
100 | q_values_k_2_action_exit: """
101 |             0.0000
102 |            illegal
103 |             1.0000
104 | """
105 | 
106 | q_values_k_2_action_south: """
107 |            illegal
108 |             0.0000
109 |            illegal
110 | """
111 | 
112 | q_values_k_2_action_west: """
113 |            illegal
114 |             0.0000
115 |            illegal
116 | """
117 | 
118 | weights_k_3: """
119 | {((0, 0), 'exit'): 1.8999999999999999,
120 |  ((0, 1), 'east'): 0,
121 |  ((0, 1), 'north'): 0,
122 |  ((0, 1), 'south'): 0.0,
123 |  ((0, 1), 'west'): 0,
124 |  ((0, 2), 'exit'): 0}
125 | """
126 | 
127 | q_values_k_3_action_north: """
128 |            illegal
129 |             0.0000
130 |            illegal
131 | """
132 | 
133 | q_values_k_3_action_east: """
134 |            illegal
135 |             0.0000
136 |            illegal
137 | """
138 | 
139 | q_values_k_3_action_exit: """
140 |             0.0000
141 |            illegal
142 |             1.9000
143 | """
144 | 
145 | q_values_k_3_action_south: """
146 |            illegal
147 |             0.0000
148 |            illegal
149 | """
150 | 
151 | q_values_k_3_action_west: """
152 |            illegal
153 |             0.0000
154 |            illegal
155 | """
156 | 
157 | weights_k_4: """
158 | {((0, 0), 'exit'): 2.71,
159 |  ((0, 1), 'east'): 0,
160 |  ((0, 1), 'north'): 0,
161 |  ((0, 1), 'south'): 0.0,
162 |  ((0, 1), 'west'): 0,
163 |  ((0, 2), 'exit'): 0}
164 | """
165 | 
166 | q_values_k_4_action_north: """
167 |            illegal
168 |             0.0000
169 |            illegal
170 | """
171 | 
172 | q_values_k_4_action_east: """
173 |            illegal
174 |             0.0000
175 |            illegal
176 | """
177 | 
178 | q_values_k_4_action_exit: """
179 |             0.0000
180 |            illegal
181 |             2.7100
182 | """
183 | 
184 | q_values_k_4_action_south: """
185 |            illegal
186 |             0.0000
187 |            illegal
188 | """
189 | 
190 | q_values_k_4_action_west: """
191 |            illegal
192 |             0.0000
193 |            illegal
194 | """
195 | 
196 | weights_k_5: """
197 | {((0, 0), 'exit'): 2.71,
198 |  ((0, 1), 'east'): 0,
199 |  ((0, 1), 'north'): 0,
200 |  ((0, 1), 'south'): 0.0,
201 |  ((0, 1), 'west'): 0,
202 |  ((0, 2), 'exit'): -1.0}
203 | """
204 | 
205 | q_values_k_5_action_north: """
206 |            illegal
207 |             0.0000
208 |            illegal
209 | """
210 | 
211 | q_values_k_5_action_east: """
212 |            illegal
213 |             0.0000
214 |            illegal
215 | """
216 | 
217 | q_values_k_5_action_exit: """
218 |            -1.0000
219 |            illegal
220 |             2.7100
221 | """
222 | 
223 | q_values_k_5_action_south: """
224 |            illegal
225 |             0.0000
226 |            illegal
227 | """
228 | 
229 | q_values_k_5_action_west: """
230 |            illegal
231 |             0.0000
232 |            illegal
233 | """
234 | 
235 | weights_k_6: """
236 | {((0, 0), 'exit'): 3.4390000000000001,
237 |  ((0, 1), 'east'): 0,
238 |  ((0, 1), 'north'): 0,
239 |  ((0, 1), 'south'): 0.0,
240 |  ((0, 1), 'west'): 0,
241 |  ((0, 2), 'exit'): -1.0}
242 | """
243 | 
244 | q_values_k_6_action_north: """
245 |            illegal
246 |             0.0000
247 |            illegal
248 | """
249 | 
250 | q_values_k_6_action_east: """
251 |            illegal
252 |             0.0000
253 |            illegal
254 | """
255 | 
256 | q_values_k_6_action_exit: """
257 |            -1.0000
258 |            illegal
259 |             3.4390
260 | """
261 | 
262 | q_values_k_6_action_south: """
263 |            illegal
264 |             0.0000
265 |            illegal
266 | """
267 | 
268 | q_values_k_6_action_west: """
269 |            illegal
270 |             0.0000
271 |            illegal
272 | """
273 | 
274 | weights_k_7: """
275 | {((0, 0), 'exit'): 3.4390000000000001,
276 |  ((0, 1), 'east'): 0,
277 |  ((0, 1), 'north'): 0,
278 |  ((0, 1), 'south'): 0.25792500000000002,
279 |  ((0, 1), 'west'): 0,
280 |  ((0, 2), 'exit'): -1.0}
281 | """
282 | 
283 | q_values_k_7_action_north: """
284 |            illegal
285 |             0.0000
286 |            illegal
287 | """
288 | 
289 | q_values_k_7_action_east: """
290 |            illegal
291 |             0.0000
292 |            illegal
293 | """
294 | 
295 | q_values_k_7_action_exit: """
296 |            -1.0000
297 |            illegal
298 |             3.4390
299 | """
300 | 
301 | q_values_k_7_action_south: """
302 |            illegal
303 |             0.2579
304 |            illegal
305 | """
306 | 
307 | q_values_k_7_action_west: """
308 |            illegal
309 |             0.0000
310 |            illegal
311 | """
312 | 
313 | weights_k_8: """
314 | {((0, 0), 'exit'): 4.0951000000000004,
315 |  ((0, 1), 'east'): 0,
316 |  ((0, 1), 'north'): 0,
317 |  ((0, 1), 'south'): 0.25792500000000002,
318 |  ((0, 1), 'west'): 0,
319 |  ((0, 2), 'exit'): -1.0}
320 | """
321 | 
322 | q_values_k_8_action_north: """
323 |            illegal
324 |             0.0000
325 |            illegal
326 | """
327 | 
328 | q_values_k_8_action_east: """
329 |            illegal
330 |             0.0000
331 |            illegal
332 | """
333 | 
334 | q_values_k_8_action_exit: """
335 |            -1.0000
336 |            illegal
337 |             4.0951
338 | """
339 | 
340 | q_values_k_8_action_south: """
341 |            illegal
342 |             0.2579
343 |            illegal
344 | """
345 | 
346 | q_values_k_8_action_west: """
347 |            illegal
348 |             0.0000
349 |            illegal
350 | """
351 | 
352 | weights_k_9: """
353 | {((0, 0), 'exit'): 4.6855900000000004,
354 |  ((0, 1), 'east'): 0,
355 |  ((0, 1), 'north'): 0,
356 |  ((0, 1), 'south'): 0.25792500000000002,
357 |  ((0, 1), 'west'): 0,
358 |  ((0, 2), 'exit'): -1.0}
359 | """
360 | 
361 | q_values_k_9_action_north: """
362 |            illegal
363 |             0.0000
364 |            illegal
365 | """
366 | 
367 | q_values_k_9_action_east: """
368 |            illegal
369 |             0.0000
370 |            illegal
371 | """
372 | 
373 | q_values_k_9_action_exit: """
374 |            -1.0000
375 |            illegal
376 |             4.6856
377 | """
378 | 
379 | q_values_k_9_action_south: """
380 |            illegal
381 |             0.2579
382 |            illegal
383 | """
384 | 
385 | q_values_k_9_action_west: """
386 |            illegal
387 |             0.0000
388 |            illegal
389 | """
390 | 
391 | weights_k_100: """
392 | {((0, 0), 'exit'): 9.8175199636859922,
393 |  ((0, 1), 'east'): 0.94989681048235752,
394 |  ((0, 1), 'north'): -0.66699795412272,
395 |  ((0, 1), 'south'): 3.256207905310105,
396 |  ((0, 1), 'west'): 0.82362807350146272,
397 |  ((0, 2), 'exit'): -9.4766523669726386}
398 | """
399 | 
400 | q_values_k_100_action_north: """
401 |            illegal
402 |            -0.6670
403 |            illegal
404 | """
405 | 
406 | q_values_k_100_action_east: """
407 |            illegal
408 |             0.9499
409 |            illegal
410 | """
411 | 
412 | q_values_k_100_action_exit: """
413 |            -9.4767
414 |            illegal
415 |             9.8175
416 | """
417 | 
418 | q_values_k_100_action_south: """
419 |            illegal
420 |             3.2562
421 |            illegal
422 | """
423 | 
424 | q_values_k_100_action_west: """
425 |            illegal
426 |             0.8236
427 |            illegal
428 | """
429 | 
430 | 


--------------------------------------------------------------------------------
/test_cases/q8/2-tinygrid-noisy.test:
--------------------------------------------------------------------------------
 1 | class: "ApproximateQLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10
11 |     S
12 |    10
13 | """
14 | discount: "0.75"
15 | noise: "0.25"
16 | livingReward: "0.0"
17 | epsilon: "0.5"
18 | learningRate: "0.1"
19 | numExperiences: "100"
20 | valueIterations: "100"
21 | iterations: "10000"
22 | 
23 | 


--------------------------------------------------------------------------------
/test_cases/q8/3-bridge.test:
--------------------------------------------------------------------------------
 1 | class: "ApproximateQLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |     #   10    #
11 |  -100    _ -100
12 |  -100    _ -100
13 |  -100    _ -100
14 |  -100    _ -100
15 |  -100    S -100
16 |     #    1    #
17 | """
18 | gridName: "bridgeGrid"
19 | discount: "0.85"
20 | noise: "0.1"
21 | livingReward: "0.0"
22 | epsilon: "0.5"
23 | learningRate: "0.1"
24 | numExperiences: "500"
25 | valueIterations: "100"
26 | iterations: "10000"
27 | 
28 | 


--------------------------------------------------------------------------------
/test_cases/q8/4-discountgrid.test:
--------------------------------------------------------------------------------
 1 | class: "ApproximateQLearningTest"
 2 | 
 3 | # GridWorld specification
 4 | #    _ is empty space
 5 | #    numbers are terminal states with that value
 6 | #    # is a wall
 7 | #    S is a start state
 8 | #
 9 | grid: """
10 |   -10    _   10    _    _
11 |   -10    _    #    _    _
12 |   -10    _    1    _    _
13 |   -10    _    #    #    _
14 |   -10    S    _    _    _
15 | """
16 | discount: "0.9"
17 | noise: "0.2"
18 | livingReward: "0.0"
19 | epsilon: "0.2"
20 | learningRate: "0.1"
21 | numExperiences: "3000"
22 | valueIterations: "100"
23 | iterations: "10000"
24 | 
25 | 


--------------------------------------------------------------------------------
/test_cases/q8/CONFIG:
--------------------------------------------------------------------------------
1 | max_points: "3"
2 | class: "PassAllTestsQuestion"
3 | 


--------------------------------------------------------------------------------
/textDisplay.py:
--------------------------------------------------------------------------------
 1 | # textDisplay.py
 2 | # --------------
 3 | # Licensing Information: Please do not distribute or publish solutions to this
 4 | # project. You are free to use and extend these projects for educational
 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
 8 | # Abbeel in Spring 2013.
 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
10 | 
11 | import pacman, time
12 | 
13 | DRAW_EVERY = 1
14 | SLEEP_TIME = 0 # This can be overwritten by __init__
15 | DISPLAY_MOVES = False
16 | QUIET = False # Supresses output
17 | 
18 | class NullGraphics:
19 |     def initialize(self, state, isBlue = False):
20 |         pass
21 | 
22 |     def update(self, state):
23 |         pass
24 | 
25 |     def pause(self):
26 |         time.sleep(SLEEP_TIME)
27 | 
28 |     def draw(self, state):
29 |         print state
30 | 
31 |     def finish(self):
32 |         pass
33 | 
34 | class PacmanGraphics:
35 |     def __init__(self, speed=None):
36 |         if speed != None:
37 |             global SLEEP_TIME
38 |             SLEEP_TIME = speed
39 | 
40 |     def initialize(self, state, isBlue = False):
41 |         self.draw(state)
42 |         self.pause()
43 |         self.turn = 0
44 |         self.agentCounter = 0
45 | 
46 |     def update(self, state):
47 |         numAgents = len(state.agentStates)
48 |         self.agentCounter = (self.agentCounter + 1) % numAgents
49 |         if self.agentCounter == 0:
50 |             self.turn += 1
51 |             if DISPLAY_MOVES:
52 |                 ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)]
53 |                 print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts
54 |             if self.turn % DRAW_EVERY == 0:
55 |                 self.draw(state)
56 |                 self.pause()
57 |         if state._win or state._lose:
58 |             self.draw(state)
59 | 
60 |     def pause(self):
61 |         time.sleep(SLEEP_TIME)
62 | 
63 |     def draw(self, state):
64 |         print state
65 | 
66 |     def finish(self):
67 |         pass
68 | 


--------------------------------------------------------------------------------
/textGridworldDisplay.py:
--------------------------------------------------------------------------------
  1 | # textGridworldDisplay.py
  2 | # -----------------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | import util
 12 | 
 13 | class TextGridworldDisplay:
 14 | 
 15 |     def __init__(self, gridworld):
 16 |         self.gridworld = gridworld
 17 | 
 18 |     def start(self):
 19 |         pass
 20 | 
 21 |     def pause(self):
 22 |         pass
 23 | 
 24 |     def displayValues(self, agent, currentState = None, message = None):
 25 |         if message != None:
 26 |             print message
 27 |         values = util.Counter()
 28 |         policy = {}
 29 |         states = self.gridworld.getStates()
 30 |         for state in states:
 31 |             values[state] = agent.getValue(state)
 32 |             policy[state] = agent.getPolicy(state)
 33 |         prettyPrintValues(self.gridworld, values, policy, currentState)
 34 | 
 35 |     def displayNullValues(self, agent, currentState = None, message = None):
 36 |         if message != None: print message
 37 |         prettyPrintNullValues(self.gridworld, currentState)
 38 | 
 39 |     def displayQValues(self, agent, currentState = None, message = None):
 40 |         if message != None: print message
 41 |         qValues = util.Counter()
 42 |         states = self.gridworld.getStates()
 43 |         for state in states:
 44 |             for action in self.gridworld.getPossibleActions(state):
 45 |                 qValues[(state, action)] = agent.getQValue(state, action)
 46 |         prettyPrintQValues(self.gridworld, qValues, currentState)
 47 | 
 48 | 
 49 | def prettyPrintValues(gridWorld, values, policy=None, currentState = None):
 50 |     grid = gridWorld.grid
 51 |     maxLen = 11
 52 |     newRows = []
 53 |     for y in range(grid.height):
 54 |         newRow = []
 55 |         for x in range(grid.width):
 56 |             state = (x, y)
 57 |             value = values[state]
 58 |             action = None
 59 |             if policy != None and state in policy:
 60 |                 action = policy[state]
 61 |             actions = gridWorld.getPossibleActions(state)
 62 |             if action not in actions and 'exit' in actions:
 63 |                 action = 'exit'
 64 |             valString = None
 65 |             if action == 'exit':
 66 |                 valString = border('%.2f' % value)
 67 |             else:
 68 |                 valString = '\n\n%.2f\n\n' % value
 69 |                 valString += ' '*maxLen
 70 |             if grid[x][y] == 'S':
 71 |                 valString = '\n\nS: %.2f\n\n'  % value
 72 |                 valString += ' '*maxLen
 73 |             if grid[x][y] == '#':
 74 |                 valString = '\n#####\n#####\n#####\n'
 75 |                 valString += ' '*maxLen
 76 |             pieces = [valString]
 77 |             text = ("\n".join(pieces)).split('\n')
 78 |             if currentState == state:
 79 |                 l = len(text[1])
 80 |                 if l == 0:
 81 |                     text[1] = '*'
 82 |                 else:
 83 |                     text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
 84 |             if action == 'east':
 85 |                 text[2] = '  ' + text[2]  + ' >'
 86 |             elif action == 'west':
 87 |                 text[2] = '< ' + text[2]  + '  '
 88 |             elif action == 'north':
 89 |                 text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
 90 |             elif action == 'south':
 91 |                 text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
 92 |             newCell = "\n".join(text)
 93 |             newRow.append(newCell)
 94 |         newRows.append(newRow)
 95 |     numCols = grid.width
 96 |     for rowNum, row in enumerate(newRows):
 97 |         row.insert(0,"\n\n"+str(rowNum))
 98 |     newRows.reverse()
 99 |     colLabels = [str(colNum) for colNum in range(numCols)]
100 |     colLabels.insert(0,' ')
101 |     finalRows = [colLabels] + newRows
102 |     print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
103 | 
104 | 
105 | def prettyPrintNullValues(gridWorld, currentState = None):
106 |     grid = gridWorld.grid
107 |     maxLen = 11
108 |     newRows = []
109 |     for y in range(grid.height):
110 |         newRow = []
111 |         for x in range(grid.width):
112 |             state = (x, y)
113 | 
114 |             # value = values[state]
115 | 
116 |             action = None
117 |             # if policy != None and state in policy:
118 |             #   action = policy[state]
119 |             #
120 |             actions = gridWorld.getPossibleActions(state)
121 | 
122 |             if action not in actions and 'exit' in actions:
123 |                 action = 'exit'
124 | 
125 |             valString = None
126 |             # if action == 'exit':
127 |             #   valString = border('%.2f' % value)
128 |             # else:
129 |             #   valString = '\n\n%.2f\n\n' % value
130 |             #   valString += ' '*maxLen
131 | 
132 |             if grid[x][y] == 'S':
133 |                 valString = '\n\nS\n\n'
134 |                 valString += ' '*maxLen
135 |             elif grid[x][y] == '#':
136 |                 valString = '\n#####\n#####\n#####\n'
137 |                 valString += ' '*maxLen
138 |             elif type(grid[x][y]) == float or type(grid[x][y]) == int:
139 |                 valString = border('%.2f' % float(grid[x][y]))
140 |             else: valString = border('  ')
141 |             pieces = [valString]
142 | 
143 |             text = ("\n".join(pieces)).split('\n')
144 | 
145 |             if currentState == state:
146 |                 l = len(text[1])
147 |                 if l == 0:
148 |                     text[1] = '*'
149 |                 else:
150 |                     text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
151 | 
152 |             if action == 'east':
153 |                 text[2] = '  ' + text[2]  + ' >'
154 |             elif action == 'west':
155 |                 text[2] = '< ' + text[2]  + '  '
156 |             elif action == 'north':
157 |                 text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
158 |             elif action == 'south':
159 |                 text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
160 |             newCell = "\n".join(text)
161 |             newRow.append(newCell)
162 |         newRows.append(newRow)
163 |     numCols = grid.width
164 |     for rowNum, row in enumerate(newRows):
165 |         row.insert(0,"\n\n"+str(rowNum))
166 |     newRows.reverse()
167 |     colLabels = [str(colNum) for colNum in range(numCols)]
168 |     colLabels.insert(0,' ')
169 |     finalRows = [colLabels] + newRows
170 |     print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
171 | 
172 | def prettyPrintQValues(gridWorld, qValues, currentState=None):
173 |     grid = gridWorld.grid
174 |     maxLen = 11
175 |     newRows = []
176 |     for y in range(grid.height):
177 |         newRow = []
178 |         for x in range(grid.width):
179 |             state = (x, y)
180 |             actions = gridWorld.getPossibleActions(state)
181 |             if actions == None or len(actions) == 0:
182 |                 actions = [None]
183 |             bestQ = max([qValues[(state, action)] for action in actions])
184 |             bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
185 | 
186 |             # display cell
187 |             qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions])
188 |             northString = ('north' in qStrings and qStrings['north']) or ' '
189 |             southString = ('south' in qStrings and qStrings['south']) or ' '
190 |             eastString = ('east' in qStrings and qStrings['east']) or ' '
191 |             westString = ('west' in qStrings and qStrings['west']) or ' '
192 |             exitString = ('exit' in qStrings and qStrings['exit']) or ' '
193 | 
194 |             eastLen = len(eastString)
195 |             westLen = len(westString)
196 |             if eastLen < westLen:
197 |                 eastString = ' '*(westLen-eastLen)+eastString
198 |             if westLen < eastLen:
199 |                 westString = westString+' '*(eastLen-westLen)
200 | 
201 |             if 'north' in bestActions:
202 |                 northString = '/'+northString+'\\'
203 |             if 'south' in bestActions:
204 |                 southString = '\\'+southString+'/'
205 |             if 'east' in bestActions:
206 |                 eastString = ''+eastString+'>'
207 |             else:
208 |                 eastString = ''+eastString+' '
209 |             if 'west' in bestActions:
210 |                 westString = '<'+westString+''
211 |             else:
212 |                 westString = ' '+westString+''
213 |             if 'exit' in bestActions:
214 |                 exitString = '[ '+exitString+' ]'
215 | 
216 | 
217 |             ewString = westString + "     " + eastString
218 |             if state == currentState:
219 |                 ewString = westString + "  *  " + eastString
220 |             if state == gridWorld.getStartState():
221 |                 ewString = westString + "  S  " + eastString
222 |             if state == currentState and state == gridWorld.getStartState():
223 |                 ewString = westString + " S:* " + eastString
224 | 
225 |             text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString]
226 | 
227 |             if grid[x][y] == '#':
228 |                 text = ['', '\n#####\n#####\n#####', '']
229 | 
230 |             newCell = "\n".join(text)
231 |             newRow.append(newCell)
232 |         newRows.append(newRow)
233 |     numCols = grid.width
234 |     for rowNum, row in enumerate(newRows):
235 |         row.insert(0,"\n\n\n"+str(rowNum))
236 |     newRows.reverse()
237 |     colLabels = [str(colNum) for colNum in range(numCols)]
238 |     colLabels.insert(0,' ')
239 |     finalRows = [colLabels] + newRows
240 | 
241 |     print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True)
242 | 
243 | def border(text):
244 |     length = len(text)
245 |     pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)]
246 |     return '\n'.join(pieces)
247 | 
248 | # INDENTING CODE
249 | 
250 | # Indenting code based on a post from George Sakkis
251 | # (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662)
252 | 
253 | import cStringIO,operator
254 | 
255 | def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
256 |            separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
257 |     """Indents a table by column.
258 |        - rows: A sequence of sequences of items, one sequence per row.
259 |        - hasHeader: True if the first row consists of the columns' names.
260 |        - headerChar: Character to be used for the row separator line
261 |          (if hasHeader==True or separateRows==True).
262 |        - delim: The column delimiter.
263 |        - justify: Determines how are data justified in their column.
264 |          Valid values are 'left','right' and 'center'.
265 |        - separateRows: True if rows are to be separated by a line
266 |          of 'headerChar's.
267 |        - prefix: A string prepended to each printed row.
268 |        - postfix: A string appended to each printed row.
269 |        - wrapfunc: A function f(text) for wrapping text; each element in
270 |          the table is first wrapped by this function."""
271 |     # closure for breaking logical rows to physical, using wrapfunc
272 |     def rowWrapper(row):
273 |         newRows = [wrapfunc(item).split('\n') for item in row]
274 |         return [[substr or '' for substr in item] for item in map(None,*newRows)]
275 |     # break each logical row into one or more physical ones
276 |     logicalRows = [rowWrapper(row) for row in rows]
277 |     # columns of physical rows
278 |     columns = map(None,*reduce(operator.add,logicalRows))
279 |     # get the maximum of each column by the string length of its items
280 |     maxWidths = [max([len(str(item)) for item in column]) for column in columns]
281 |     rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
282 |                                  len(delim)*(len(maxWidths)-1))
283 |     # select the appropriate justify method
284 |     justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
285 |     output=cStringIO.StringIO()
286 |     if separateRows: print >> output, rowSeparator
287 |     for physicalRows in logicalRows:
288 |         for row in physicalRows:
289 |             print >> output, \
290 |                 prefix \
291 |                 + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
292 |                 + postfix
293 |         if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
294 |     return output.getvalue()
295 | 
296 | import math
297 | def wrap_always(text, width):
298 |     """A simple word-wrap function that wraps text on exactly width characters.
299 |        It doesn't split the text in words."""
300 |     return '\n'.join([ text[width*i:width*(i+1)] \
301 |                        for i in xrange(int(math.ceil(1.*len(text)/width))) ])
302 | 
303 | 
304 | # TEST OF DISPLAY CODE
305 | 
306 | if __name__ == '__main__':
307 |     import gridworld, util
308 | 
309 |     grid = gridworld.getCliffGrid3()
310 |     print grid.getStates()
311 | 
312 |     policy = dict([(state,'east') for state in grid.getStates()])
313 |     values = util.Counter(dict([(state,1000.23) for state in grid.getStates()]))
314 |     prettyPrintValues(grid, values, policy, currentState = (0,0))
315 | 
316 |     stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()]
317 |     qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
318 |     qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates]))
319 |     qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])]))
320 |     prettyPrintQValues(grid, qValues, currentState = (0,0))
321 | 


--------------------------------------------------------------------------------
/valueIterationAgents.py:
--------------------------------------------------------------------------------
  1 | # valueIterationAgents.py
  2 | # -----------------------
  3 | # Licensing Information: Please do not distribute or publish solutions to this
  4 | # project. You are free to use and extend these projects for educational
  5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
  6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 
  8 | # Abbeel in Spring 2013.
  9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 10 | 
 11 | import mdp, util
 12 | 
 13 | from learningAgents import ValueEstimationAgent
 14 | 
 15 | class ValueIterationAgent(ValueEstimationAgent):
 16 |     """
 17 |         * Please read learningAgents.py before reading this.*
 18 | 
 19 |         A ValueIterationAgent takes a Markov decision process
 20 |         (see mdp.py) on initialization and runs value iteration
 21 |         for a given number of iterations using the supplied
 22 |         discount factor.
 23 |     """
 24 |     def __init__(self, mdp, discount = 0.9, iterations = 100):
 25 |         """
 26 |           Your value iteration agent should take an mdp on
 27 |           construction, run the indicated number of iterations
 28 |           and then act according to the resulting policy.
 29 | 
 30 |           Some useful mdp methods you will use:
 31 |               mdp.getStates()
 32 |               mdp.getPossibleActions(state)
 33 |               mdp.getTransitionStatesAndProbs(state, action)
 34 |               mdp.getReward(state, action, nextState)
 35 |               mdp.isTerminal(state)
 36 |         """
 37 |         self.mdp = mdp
 38 |         self.discount = discount
 39 |         self.iterations = iterations
 40 |         self.values = util.Counter() # A Counter is a dict with default 0
 41 |         # Write value iteration code here
 42 |         vcurr = util.Counter()
 43 |         for i in xrange(self.iterations):
 44 |             vcurr = self.values.copy()
 45 |             for state in self.mdp.getStates():
 46 |                 all_actions = self.mdp.getPossibleActions(state)
 47 |                 transitions = []
 48 |                 value_list = []
 49 |                 if self.mdp.isTerminal(state):
 50 |                     self.values[state] = 0
 51 |                 else:
 52 |                     for action in all_actions:
 53 |                         transitions = self.mdp.getTransitionStatesAndProbs(state, action)
 54 |                         value = 0
 55 |                         for transition in transitions:
 56 |                             value += transition[1]*(self.mdp.getReward(state, action, transition[0]) + self.discount * vcurr[transition[0]])
 57 |                         value_list.append(value)
 58 |                     self.values[state] = max(value_list)
 59 | 
 60 | 
 61 |     def getValue(self, state):
 62 |         """
 63 |           Return the value of the state (computed in __init__).
 64 |         """
 65 |         return self.values[state]
 66 | 
 67 | 
 68 |     def computeQValueFromValues(self, state, action):
 69 |         """
 70 |           Compute the Q-value of action in state from the
 71 |           value function stored in self.values.
 72 |         """
 73 |         value = 0
 74 |         transitions = self.mdp.getTransitionStatesAndProbs(state, action)
 75 |         for transition in transitions:
 76 |              value += transition[1]*(self.mdp.getReward(state, action, transition[0]) + self.discount * self.values[transition[0]])
 77 |         return value
 78 | 
 79 |         
 80 | 
 81 |     def computeActionFromValues(self, state):
 82 |         """
 83 |           The policy is the best action in the given state
 84 |           according to the values currently stored in self.values.
 85 | 
 86 |           You may break ties any way you see fit.  Note that if
 87 |           there are no legal actions, which is the case at the
 88 |           terminal state, you should return None.
 89 |         """
 90 |         if self.mdp.isTerminal(state):
 91 |             return None
 92 |         else:
 93 |             bestval = -99999999999
 94 |             bestaction = 0
 95 |             all_actions = self.mdp.getPossibleActions(state)
 96 |             for action in all_actions:
 97 |                 transitions = self.mdp.getTransitionStatesAndProbs(state, action)
 98 |                 value = 0
 99 |                 for transition in transitions:
100 |                     value += transition[1]*(self.mdp.getReward(state, action, transition[0]) + self.discount * self.values[transition[0]])
101 |                 if value > bestval:
102 |                     bestaction = action
103 |                     bestval = value
104 |             return bestaction
105 | 
106 | 
107 | 
108 |     def getPolicy(self, state):
109 |         return self.computeActionFromValues(state)
110 | 
111 |     def getAction(self, state):
112 |         "Returns the policy at the state (no exploration)."
113 |         return self.computeActionFromValues(state)
114 | 
115 |     def getQValue(self, state, action):
116 |         return self.computeQValueFromValues(state, action)
117 | 


--------------------------------------------------------------------------------