From c3b9ff70509560342a5b51f053822b5e3b6d6ab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Pelletier?= Date: Wed, 10 Apr 2019 23:58:15 -0400 Subject: [PATCH] =?UTF-8?q?ajout=20des=20fichiers=20du=20tp=202=20q1=20r?= =?UTF-8?q?=C3=A9pondue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- reinforcement/VERSION | 1 + reinforcement/analysis.py | 73 + reinforcement/autograder.py | 351 +++++ reinforcement/crawler.py | 384 ++++++ reinforcement/environment.py | 56 + reinforcement/featureExtractors.py | 103 ++ reinforcement/game.py | 729 ++++++++++ reinforcement/ghostAgents.py | 81 ++ reinforcement/grading.py | 282 ++++ reinforcement/graphicsCrawlerDisplay.py | 333 +++++ reinforcement/graphicsDisplay.py | 679 +++++++++ reinforcement/graphicsGridworldDisplay.py | 348 +++++ reinforcement/graphicsUtils.py | 398 ++++++ reinforcement/gridworld.py | 585 ++++++++ reinforcement/keyboardAgents.py | 84 ++ reinforcement/layout.py | 149 ++ reinforcement/layouts/capsuleClassic.lay | 7 + reinforcement/layouts/contestClassic.lay | 9 + reinforcement/layouts/mediumClassic.lay | 11 + reinforcement/layouts/mediumGrid.lay | 7 + reinforcement/layouts/minimaxClassic.lay | 5 + reinforcement/layouts/openClassic.lay | 9 + reinforcement/layouts/originalClassic.lay | 27 + reinforcement/layouts/smallClassic.lay | 7 + reinforcement/layouts/smallGrid.lay | 7 + reinforcement/layouts/testClassic.lay | 10 + reinforcement/layouts/trappedClassic.lay | 5 + reinforcement/layouts/trickyClassic.lay | 13 + reinforcement/learningAgents.py | 258 ++++ reinforcement/mdp.py | 67 + reinforcement/pacman.py | 684 ++++++++++ reinforcement/pacmanAgents.py | 52 + reinforcement/projectParams.py | 18 + reinforcement/qlearningAgents.py | 186 +++ reinforcement/reinforcementTestClasses.py | 924 +++++++++++++ reinforcement/testClasses.py | 189 +++ reinforcement/testParser.py | 85 ++ reinforcement/test_cases/CONFIG | 0 .../test_cases/q1/1-tinygrid.solution | 410 ++++++ reinforcement/test_cases/q1/1-tinygrid.test | 22 + .../test_cases/q1/2-tinygrid-noisy.solution | 410 ++++++ .../test_cases/q1/2-tinygrid-noisy.test | 22 + reinforcement/test_cases/q1/3-bridge.solution | 678 +++++++++ reinforcement/test_cases/q1/3-bridge.test | 27 + .../test_cases/q1/4-discountgrid.solution | 544 ++++++++ .../test_cases/q1/4-discountgrid.test | 24 + reinforcement/test_cases/q1/CONFIG | 2 + .../test_cases/q2/1-bridge-grid.solution | 2 + .../test_cases/q2/1-bridge-grid.test | 29 + reinforcement/test_cases/q2/CONFIG | 2 + .../test_cases/q3/1-question-3.1.solution | 2 + .../test_cases/q3/1-question-3.1.test | 31 + .../test_cases/q3/2-question-3.2.solution | 2 + .../test_cases/q3/2-question-3.2.test | 31 + .../test_cases/q3/3-question-3.3.solution | 2 + .../test_cases/q3/3-question-3.3.test | 31 + .../test_cases/q3/4-question-3.4.solution | 2 + .../test_cases/q3/4-question-3.4.test | 36 + .../test_cases/q3/5-question-3.5.solution | 2 + .../test_cases/q3/5-question-3.5.test | 36 + reinforcement/test_cases/q3/CONFIG | 2 + .../test_cases/q4/1-tinygrid.solution | 342 +++++ reinforcement/test_cases/q4/1-tinygrid.test | 22 + .../test_cases/q4/2-tinygrid-noisy.solution | 342 +++++ .../test_cases/q4/2-tinygrid-noisy.test | 22 + reinforcement/test_cases/q4/3-bridge.solution | 570 ++++++++ reinforcement/test_cases/q4/3-bridge.test | 27 + .../test_cases/q4/4-discountgrid.solution | 456 +++++++ .../test_cases/q4/4-discountgrid.test | 24 + reinforcement/test_cases/q4/CONFIG | 2 + .../test_cases/q5/1-tinygrid.solution | 2 + reinforcement/test_cases/q5/1-tinygrid.test | 22 + .../test_cases/q5/2-tinygrid-noisy.solution | 2 + .../test_cases/q5/2-tinygrid-noisy.test | 22 + reinforcement/test_cases/q5/3-bridge.solution | 2 + reinforcement/test_cases/q5/3-bridge.test | 27 + .../test_cases/q5/4-discountgrid.solution | 2 + .../test_cases/q5/4-discountgrid.test | 24 + reinforcement/test_cases/q5/CONFIG | 2 + reinforcement/test_cases/q6/CONFIG | 2 + .../test_cases/q6/grade-agent.solution | 2 + reinforcement/test_cases/q6/grade-agent.test | 2 + reinforcement/test_cases/q7/CONFIG | 2 + .../test_cases/q7/grade-agent.solution | 2 + reinforcement/test_cases/q7/grade-agent.test | 6 + .../test_cases/q8/1-tinygrid.solution | 429 ++++++ reinforcement/test_cases/q8/1-tinygrid.test | 22 + .../test_cases/q8/2-tinygrid-noisy.solution | 429 ++++++ .../test_cases/q8/2-tinygrid-noisy.test | 22 + reinforcement/test_cases/q8/3-bridge.solution | 935 +++++++++++++ reinforcement/test_cases/q8/3-bridge.test | 27 + .../test_cases/q8/4-discountgrid.solution | 1210 +++++++++++++++++ .../test_cases/q8/4-discountgrid.test | 24 + .../test_cases/q8/5-coord-extractor.solution | 880 ++++++++++++ .../test_cases/q8/5-coord-extractor.test | 25 + reinforcement/test_cases/q8/CONFIG | 2 + reinforcement/textDisplay.py | 81 ++ reinforcement/textGridworldDisplay.py | 324 +++++ reinforcement/util.py | 653 +++++++++ reinforcement/valueIterationAgents.py | 123 ++ search/search.py | 2 +- 101 files changed, 16680 insertions(+), 1 deletion(-) create mode 100644 reinforcement/VERSION create mode 100644 reinforcement/analysis.py create mode 100644 reinforcement/autograder.py create mode 100644 reinforcement/crawler.py create mode 100644 reinforcement/environment.py create mode 100644 reinforcement/featureExtractors.py create mode 100644 reinforcement/game.py create mode 100644 reinforcement/ghostAgents.py create mode 100644 reinforcement/grading.py create mode 100644 reinforcement/graphicsCrawlerDisplay.py create mode 100644 reinforcement/graphicsDisplay.py create mode 100644 reinforcement/graphicsGridworldDisplay.py create mode 100644 reinforcement/graphicsUtils.py create mode 100644 reinforcement/gridworld.py create mode 100644 reinforcement/keyboardAgents.py create mode 100644 reinforcement/layout.py create mode 100644 reinforcement/layouts/capsuleClassic.lay create mode 100644 reinforcement/layouts/contestClassic.lay create mode 100644 reinforcement/layouts/mediumClassic.lay create mode 100644 reinforcement/layouts/mediumGrid.lay create mode 100644 reinforcement/layouts/minimaxClassic.lay create mode 100644 reinforcement/layouts/openClassic.lay create mode 100644 reinforcement/layouts/originalClassic.lay create mode 100644 reinforcement/layouts/smallClassic.lay create mode 100644 reinforcement/layouts/smallGrid.lay create mode 100644 reinforcement/layouts/testClassic.lay create mode 100644 reinforcement/layouts/trappedClassic.lay create mode 100644 reinforcement/layouts/trickyClassic.lay create mode 100644 reinforcement/learningAgents.py create mode 100644 reinforcement/mdp.py create mode 100644 reinforcement/pacman.py create mode 100644 reinforcement/pacmanAgents.py create mode 100644 reinforcement/projectParams.py create mode 100644 reinforcement/qlearningAgents.py create mode 100644 reinforcement/reinforcementTestClasses.py create mode 100644 reinforcement/testClasses.py create mode 100644 reinforcement/testParser.py create mode 100644 reinforcement/test_cases/CONFIG create mode 100644 reinforcement/test_cases/q1/1-tinygrid.solution create mode 100644 reinforcement/test_cases/q1/1-tinygrid.test create mode 100644 reinforcement/test_cases/q1/2-tinygrid-noisy.solution create mode 100644 reinforcement/test_cases/q1/2-tinygrid-noisy.test create mode 100644 reinforcement/test_cases/q1/3-bridge.solution create mode 100644 reinforcement/test_cases/q1/3-bridge.test create mode 100644 reinforcement/test_cases/q1/4-discountgrid.solution create mode 100644 reinforcement/test_cases/q1/4-discountgrid.test create mode 100644 reinforcement/test_cases/q1/CONFIG create mode 100644 reinforcement/test_cases/q2/1-bridge-grid.solution create mode 100644 reinforcement/test_cases/q2/1-bridge-grid.test create mode 100644 reinforcement/test_cases/q2/CONFIG create mode 100644 reinforcement/test_cases/q3/1-question-3.1.solution create mode 100644 reinforcement/test_cases/q3/1-question-3.1.test create mode 100644 reinforcement/test_cases/q3/2-question-3.2.solution create mode 100644 reinforcement/test_cases/q3/2-question-3.2.test create mode 100644 reinforcement/test_cases/q3/3-question-3.3.solution create mode 100644 reinforcement/test_cases/q3/3-question-3.3.test create mode 100644 reinforcement/test_cases/q3/4-question-3.4.solution create mode 100644 reinforcement/test_cases/q3/4-question-3.4.test create mode 100644 reinforcement/test_cases/q3/5-question-3.5.solution create mode 100644 reinforcement/test_cases/q3/5-question-3.5.test create mode 100644 reinforcement/test_cases/q3/CONFIG create mode 100644 reinforcement/test_cases/q4/1-tinygrid.solution create mode 100644 reinforcement/test_cases/q4/1-tinygrid.test create mode 100644 reinforcement/test_cases/q4/2-tinygrid-noisy.solution create mode 100644 reinforcement/test_cases/q4/2-tinygrid-noisy.test create mode 100644 reinforcement/test_cases/q4/3-bridge.solution create mode 100644 reinforcement/test_cases/q4/3-bridge.test create mode 100644 reinforcement/test_cases/q4/4-discountgrid.solution create mode 100644 reinforcement/test_cases/q4/4-discountgrid.test create mode 100644 reinforcement/test_cases/q4/CONFIG create mode 100644 reinforcement/test_cases/q5/1-tinygrid.solution create mode 100644 reinforcement/test_cases/q5/1-tinygrid.test create mode 100644 reinforcement/test_cases/q5/2-tinygrid-noisy.solution create mode 100644 reinforcement/test_cases/q5/2-tinygrid-noisy.test create mode 100644 reinforcement/test_cases/q5/3-bridge.solution create mode 100644 reinforcement/test_cases/q5/3-bridge.test create mode 100644 reinforcement/test_cases/q5/4-discountgrid.solution create mode 100644 reinforcement/test_cases/q5/4-discountgrid.test create mode 100644 reinforcement/test_cases/q5/CONFIG create mode 100644 reinforcement/test_cases/q6/CONFIG create mode 100644 reinforcement/test_cases/q6/grade-agent.solution create mode 100644 reinforcement/test_cases/q6/grade-agent.test create mode 100644 reinforcement/test_cases/q7/CONFIG create mode 100644 reinforcement/test_cases/q7/grade-agent.solution create mode 100644 reinforcement/test_cases/q7/grade-agent.test create mode 100644 reinforcement/test_cases/q8/1-tinygrid.solution create mode 100644 reinforcement/test_cases/q8/1-tinygrid.test create mode 100644 reinforcement/test_cases/q8/2-tinygrid-noisy.solution create mode 100644 reinforcement/test_cases/q8/2-tinygrid-noisy.test create mode 100644 reinforcement/test_cases/q8/3-bridge.solution create mode 100644 reinforcement/test_cases/q8/3-bridge.test create mode 100644 reinforcement/test_cases/q8/4-discountgrid.solution create mode 100644 reinforcement/test_cases/q8/4-discountgrid.test create mode 100644 reinforcement/test_cases/q8/5-coord-extractor.solution create mode 100644 reinforcement/test_cases/q8/5-coord-extractor.test create mode 100644 reinforcement/test_cases/q8/CONFIG create mode 100644 reinforcement/textDisplay.py create mode 100644 reinforcement/textGridworldDisplay.py create mode 100644 reinforcement/util.py create mode 100644 reinforcement/valueIterationAgents.py diff --git a/reinforcement/VERSION b/reinforcement/VERSION new file mode 100644 index 0000000..52f6cb3 --- /dev/null +++ b/reinforcement/VERSION @@ -0,0 +1 @@ +v1.001 diff --git a/reinforcement/analysis.py b/reinforcement/analysis.py new file mode 100644 index 0000000..b040fa2 --- /dev/null +++ b/reinforcement/analysis.py @@ -0,0 +1,73 @@ +# analysis.py +# ----------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +###################### +# ANALYSIS QUESTIONS # +###################### + +# Set the given parameters to obtain the specified policies through +# value iteration. + +def question2(): + answerDiscount = 0.9 + answerNoise = 0.2 + return answerDiscount, answerNoise + +def question3a(): + answerDiscount = None + answerNoise = None + answerLivingReward = None + return answerDiscount, answerNoise, answerLivingReward + # If not possible, return 'NOT POSSIBLE' + +def question3b(): + answerDiscount = None + answerNoise = None + answerLivingReward = None + return answerDiscount, answerNoise, answerLivingReward + # If not possible, return 'NOT POSSIBLE' + +def question3c(): + answerDiscount = None + answerNoise = None + answerLivingReward = None + return answerDiscount, answerNoise, answerLivingReward + # If not possible, return 'NOT POSSIBLE' + +def question3d(): + answerDiscount = None + answerNoise = None + answerLivingReward = None + return answerDiscount, answerNoise, answerLivingReward + # If not possible, return 'NOT POSSIBLE' + +def question3e(): + answerDiscount = None + answerNoise = None + answerLivingReward = None + return answerDiscount, answerNoise, answerLivingReward + # If not possible, return 'NOT POSSIBLE' + +def question6(): + answerEpsilon = None + answerLearningRate = None + return answerEpsilon, answerLearningRate + # If not possible, return 'NOT POSSIBLE' + +if __name__ == '__main__': + print 'Answers to analysis questions:' + import analysis + for q in [q for q in dir(analysis) if q.startswith('question')]: + response = getattr(analysis, q)() + print ' Question %s:\t%s' % (q, str(response)) diff --git a/reinforcement/autograder.py b/reinforcement/autograder.py new file mode 100644 index 0000000..9c11c4c --- /dev/null +++ b/reinforcement/autograder.py @@ -0,0 +1,351 @@ +# autograder.py +# ------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +# imports from python standard library +import grading +import imp +import optparse +import os +import re +import sys +import projectParams +import random +random.seed(0) +try: + from pacman import GameState +except: + pass + +# register arguments and set default values +def readCommand(argv): + parser = optparse.OptionParser(description = 'Run public tests on student code') + parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False, noGraphics=False) + parser.add_option('--test-directory', + dest = 'testRoot', + default = 'test_cases', + help = 'Root test directory which contains subdirectories corresponding to each question') + parser.add_option('--student-code', + dest = 'studentCode', + default = projectParams.STUDENT_CODE_DEFAULT, + help = 'comma separated list of student code files') + parser.add_option('--code-directory', + dest = 'codeRoot', + default = "", + help = 'Root directory containing the student and testClass code') + parser.add_option('--test-case-code', + dest = 'testCaseCode', + default = projectParams.PROJECT_TEST_CLASSES, + help = 'class containing testClass classes for this project') + parser.add_option('--generate-solutions', + dest = 'generateSolutions', + action = 'store_true', + help = 'Write solutions generated to .solution file') + parser.add_option('--edx-output', + dest = 'edxOutput', + action = 'store_true', + help = 'Generate edX output files') + parser.add_option('--mute', + dest = 'muteOutput', + action = 'store_true', + help = 'Mute output from executing tests') + parser.add_option('--print-tests', '-p', + dest = 'printTestCase', + action = 'store_true', + help = 'Print each test case before running them.') + parser.add_option('--test', '-t', + dest = 'runTest', + default = None, + help = 'Run one particular test. Relative to test root.') + parser.add_option('--question', '-q', + dest = 'gradeQuestion', + default = None, + help = 'Grade one particular question.') + parser.add_option('--no-graphics', + dest = 'noGraphics', + action = 'store_true', + help = 'No graphics display for pacman games.') + (options, args) = parser.parse_args(argv) + return options + + +# confirm we should author solution files +def confirmGenerate(): + print 'WARNING: this action will overwrite any solution files.' + print 'Are you sure you want to proceed? (yes/no)' + while True: + ans = sys.stdin.readline().strip() + if ans == 'yes': + break + elif ans == 'no': + sys.exit(0) + else: + print 'please answer either "yes" or "no"' + + +# TODO: Fix this so that it tracebacks work correctly +# Looking at source of the traceback module, presuming it works +# the same as the intepreters, it uses co_filename. This is, +# however, a readonly attribute. +def setModuleName(module, filename): + functionType = type(confirmGenerate) + classType = type(optparse.Option) + + for i in dir(module): + o = getattr(module, i) + if hasattr(o, '__file__'): continue + + if type(o) == functionType: + setattr(o, '__file__', filename) + elif type(o) == classType: + setattr(o, '__file__', filename) + # TODO: assign member __file__'s? + #print i, type(o) + + +#from cStringIO import StringIO + +def loadModuleString(moduleSource): + # Below broken, imp doesn't believe its being passed a file: + # ValueError: load_module arg#2 should be a file or None + # + #f = StringIO(moduleCodeDict[k]) + #tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE)) + tmp = imp.new_module(k) + exec moduleCodeDict[k] in tmp.__dict__ + setModuleName(tmp, k) + return tmp + +import py_compile + +def loadModuleFile(moduleName, filePath): + with open(filePath, 'r') as f: + return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE)) + + +def readFile(path, root=""): + "Read file from disk at specified path and return as string" + with open(os.path.join(root, path), 'r') as handle: + return handle.read() + + +####################################################################### +# Error Hint Map +####################################################################### + +# TODO: use these +ERROR_HINT_MAP = { + 'q1': { + "": """ + We noticed that your project threw an IndexError on q1. + While many things may cause this, it may have been from + assuming a certain number of successors from a state space + or assuming a certain number of actions available from a given + state. Try making your code more general (no hardcoded indices) + and submit again! + """ + }, + 'q3': { + "": """ + We noticed that your project threw an AttributeError on q3. + While many things may cause this, it may have been from assuming + a certain size or structure to the state space. For example, if you have + a line of code assuming that the state is (x, y) and we run your code + on a state space with (x, y, z), this error could be thrown. Try + making your code more general and submit again! + + """ + } +} + +import pprint + +def splitStrings(d): + d2 = dict(d) + for k in d: + if k[0:2] == "__": + del d2[k] + continue + if d2[k].find("\n") >= 0: + d2[k] = d2[k].split("\n") + return d2 + + +def printTest(testDict, solutionDict): + pp = pprint.PrettyPrinter(indent=4) + print "Test case:" + for line in testDict["__raw_lines__"]: + print " |", line + print "Solution:" + for line in solutionDict["__raw_lines__"]: + print " |", line + + +def runTest(testName, moduleDict, printTestCase=False, display=None): + import testParser + import testClasses + for module in moduleDict: + setattr(sys.modules[__name__], module, moduleDict[module]) + + testDict = testParser.TestParser(testName + ".test").parse() + solutionDict = testParser.TestParser(testName + ".solution").parse() + test_out_file = os.path.join('%s.test_output' % testName) + testDict['test_out_file'] = test_out_file + testClass = getattr(projectTestClasses, testDict['class']) + + questionClass = getattr(testClasses, 'Question') + question = questionClass({'max_points': 0}, display) + testCase = testClass(question, testDict) + + if printTestCase: + printTest(testDict, solutionDict) + + # This is a fragile hack to create a stub grades object + grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)]) + testCase.execute(grades, moduleDict, solutionDict) + + +# returns all the tests you need to run in order to run question +def getDepends(testParser, testRoot, question): + allDeps = [question] + questionDict = testParser.TestParser(os.path.join(testRoot, question, 'CONFIG')).parse() + if 'depends' in questionDict: + depends = questionDict['depends'].split() + for d in depends: + # run dependencies first + allDeps = getDepends(testParser, testRoot, d) + allDeps + return allDeps + +# get list of questions to grade +def getTestSubdirs(testParser, testRoot, questionToGrade): + problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse() + if questionToGrade != None: + questions = getDepends(testParser, testRoot, questionToGrade) + if len(questions) > 1: + print 'Note: due to dependencies, the following tests will be run: %s' % ' '.join(questions) + return questions + if 'order' in problemDict: + return problemDict['order'].split() + return sorted(os.listdir(testRoot)) + + +# evaluate student code +def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False, + printTestCase=False, questionToGrade=None, display=None): + # imports of testbench code. note that the testClasses import must follow + # the import of student code due to dependencies + import testParser + import testClasses + for module in moduleDict: + setattr(sys.modules[__name__], module, moduleDict[module]) + + questions = [] + questionDicts = {} + test_subdirs = getTestSubdirs(testParser, testRoot, questionToGrade) + for q in test_subdirs: + subdir_path = os.path.join(testRoot, q) + if not os.path.isdir(subdir_path) or q[0] == '.': + continue + + # create a question object + questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse() + questionClass = getattr(testClasses, questionDict['class']) + question = questionClass(questionDict, display) + questionDicts[q] = questionDict + + # load test cases into question + tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path)) + tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests) + for t in sorted(tests): + test_file = os.path.join(subdir_path, '%s.test' % t) + solution_file = os.path.join(subdir_path, '%s.solution' % t) + test_out_file = os.path.join(subdir_path, '%s.test_output' % t) + testDict = testParser.TestParser(test_file).parse() + if testDict.get("disabled", "false").lower() == "true": + continue + testDict['test_out_file'] = test_out_file + testClass = getattr(projectTestClasses, testDict['class']) + testCase = testClass(question, testDict) + def makefun(testCase, solution_file): + if generateSolutions: + # write solution file to disk + return lambda grades: testCase.writeSolution(moduleDict, solution_file) + else: + # read in solution dictionary and pass as an argument + testDict = testParser.TestParser(test_file).parse() + solutionDict = testParser.TestParser(solution_file).parse() + if printTestCase: + return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict) + else: + return lambda grades: testCase.execute(grades, moduleDict, solutionDict) + question.addTestCase(testCase, makefun(testCase, solution_file)) + + # Note extra function is necessary for scoping reasons + def makefun(question): + return lambda grades: question.execute(grades) + setattr(sys.modules[__name__], q, makefun(question)) + questions.append((q, question.getMaxPoints())) + + grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput) + if questionToGrade == None: + for q in questionDicts: + for prereq in questionDicts[q].get('depends', '').split(): + grades.addPrereq(q, prereq) + + grades.grade(sys.modules[__name__], bonusPic = projectParams.BONUS_PIC) + return grades.points + + + +def getDisplay(graphicsByDefault, options=None): + graphics = graphicsByDefault + if options is not None and options.noGraphics: + graphics = False + if graphics: + try: + import graphicsDisplay + return graphicsDisplay.PacmanGraphics(1, frameTime=.05) + except ImportError: + pass + import textDisplay + return textDisplay.NullGraphics() + + + + +if __name__ == '__main__': + options = readCommand(sys.argv) + if options.generateSolutions: + confirmGenerate() + codePaths = options.studentCode.split(',') + # moduleCodeDict = {} + # for cp in codePaths: + # moduleName = re.match('.*?([^/]*)\.py', cp).group(1) + # moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot) + # moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot) + # moduleDict = loadModuleDict(moduleCodeDict) + + moduleDict = {} + for cp in codePaths: + moduleName = re.match('.*?([^/]*)\.py', cp).group(1) + moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp)) + moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1) + moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode)) + + + if options.runTest != None: + runTest(options.runTest, moduleDict, printTestCase=options.printTestCase, display=getDisplay(True, options)) + else: + evaluate(options.generateSolutions, options.testRoot, moduleDict, + edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase, + questionToGrade=options.gradeQuestion, display=getDisplay(options.gradeQuestion!=None, options)) diff --git a/reinforcement/crawler.py b/reinforcement/crawler.py new file mode 100644 index 0000000..d42255c --- /dev/null +++ b/reinforcement/crawler.py @@ -0,0 +1,384 @@ +# crawler.py +# ---------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +#!/usr/bin/python +import math +from math import pi as PI +import time +import environment +import random + +class CrawlingRobotEnvironment(environment.Environment): + + def __init__(self, crawlingRobot): + + self.crawlingRobot = crawlingRobot + + # The state is of the form (armAngle, handAngle) + # where the angles are bucket numbers, not actual + # degree measurements + self.state = None + + self.nArmStates = 9 + self.nHandStates = 13 + + # create a list of arm buckets and hand buckets to + # discretize the state space + minArmAngle,maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles() + minHandAngle,maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles() + armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates-1) + handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates-1) + self.armBuckets = [minArmAngle+(armIncrement*i) \ + for i in range(self.nArmStates)] + self.handBuckets = [minHandAngle+(handIncrement*i) \ + for i in range(self.nHandStates)] + + # Reset + self.reset() + + def getCurrentState(self): + """ + Return the current state + of the crawling robot + """ + return self.state + + def getPossibleActions(self, state): + """ + Returns possible actions + for the states in the + current state + """ + + actions = list() + + currArmBucket,currHandBucket = state + if currArmBucket > 0: actions.append('arm-down') + if currArmBucket < self.nArmStates-1: actions.append('arm-up') + if currHandBucket > 0: actions.append('hand-down') + if currHandBucket < self.nHandStates-1: actions.append('hand-up') + + return actions + + def doAction(self, action): + """ + Perform the action and update + the current state of the Environment + and return the reward for the + current state, the next state + and the taken action. + + Returns: + nextState, reward + """ + nextState, reward = None, None + + oldX,oldY = self.crawlingRobot.getRobotPosition() + + armBucket,handBucket = self.state + armAngle,handAngle = self.crawlingRobot.getAngles() + if action == 'arm-up': + newArmAngle = self.armBuckets[armBucket+1] + self.crawlingRobot.moveArm(newArmAngle) + nextState = (armBucket+1,handBucket) + if action == 'arm-down': + newArmAngle = self.armBuckets[armBucket-1] + self.crawlingRobot.moveArm(newArmAngle) + nextState = (armBucket-1,handBucket) + if action == 'hand-up': + newHandAngle = self.handBuckets[handBucket+1] + self.crawlingRobot.moveHand(newHandAngle) + nextState = (armBucket,handBucket+1) + if action == 'hand-down': + newHandAngle = self.handBuckets[handBucket-1] + self.crawlingRobot.moveHand(newHandAngle) + nextState = (armBucket,handBucket-1) + + newX,newY = self.crawlingRobot.getRobotPosition() + + # a simple reward function + reward = newX - oldX + + self.state = nextState + return nextState, reward + + + def reset(self): + """ + Resets the Environment to the initial state + """ + ## Initialize the state to be the middle + ## value for each parameter e.g. if there are 13 and 19 + ## buckets for the arm and hand parameters, then the intial + ## state should be (6,9) + ## + ## Also call self.crawlingRobot.setAngles() + ## to the initial arm and hand angle + + armState = self.nArmStates/2 + handState = self.nHandStates/2 + self.state = armState,handState + self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState]) + self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]] + + +class CrawlingRobot: + + def setAngles(self, armAngle, handAngle): + """ + set the robot's arm and hand angles + to the passed in values + """ + self.armAngle = armAngle + self.handAngle = handAngle + + def getAngles(self): + """ + returns the pair of (armAngle, handAngle) + """ + return self.armAngle, self.handAngle + + def getRobotPosition(self): + """ + returns the (x,y) coordinates + of the lower-left point of the + robot + """ + return self.robotPos + + def moveArm(self, newArmAngle): + """ + move the robot arm to 'newArmAngle' + """ + oldArmAngle = self.armAngle + if newArmAngle > self.maxArmAngle: + raise 'Crawling Robot: Arm Raised too high. Careful!' + if newArmAngle < self.minArmAngle: + raise 'Crawling Robot: Arm Raised too low. Careful!' + disp = self.displacement(self.armAngle, self.handAngle, + newArmAngle, self.handAngle) + curXPos = self.robotPos[0] + self.robotPos = (curXPos+disp, self.robotPos[1]) + self.armAngle = newArmAngle + + # Position and Velocity Sign Post + self.positions.append(self.getRobotPosition()[0]) +# self.angleSums.append(abs(math.degrees(oldArmAngle)-math.degrees(newArmAngle))) + if len(self.positions) > 100: + self.positions.pop(0) + # self.angleSums.pop(0) + + def moveHand(self, newHandAngle): + """ + move the robot hand to 'newArmAngle' + """ + oldHandAngle = self.handAngle + + if newHandAngle > self.maxHandAngle: + raise 'Crawling Robot: Hand Raised too high. Careful!' + if newHandAngle < self.minHandAngle: + raise 'Crawling Robot: Hand Raised too low. Careful!' + disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle) + curXPos = self.robotPos[0] + self.robotPos = (curXPos+disp, self.robotPos[1]) + self.handAngle = newHandAngle + + # Position and Velocity Sign Post + self.positions.append(self.getRobotPosition()[0]) + # self.angleSums.append(abs(math.degrees(oldHandAngle)-math.degrees(newHandAngle))) + if len(self.positions) > 100: + self.positions.pop(0) + # self.angleSums.pop(0) + + def getMinAndMaxArmAngles(self): + """ + get the lower- and upper- bound + for the arm angles returns (min,max) pair + """ + return self.minArmAngle, self.maxArmAngle + + def getMinAndMaxHandAngles(self): + """ + get the lower- and upper- bound + for the hand angles returns (min,max) pair + """ + return self.minHandAngle, self.maxHandAngle + + def getRotationAngle(self): + """ + get the current angle the + robot body is rotated off the ground + """ + armCos, armSin = self.__getCosAndSin(self.armAngle) + handCos, handSin = self.__getCosAndSin(self.handAngle) + x = self.armLength * armCos + self.handLength * handCos + self.robotWidth + y = self.armLength * armSin + self.handLength * handSin + self.robotHeight + if y < 0: + return math.atan(-y/x) + return 0.0 + + + ## You shouldn't need methods below here + + + def __getCosAndSin(self, angle): + return math.cos(angle), math.sin(angle) + + def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree): + + oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree) + armCos, armSin = self.__getCosAndSin(armDegree) + oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree) + handCos, handSin = self.__getCosAndSin(handDegree) + + xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth + yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight + + x = self.armLength * armCos + self.handLength * handCos + self.robotWidth + y = self.armLength * armSin + self.handLength * handSin + self.robotHeight + + if y < 0: + if yOld <= 0: + return math.sqrt(xOld*xOld + yOld*yOld) - math.sqrt(x*x + y*y) + return (xOld - yOld*(x-xOld) / (y - yOld)) - math.sqrt(x*x + y*y) + else: + if yOld >= 0: + return 0.0 + return -(x - y * (xOld-x)/(yOld-y)) + math.sqrt(xOld*xOld + yOld*yOld) + + raise 'Never Should See This!' + + def draw(self, stepCount, stepDelay): + x1, y1 = self.getRobotPosition() + x1 = x1 % self.totWidth + + ## Check Lower Still on the ground + if y1 != self.groundY: + raise 'Flying Robot!!' + + rotationAngle = self.getRotationAngle() + cosRot, sinRot = self.__getCosAndSin(rotationAngle) + + x2 = x1 + self.robotWidth * cosRot + y2 = y1 - self.robotWidth * sinRot + + x3 = x1 - self.robotHeight * sinRot + y3 = y1 - self.robotHeight * cosRot + + x4 = x3 + cosRot*self.robotWidth + y4 = y3 - sinRot*self.robotWidth + + self.canvas.coords(self.robotBody,x1,y1,x2,y2,x4,y4,x3,y3) + + armCos, armSin = self.__getCosAndSin(rotationAngle+self.armAngle) + xArm = x4 + self.armLength * armCos + yArm = y4 - self.armLength * armSin + + self.canvas.coords(self.robotArm,x4,y4,xArm,yArm) + + handCos, handSin = self.__getCosAndSin(self.handAngle+rotationAngle) + xHand = xArm + self.handLength * handCos + yHand = yArm - self.handLength * handSin + + self.canvas.coords(self.robotHand,xArm,yArm,xHand,yHand) + + + # Position and Velocity Sign Post +# time = len(self.positions) + 0.5 * sum(self.angleSums) +# velocity = (self.positions[-1]-self.positions[0]) / time +# if len(self.positions) == 1: return + steps = (stepCount - self.lastStep) + if steps==0:return + # pos = self.positions[-1] +# velocity = (pos - self.lastPos) / steps + # g = .9 ** (10 * stepDelay) +# g = .99 ** steps +# self.velAvg = g * self.velAvg + (1 - g) * velocity + # g = .999 ** steps + # self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity + pos = self.positions[-1] + velocity = pos - self.positions[-2] + vel2 = (pos - self.positions[0]) / len(self.positions) + self.velAvg = .9 * self.velAvg + .1 * vel2 + velMsg = '100-step Avg Velocity: %.2f' % self.velAvg +# velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2 + velocityMsg = 'Velocity: %.2f' % velocity + positionMsg = 'Position: %2.f' % pos + stepMsg = 'Step: %d' % stepCount + if 'vel_msg' in dir(self): + self.canvas.delete(self.vel_msg) + self.canvas.delete(self.pos_msg) + self.canvas.delete(self.step_msg) + self.canvas.delete(self.velavg_msg) + # self.canvas.delete(self.velavg2_msg) + # self.velavg2_msg = self.canvas.create_text(850,190,text=velMsg2) + self.velavg_msg = self.canvas.create_text(650,190,text=velMsg) + self.vel_msg = self.canvas.create_text(450,190,text=velocityMsg) + self.pos_msg = self.canvas.create_text(250,190,text=positionMsg) + self.step_msg = self.canvas.create_text(50,190,text=stepMsg) +# self.lastPos = pos + self.lastStep = stepCount +# self.lastVel = velocity + + def __init__(self, canvas): + + ## Canvas ## + self.canvas = canvas + self.velAvg = 0 +# self.velAvg2 = 0 +# self.lastPos = 0 + self.lastStep = 0 +# self.lastVel = 0 + + ## Arm and Hand Degrees ## + self.armAngle = self.oldArmDegree = 0.0 + self.handAngle = self.oldHandDegree = -PI/6 + + self.maxArmAngle = PI/6 + self.minArmAngle = -PI/6 + + self.maxHandAngle = 0 + self.minHandAngle = -(5.0/6.0) * PI + + ## Draw Ground ## + self.totWidth = canvas.winfo_reqwidth() + self.totHeight = canvas.winfo_reqheight() + self.groundHeight = 40 + self.groundY = self.totHeight - self.groundHeight + + self.ground = canvas.create_rectangle(0, + self.groundY,self.totWidth,self.totHeight, fill='blue') + + ## Robot Body ## + self.robotWidth = 80 + self.robotHeight = 40 + self.robotPos = (20, self.groundY) + self.robotBody = canvas.create_polygon(0,0,0,0,0,0,0,0, fill='green') + + ## Robot Arm ## + self.armLength = 60 + self.robotArm = canvas.create_line(0,0,0,0,fill='orange',width=5) + + ## Robot Hand ## + self.handLength = 40 + self.robotHand = canvas.create_line(0,0,0,0,fill='red',width=3) + + self.positions = [0,0] + # self.angleSums = [0,0] + + + +if __name__ == '__main__': + from graphicsCrawlerDisplay import * + run() diff --git a/reinforcement/environment.py b/reinforcement/environment.py new file mode 100644 index 0000000..94f9cbf --- /dev/null +++ b/reinforcement/environment.py @@ -0,0 +1,56 @@ +# environment.py +# -------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +#!/usr/bin/python + +class Environment: + + def getCurrentState(self): + """ + Returns the current state of enviornment + """ + abstract + + def getPossibleActions(self, state): + """ + Returns possible actions the agent + can take in the given state. Can + return the empty list if we are in + a terminal state. + """ + abstract + + def doAction(self, action): + """ + Performs the given action in the current + environment state and updates the enviornment. + + Returns a (reward, nextState) pair + """ + abstract + + def reset(self): + """ + Resets the current state to the start state + """ + abstract + + def isTerminal(self): + """ + Has the enviornment entered a terminal + state? This means there are no successors + """ + state = self.getCurrentState() + actions = self.getPossibleActions(state) + return len(actions) == 0 diff --git a/reinforcement/featureExtractors.py b/reinforcement/featureExtractors.py new file mode 100644 index 0000000..a327d58 --- /dev/null +++ b/reinforcement/featureExtractors.py @@ -0,0 +1,103 @@ +# featureExtractors.py +# -------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +"Feature extractors for Pacman game states" + +from game import Directions, Actions +import util + +class FeatureExtractor: + def getFeatures(self, state, action): + """ + Returns a dict from features to counts + Usually, the count will just be 1.0 for + indicator functions. + """ + util.raiseNotDefined() + +class IdentityExtractor(FeatureExtractor): + def getFeatures(self, state, action): + feats = util.Counter() + feats[(state,action)] = 1.0 + return feats + +class CoordinateExtractor(FeatureExtractor): + def getFeatures(self, state, action): + feats = util.Counter() + feats[state] = 1.0 + feats['x=%d' % state[0]] = 1.0 + feats['y=%d' % state[0]] = 1.0 + feats['action=%s' % action] = 1.0 + return feats + +def closestFood(pos, food, walls): + """ + closestFood -- this is similar to the function that we have + worked on in the search project; here its all in one place + """ + fringe = [(pos[0], pos[1], 0)] + expanded = set() + while fringe: + pos_x, pos_y, dist = fringe.pop(0) + if (pos_x, pos_y) in expanded: + continue + expanded.add((pos_x, pos_y)) + # if we find a food at this location then exit + if food[pos_x][pos_y]: + return dist + # otherwise spread out from the location to its neighbours + nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls) + for nbr_x, nbr_y in nbrs: + fringe.append((nbr_x, nbr_y, dist+1)) + # no food found + return None + +class SimpleExtractor(FeatureExtractor): + """ + Returns simple features for a basic reflex Pacman: + - whether food will be eaten + - how far away the next food is + - whether a ghost collision is imminent + - whether a ghost is one step away + """ + + def getFeatures(self, state, action): + # extract the grid of food and wall locations and get the ghost locations + food = state.getFood() + walls = state.getWalls() + ghosts = state.getGhostPositions() + + features = util.Counter() + + features["bias"] = 1.0 + + # compute the location of pacman after he takes the action + x, y = state.getPacmanPosition() + dx, dy = Actions.directionToVector(action) + next_x, next_y = int(x + dx), int(y + dy) + + # count the number of ghosts 1-step away + features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts) + + # if there is no danger of ghosts then add the food feature + if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]: + features["eats-food"] = 1.0 + + dist = closestFood((next_x, next_y), food, walls) + if dist is not None: + # make the distance a number less than one otherwise the update + # will diverge wildly + features["closest-food"] = float(dist) / (walls.width * walls.height) + features.divideAll(10.0) + return features diff --git a/reinforcement/game.py b/reinforcement/game.py new file mode 100644 index 0000000..e34d6cf --- /dev/null +++ b/reinforcement/game.py @@ -0,0 +1,729 @@ +# game.py +# ------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +# game.py +# ------- +# Licensing Information: Please do not distribute or publish solutions to this +# project. You are free to use and extend these projects for educational +# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by +# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html + +from util import * +import time, os +import traceback +import sys + +####################### +# Parts worth reading # +####################### + +class Agent: + """ + An agent must define a getAction method, but may also define the + following methods which will be called if they exist: + + def registerInitialState(self, state): # inspects the starting state + """ + def __init__(self, index=0): + self.index = index + + def getAction(self, state): + """ + The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and + must return an action from Directions.{North, South, East, West, Stop} + """ + raiseNotDefined() + +class Directions: + NORTH = 'North' + SOUTH = 'South' + EAST = 'East' + WEST = 'West' + STOP = 'Stop' + + LEFT = {NORTH: WEST, + SOUTH: EAST, + EAST: NORTH, + WEST: SOUTH, + STOP: STOP} + + RIGHT = dict([(y,x) for x, y in LEFT.items()]) + + REVERSE = {NORTH: SOUTH, + SOUTH: NORTH, + EAST: WEST, + WEST: EAST, + STOP: STOP} + +class Configuration: + """ + A Configuration holds the (x,y) coordinate of a character, along with its + traveling direction. + + The convention for positions, like a graph, is that (0,0) is the lower left corner, x increases + horizontally and y increases vertically. Therefore, north is the direction of increasing y, or (0,1). + """ + + def __init__(self, pos, direction): + self.pos = pos + self.direction = direction + + def getPosition(self): + return (self.pos) + + def getDirection(self): + return self.direction + + def isInteger(self): + x,y = self.pos + return x == int(x) and y == int(y) + + def __eq__(self, other): + if other == None: return False + return (self.pos == other.pos and self.direction == other.direction) + + def __hash__(self): + x = hash(self.pos) + y = hash(self.direction) + return hash(x + 13 * y) + + def __str__(self): + return "(x,y)="+str(self.pos)+", "+str(self.direction) + + def generateSuccessor(self, vector): + """ + Generates a new configuration reached by translating the current + configuration by the action vector. This is a low-level call and does + not attempt to respect the legality of the movement. + + Actions are movement vectors. + """ + x, y= self.pos + dx, dy = vector + direction = Actions.vectorToDirection(vector) + if direction == Directions.STOP: + direction = self.direction # There is no stop direction + return Configuration((x + dx, y+dy), direction) + +class AgentState: + """ + AgentStates hold the state of an agent (configuration, speed, scared, etc). + """ + + def __init__( self, startConfiguration, isPacman ): + self.start = startConfiguration + self.configuration = startConfiguration + self.isPacman = isPacman + self.scaredTimer = 0 + self.numCarrying = 0 + self.numReturned = 0 + + def __str__( self ): + if self.isPacman: + return "Pacman: " + str( self.configuration ) + else: + return "Ghost: " + str( self.configuration ) + + def __eq__( self, other ): + if other == None: + return False + return self.configuration == other.configuration and self.scaredTimer == other.scaredTimer + + def __hash__(self): + return hash(hash(self.configuration) + 13 * hash(self.scaredTimer)) + + def copy( self ): + state = AgentState( self.start, self.isPacman ) + state.configuration = self.configuration + state.scaredTimer = self.scaredTimer + state.numCarrying = self.numCarrying + state.numReturned = self.numReturned + return state + + def getPosition(self): + if self.configuration == None: return None + return self.configuration.getPosition() + + def getDirection(self): + return self.configuration.getDirection() + +class Grid: + """ + A 2-dimensional array of objects backed by a list of lists. Data is accessed + via grid[x][y] where (x,y) are positions on a Pacman map with x horizontal, + y vertical and the origin (0,0) in the bottom left corner. + + The __str__ method constructs an output that is oriented like a pacman board. + """ + def __init__(self, width, height, initialValue=False, bitRepresentation=None): + if initialValue not in [False, True]: raise Exception('Grids can only contain booleans') + self.CELLS_PER_INT = 30 + + self.width = width + self.height = height + self.data = [[initialValue for y in range(height)] for x in range(width)] + if bitRepresentation: + self._unpackBits(bitRepresentation) + + def __getitem__(self, i): + return self.data[i] + + def __setitem__(self, key, item): + self.data[key] = item + + def __str__(self): + out = [[str(self.data[x][y])[0] for x in range(self.width)] for y in range(self.height)] + out.reverse() + return '\n'.join([''.join(x) for x in out]) + + def __eq__(self, other): + if other == None: return False + return self.data == other.data + + def __hash__(self): + # return hash(str(self)) + base = 1 + h = 0 + for l in self.data: + for i in l: + if i: + h += base + base *= 2 + return hash(h) + + def copy(self): + g = Grid(self.width, self.height) + g.data = [x[:] for x in self.data] + return g + + def deepCopy(self): + return self.copy() + + def shallowCopy(self): + g = Grid(self.width, self.height) + g.data = self.data + return g + + def count(self, item =True ): + return sum([x.count(item) for x in self.data]) + + def asList(self, key = True): + list = [] + for x in range(self.width): + for y in range(self.height): + if self[x][y] == key: list.append( (x,y) ) + return list + + def packBits(self): + """ + Returns an efficient int list representation + + (width, height, bitPackedInts...) + """ + bits = [self.width, self.height] + currentInt = 0 + for i in range(self.height * self.width): + bit = self.CELLS_PER_INT - (i % self.CELLS_PER_INT) - 1 + x, y = self._cellIndexToPosition(i) + if self[x][y]: + currentInt += 2 ** bit + if (i + 1) % self.CELLS_PER_INT == 0: + bits.append(currentInt) + currentInt = 0 + bits.append(currentInt) + return tuple(bits) + + def _cellIndexToPosition(self, index): + x = index / self.height + y = index % self.height + return x, y + + def _unpackBits(self, bits): + """ + Fills in data from a bit-level representation + """ + cell = 0 + for packed in bits: + for bit in self._unpackInt(packed, self.CELLS_PER_INT): + if cell == self.width * self.height: break + x, y = self._cellIndexToPosition(cell) + self[x][y] = bit + cell += 1 + + def _unpackInt(self, packed, size): + bools = [] + if packed < 0: raise ValueError, "must be a positive integer" + for i in range(size): + n = 2 ** (self.CELLS_PER_INT - i - 1) + if packed >= n: + bools.append(True) + packed -= n + else: + bools.append(False) + return bools + +def reconstituteGrid(bitRep): + if type(bitRep) is not type((1,2)): + return bitRep + width, height = bitRep[:2] + return Grid(width, height, bitRepresentation= bitRep[2:]) + +#################################### +# Parts you shouldn't have to read # +#################################### + +class Actions: + """ + A collection of static methods for manipulating move actions. + """ + # Directions + _directions = {Directions.NORTH: (0, 1), + Directions.SOUTH: (0, -1), + Directions.EAST: (1, 0), + Directions.WEST: (-1, 0), + Directions.STOP: (0, 0)} + + _directionsAsList = _directions.items() + + TOLERANCE = .001 + + def reverseDirection(action): + if action == Directions.NORTH: + return Directions.SOUTH + if action == Directions.SOUTH: + return Directions.NORTH + if action == Directions.EAST: + return Directions.WEST + if action == Directions.WEST: + return Directions.EAST + return action + reverseDirection = staticmethod(reverseDirection) + + def vectorToDirection(vector): + dx, dy = vector + if dy > 0: + return Directions.NORTH + if dy < 0: + return Directions.SOUTH + if dx < 0: + return Directions.WEST + if dx > 0: + return Directions.EAST + return Directions.STOP + vectorToDirection = staticmethod(vectorToDirection) + + def directionToVector(direction, speed = 1.0): + dx, dy = Actions._directions[direction] + return (dx * speed, dy * speed) + directionToVector = staticmethod(directionToVector) + + def getPossibleActions(config, walls): + possible = [] + x, y = config.pos + x_int, y_int = int(x + 0.5), int(y + 0.5) + + # In between grid points, all agents must continue straight + if (abs(x - x_int) + abs(y - y_int) > Actions.TOLERANCE): + return [config.getDirection()] + + for dir, vec in Actions._directionsAsList: + dx, dy = vec + next_y = y_int + dy + next_x = x_int + dx + if not walls[next_x][next_y]: possible.append(dir) + + return possible + + getPossibleActions = staticmethod(getPossibleActions) + + def getLegalNeighbors(position, walls): + x,y = position + x_int, y_int = int(x + 0.5), int(y + 0.5) + neighbors = [] + for dir, vec in Actions._directionsAsList: + dx, dy = vec + next_x = x_int + dx + if next_x < 0 or next_x == walls.width: continue + next_y = y_int + dy + if next_y < 0 or next_y == walls.height: continue + if not walls[next_x][next_y]: neighbors.append((next_x, next_y)) + return neighbors + getLegalNeighbors = staticmethod(getLegalNeighbors) + + def getSuccessor(position, action): + dx, dy = Actions.directionToVector(action) + x, y = position + return (x + dx, y + dy) + getSuccessor = staticmethod(getSuccessor) + +class GameStateData: + """ + + """ + def __init__( self, prevState = None ): + """ + Generates a new data packet by copying information from its predecessor. + """ + if prevState != None: + self.food = prevState.food.shallowCopy() + self.capsules = prevState.capsules[:] + self.agentStates = self.copyAgentStates( prevState.agentStates ) + self.layout = prevState.layout + self._eaten = prevState._eaten + self.score = prevState.score + + self._foodEaten = None + self._foodAdded = None + self._capsuleEaten = None + self._agentMoved = None + self._lose = False + self._win = False + self.scoreChange = 0 + + def deepCopy( self ): + state = GameStateData( self ) + state.food = self.food.deepCopy() + state.layout = self.layout.deepCopy() + state._agentMoved = self._agentMoved + state._foodEaten = self._foodEaten + state._foodAdded = self._foodAdded + state._capsuleEaten = self._capsuleEaten + return state + + def copyAgentStates( self, agentStates ): + copiedStates = [] + for agentState in agentStates: + copiedStates.append( agentState.copy() ) + return copiedStates + + def __eq__( self, other ): + """ + Allows two states to be compared. + """ + if other == None: return False + # TODO Check for type of other + if not self.agentStates == other.agentStates: return False + if not self.food == other.food: return False + if not self.capsules == other.capsules: return False + if not self.score == other.score: return False + return True + + def __hash__( self ): + """ + Allows states to be keys of dictionaries. + """ + for i, state in enumerate( self.agentStates ): + try: + int(hash(state)) + except TypeError, e: + print e + #hash(state) + return int((hash(tuple(self.agentStates)) + 13*hash(self.food) + 113* hash(tuple(self.capsules)) + 7 * hash(self.score)) % 1048575 ) + + def __str__( self ): + width, height = self.layout.width, self.layout.height + map = Grid(width, height) + if type(self.food) == type((1,2)): + self.food = reconstituteGrid(self.food) + for x in range(width): + for y in range(height): + food, walls = self.food, self.layout.walls + map[x][y] = self._foodWallStr(food[x][y], walls[x][y]) + + for agentState in self.agentStates: + if agentState == None: continue + if agentState.configuration == None: continue + x,y = [int( i ) for i in nearestPoint( agentState.configuration.pos )] + agent_dir = agentState.configuration.direction + if agentState.isPacman: + map[x][y] = self._pacStr( agent_dir ) + else: + map[x][y] = self._ghostStr( agent_dir ) + + for x, y in self.capsules: + map[x][y] = 'o' + + return str(map) + ("\nScore: %d\n" % self.score) + + def _foodWallStr( self, hasFood, hasWall ): + if hasFood: + return '.' + elif hasWall: + return '%' + else: + return ' ' + + def _pacStr( self, dir ): + if dir == Directions.NORTH: + return 'v' + if dir == Directions.SOUTH: + return '^' + if dir == Directions.WEST: + return '>' + return '<' + + def _ghostStr( self, dir ): + return 'G' + if dir == Directions.NORTH: + return 'M' + if dir == Directions.SOUTH: + return 'W' + if dir == Directions.WEST: + return '3' + return 'E' + + def initialize( self, layout, numGhostAgents ): + """ + Creates an initial game state from a layout array (see layout.py). + """ + self.food = layout.food.copy() + #self.capsules = [] + self.capsules = layout.capsules[:] + self.layout = layout + self.score = 0 + self.scoreChange = 0 + + self.agentStates = [] + numGhosts = 0 + for isPacman, pos in layout.agentPositions: + if not isPacman: + if numGhosts == numGhostAgents: continue # Max ghosts reached already + else: numGhosts += 1 + self.agentStates.append( AgentState( Configuration( pos, Directions.STOP), isPacman) ) + self._eaten = [False for a in self.agentStates] + +try: + import boinc + _BOINC_ENABLED = True +except: + _BOINC_ENABLED = False + +class Game: + """ + The Game manages the control flow, soliciting actions from agents. + """ + + def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False ): + self.agentCrashed = False + self.agents = agents + self.display = display + self.rules = rules + self.startingIndex = startingIndex + self.gameOver = False + self.muteAgents = muteAgents + self.catchExceptions = catchExceptions + self.moveHistory = [] + self.totalAgentTimes = [0 for agent in agents] + self.totalAgentTimeWarnings = [0 for agent in agents] + self.agentTimeout = False + import cStringIO + self.agentOutput = [cStringIO.StringIO() for agent in agents] + + def getProgress(self): + if self.gameOver: + return 1.0 + else: + return self.rules.getProgress(self) + + def _agentCrash( self, agentIndex, quiet=False): + "Helper method for handling agent crashes" + if not quiet: traceback.print_exc() + self.gameOver = True + self.agentCrashed = True + self.rules.agentCrash(self, agentIndex) + + OLD_STDOUT = None + OLD_STDERR = None + + def mute(self, agentIndex): + if not self.muteAgents: return + global OLD_STDOUT, OLD_STDERR + import cStringIO + OLD_STDOUT = sys.stdout + OLD_STDERR = sys.stderr + sys.stdout = self.agentOutput[agentIndex] + sys.stderr = self.agentOutput[agentIndex] + + def unmute(self): + if not self.muteAgents: return + global OLD_STDOUT, OLD_STDERR + # Revert stdout/stderr to originals + sys.stdout = OLD_STDOUT + sys.stderr = OLD_STDERR + + + def run( self ): + """ + Main control loop for game play. + """ + self.display.initialize(self.state.data) + self.numMoves = 0 + + ###self.display.initialize(self.state.makeObservation(1).data) + # inform learning agents of the game start + for i in range(len(self.agents)): + agent = self.agents[i] + if not agent: + self.mute(i) + # this is a null agent, meaning it failed to load + # the other team wins + print >>sys.stderr, "Agent %d failed to load" % i + self.unmute() + self._agentCrash(i, quiet=True) + return + if ("registerInitialState" in dir(agent)): + self.mute(i) + if self.catchExceptions: + try: + timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i))) + try: + start_time = time.time() + timed_func(self.state.deepCopy()) + time_taken = time.time() - start_time + self.totalAgentTimes[i] += time_taken + except TimeoutFunctionException: + print >>sys.stderr, "Agent %d ran out of time on startup!" % i + self.unmute() + self.agentTimeout = True + self._agentCrash(i, quiet=True) + return + except Exception,data: + self._agentCrash(i, quiet=False) + self.unmute() + return + else: + agent.registerInitialState(self.state.deepCopy()) + ## TODO: could this exceed the total time + self.unmute() + + agentIndex = self.startingIndex + numAgents = len( self.agents ) + + while not self.gameOver: + # Fetch the next agent + agent = self.agents[agentIndex] + move_time = 0 + skip_action = False + # Generate an observation of the state + if 'observationFunction' in dir( agent ): + self.mute(agentIndex) + if self.catchExceptions: + try: + timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex))) + try: + start_time = time.time() + observation = timed_func(self.state.deepCopy()) + except TimeoutFunctionException: + skip_action = True + move_time += time.time() - start_time + self.unmute() + except Exception,data: + self._agentCrash(agentIndex, quiet=False) + self.unmute() + return + else: + observation = agent.observationFunction(self.state.deepCopy()) + self.unmute() + else: + observation = self.state.deepCopy() + + # Solicit an action + action = None + self.mute(agentIndex) + if self.catchExceptions: + try: + timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time)) + try: + start_time = time.time() + if skip_action: + raise TimeoutFunctionException() + action = timed_func( observation ) + except TimeoutFunctionException: + print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex + self.agentTimeout = True + self._agentCrash(agentIndex, quiet=True) + self.unmute() + return + + move_time += time.time() - start_time + + if move_time > self.rules.getMoveWarningTime(agentIndex): + self.totalAgentTimeWarnings[agentIndex] += 1 + print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]) + if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex): + print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]) + self.agentTimeout = True + self._agentCrash(agentIndex, quiet=True) + self.unmute() + return + + self.totalAgentTimes[agentIndex] += move_time + #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]) + if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex): + print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]) + self.agentTimeout = True + self._agentCrash(agentIndex, quiet=True) + self.unmute() + return + self.unmute() + except Exception,data: + self._agentCrash(agentIndex) + self.unmute() + return + else: + action = agent.getAction(observation) + self.unmute() + + # Execute the action + self.moveHistory.append( (agentIndex, action) ) + if self.catchExceptions: + try: + self.state = self.state.generateSuccessor( agentIndex, action ) + except Exception,data: + self.mute(agentIndex) + self._agentCrash(agentIndex) + self.unmute() + return + else: + self.state = self.state.generateSuccessor( agentIndex, action ) + + # Change the display + self.display.update( self.state.data ) + ###idx = agentIndex - agentIndex % 2 + 1 + ###self.display.update( self.state.makeObservation(idx).data ) + + # Allow for game specific conditions (winning, losing, etc.) + self.rules.process(self.state, self) + # Track progress + if agentIndex == numAgents + 1: self.numMoves += 1 + # Next agent + agentIndex = ( agentIndex + 1 ) % numAgents + + if _BOINC_ENABLED: + boinc.set_fraction_done(self.getProgress()) + + # inform a learning agent of the game result + for agentIndex, agent in enumerate(self.agents): + if "final" in dir( agent ) : + try: + self.mute(agentIndex) + agent.final( self.state ) + self.unmute() + except Exception,data: + if not self.catchExceptions: raise + self._agentCrash(agentIndex) + self.unmute() + return + self.display.finish() diff --git a/reinforcement/ghostAgents.py b/reinforcement/ghostAgents.py new file mode 100644 index 0000000..c3afe1f --- /dev/null +++ b/reinforcement/ghostAgents.py @@ -0,0 +1,81 @@ +# ghostAgents.py +# -------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from game import Agent +from game import Actions +from game import Directions +import random +from util import manhattanDistance +import util + +class GhostAgent( Agent ): + def __init__( self, index ): + self.index = index + + def getAction( self, state ): + dist = self.getDistribution(state) + if len(dist) == 0: + return Directions.STOP + else: + return util.chooseFromDistribution( dist ) + + def getDistribution(self, state): + "Returns a Counter encoding a distribution over actions from the provided state." + util.raiseNotDefined() + +class RandomGhost( GhostAgent ): + "A ghost that chooses a legal action uniformly at random." + def getDistribution( self, state ): + dist = util.Counter() + for a in state.getLegalActions( self.index ): dist[a] = 1.0 + dist.normalize() + return dist + +class DirectionalGhost( GhostAgent ): + "A ghost that prefers to rush Pacman, or flee when scared." + def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ): + self.index = index + self.prob_attack = prob_attack + self.prob_scaredFlee = prob_scaredFlee + + def getDistribution( self, state ): + # Read variables from state + ghostState = state.getGhostState( self.index ) + legalActions = state.getLegalActions( self.index ) + pos = state.getGhostPosition( self.index ) + isScared = ghostState.scaredTimer > 0 + + speed = 1 + if isScared: speed = 0.5 + + actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions] + newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors] + pacmanPosition = state.getPacmanPosition() + + # Select best actions given the state + distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions] + if isScared: + bestScore = max( distancesToPacman ) + bestProb = self.prob_scaredFlee + else: + bestScore = min( distancesToPacman ) + bestProb = self.prob_attack + bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore] + + # Construct distribution + dist = util.Counter() + for a in bestActions: dist[a] = bestProb / len(bestActions) + for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions) + dist.normalize() + return dist diff --git a/reinforcement/grading.py b/reinforcement/grading.py new file mode 100644 index 0000000..0ef07a9 --- /dev/null +++ b/reinforcement/grading.py @@ -0,0 +1,282 @@ +# grading.py +# ---------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +"Common code for autograders" + +import cgi +import time +import sys +import traceback +import pdb +from collections import defaultdict +import util + +class Grades: + "A data structure for project grades, along with formatting code to display them" + def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False): + """ + Defines the grading scheme for a project + projectName: project name + questionsAndMaxesDict: a list of (question name, max points per question) + """ + self.questions = [el[0] for el in questionsAndMaxesList] + self.maxes = dict(questionsAndMaxesList) + self.points = Counter() + self.messages = dict([(q, []) for q in self.questions]) + self.project = projectName + self.start = time.localtime()[1:6] + self.sane = True # Sanity checks + self.currentQuestion = None # Which question we're grading + self.edxOutput = edxOutput + self.mute = muteOutput + self.prereqs = defaultdict(set) + + #print 'Autograder transcript for %s' % self.project + print 'Starting on %d-%d at %d:%02d:%02d' % self.start + + def addPrereq(self, question, prereq): + self.prereqs[question].add(prereq) + + def grade(self, gradingModule, exceptionMap = {}, bonusPic = False): + """ + Grades each question + gradingModule: the module with all the grading functions (pass in with sys.modules[__name__]) + """ + + completedQuestions = set([]) + for q in self.questions: + print '\nQuestion %s' % q + print '=' * (9 + len(q)) + print + self.currentQuestion = q + + incompleted = self.prereqs[q].difference(completedQuestions) + if len(incompleted) > 0: + prereq = incompleted.pop() + print \ +"""*** NOTE: Make sure to complete Question %s before working on Question %s, +*** because Question %s builds upon your answer for Question %s. +""" % (prereq, q, q, prereq) + continue + + if self.mute: util.mutePrint() + try: + util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function + #TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function + except Exception, inst: + self.addExceptionMessage(q, inst, traceback) + self.addErrorHints(exceptionMap, inst, q[1]) + except: + self.fail('FAIL: Terminated with a string exception.') + finally: + if self.mute: util.unmutePrint() + + if self.points[q] >= self.maxes[q]: + completedQuestions.add(q) + + print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q]) + + + print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6] + print "\nProvisional grades\n==================" + + for q in self.questions: + print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q]) + print '------------------' + print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values())) + if bonusPic and self.points.totalCount() == 25: + print """ + + ALL HAIL GRANDPAC. + LONG LIVE THE GHOSTBUSTING KING. + + --- ---- --- + | \ / + \ / | + | + \--/ \--/ + | + | + + | + | + + + | + @@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + \ / @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + V \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + \ / @@@@@@@@@@@@@@@@@@@@@@@@@@ + V @@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@ + /\ @@@@@@@@@@@@@@@@@@@@@@ + / \ @@@@@@@@@@@@@@@@@@@@@@@@@ + /\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@ + / \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + / @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@ + +""" + print """ +Your grades are NOT yet registered. To register your grades, make sure +to follow your instructor's guidelines to receive credit on your project. +""" + + if self.edxOutput: + self.produceOutput() + + def addExceptionMessage(self, q, inst, traceback): + """ + Method to format the exception message, this is more complicated because + we need to cgi.escape the traceback but wrap the exception in a
 tag
+    """
+    self.fail('FAIL: Exception raised: %s' % inst)
+    self.addMessage('')
+    for line in traceback.format_exc().split('\n'):
+        self.addMessage(line)
+
+  def addErrorHints(self, exceptionMap, errorInstance, questionNum):
+    typeOf = str(type(errorInstance))
+    questionName = 'q' + questionNum
+    errorHint = ''
+
+    # question specific error hints
+    if exceptionMap.get(questionName):
+      questionMap = exceptionMap.get(questionName)
+      if (questionMap.get(typeOf)):
+        errorHint = questionMap.get(typeOf)
+    # fall back to general error messages if a question specific
+    # one does not exist
+    if (exceptionMap.get(typeOf)):
+      errorHint = exceptionMap.get(typeOf)
+
+    # dont include the HTML if we have no error hint
+    if not errorHint:
+      return ''
+
+    for line in errorHint.split('\n'):
+      self.addMessage(line)
+
+  def produceOutput(self):
+    edxOutput = open('edx_response.html', 'w')
+    edxOutput.write("
") + + # first sum + total_possible = sum(self.maxes.values()) + total_score = sum(self.points.values()) + checkOrX = '' + if (total_score >= total_possible): + checkOrX = '' + header = """ +

+ Total score ({total_score} / {total_possible}) +

+ """.format(total_score = total_score, + total_possible = total_possible, + checkOrX = checkOrX + ) + edxOutput.write(header) + + for q in self.questions: + if len(q) == 2: + name = q[1] + else: + name = q + checkOrX = '' + if (self.points[q] == self.maxes[q]): + checkOrX = '' + #messages = '\n
\n'.join(self.messages[q]) + messages = "
%s
" % '\n'.join(self.messages[q]) + output = """ +
+
+
+ Question {q} ({points}/{max}) {checkOrX} +
+
+ {messages} +
+
+
+ """.format(q = name, + max = self.maxes[q], + messages = messages, + checkOrX = checkOrX, + points = self.points[q] + ) + # print "*** output for Question %s " % q[1] + # print output + edxOutput.write(output) + edxOutput.write("
") + edxOutput.close() + edxOutput = open('edx_grade', 'w') + edxOutput.write(str(self.points.totalCount())) + edxOutput.close() + + def fail(self, message, raw=False): + "Sets sanity check bit to false and outputs a message" + self.sane = False + self.assignZeroCredit() + self.addMessage(message, raw) + + def assignZeroCredit(self): + self.points[self.currentQuestion] = 0 + + def addPoints(self, amt): + self.points[self.currentQuestion] += amt + + def deductPoints(self, amt): + self.points[self.currentQuestion] -= amt + + def assignFullCredit(self, message="", raw=False): + self.points[self.currentQuestion] = self.maxes[self.currentQuestion] + if message != "": + self.addMessage(message, raw) + + def addMessage(self, message, raw=False): + if not raw: + # We assume raw messages, formatted for HTML, are printed separately + if self.mute: util.unmutePrint() + print '*** ' + message + if self.mute: util.mutePrint() + message = cgi.escape(message) + self.messages[self.currentQuestion].append(message) + + def addMessageToEmail(self, message): + print "WARNING**** addMessageToEmail is deprecated %s" % message + for line in message.split('\n'): + pass + #print '%%% ' + line + ' %%%' + #self.messages[self.currentQuestion].append(line) + + + + + +class Counter(dict): + """ + Dict with default 0 + """ + def __getitem__(self, idx): + try: + return dict.__getitem__(self, idx) + except KeyError: + return 0 + + def totalCount(self): + """ + Returns the sum of counts for all keys. + """ + return sum(self.values()) + diff --git a/reinforcement/graphicsCrawlerDisplay.py b/reinforcement/graphicsCrawlerDisplay.py new file mode 100644 index 0000000..4c9cf41 --- /dev/null +++ b/reinforcement/graphicsCrawlerDisplay.py @@ -0,0 +1,333 @@ +# graphicsCrawlerDisplay.py +# ------------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +# graphicsCrawlerDisplay.py +# ------------------------- +# Licensing Information: Please do not distribute or publish solutions to this +# project. You are free to use and extend these projects for educational +# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by +# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and Pieter +# Abbeel in Spring 2013. +# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html + +import Tkinter +import qlearningAgents +import time +import threading +import sys +import crawler +#import pendulum +import math +from math import pi as PI + +robotType = 'crawler' + +class Application: + + def sigmoid(self, x): + return 1.0 / (1.0 + 2.0 ** (-x)) + + def incrementSpeed(self, inc): + self.tickTime *= inc +# self.epsilon = min(1.0, self.epsilon) +# self.epsilon = max(0.0,self.epsilon) +# self.learner.setSpeed(self.epsilon) + self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime) + + def incrementEpsilon(self, inc): + self.ep += inc + self.epsilon = self.sigmoid(self.ep) + self.learner.setEpsilon(self.epsilon) + self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon) + + def incrementGamma(self, inc): + self.ga += inc + self.gamma = self.sigmoid(self.ga) + self.learner.setDiscount(self.gamma) + self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma) + + def incrementAlpha(self, inc): + self.al += inc + self.alpha = self.sigmoid(self.al) + self.learner.setLearningRate(self.alpha) + self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha) + + def __initGUI(self, win): + ## Window ## + self.win = win + + ## Initialize Frame ## + win.grid() + self.dec = -.5 + self.inc = .5 + self.tickTime = 0.1 + + ## Epsilon Button + Label ## + self.setupSpeedButtonAndLabel(win) + + self.setupEpsilonButtonAndLabel(win) + + ## Gamma Button + Label ## + self.setUpGammaButtonAndLabel(win) + + ## Alpha Button + Label ## + self.setupAlphaButtonAndLabel(win) + + ## Exit Button ## + #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit) + #self.exit_button.grid(row=0, column=9) + + ## Simulation Buttons ## +# self.setupSimulationButtons(win) + + ## Canvas ## + self.canvas = Tkinter.Canvas(root, height=200, width=1000) + self.canvas.grid(row=2,columnspan=10) + + def setupAlphaButtonAndLabel(self, win): + self.alpha_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementAlpha(self.dec))) + self.alpha_minus.grid(row=1, column=3, padx=10) + + self.alpha = self.sigmoid(self.al) + self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha)) + self.alpha_label.grid(row=1, column=4) + + self.alpha_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementAlpha(self.inc))) + self.alpha_plus.grid(row=1, column=5, padx=10) + + def setUpGammaButtonAndLabel(self, win): + self.gamma_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementGamma(self.dec))) + self.gamma_minus.grid(row=1, column=0, padx=10) + + self.gamma = self.sigmoid(self.ga) + self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma)) + self.gamma_label.grid(row=1, column=1) + + self.gamma_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementGamma(self.inc))) + self.gamma_plus.grid(row=1, column=2, padx=10) + + def setupEpsilonButtonAndLabel(self, win): + self.epsilon_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementEpsilon(self.dec))) + self.epsilon_minus.grid(row=0, column=3) + + self.epsilon = self.sigmoid(self.ep) + self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon)) + self.epsilon_label.grid(row=0, column=4) + + self.epsilon_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementEpsilon(self.inc))) + self.epsilon_plus.grid(row=0, column=5) + + def setupSpeedButtonAndLabel(self, win): + self.speed_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementSpeed(.5))) + self.speed_minus.grid(row=0, column=0) + + self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime)) + self.speed_label.grid(row=0, column=1) + + self.speed_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementSpeed(2))) + self.speed_plus.grid(row=0, column=2) + + + + + + + + def skip5kSteps(self): + self.stepsToSkip = 5000 + + def __init__(self, win): + + self.ep = 0 + self.ga = 2 + self.al = 2 + self.stepCount = 0 + ## Init Gui + + self.__initGUI(win) + + # Init environment + if robotType == 'crawler': + self.robot = crawler.CrawlingRobot(self.canvas) + self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot) + elif robotType == 'pendulum': + self.robot = pendulum.PendulumRobot(self.canvas) + self.robotEnvironment = \ + pendulum.PendulumRobotEnvironment(self.robot) + else: + raise "Unknown RobotType" + + # Init Agent + simulationFn = lambda agent: \ + simulation.SimulationEnvironment(self.robotEnvironment,agent) + actionFn = lambda state: \ + self.robotEnvironment.getPossibleActions(state) + self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn) + + self.learner.setEpsilon(self.epsilon) + self.learner.setLearningRate(self.alpha) + self.learner.setDiscount(self.gamma) + + # Start GUI + self.running = True + self.stopped = False + self.stepsToSkip = 0 + self.thread = threading.Thread(target=self.run) + self.thread.start() + + + def exit(self): + self.running = False + for i in range(5): + if not self.stopped: + time.sleep(0.1) + try: + self.win.destroy() + except: + pass + sys.exit(0) + + def step(self): + + self.stepCount += 1 + + state = self.robotEnvironment.getCurrentState() + actions = self.robotEnvironment.getPossibleActions(state) + if len(actions) == 0.0: + self.robotEnvironment.reset() + state = self.robotEnvironment.getCurrentState() + actions = self.robotEnvironment.getPossibleActions(state) + print 'Reset!' + action = self.learner.getAction(state) + if action == None: + raise 'None action returned: Code Not Complete' + nextState, reward = self.robotEnvironment.doAction(action) + self.learner.observeTransition(state, action, nextState, reward) + + def animatePolicy(self): + if robotType != 'pendulum': + raise 'Only pendulum can animatePolicy' + + + totWidth = self.canvas.winfo_reqwidth() + totHeight = self.canvas.winfo_reqheight() + + length = 0.48 * min(totWidth, totHeight) + x,y = totWidth-length-30, length+10 + + + + angleMin, angleMax = self.robot.getMinAndMaxAngle() + velMin, velMax = self.robot.getMinAndMaxAngleVelocity() + + if not 'animatePolicyBox' in dir(self): + self.canvas.create_line(x,y,x+length,y) + self.canvas.create_line(x+length,y,x+length,y-length) + self.canvas.create_line(x+length,y-length,x,y-length) + self.canvas.create_line(x,y-length,x,y) + self.animatePolicyBox = 1 + self.canvas.create_text(x+length/2,y+10,text='angle') + self.canvas.create_text(x-30,y-length/2,text='velocity') + self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft') + self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight') + self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing') + + + + angleDelta = (angleMax-angleMin) / 100 + velDelta = (velMax-velMin) / 100 + for i in range(100): + angle = angleMin + i * angleDelta + + for j in range(100): + vel = velMin + j * velDelta + state = self.robotEnvironment.getState(angle,vel) + max, argMax = None, None + if not self.learner.seenState(state): + argMax = 'unseen' + else: + for action in ('kickLeft','kickRight','doNothing'): + qVal = self.learner.getQValue(state, action) + if max == None or qVal > max: + max, argMax = qVal, action + if argMax != 'unseen': + if argMax == 'kickLeft': + color = 'blue' + elif argMax == 'kickRight': + color = 'red' + elif argMax == 'doNothing': + color = 'white' + dx = length / 100.0 + dy = length / 100.0 + x0, y0 = x+i*dx, y-j*dy + self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color) + + + + + def run(self): + self.stepCount = 0 + self.learner.startEpisode() + while True: + minSleep = .01 + tm = max(minSleep, self.tickTime) + time.sleep(tm) + self.stepsToSkip = int(tm / self.tickTime) - 1 + + if not self.running: + self.stopped = True + return + for i in range(self.stepsToSkip): + self.step() + self.stepsToSkip = 0 + self.step() +# self.robot.draw() + self.learner.stopEpisode() + + def start(self): + self.win.mainloop() + + + + + +def run(): + global root + root = Tkinter.Tk() + root.title( 'Crawler GUI' ) + root.resizable( 0, 0 ) + +# root.mainloop() + + + app = Application(root) + def update_gui(): + app.robot.draw(app.stepCount, app.tickTime) + root.after(10, update_gui) + update_gui() + + root.protocol( 'WM_DELETE_WINDOW', app.exit) + try: + app.start() + except: + app.exit() diff --git a/reinforcement/graphicsDisplay.py b/reinforcement/graphicsDisplay.py new file mode 100644 index 0000000..1bfe1b3 --- /dev/null +++ b/reinforcement/graphicsDisplay.py @@ -0,0 +1,679 @@ +# graphicsDisplay.py +# ------------------ +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from graphicsUtils import * +import math, time +from game import Directions + +########################### +# GRAPHICS DISPLAY CODE # +########################### + +# Most code by Dan Klein and John Denero written or rewritten for cs188, UC Berkeley. +# Some code from a Pacman implementation by LiveWires, and used / modified with permission. + +DEFAULT_GRID_SIZE = 30.0 +INFO_PANE_HEIGHT = 35 +BACKGROUND_COLOR = formatColor(0,0,0) +WALL_COLOR = formatColor(0.0/255.0, 51.0/255.0, 255.0/255.0) +INFO_PANE_COLOR = formatColor(.4,.4,0) +SCORE_COLOR = formatColor(.9, .9, .9) +PACMAN_OUTLINE_WIDTH = 2 +PACMAN_CAPTURE_OUTLINE_WIDTH = 4 + +GHOST_COLORS = [] +GHOST_COLORS.append(formatColor(.9,0,0)) # Red +GHOST_COLORS.append(formatColor(0,.3,.9)) # Blue +GHOST_COLORS.append(formatColor(.98,.41,.07)) # Orange +GHOST_COLORS.append(formatColor(.1,.75,.7)) # Green +GHOST_COLORS.append(formatColor(1.0,0.6,0.0)) # Yellow +GHOST_COLORS.append(formatColor(.4,0.13,0.91)) # Purple + +TEAM_COLORS = GHOST_COLORS[:2] + +GHOST_SHAPE = [ + ( 0, 0.3 ), + ( 0.25, 0.75 ), + ( 0.5, 0.3 ), + ( 0.75, 0.75 ), + ( 0.75, -0.5 ), + ( 0.5, -0.75 ), + (-0.5, -0.75 ), + (-0.75, -0.5 ), + (-0.75, 0.75 ), + (-0.5, 0.3 ), + (-0.25, 0.75 ) + ] +GHOST_SIZE = 0.65 +SCARED_COLOR = formatColor(1,1,1) + +GHOST_VEC_COLORS = map(colorToVector, GHOST_COLORS) + +PACMAN_COLOR = formatColor(255.0/255.0,255.0/255.0,61.0/255) +PACMAN_SCALE = 0.5 +#pacman_speed = 0.25 + +# Food +FOOD_COLOR = formatColor(1,1,1) +FOOD_SIZE = 0.1 + +# Laser +LASER_COLOR = formatColor(1,0,0) +LASER_SIZE = 0.02 + +# Capsule graphics +CAPSULE_COLOR = formatColor(1,1,1) +CAPSULE_SIZE = 0.25 + +# Drawing walls +WALL_RADIUS = 0.15 + +class InfoPane: + def __init__(self, layout, gridSize): + self.gridSize = gridSize + self.width = (layout.width) * gridSize + self.base = (layout.height + 1) * gridSize + self.height = INFO_PANE_HEIGHT + self.fontSize = 24 + self.textColor = PACMAN_COLOR + self.drawPane() + + def toScreen(self, pos, y = None): + """ + Translates a point relative from the bottom left of the info pane. + """ + if y == None: + x,y = pos + else: + x = pos + + x = self.gridSize + x # Margin + y = self.base + y + return x,y + + def drawPane(self): + self.scoreText = text( self.toScreen(0, 0 ), self.textColor, "SCORE: 0", "Times", self.fontSize, "bold") + + def initializeGhostDistances(self, distances): + self.ghostDistanceText = [] + + size = 20 + if self.width < 240: + size = 12 + if self.width < 160: + size = 10 + + for i, d in enumerate(distances): + t = text( self.toScreen(self.width/2 + self.width/8 * i, 0), GHOST_COLORS[i+1], d, "Times", size, "bold") + self.ghostDistanceText.append(t) + + def updateScore(self, score): + changeText(self.scoreText, "SCORE: % 4d" % score) + + def setTeam(self, isBlue): + text = "RED TEAM" + if isBlue: text = "BLUE TEAM" + self.teamText = text( self.toScreen(300, 0 ), self.textColor, text, "Times", self.fontSize, "bold") + + def updateGhostDistances(self, distances): + if len(distances) == 0: return + if 'ghostDistanceText' not in dir(self): self.initializeGhostDistances(distances) + else: + for i, d in enumerate(distances): + changeText(self.ghostDistanceText[i], d) + + def drawGhost(self): + pass + + def drawPacman(self): + pass + + def drawWarning(self): + pass + + def clearIcon(self): + pass + + def updateMessage(self, message): + pass + + def clearMessage(self): + pass + + +class PacmanGraphics: + def __init__(self, zoom=1.0, frameTime=0.0, capture=False): + self.have_window = 0 + self.currentGhostImages = {} + self.pacmanImage = None + self.zoom = zoom + self.gridSize = DEFAULT_GRID_SIZE * zoom + self.capture = capture + self.frameTime = frameTime + + def checkNullDisplay(self): + return False + + def initialize(self, state, isBlue = False): + self.isBlue = isBlue + self.startGraphics(state) + + # self.drawDistributions(state) + self.distributionImages = None # Initialized lazily + self.drawStaticObjects(state) + self.drawAgentObjects(state) + + # Information + self.previousState = state + + def startGraphics(self, state): + self.layout = state.layout + layout = self.layout + self.width = layout.width + self.height = layout.height + self.make_window(self.width, self.height) + self.infoPane = InfoPane(layout, self.gridSize) + self.currentState = layout + + def drawDistributions(self, state): + walls = state.layout.walls + dist = [] + for x in range(walls.width): + distx = [] + dist.append(distx) + for y in range(walls.height): + ( screen_x, screen_y ) = self.to_screen( (x, y) ) + block = square( (screen_x, screen_y), + 0.5 * self.gridSize, + color = BACKGROUND_COLOR, + filled = 1, behind=2) + distx.append(block) + self.distributionImages = dist + + def drawStaticObjects(self, state): + layout = self.layout + self.drawWalls(layout.walls) + self.food = self.drawFood(layout.food) + self.capsules = self.drawCapsules(layout.capsules) + refresh() + + def drawAgentObjects(self, state): + self.agentImages = [] # (agentState, image) + for index, agent in enumerate(state.agentStates): + if agent.isPacman: + image = self.drawPacman(agent, index) + self.agentImages.append( (agent, image) ) + else: + image = self.drawGhost(agent, index) + self.agentImages.append( (agent, image) ) + refresh() + + def swapImages(self, agentIndex, newState): + """ + Changes an image from a ghost to a pacman or vis versa (for capture) + """ + prevState, prevImage = self.agentImages[agentIndex] + for item in prevImage: remove_from_screen(item) + if newState.isPacman: + image = self.drawPacman(newState, agentIndex) + self.agentImages[agentIndex] = (newState, image ) + else: + image = self.drawGhost(newState, agentIndex) + self.agentImages[agentIndex] = (newState, image ) + refresh() + + def update(self, newState): + agentIndex = newState._agentMoved + agentState = newState.agentStates[agentIndex] + + if self.agentImages[agentIndex][0].isPacman != agentState.isPacman: self.swapImages(agentIndex, agentState) + prevState, prevImage = self.agentImages[agentIndex] + if agentState.isPacman: + self.animatePacman(agentState, prevState, prevImage) + else: + self.moveGhost(agentState, agentIndex, prevState, prevImage) + self.agentImages[agentIndex] = (agentState, prevImage) + + if newState._foodEaten != None: + self.removeFood(newState._foodEaten, self.food) + if newState._capsuleEaten != None: + self.removeCapsule(newState._capsuleEaten, self.capsules) + self.infoPane.updateScore(newState.score) + if 'ghostDistances' in dir(newState): + self.infoPane.updateGhostDistances(newState.ghostDistances) + + def make_window(self, width, height): + grid_width = (width-1) * self.gridSize + grid_height = (height-1) * self.gridSize + screen_width = 2*self.gridSize + grid_width + screen_height = 2*self.gridSize + grid_height + INFO_PANE_HEIGHT + + begin_graphics(screen_width, + screen_height, + BACKGROUND_COLOR, + "CS188 Pacman") + + def drawPacman(self, pacman, index): + position = self.getPosition(pacman) + screen_point = self.to_screen(position) + endpoints = self.getEndpoints(self.getDirection(pacman)) + + width = PACMAN_OUTLINE_WIDTH + outlineColor = PACMAN_COLOR + fillColor = PACMAN_COLOR + + if self.capture: + outlineColor = TEAM_COLORS[index % 2] + fillColor = GHOST_COLORS[index] + width = PACMAN_CAPTURE_OUTLINE_WIDTH + + return [circle(screen_point, PACMAN_SCALE * self.gridSize, + fillColor = fillColor, outlineColor = outlineColor, + endpoints = endpoints, + width = width)] + + def getEndpoints(self, direction, position=(0,0)): + x, y = position + pos = x - int(x) + y - int(y) + width = 30 + 80 * math.sin(math.pi* pos) + + delta = width / 2 + if (direction == 'West'): + endpoints = (180+delta, 180-delta) + elif (direction == 'North'): + endpoints = (90+delta, 90-delta) + elif (direction == 'South'): + endpoints = (270+delta, 270-delta) + else: + endpoints = (0+delta, 0-delta) + return endpoints + + def movePacman(self, position, direction, image): + screenPosition = self.to_screen(position) + endpoints = self.getEndpoints( direction, position ) + r = PACMAN_SCALE * self.gridSize + moveCircle(image[0], screenPosition, r, endpoints) + refresh() + + def animatePacman(self, pacman, prevPacman, image): + if self.frameTime < 0: + print 'Press any key to step forward, "q" to play' + keys = wait_for_keys() + if 'q' in keys: + self.frameTime = 0.1 + if self.frameTime > 0.01 or self.frameTime < 0: + start = time.time() + fx, fy = self.getPosition(prevPacman) + px, py = self.getPosition(pacman) + frames = 4.0 + for i in range(1,int(frames) + 1): + pos = px*i/frames + fx*(frames-i)/frames, py*i/frames + fy*(frames-i)/frames + self.movePacman(pos, self.getDirection(pacman), image) + refresh() + sleep(abs(self.frameTime) / frames) + else: + self.movePacman(self.getPosition(pacman), self.getDirection(pacman), image) + refresh() + + def getGhostColor(self, ghost, ghostIndex): + if ghost.scaredTimer > 0: + return SCARED_COLOR + else: + return GHOST_COLORS[ghostIndex] + + def drawGhost(self, ghost, agentIndex): + pos = self.getPosition(ghost) + dir = self.getDirection(ghost) + (screen_x, screen_y) = (self.to_screen(pos) ) + coords = [] + for (x, y) in GHOST_SHAPE: + coords.append((x*self.gridSize*GHOST_SIZE + screen_x, y*self.gridSize*GHOST_SIZE + screen_y)) + + colour = self.getGhostColor(ghost, agentIndex) + body = polygon(coords, colour, filled = 1) + WHITE = formatColor(1.0, 1.0, 1.0) + BLACK = formatColor(0.0, 0.0, 0.0) + + dx = 0 + dy = 0 + if dir == 'North': + dy = -0.2 + if dir == 'South': + dy = 0.2 + if dir == 'East': + dx = 0.2 + if dir == 'West': + dx = -0.2 + leftEye = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE) + rightEye = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE) + leftPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK) + rightPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK) + ghostImageParts = [] + ghostImageParts.append(body) + ghostImageParts.append(leftEye) + ghostImageParts.append(rightEye) + ghostImageParts.append(leftPupil) + ghostImageParts.append(rightPupil) + + return ghostImageParts + + def moveEyes(self, pos, dir, eyes): + (screen_x, screen_y) = (self.to_screen(pos) ) + dx = 0 + dy = 0 + if dir == 'North': + dy = -0.2 + if dir == 'South': + dy = 0.2 + if dir == 'East': + dx = 0.2 + if dir == 'West': + dx = -0.2 + moveCircle(eyes[0],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2) + moveCircle(eyes[1],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2) + moveCircle(eyes[2],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08) + moveCircle(eyes[3],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08) + + def moveGhost(self, ghost, ghostIndex, prevGhost, ghostImageParts): + old_x, old_y = self.to_screen(self.getPosition(prevGhost)) + new_x, new_y = self.to_screen(self.getPosition(ghost)) + delta = new_x - old_x, new_y - old_y + + for ghostImagePart in ghostImageParts: + move_by(ghostImagePart, delta) + refresh() + + if ghost.scaredTimer > 0: + color = SCARED_COLOR + else: + color = GHOST_COLORS[ghostIndex] + edit(ghostImageParts[0], ('fill', color), ('outline', color)) + self.moveEyes(self.getPosition(ghost), self.getDirection(ghost), ghostImageParts[-4:]) + refresh() + + def getPosition(self, agentState): + if agentState.configuration == None: return (-1000, -1000) + return agentState.getPosition() + + def getDirection(self, agentState): + if agentState.configuration == None: return Directions.STOP + return agentState.configuration.getDirection() + + def finish(self): + end_graphics() + + def to_screen(self, point): + ( x, y ) = point + #y = self.height - y + x = (x + 1)*self.gridSize + y = (self.height - y)*self.gridSize + return ( x, y ) + + # Fixes some TK issue with off-center circles + def to_screen2(self, point): + ( x, y ) = point + #y = self.height - y + x = (x + 1)*self.gridSize + y = (self.height - y)*self.gridSize + return ( x, y ) + + def drawWalls(self, wallMatrix): + wallColor = WALL_COLOR + for xNum, x in enumerate(wallMatrix): + if self.capture and (xNum * 2) < wallMatrix.width: wallColor = TEAM_COLORS[0] + if self.capture and (xNum * 2) >= wallMatrix.width: wallColor = TEAM_COLORS[1] + + for yNum, cell in enumerate(x): + if cell: # There's a wall here + pos = (xNum, yNum) + screen = self.to_screen(pos) + screen2 = self.to_screen2(pos) + + # draw each quadrant of the square based on adjacent walls + wIsWall = self.isWall(xNum-1, yNum, wallMatrix) + eIsWall = self.isWall(xNum+1, yNum, wallMatrix) + nIsWall = self.isWall(xNum, yNum+1, wallMatrix) + sIsWall = self.isWall(xNum, yNum-1, wallMatrix) + nwIsWall = self.isWall(xNum-1, yNum+1, wallMatrix) + swIsWall = self.isWall(xNum-1, yNum-1, wallMatrix) + neIsWall = self.isWall(xNum+1, yNum+1, wallMatrix) + seIsWall = self.isWall(xNum+1, yNum-1, wallMatrix) + + # NE quadrant + if (not nIsWall) and (not eIsWall): + # inner circle + circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (0,91), 'arc') + if (nIsWall) and (not eIsWall): + # vertical line + line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor) + if (not nIsWall) and (eIsWall): + # horizontal line + line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor) + if (nIsWall) and (eIsWall) and (not neIsWall): + # outer circle + circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (180,271), 'arc') + line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor) + line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5))), wallColor) + + # NW quadrant + if (not nIsWall) and (not wIsWall): + # inner circle + circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (90,181), 'arc') + if (nIsWall) and (not wIsWall): + # vertical line + line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor) + if (not nIsWall) and (wIsWall): + # horizontal line + line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(-1)*WALL_RADIUS)), wallColor) + if (nIsWall) and (wIsWall) and (not nwIsWall): + # outer circle + circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (270,361), 'arc') + line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(-1)*WALL_RADIUS)), wallColor) + line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5))), wallColor) + + # SE quadrant + if (not sIsWall) and (not eIsWall): + # inner circle + circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (270,361), 'arc') + if (sIsWall) and (not eIsWall): + # vertical line + line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor) + if (not sIsWall) and (eIsWall): + # horizontal line + line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(1)*WALL_RADIUS)), wallColor) + if (sIsWall) and (eIsWall) and (not seIsWall): + # outer circle + circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (90,181), 'arc') + line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5, self.gridSize*(1)*WALL_RADIUS)), wallColor) + line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5))), wallColor) + + # SW quadrant + if (not sIsWall) and (not wIsWall): + # inner circle + circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (180,271), 'arc') + if (sIsWall) and (not wIsWall): + # vertical line + line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor) + if (not sIsWall) and (wIsWall): + # horizontal line + line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(1)*WALL_RADIUS)), wallColor) + if (sIsWall) and (wIsWall) and (not swIsWall): + # outer circle + circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (0,91), 'arc') + line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(1)*WALL_RADIUS)), wallColor) + line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5))), wallColor) + + def isWall(self, x, y, walls): + if x < 0 or y < 0: + return False + if x >= walls.width or y >= walls.height: + return False + return walls[x][y] + + def drawFood(self, foodMatrix ): + foodImages = [] + color = FOOD_COLOR + for xNum, x in enumerate(foodMatrix): + if self.capture and (xNum * 2) <= foodMatrix.width: color = TEAM_COLORS[0] + if self.capture and (xNum * 2) > foodMatrix.width: color = TEAM_COLORS[1] + imageRow = [] + foodImages.append(imageRow) + for yNum, cell in enumerate(x): + if cell: # There's food here + screen = self.to_screen((xNum, yNum )) + dot = circle( screen, + FOOD_SIZE * self.gridSize, + outlineColor = color, fillColor = color, + width = 1) + imageRow.append(dot) + else: + imageRow.append(None) + return foodImages + + def drawCapsules(self, capsules ): + capsuleImages = {} + for capsule in capsules: + ( screen_x, screen_y ) = self.to_screen(capsule) + dot = circle( (screen_x, screen_y), + CAPSULE_SIZE * self.gridSize, + outlineColor = CAPSULE_COLOR, + fillColor = CAPSULE_COLOR, + width = 1) + capsuleImages[capsule] = dot + return capsuleImages + + def removeFood(self, cell, foodImages ): + x, y = cell + remove_from_screen(foodImages[x][y]) + + def removeCapsule(self, cell, capsuleImages ): + x, y = cell + remove_from_screen(capsuleImages[(x, y)]) + + def drawExpandedCells(self, cells): + """ + Draws an overlay of expanded grid positions for search agents + """ + n = float(len(cells)) + baseColor = [1.0, 0.0, 0.0] + self.clearExpandedCells() + self.expandedCells = [] + for k, cell in enumerate(cells): + screenPos = self.to_screen( cell) + cellColor = formatColor(*[(n-k) * c * .5 / n + .25 for c in baseColor]) + block = square(screenPos, + 0.5 * self.gridSize, + color = cellColor, + filled = 1, behind=2) + self.expandedCells.append(block) + if self.frameTime < 0: + refresh() + + def clearExpandedCells(self): + if 'expandedCells' in dir(self) and len(self.expandedCells) > 0: + for cell in self.expandedCells: + remove_from_screen(cell) + + + def updateDistributions(self, distributions): + "Draws an agent's belief distributions" + # copy all distributions so we don't change their state + distributions = map(lambda x: x.copy(), distributions) + if self.distributionImages == None: + self.drawDistributions(self.previousState) + for x in range(len(self.distributionImages)): + for y in range(len(self.distributionImages[0])): + image = self.distributionImages[x][y] + weights = [dist[ (x,y) ] for dist in distributions] + + if sum(weights) != 0: + pass + # Fog of war + color = [0.0,0.0,0.0] + colors = GHOST_VEC_COLORS[1:] # With Pacman + if self.capture: colors = GHOST_VEC_COLORS + for weight, gcolor in zip(weights, colors): + color = [min(1.0, c + 0.95 * g * weight ** .3) for c,g in zip(color, gcolor)] + changeColor(image, formatColor(*color)) + refresh() + +class FirstPersonPacmanGraphics(PacmanGraphics): + def __init__(self, zoom = 1.0, showGhosts = True, capture = False, frameTime=0): + PacmanGraphics.__init__(self, zoom, frameTime=frameTime) + self.showGhosts = showGhosts + self.capture = capture + + def initialize(self, state, isBlue = False): + + self.isBlue = isBlue + PacmanGraphics.startGraphics(self, state) + # Initialize distribution images + walls = state.layout.walls + dist = [] + self.layout = state.layout + + # Draw the rest + self.distributionImages = None # initialize lazily + self.drawStaticObjects(state) + self.drawAgentObjects(state) + + # Information + self.previousState = state + + def lookAhead(self, config, state): + if config.getDirection() == 'Stop': + return + else: + pass + # Draw relevant ghosts + allGhosts = state.getGhostStates() + visibleGhosts = state.getVisibleGhosts() + for i, ghost in enumerate(allGhosts): + if ghost in visibleGhosts: + self.drawGhost(ghost, i) + else: + self.currentGhostImages[i] = None + + def getGhostColor(self, ghost, ghostIndex): + return GHOST_COLORS[ghostIndex] + + def getPosition(self, ghostState): + if not self.showGhosts and not ghostState.isPacman and ghostState.getPosition()[1] > 1: + return (-1000, -1000) + else: + return PacmanGraphics.getPosition(self, ghostState) + +def add(x, y): + return (x[0] + y[0], x[1] + y[1]) + + +# Saving graphical output +# ----------------------- +# Note: to make an animated gif from this postscript output, try the command: +# convert -delay 7 -loop 1 -compress lzw -layers optimize frame* out.gif +# convert is part of imagemagick (freeware) + +SAVE_POSTSCRIPT = False +POSTSCRIPT_OUTPUT_DIR = 'frames' +FRAME_NUMBER = 0 +import os + +def saveFrame(): + "Saves the current graphical output as a postscript file" + global SAVE_POSTSCRIPT, FRAME_NUMBER, POSTSCRIPT_OUTPUT_DIR + if not SAVE_POSTSCRIPT: return + if not os.path.exists(POSTSCRIPT_OUTPUT_DIR): os.mkdir(POSTSCRIPT_OUTPUT_DIR) + name = os.path.join(POSTSCRIPT_OUTPUT_DIR, 'frame_%08d.ps' % FRAME_NUMBER) + FRAME_NUMBER += 1 + writePostscript(name) # writes the current canvas diff --git a/reinforcement/graphicsGridworldDisplay.py b/reinforcement/graphicsGridworldDisplay.py new file mode 100644 index 0000000..5c784bb --- /dev/null +++ b/reinforcement/graphicsGridworldDisplay.py @@ -0,0 +1,348 @@ +# graphicsGridworldDisplay.py +# --------------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import util +from graphicsUtils import * + +class GraphicsGridworldDisplay: + + def __init__(self, gridworld, size=120, speed=1.0): + self.gridworld = gridworld + self.size = size + self.speed = speed + + def start(self): + setup(self.gridworld, size=self.size) + + def pause(self): + wait_for_keys() + + def displayValues(self, agent, currentState = None, message = 'Agent Values'): + values = util.Counter() + policy = {} + states = self.gridworld.getStates() + for state in states: + values[state] = agent.getValue(state) + policy[state] = agent.getPolicy(state) + drawValues(self.gridworld, values, policy, currentState, message) + sleep(0.05 / self.speed) + + def displayNullValues(self, currentState = None, message = ''): + values = util.Counter() + #policy = {} + states = self.gridworld.getStates() + for state in states: + values[state] = 0.0 + #policy[state] = agent.getPolicy(state) + drawNullValues(self.gridworld, currentState,'') + # drawValues(self.gridworld, values, policy, currentState, message) + sleep(0.05 / self.speed) + + def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'): + qValues = util.Counter() + states = self.gridworld.getStates() + for state in states: + for action in self.gridworld.getPossibleActions(state): + qValues[(state, action)] = agent.getQValue(state, action) + drawQValues(self.gridworld, qValues, currentState, message) + sleep(0.05 / self.speed) + +BACKGROUND_COLOR = formatColor(0,0,0) +EDGE_COLOR = formatColor(1,1,1) +OBSTACLE_COLOR = formatColor(0.5,0.5,0.5) +TEXT_COLOR = formatColor(1,1,1) +MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7) +LOCATION_COLOR = formatColor(0,0,1) + +WINDOW_SIZE = -1 +GRID_SIZE = -1 +GRID_HEIGHT = -1 +MARGIN = -1 + +def setup(gridworld, title = "Gridworld Display", size = 120): + global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT + grid = gridworld.grid + WINDOW_SIZE = size + GRID_SIZE = size + GRID_HEIGHT = grid.height + MARGIN = GRID_SIZE * 0.75 + screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2 + screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2 + + begin_graphics(screen_width, + screen_height, + BACKGROUND_COLOR, title=title) + +def drawNullValues(gridworld, currentState = None, message = ''): + grid = gridworld.grid + blank() + for x in range(grid.width): + for y in range(grid.height): + state = (x, y) + gridType = grid[x][y] + isExit = (str(gridType) != gridType) + isCurrent = (currentState == state) + if gridType == '#': + drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) + else: + drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent) + pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8)) + text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c") + + +def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'): + grid = gridworld.grid + blank() + valueList = [values[state] for state in gridworld.getStates()] + [0.0] + minValue = min(valueList) + maxValue = max(valueList) + for x in range(grid.width): + for y in range(grid.height): + state = (x, y) + gridType = grid[x][y] + isExit = (str(gridType) != gridType) + isCurrent = (currentState == state) + if gridType == '#': + drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) + else: + value = values[state] + action = None + if policy != None and state in policy: + action = policy[state] + actions = gridworld.getPossibleActions(state) + if action not in actions and 'exit' in actions: + action = 'exit' + valString = '%.2f' % value + drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent) + pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8)) + text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c") + +def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'): + grid = gridworld.grid + blank() + stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()] + qStates = reduce(lambda x,y: x+y, stateCrossActions, []) + qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0] + minValue = min(qValueList) + maxValue = max(qValueList) + for x in range(grid.width): + for y in range(grid.height): + state = (x, y) + gridType = grid[x][y] + isExit = (str(gridType) != gridType) + isCurrent = (currentState == state) + actions = gridworld.getPossibleActions(state) + if actions == None or len(actions) == 0: + actions = [None] + bestQ = max([qValues[(state, action)] for action in actions]) + bestActions = [action for action in actions if qValues[(state, action)] == bestQ] + + q = util.Counter() + valStrings = {} + for action in actions: + v = qValues[(state, action)] + q[action] += v + valStrings[action] = '%.2f' % v + if gridType == '#': + drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) + elif isExit: + action = 'exit' + value = q[action] + valString = '%.2f' % value + drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent) + else: + drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent) + pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8)) + text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c") + + +def blank(): + clear_screen() + +def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent): + + square_color = getColor(0, -1, 1) + + if isObstacle: + square_color = OBSTACLE_COLOR + + (screen_x, screen_y) = to_screen((x, y)) + square( (screen_x, screen_y), + 0.5* GRID_SIZE, + color = square_color, + filled = 1, + width = 1) + + square( (screen_x, screen_y), + 0.5* GRID_SIZE, + color = EDGE_COLOR, + filled = 0, + width = 3) + + if isTerminal and not isObstacle: + square( (screen_x, screen_y), + 0.4* GRID_SIZE, + color = EDGE_COLOR, + filled = 0, + width = 2) + text( (screen_x, screen_y), + TEXT_COLOR, + str(grid[x][y]), + "Courier", -24, "bold", "c") + + + text_color = TEXT_COLOR + + if not isObstacle and isCurrent: + circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR ) + + # if not isObstacle: + # text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c") + +def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent): + + square_color = getColor(val, min, max) + + if isObstacle: + square_color = OBSTACLE_COLOR + + (screen_x, screen_y) = to_screen((x, y)) + square( (screen_x, screen_y), + 0.5* GRID_SIZE, + color = square_color, + filled = 1, + width = 1) + square( (screen_x, screen_y), + 0.5* GRID_SIZE, + color = EDGE_COLOR, + filled = 0, + width = 3) + if isTerminal and not isObstacle: + square( (screen_x, screen_y), + 0.4* GRID_SIZE, + color = EDGE_COLOR, + filled = 0, + width = 2) + + + if action == 'north': + polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) + if action == 'south': + polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) + if action == 'west': + polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) + if action == 'east': + polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) + + + text_color = TEXT_COLOR + + if not isObstacle and isCurrent: + circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR ) + + if not isObstacle: + text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c") + + +def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent): + + (screen_x, screen_y) = to_screen((x, y)) + + center = (screen_x, screen_y) + nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE) + ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE) + se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE) + sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE) + n = (screen_x, screen_y-0.5*GRID_SIZE+5) + s = (screen_x, screen_y+0.5*GRID_SIZE-5) + w = (screen_x-0.5*GRID_SIZE+5, screen_y) + e = (screen_x+0.5*GRID_SIZE-5, screen_y) + + actions = qVals.keys() + for action in actions: + + wedge_color = getColor(qVals[action], minVal, maxVal) + + if action == 'north': + polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False) + #text(n, text_color, valStr, "Courier", 8, "bold", "n") + if action == 'south': + polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False) + #text(s, text_color, valStr, "Courier", 8, "bold", "s") + if action == 'east': + polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False) + #text(e, text_color, valStr, "Courier", 8, "bold", "e") + if action == 'west': + polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False) + #text(w, text_color, valStr, "Courier", 8, "bold", "w") + + square( (screen_x, screen_y), + 0.5* GRID_SIZE, + color = EDGE_COLOR, + filled = 0, + width = 3) + line(ne, sw, color = EDGE_COLOR) + line(nw, se, color = EDGE_COLOR) + + if isCurrent: + circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR ) + + for action in actions: + text_color = TEXT_COLOR + if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR + valStr = "" + if action in valStrs: + valStr = valStrs[action] + h = -20 + if action == 'north': + #polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0) + text(n, text_color, valStr, "Courier", h, "bold", "n") + if action == 'south': + #polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0) + text(s, text_color, valStr, "Courier", h, "bold", "s") + if action == 'east': + #polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0) + text(e, text_color, valStr, "Courier", h, "bold", "e") + if action == 'west': + #polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0) + text(w, text_color, valStr, "Courier", h, "bold", "w") + + +def getColor(val, minVal, max): + r, g = 0.0, 0.0 + if val < 0 and minVal < 0: + r = val * 0.65 / minVal + if val > 0 and max > 0: + g = val * 0.65 / max + return formatColor(r,g,0.0) + + +def square(pos, size, color, filled, width): + x, y = pos + dx, dy = size, size + return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False) + + +def to_screen(point): + ( gamex, gamey ) = point + x = gamex*GRID_SIZE + MARGIN + y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN + return ( x, y ) + +def to_grid(point): + (x, y) = point + x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE) + y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE) + print point, "-->", (x, y) + return (x, y) diff --git a/reinforcement/graphicsUtils.py b/reinforcement/graphicsUtils.py new file mode 100644 index 0000000..a1d8bdc --- /dev/null +++ b/reinforcement/graphicsUtils.py @@ -0,0 +1,398 @@ +# graphicsUtils.py +# ---------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import sys +import math +import random +import string +import time +import types +import Tkinter + +_Windows = sys.platform == 'win32' # True if on Win95/98/NT + +_root_window = None # The root window for graphics output +_canvas = None # The canvas which holds graphics +_canvas_xs = None # Size of canvas object +_canvas_ys = None +_canvas_x = None # Current position on canvas +_canvas_y = None +_canvas_col = None # Current colour (set to black below) +_canvas_tsize = 12 +_canvas_tserifs = 0 + +def formatColor(r, g, b): + return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) + +def colorToVector(color): + return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]]) + +if _Windows: + _canvas_tfonts = ['times new roman', 'lucida console'] +else: + _canvas_tfonts = ['times', 'lucidasans-24'] + pass # XXX need defaults here + +def sleep(secs): + global _root_window + if _root_window == None: + time.sleep(secs) + else: + _root_window.update_idletasks() + _root_window.after(int(1000 * secs), _root_window.quit) + _root_window.mainloop() + +def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None): + + global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color + + # Check for duplicate call + if _root_window is not None: + # Lose the window. + _root_window.destroy() + + # Save the canvas size parameters + _canvas_xs, _canvas_ys = width - 1, height - 1 + _canvas_x, _canvas_y = 0, _canvas_ys + _bg_color = color + + # Create the root window + _root_window = Tkinter.Tk() + _root_window.protocol('WM_DELETE_WINDOW', _destroy_window) + _root_window.title(title or 'Graphics Window') + _root_window.resizable(0, 0) + + # Create the canvas object + try: + _canvas = Tkinter.Canvas(_root_window, width=width, height=height) + _canvas.pack() + draw_background() + _canvas.update() + except: + _root_window = None + raise + + # Bind to key-down and key-up events + _root_window.bind( "", _keypress ) + _root_window.bind( "", _keyrelease ) + _root_window.bind( "", _clear_keys ) + _root_window.bind( "", _clear_keys ) + _root_window.bind( "", _leftclick ) + _root_window.bind( "", _rightclick ) + _root_window.bind( "", _rightclick ) + _root_window.bind( "", _ctrl_leftclick) + _clear_keys() + +_leftclick_loc = None +_rightclick_loc = None +_ctrl_leftclick_loc = None + +def _leftclick(event): + global _leftclick_loc + _leftclick_loc = (event.x, event.y) + +def _rightclick(event): + global _rightclick_loc + _rightclick_loc = (event.x, event.y) + +def _ctrl_leftclick(event): + global _ctrl_leftclick_loc + _ctrl_leftclick_loc = (event.x, event.y) + +def wait_for_click(): + while True: + global _leftclick_loc + global _rightclick_loc + global _ctrl_leftclick_loc + if _leftclick_loc != None: + val = _leftclick_loc + _leftclick_loc = None + return val, 'left' + if _rightclick_loc != None: + val = _rightclick_loc + _rightclick_loc = None + return val, 'right' + if _ctrl_leftclick_loc != None: + val = _ctrl_leftclick_loc + _ctrl_leftclick_loc = None + return val, 'ctrl_left' + sleep(0.05) + +def draw_background(): + corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)] + polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False) + +def _destroy_window(event=None): + sys.exit(0) +# global _root_window +# _root_window.destroy() +# _root_window = None + #print "DESTROY" + +def end_graphics(): + global _root_window, _canvas, _mouse_enabled + try: + try: + sleep(1) + if _root_window != None: + _root_window.destroy() + except SystemExit, e: + print 'Ending graphics raised an exception:', e + finally: + _root_window = None + _canvas = None + _mouse_enabled = 0 + _clear_keys() + +def clear_screen(background=None): + global _canvas_x, _canvas_y + _canvas.delete('all') + draw_background() + _canvas_x, _canvas_y = 0, _canvas_ys + +def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1): + c = [] + for coord in coords: + c.append(coord[0]) + c.append(coord[1]) + if fillColor == None: fillColor = outlineColor + if filled == 0: fillColor = "" + poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width) + if behind > 0: + _canvas.tag_lower(poly, behind) # Higher should be more visible + return poly + +def square(pos, r, color, filled=1, behind=0): + x, y = pos + coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)] + return polygon(coords, color, color, filled, 0, behind=behind) + +def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2): + x, y = pos + x0, x1 = x - r - 1, x + r + y0, y1 = y - r - 1, y + r + if endpoints == None: + e = [0, 359] + else: + e = list(endpoints) + while e[0] > e[1]: e[1] = e[1] + 360 + + return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor, + extent=e[1] - e[0], start=e[0], style=style, width=width) + +def image(pos, file="../../blueghost.gif"): + x, y = pos + # img = PhotoImage(file=file) + return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW) + + +def refresh(): + _canvas.update_idletasks() + +def moveCircle(id, pos, r, endpoints=None): + global _canvas_x, _canvas_y + + x, y = pos +# x0, x1 = x - r, x + r + 1 +# y0, y1 = y - r, y + r + 1 + x0, x1 = x - r - 1, x + r + y0, y1 = y - r - 1, y + r + if endpoints == None: + e = [0, 359] + else: + e = list(endpoints) + while e[0] > e[1]: e[1] = e[1] + 360 + + edit(id, ('start', e[0]), ('extent', e[1] - e[0])) + move_to(id, x0, y0) + +def edit(id, *args): + _canvas.itemconfigure(id, **dict(args)) + +def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"): + global _canvas_x, _canvas_y + x, y = pos + font = (font, str(size), style) + return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor) + +def changeText(id, newText, font=None, size=12, style='normal'): + _canvas.itemconfigure(id, text=newText) + if font != None: + _canvas.itemconfigure(id, font=(font, '-%d' % size, style)) + +def changeColor(id, newColor): + _canvas.itemconfigure(id, fill=newColor) + +def line(here, there, color=formatColor(0, 0, 0), width=2): + x0, y0 = here[0], here[1] + x1, y1 = there[0], there[1] + return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width) + +############################################################################## +### Keypress handling ######################################################## +############################################################################## + +# We bind to key-down and key-up events. + +_keysdown = {} +_keyswaiting = {} +# This holds an unprocessed key release. We delay key releases by up to +# one call to keys_pressed() to get round a problem with auto repeat. +_got_release = None + +def _keypress(event): + global _got_release + #remap_arrows(event) + _keysdown[event.keysym] = 1 + _keyswaiting[event.keysym] = 1 +# print event.char, event.keycode + _got_release = None + +def _keyrelease(event): + global _got_release + #remap_arrows(event) + try: + del _keysdown[event.keysym] + except: + pass + _got_release = 1 + +def remap_arrows(event): + # TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT) + if event.char in ['a', 's', 'd', 'w']: + return + if event.keycode in [37, 101]: # LEFT ARROW (win / x) + event.char = 'a' + if event.keycode in [38, 99]: # UP ARROW + event.char = 'w' + if event.keycode in [39, 102]: # RIGHT ARROW + event.char = 'd' + if event.keycode in [40, 104]: # DOWN ARROW + event.char = 's' + +def _clear_keys(event=None): + global _keysdown, _got_release, _keyswaiting + _keysdown = {} + _keyswaiting = {} + _got_release = None + +def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent, + d_w=Tkinter.tkinter.DONT_WAIT): + d_o_e(d_w) + if _got_release: + d_o_e(d_w) + return _keysdown.keys() + +def keys_waiting(): + global _keyswaiting + keys = _keyswaiting.keys() + _keyswaiting = {} + return keys + +# Block for a list of keys... + +def wait_for_keys(): + keys = [] + while keys == []: + keys = keys_pressed() + sleep(0.05) + return keys + +def remove_from_screen(x, + d_o_e=Tkinter.tkinter.dooneevent, + d_w=Tkinter.tkinter.DONT_WAIT): + _canvas.delete(x) + d_o_e(d_w) + +def _adjust_coords(coord_list, x, y): + for i in range(0, len(coord_list), 2): + coord_list[i] = coord_list[i] + x + coord_list[i + 1] = coord_list[i + 1] + y + return coord_list + +def move_to(object, x, y=None, + d_o_e=Tkinter.tkinter.dooneevent, + d_w=Tkinter.tkinter.DONT_WAIT): + if y is None: + try: x, y = x + except: raise 'incomprehensible coordinates' + + horiz = True + newCoords = [] + current_x, current_y = _canvas.coords(object)[0:2] # first point + for coord in _canvas.coords(object): + if horiz: + inc = x - current_x + else: + inc = y - current_y + horiz = not horiz + + newCoords.append(coord + inc) + + _canvas.coords(object, *newCoords) + d_o_e(d_w) + +def move_by(object, x, y=None, + d_o_e=Tkinter.tkinter.dooneevent, + d_w=Tkinter.tkinter.DONT_WAIT, lift=False): + if y is None: + try: x, y = x + except: raise Exception, 'incomprehensible coordinates' + + horiz = True + newCoords = [] + for coord in _canvas.coords(object): + if horiz: + inc = x + else: + inc = y + horiz = not horiz + + newCoords.append(coord + inc) + + _canvas.coords(object, *newCoords) + d_o_e(d_w) + if lift: + _canvas.tag_raise(object) + +def writePostscript(filename): + "Writes the current canvas to a postscript file." + psfile = file(filename, 'w') + psfile.write(_canvas.postscript(pageanchor='sw', + y='0.c', + x='0.c')) + psfile.close() + +ghost_shape = [ + (0, - 0.5), + (0.25, - 0.75), + (0.5, - 0.5), + (0.75, - 0.75), + (0.75, 0.5), + (0.5, 0.75), + (- 0.5, 0.75), + (- 0.75, 0.5), + (- 0.75, - 0.75), + (- 0.5, - 0.5), + (- 0.25, - 0.75) + ] + +if __name__ == '__main__': + begin_graphics() + clear_screen() + ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape] + g = polygon(ghost_shape, formatColor(1, 1, 1)) + move_to(g, (50, 50)) + circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15]) + sleep(2) diff --git a/reinforcement/gridworld.py b/reinforcement/gridworld.py new file mode 100644 index 0000000..6e1e16b --- /dev/null +++ b/reinforcement/gridworld.py @@ -0,0 +1,585 @@ +# gridworld.py +# ------------ +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import random +import sys +import mdp +import environment +import util +import optparse + +class Gridworld(mdp.MarkovDecisionProcess): + """ + Gridworld + """ + def __init__(self, grid): + # layout + if type(grid) == type([]): grid = makeGrid(grid) + self.grid = grid + + # parameters + self.livingReward = 0.0 + self.noise = 0.2 + + def setLivingReward(self, reward): + """ + The (negative) reward for exiting "normal" states. + + Note that in the R+N text, this reward is on entering + a state and therefore is not clearly part of the state's + future rewards. + """ + self.livingReward = reward + + def setNoise(self, noise): + """ + The probability of moving in an unintended direction. + """ + self.noise = noise + + + def getPossibleActions(self, state): + """ + Returns list of valid actions for 'state'. + + Note that you can request moves into walls and + that "exit" states transition to the terminal + state under the special action "done". + """ + if state == self.grid.terminalState: + return () + x,y = state + if type(self.grid[x][y]) == int: + return ('exit',) + return ('north','west','south','east') + + def getStates(self): + """ + Return list of all states. + """ + # The true terminal state. + states = [self.grid.terminalState] + for x in range(self.grid.width): + for y in range(self.grid.height): + if self.grid[x][y] != '#': + state = (x,y) + states.append(state) + return states + + def getReward(self, state, action, nextState): + """ + Get reward for state, action, nextState transition. + + Note that the reward depends only on the state being + departed (as in the R+N book examples, which more or + less use this convention). + """ + if state == self.grid.terminalState: + return 0.0 + x, y = state + cell = self.grid[x][y] + if type(cell) == int or type(cell) == float: + return cell + return self.livingReward + + def getStartState(self): + for x in range(self.grid.width): + for y in range(self.grid.height): + if self.grid[x][y] == 'S': + return (x, y) + raise 'Grid has no start state' + + def isTerminal(self, state): + """ + Only the TERMINAL_STATE state is *actually* a terminal state. + The other "exit" states are technically non-terminals with + a single action "exit" which leads to the true terminal state. + This convention is to make the grids line up with the examples + in the R+N textbook. + """ + return state == self.grid.terminalState + + + def getTransitionStatesAndProbs(self, state, action): + """ + Returns list of (nextState, prob) pairs + representing the states reachable + from 'state' by taking 'action' along + with their transition probabilities. + """ + + if action not in self.getPossibleActions(state): + raise "Illegal action!" + + if self.isTerminal(state): + return [] + + x, y = state + + if type(self.grid[x][y]) == int or type(self.grid[x][y]) == float: + termState = self.grid.terminalState + return [(termState, 1.0)] + + successors = [] + + northState = (self.__isAllowed(y+1,x) and (x,y+1)) or state + westState = (self.__isAllowed(y,x-1) and (x-1,y)) or state + southState = (self.__isAllowed(y-1,x) and (x,y-1)) or state + eastState = (self.__isAllowed(y,x+1) and (x+1,y)) or state + + if action == 'north' or action == 'south': + if action == 'north': + successors.append((northState,1-self.noise)) + else: + successors.append((southState,1-self.noise)) + + massLeft = self.noise + successors.append((westState,massLeft/2.0)) + successors.append((eastState,massLeft/2.0)) + + if action == 'west' or action == 'east': + if action == 'west': + successors.append((westState,1-self.noise)) + else: + successors.append((eastState,1-self.noise)) + + massLeft = self.noise + successors.append((northState,massLeft/2.0)) + successors.append((southState,massLeft/2.0)) + + successors = self.__aggregate(successors) + + return successors + + def __aggregate(self, statesAndProbs): + counter = util.Counter() + for state, prob in statesAndProbs: + counter[state] += prob + newStatesAndProbs = [] + for state, prob in counter.items(): + newStatesAndProbs.append((state, prob)) + return newStatesAndProbs + + def __isAllowed(self, y, x): + if y < 0 or y >= self.grid.height: return False + if x < 0 or x >= self.grid.width: return False + return self.grid[x][y] != '#' + +class GridworldEnvironment(environment.Environment): + + def __init__(self, gridWorld): + self.gridWorld = gridWorld + self.reset() + + def getCurrentState(self): + return self.state + + def getPossibleActions(self, state): + return self.gridWorld.getPossibleActions(state) + + def doAction(self, action): + state = self.getCurrentState() + (nextState, reward) = self.getRandomNextState(state, action) + self.state = nextState + return (nextState, reward) + + def getRandomNextState(self, state, action, randObj=None): + rand = -1.0 + if randObj is None: + rand = random.random() + else: + rand = randObj.random() + sum = 0.0 + successors = self.gridWorld.getTransitionStatesAndProbs(state, action) + for nextState, prob in successors: + sum += prob + if sum > 1.0: + raise 'Total transition probability more than one; sample failure.' + if rand < sum: + reward = self.gridWorld.getReward(state, action, nextState) + return (nextState, reward) + raise 'Total transition probability less than one; sample failure.' + + def reset(self): + self.state = self.gridWorld.getStartState() + +class Grid: + """ + A 2-dimensional array of immutables backed by a list of lists. Data is accessed + via grid[x][y] where (x,y) are cartesian coordinates with x horizontal, + y vertical and the origin (0,0) in the bottom left corner. + + The __str__ method constructs an output that is oriented appropriately. + """ + def __init__(self, width, height, initialValue=' '): + self.width = width + self.height = height + self.data = [[initialValue for y in range(height)] for x in range(width)] + self.terminalState = 'TERMINAL_STATE' + + def __getitem__(self, i): + return self.data[i] + + def __setitem__(self, key, item): + self.data[key] = item + + def __eq__(self, other): + if other == None: return False + return self.data == other.data + + def __hash__(self): + return hash(self.data) + + def copy(self): + g = Grid(self.width, self.height) + g.data = [x[:] for x in self.data] + return g + + def deepCopy(self): + return self.copy() + + def shallowCopy(self): + g = Grid(self.width, self.height) + g.data = self.data + return g + + def _getLegacyText(self): + t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)] + t.reverse() + return t + + def __str__(self): + return str(self._getLegacyText()) + +def makeGrid(gridString): + width, height = len(gridString[0]), len(gridString) + grid = Grid(width, height) + for ybar, line in enumerate(gridString): + y = height - ybar - 1 + for x, el in enumerate(line): + grid[x][y] = el + return grid + +def getCliffGrid(): + grid = [[' ',' ',' ',' ',' '], + ['S',' ',' ',' ',10], + [-100,-100, -100, -100, -100]] + return Gridworld(makeGrid(grid)) + +def getCliffGrid2(): + grid = [[' ',' ',' ',' ',' '], + [8,'S',' ',' ',10], + [-100,-100, -100, -100, -100]] + return Gridworld(grid) + +def getDiscountGrid(): + grid = [[' ',' ',' ',' ',' '], + [' ','#',' ',' ',' '], + [' ','#', 1,'#', 10], + ['S',' ',' ',' ',' '], + [-10,-10, -10, -10, -10]] + return Gridworld(grid) + +def getBridgeGrid(): + grid = [[ '#',-100, -100, -100, -100, -100, '#'], + [ 1, 'S', ' ', ' ', ' ', ' ', 10], + [ '#',-100, -100, -100, -100, -100, '#']] + return Gridworld(grid) + +def getBookGrid(): + grid = [[' ',' ',' ',+1], + [' ','#',' ',-1], + ['S',' ',' ',' ']] + return Gridworld(grid) + +def getMazeGrid(): + grid = [[' ',' ',' ',+1], + ['#','#',' ','#'], + [' ','#',' ',' '], + [' ','#','#',' '], + ['S',' ',' ',' ']] + return Gridworld(grid) + + + +def getUserAction(state, actionFunction): + """ + Get an action from the user (rather than the agent). + + Used for debugging and lecture demos. + """ + import graphicsUtils + action = None + while True: + keys = graphicsUtils.wait_for_keys() + if 'Up' in keys: action = 'north' + if 'Down' in keys: action = 'south' + if 'Left' in keys: action = 'west' + if 'Right' in keys: action = 'east' + if 'q' in keys: sys.exit(0) + if action == None: continue + break + actions = actionFunction(state) + if action not in actions: + action = actions[0] + return action + +def printString(x): print x + +def runEpisode(agent, environment, discount, decision, display, message, pause, episode): + returns = 0 + totalDiscount = 1.0 + environment.reset() + if 'startEpisode' in dir(agent): agent.startEpisode() + message("BEGINNING EPISODE: "+str(episode)+"\n") + while True: + + # DISPLAY CURRENT STATE + state = environment.getCurrentState() + display(state) + pause() + + # END IF IN A TERMINAL STATE + actions = environment.getPossibleActions(state) + if len(actions) == 0: + message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") + return returns + + # GET ACTION (USUALLY FROM AGENT) + action = decision(state) + if action == None: + raise 'Error: Agent returned None action' + + # EXECUTE ACTION + nextState, reward = environment.doAction(action) + message("Started in state: "+str(state)+ + "\nTook action: "+str(action)+ + "\nEnded in state: "+str(nextState)+ + "\nGot reward: "+str(reward)+"\n") + # UPDATE LEARNER + if 'observeTransition' in dir(agent): + agent.observeTransition(state, action, nextState, reward) + + returns += reward * totalDiscount + totalDiscount *= discount + + if 'stopEpisode' in dir(agent): + agent.stopEpisode() + +def parseOptions(): + optParser = optparse.OptionParser() + optParser.add_option('-d', '--discount',action='store', + type='float',dest='discount',default=0.9, + help='Discount on future (default %default)') + optParser.add_option('-r', '--livingReward',action='store', + type='float',dest='livingReward',default=0.0, + metavar="R", help='Reward for living for a time step (default %default)') + optParser.add_option('-n', '--noise',action='store', + type='float',dest='noise',default=0.2, + metavar="P", help='How often action results in ' + + 'unintended direction (default %default)' ) + optParser.add_option('-e', '--epsilon',action='store', + type='float',dest='epsilon',default=0.3, + metavar="E", help='Chance of taking a random action in q-learning (default %default)') + optParser.add_option('-l', '--learningRate',action='store', + type='float',dest='learningRate',default=0.5, + metavar="P", help='TD learning rate (default %default)' ) + optParser.add_option('-i', '--iterations',action='store', + type='int',dest='iters',default=10, + metavar="K", help='Number of rounds of value iteration (default %default)') + optParser.add_option('-k', '--episodes',action='store', + type='int',dest='episodes',default=1, + metavar="K", help='Number of epsiodes of the MDP to run (default %default)') + optParser.add_option('-g', '--grid',action='store', + metavar="G", type='string',dest='grid',default="BookGrid", + help='Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)' ) + optParser.add_option('-w', '--windowSize', metavar="X", type='int',dest='gridSize',default=150, + help='Request a window width of X pixels *per grid cell* (default %default)') + optParser.add_option('-a', '--agent',action='store', metavar="A", + type='string',dest='agent',default="random", + help='Agent type (options are \'random\', \'value\' and \'q\', default %default)') + optParser.add_option('-t', '--text',action='store_true', + dest='textDisplay',default=False, + help='Use text-only ASCII display') + optParser.add_option('-p', '--pause',action='store_true', + dest='pause',default=False, + help='Pause GUI after each time step when running the MDP') + optParser.add_option('-q', '--quiet',action='store_true', + dest='quiet',default=False, + help='Skip display of any learning episodes') + optParser.add_option('-s', '--speed',action='store', metavar="S", type=float, + dest='speed',default=1.0, + help='Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)') + optParser.add_option('-m', '--manual',action='store_true', + dest='manual',default=False, + help='Manually control agent') + optParser.add_option('-v', '--valueSteps',action='store_true' ,default=False, + help='Display each step of value iteration') + + opts, args = optParser.parse_args() + + if opts.manual and opts.agent != 'q': + print '## Disabling Agents in Manual Mode (-m) ##' + opts.agent = None + + # MANAGE CONFLICTS + if opts.textDisplay or opts.quiet: + # if opts.quiet: + opts.pause = False + # opts.manual = False + + if opts.manual: + opts.pause = True + + return opts + + +if __name__ == '__main__': + + opts = parseOptions() + + ########################### + # GET THE GRIDWORLD + ########################### + + import gridworld + mdpFunction = getattr(gridworld, "get"+opts.grid) + mdp = mdpFunction() + mdp.setLivingReward(opts.livingReward) + mdp.setNoise(opts.noise) + env = gridworld.GridworldEnvironment(mdp) + + + ########################### + # GET THE DISPLAY ADAPTER + ########################### + + import textGridworldDisplay + display = textGridworldDisplay.TextGridworldDisplay(mdp) + if not opts.textDisplay: + import graphicsGridworldDisplay + display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed) + try: + display.start() + except KeyboardInterrupt: + sys.exit(0) + + ########################### + # GET THE AGENT + ########################### + + import valueIterationAgents, qlearningAgents + a = None + if opts.agent == 'value': + a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters) + elif opts.agent == 'q': + #env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon + #simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) + gridWorldEnv = GridworldEnvironment(mdp) + actionFn = lambda state: mdp.getPossibleActions(state) + qLearnOpts = {'gamma': opts.discount, + 'alpha': opts.learningRate, + 'epsilon': opts.epsilon, + 'actionFn': actionFn} + a = qlearningAgents.QLearningAgent(**qLearnOpts) + elif opts.agent == 'random': + # # No reason to use the random agent without episodes + if opts.episodes == 0: + opts.episodes = 10 + class RandomAgent: + def getAction(self, state): + return random.choice(mdp.getPossibleActions(state)) + def getValue(self, state): + return 0.0 + def getQValue(self, state, action): + return 0.0 + def getPolicy(self, state): + "NOTE: 'random' is a special policy value; don't use it in your code." + return 'random' + def update(self, state, action, nextState, reward): + pass + a = RandomAgent() + else: + if not opts.manual: raise 'Unknown agent type: '+opts.agent + + + ########################### + # RUN EPISODES + ########################### + # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES + try: + if not opts.manual and opts.agent == 'value': + if opts.valueSteps: + for i in range(opts.iters): + tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i) + display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS") + display.pause() + + display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS") + display.pause() + display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS") + display.pause() + except KeyboardInterrupt: + sys.exit(0) + + + + # FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING) + displayCallback = lambda x: None + if not opts.quiet: + if opts.manual and opts.agent == None: + displayCallback = lambda state: display.displayNullValues(state) + else: + if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES") + if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES") + if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES") + + messageCallback = lambda x: printString(x) + if opts.quiet: + messageCallback = lambda x: None + + # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP + pauseCallback = lambda : None + if opts.pause: + pauseCallback = lambda : display.pause() + + # FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS) + if opts.manual: + decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions) + else: + decisionCallback = a.getAction + + # RUN EPISODES + if opts.episodes > 0: + print + print "RUNNING", opts.episodes, "EPISODES" + print + returns = 0 + for episode in range(1, opts.episodes+1): + returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode) + if opts.episodes > 0: + print + print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes) + print + print + + # DISPLAY POST-LEARNING VALUES / Q-VALUES + if opts.agent == 'q' and not opts.manual: + try: + display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES") + display.pause() + display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES") + display.pause() + except KeyboardInterrupt: + sys.exit(0) diff --git a/reinforcement/keyboardAgents.py b/reinforcement/keyboardAgents.py new file mode 100644 index 0000000..c7d9fcf --- /dev/null +++ b/reinforcement/keyboardAgents.py @@ -0,0 +1,84 @@ +# keyboardAgents.py +# ----------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from game import Agent +from game import Directions +import random + +class KeyboardAgent(Agent): + """ + An agent controlled by the keyboard. + """ + # NOTE: Arrow keys also work. + WEST_KEY = 'a' + EAST_KEY = 'd' + NORTH_KEY = 'w' + SOUTH_KEY = 's' + STOP_KEY = 'q' + + def __init__( self, index = 0 ): + + self.lastMove = Directions.STOP + self.index = index + self.keys = [] + + def getAction( self, state): + from graphicsUtils import keys_waiting + from graphicsUtils import keys_pressed + keys = keys_waiting() + keys_pressed() + if keys != []: + self.keys = keys + + legal = state.getLegalActions(self.index) + move = self.getMove(legal) + + if move == Directions.STOP: + # Try to move in the same direction as before + if self.lastMove in legal: + move = self.lastMove + + if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP + + if move not in legal: + move = random.choice(legal) + + self.lastMove = move + return move + + def getMove(self, legal): + move = Directions.STOP + if (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal: move = Directions.WEST + if (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST + if (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal: move = Directions.NORTH + if (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH + return move + +class KeyboardAgent2(KeyboardAgent): + """ + A second agent controlled by the keyboard. + """ + # NOTE: Arrow keys also work. + WEST_KEY = 'j' + EAST_KEY = "l" + NORTH_KEY = 'i' + SOUTH_KEY = 'k' + STOP_KEY = 'u' + + def getMove(self, legal): + move = Directions.STOP + if (self.WEST_KEY in self.keys) and Directions.WEST in legal: move = Directions.WEST + if (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST + if (self.NORTH_KEY in self.keys) and Directions.NORTH in legal: move = Directions.NORTH + if (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH + return move diff --git a/reinforcement/layout.py b/reinforcement/layout.py new file mode 100644 index 0000000..c6b377d --- /dev/null +++ b/reinforcement/layout.py @@ -0,0 +1,149 @@ +# layout.py +# --------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from util import manhattanDistance +from game import Grid +import os +import random + +VISIBILITY_MATRIX_CACHE = {} + +class Layout: + """ + A Layout manages the static information about the game board. + """ + + def __init__(self, layoutText): + self.width = len(layoutText[0]) + self.height= len(layoutText) + self.walls = Grid(self.width, self.height, False) + self.food = Grid(self.width, self.height, False) + self.capsules = [] + self.agentPositions = [] + self.numGhosts = 0 + self.processLayoutText(layoutText) + self.layoutText = layoutText + self.totalFood = len(self.food.asList()) + # self.initializeVisibilityMatrix() + + def getNumGhosts(self): + return self.numGhosts + + def initializeVisibilityMatrix(self): + global VISIBILITY_MATRIX_CACHE + if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE: + from game import Directions + vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)] + dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST] + vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()}) + for x in range(self.width): + for y in range(self.height): + if self.walls[x][y] == False: + for vec, direction in zip(vecs, dirs): + dx, dy = vec + nextx, nexty = x + dx, y + dy + while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] : + vis[x][y][direction].add((nextx, nexty)) + nextx, nexty = x + dx, y + dy + self.visibility = vis + VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis + else: + self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] + + def isWall(self, pos): + x, col = pos + return self.walls[x][col] + + def getRandomLegalPosition(self): + x = random.choice(range(self.width)) + y = random.choice(range(self.height)) + while self.isWall( (x, y) ): + x = random.choice(range(self.width)) + y = random.choice(range(self.height)) + return (x,y) + + def getRandomCorner(self): + poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)] + return random.choice(poses) + + def getFurthestCorner(self, pacPos): + poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)] + dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses]) + return pos + + def isVisibleFrom(self, ghostPos, pacPos, pacDirection): + row, col = [int(x) for x in pacPos] + return ghostPos in self.visibility[row][col][pacDirection] + + def __str__(self): + return "\n".join(self.layoutText) + + def deepCopy(self): + return Layout(self.layoutText[:]) + + def processLayoutText(self, layoutText): + """ + Coordinates are flipped from the input format to the (x,y) convention here + + The shape of the maze. Each character + represents a different type of object. + % - Wall + . - Food + o - Capsule + G - Ghost + P - Pacman + Other characters are ignored. + """ + maxY = self.height - 1 + for y in range(self.height): + for x in range(self.width): + layoutChar = layoutText[maxY - y][x] + self.processLayoutChar(x, y, layoutChar) + self.agentPositions.sort() + self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions] + + def processLayoutChar(self, x, y, layoutChar): + if layoutChar == '%': + self.walls[x][y] = True + elif layoutChar == '.': + self.food[x][y] = True + elif layoutChar == 'o': + self.capsules.append((x, y)) + elif layoutChar == 'P': + self.agentPositions.append( (0, (x, y) ) ) + elif layoutChar in ['G']: + self.agentPositions.append( (1, (x, y) ) ) + self.numGhosts += 1 + elif layoutChar in ['1', '2', '3', '4']: + self.agentPositions.append( (int(layoutChar), (x,y))) + self.numGhosts += 1 +def getLayout(name, back = 2): + if name.endswith('.lay'): + layout = tryToLoad('layouts/' + name) + if layout == None: layout = tryToLoad(name) + else: + layout = tryToLoad('layouts/' + name + '.lay') + if layout == None: layout = tryToLoad(name + '.lay') + if layout == None and back >= 0: + curdir = os.path.abspath('.') + os.chdir('..') + layout = getLayout(name, back -1) + os.chdir(curdir) + return layout + +def tryToLoad(fullname): + if(not os.path.exists(fullname)): return None + f = open(fullname) + try: return Layout([line.strip() for line in f]) + finally: f.close() diff --git a/reinforcement/layouts/capsuleClassic.lay b/reinforcement/layouts/capsuleClassic.lay new file mode 100644 index 0000000..06a5c51 --- /dev/null +++ b/reinforcement/layouts/capsuleClassic.lay @@ -0,0 +1,7 @@ +%%%%%%%%%%%%%%%%%%% +%G. G ....% +%.% % %%%%%% %.%%.% +%.%o% % o% %.o%.% +%.%%%.% %%% %..%.% +%..... P %..%G% +%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/layouts/contestClassic.lay b/reinforcement/layouts/contestClassic.lay new file mode 100644 index 0000000..84c8733 --- /dev/null +++ b/reinforcement/layouts/contestClassic.lay @@ -0,0 +1,9 @@ +%%%%%%%%%%%%%%%%%%%% +%o...%........%...o% +%.%%.%.%%..%%.%.%%.% +%...... G GG%......% +%.%.%%.%% %%%.%%.%.% +%.%....% ooo%.%..%.% +%.%.%%.% %% %.%.%%.% +%o%......P....%....% +%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/layouts/mediumClassic.lay b/reinforcement/layouts/mediumClassic.lay new file mode 100644 index 0000000..33c5db8 --- /dev/null +++ b/reinforcement/layouts/mediumClassic.lay @@ -0,0 +1,11 @@ +%%%%%%%%%%%%%%%%%%%% +%o...%........%....% +%.%%.%.%%%%%%.%.%%.% +%.%..............%.% +%.%.%%.%% %%.%%.%.% +%......%G G%......% +%.%.%%.%%%%%%.%%.%.% +%.%..............%.% +%.%%.%.%%%%%%.%.%%.% +%....%...P....%...o% +%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/layouts/mediumGrid.lay b/reinforcement/layouts/mediumGrid.lay new file mode 100644 index 0000000..52b2707 --- /dev/null +++ b/reinforcement/layouts/mediumGrid.lay @@ -0,0 +1,7 @@ +%%%%%%%% +%P % +% .% . % +% % % +% .% . % +% G% +%%%%%%%% diff --git a/reinforcement/layouts/minimaxClassic.lay b/reinforcement/layouts/minimaxClassic.lay new file mode 100644 index 0000000..a547397 --- /dev/null +++ b/reinforcement/layouts/minimaxClassic.lay @@ -0,0 +1,5 @@ +%%%%%%%%% +%.P G% +% %.%G%%% +%G %%% +%%%%%%%%% diff --git a/reinforcement/layouts/openClassic.lay b/reinforcement/layouts/openClassic.lay new file mode 100644 index 0000000..6760b42 --- /dev/null +++ b/reinforcement/layouts/openClassic.lay @@ -0,0 +1,9 @@ +%%%%%%%%%%%%%%%%%%%%%%%%% +%.. P .... .... % +%.. ... ... ... ... % +%.. ... ... ... ... % +%.. .... .... G % +%.. ... ... ... ... % +%.. ... ... ... ... % +%.. .... .... o% +%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/layouts/originalClassic.lay b/reinforcement/layouts/originalClassic.lay new file mode 100644 index 0000000..b2770c5 --- /dev/null +++ b/reinforcement/layouts/originalClassic.lay @@ -0,0 +1,27 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%............%%............% +%.%%%%.%%%%%.%%.%%%%%.%%%%.% +%o%%%%.%%%%%.%%.%%%%%.%%%%o% +%.%%%%.%%%%%.%%.%%%%%.%%%%.% +%..........................% +%.%%%%.%%.%%%%%%%%.%%.%%%%.% +%.%%%%.%%.%%%%%%%%.%%.%%%%.% +%......%%....%%....%%......% +%%%%%%.%%%%% %% %%%%%.%%%%%% +%%%%%%.%%%%% %% %%%%%.%%%%%% +%%%%%%.% %.%%%%%% +%%%%%%.% %%%% %%%% %.%%%%%% +% . %G GG G% . % +%%%%%%.% %%%%%%%%%% %.%%%%%% +%%%%%%.% %.%%%%%% +%%%%%%.% %%%%%%%%%% %.%%%%%% +%............%%............% +%.%%%%.%%%%%.%%.%%%%%.%%%%.% +%.%%%%.%%%%%.%%.%%%%%.%%%%.% +%o..%%....... .......%%..o% +%%%.%%.%%.%%%%%%%%.%%.%%.%%% +%%%.%%.%%.%%%%%%%%.%%.%%.%%% +%......%%....%%....%%......% +%.%%%%%%%%%%.%%.%%%%%%%%%%.% +%.............P............% +%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/layouts/smallClassic.lay b/reinforcement/layouts/smallClassic.lay new file mode 100644 index 0000000..ce6c1d9 --- /dev/null +++ b/reinforcement/layouts/smallClassic.lay @@ -0,0 +1,7 @@ +%%%%%%%%%%%%%%%%%%%% +%......%G G%......% +%.%%...%% %%...%%.% +%.%o.%........%.o%.% +%.%%.%.%%%%%%.%.%%.% +%........P.........% +%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/layouts/smallGrid.lay b/reinforcement/layouts/smallGrid.lay new file mode 100644 index 0000000..4bbe2b6 --- /dev/null +++ b/reinforcement/layouts/smallGrid.lay @@ -0,0 +1,7 @@ +%%%%%%% +% P % +% %%% % +% %. % +% %%% % +%. G % +%%%%%%% diff --git a/reinforcement/layouts/testClassic.lay b/reinforcement/layouts/testClassic.lay new file mode 100644 index 0000000..4b3ffca --- /dev/null +++ b/reinforcement/layouts/testClassic.lay @@ -0,0 +1,10 @@ +%%%%% +% . % +%.G.% +% . % +%. .% +% % +% .% +% % +%P .% +%%%%% diff --git a/reinforcement/layouts/trappedClassic.lay b/reinforcement/layouts/trappedClassic.lay new file mode 100644 index 0000000..289557f --- /dev/null +++ b/reinforcement/layouts/trappedClassic.lay @@ -0,0 +1,5 @@ +%%%%%%%% +% P G% +%G%%%%%% +%.... % +%%%%%%%% diff --git a/reinforcement/layouts/trickyClassic.lay b/reinforcement/layouts/trickyClassic.lay new file mode 100644 index 0000000..ffa156c --- /dev/null +++ b/reinforcement/layouts/trickyClassic.lay @@ -0,0 +1,13 @@ +%%%%%%%%%%%%%%%%%%%% +%o...%........%...o% +%.%%.%.%%..%%.%.%%.% +%.%.....%..%.....%.% +%.%.%%.%% %%.%%.%.% +%...... GGGG%.%....% +%.%....%%%%%%.%..%.% +%.%....% oo%.%..%.% +%.%....% %%%%.%..%.% +%.%...........%..%.% +%.%%.%.%%%%%%.%.%%.% +%o...%...P....%...o% +%%%%%%%%%%%%%%%%%%%% diff --git a/reinforcement/learningAgents.py b/reinforcement/learningAgents.py new file mode 100644 index 0000000..ef6c51c --- /dev/null +++ b/reinforcement/learningAgents.py @@ -0,0 +1,258 @@ +# learningAgents.py +# ----------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from game import Directions, Agent, Actions + +import random,util,time + +class ValueEstimationAgent(Agent): + """ + Abstract agent which assigns values to (state,action) + Q-Values for an environment. As well as a value to a + state and a policy given respectively by, + + V(s) = max_{a in actions} Q(s,a) + policy(s) = arg_max_{a in actions} Q(s,a) + + Both ValueIterationAgent and QLearningAgent inherit + from this agent. While a ValueIterationAgent has + a model of the environment via a MarkovDecisionProcess + (see mdp.py) that is used to estimate Q-Values before + ever actually acting, the QLearningAgent estimates + Q-Values while acting in the environment. + """ + + def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10): + """ + Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,... + alpha - learning rate + epsilon - exploration rate + gamma - discount factor + numTraining - number of training episodes, i.e. no learning after these many episodes + """ + self.alpha = float(alpha) + self.epsilon = float(epsilon) + self.discount = float(gamma) + self.numTraining = int(numTraining) + + #################################### + # Override These Functions # + #################################### + def getQValue(self, state, action): + """ + Should return Q(state,action) + """ + util.raiseNotDefined() + + def getValue(self, state): + """ + What is the value of this state under the best action? + Concretely, this is given by + + V(s) = max_{a in actions} Q(s,a) + """ + util.raiseNotDefined() + + def getPolicy(self, state): + """ + What is the best action to take in the state. Note that because + we might want to explore, this might not coincide with getAction + Concretely, this is given by + + policy(s) = arg_max_{a in actions} Q(s,a) + + If many actions achieve the maximal Q-value, + it doesn't matter which is selected. + """ + util.raiseNotDefined() + + def getAction(self, state): + """ + state: can call state.getLegalActions() + Choose an action and return it. + """ + util.raiseNotDefined() + +class ReinforcementAgent(ValueEstimationAgent): + """ + Abstract Reinforcemnt Agent: A ValueEstimationAgent + which estimates Q-Values (as well as policies) from experience + rather than a model + + What you need to know: + - The environment will call + observeTransition(state,action,nextState,deltaReward), + which will call update(state, action, nextState, deltaReward) + which you should override. + - Use self.getLegalActions(state) to know which actions + are available in a state + """ + #################################### + # Override These Functions # + #################################### + + def update(self, state, action, nextState, reward): + """ + This class will call this function, which you write, after + observing a transition and reward + """ + util.raiseNotDefined() + + #################################### + # Read These Functions # + #################################### + + def getLegalActions(self,state): + """ + Get the actions available for a given + state. This is what you should use to + obtain legal actions for a state + """ + return self.actionFn(state) + + def observeTransition(self, state,action,nextState,deltaReward): + """ + Called by environment to inform agent that a transition has + been observed. This will result in a call to self.update + on the same arguments + + NOTE: Do *not* override or call this function + """ + self.episodeRewards += deltaReward + self.update(state,action,nextState,deltaReward) + + def startEpisode(self): + """ + Called by environment when new episode is starting + """ + self.lastState = None + self.lastAction = None + self.episodeRewards = 0.0 + + def stopEpisode(self): + """ + Called by environment when episode is done + """ + if self.episodesSoFar < self.numTraining: + self.accumTrainRewards += self.episodeRewards + else: + self.accumTestRewards += self.episodeRewards + self.episodesSoFar += 1 + if self.episodesSoFar >= self.numTraining: + # Take off the training wheels + self.epsilon = 0.0 # no exploration + self.alpha = 0.0 # no learning + + def isInTraining(self): + return self.episodesSoFar < self.numTraining + + def isInTesting(self): + return not self.isInTraining() + + def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1): + """ + actionFn: Function which takes a state and returns the list of legal actions + + alpha - learning rate + epsilon - exploration rate + gamma - discount factor + numTraining - number of training episodes, i.e. no learning after these many episodes + """ + if actionFn == None: + actionFn = lambda state: state.getLegalActions() + self.actionFn = actionFn + self.episodesSoFar = 0 + self.accumTrainRewards = 0.0 + self.accumTestRewards = 0.0 + self.numTraining = int(numTraining) + self.epsilon = float(epsilon) + self.alpha = float(alpha) + self.discount = float(gamma) + + ################################ + # Controls needed for Crawler # + ################################ + def setEpsilon(self, epsilon): + self.epsilon = epsilon + + def setLearningRate(self, alpha): + self.alpha = alpha + + def setDiscount(self, discount): + self.discount = discount + + def doAction(self,state,action): + """ + Called by inherited class when + an action is taken in a state + """ + self.lastState = state + self.lastAction = action + + ################### + # Pacman Specific # + ################### + def observationFunction(self, state): + """ + This is where we ended up after our last action. + The simulation should somehow ensure this is called + """ + if not self.lastState is None: + reward = state.getScore() - self.lastState.getScore() + self.observeTransition(self.lastState, self.lastAction, state, reward) + return state + + def registerInitialState(self, state): + self.startEpisode() + if self.episodesSoFar == 0: + print 'Beginning %d episodes of Training' % (self.numTraining) + + def final(self, state): + """ + Called by Pacman game at the terminal state + """ + deltaReward = state.getScore() - self.lastState.getScore() + self.observeTransition(self.lastState, self.lastAction, state, deltaReward) + self.stopEpisode() + + # Make sure we have this var + if not 'episodeStartTime' in self.__dict__: + self.episodeStartTime = time.time() + if not 'lastWindowAccumRewards' in self.__dict__: + self.lastWindowAccumRewards = 0.0 + self.lastWindowAccumRewards += state.getScore() + + NUM_EPS_UPDATE = 100 + if self.episodesSoFar % NUM_EPS_UPDATE == 0: + print 'Reinforcement Learning Status:' + windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE) + if self.episodesSoFar <= self.numTraining: + trainAvg = self.accumTrainRewards / float(self.episodesSoFar) + print '\tCompleted %d out of %d training episodes' % ( + self.episodesSoFar,self.numTraining) + print '\tAverage Rewards over all training: %.2f' % ( + trainAvg) + else: + testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining) + print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining) + print '\tAverage Rewards over testing: %.2f' % testAvg + print '\tAverage Rewards for last %d episodes: %.2f' % ( + NUM_EPS_UPDATE,windowAvg) + print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime) + self.lastWindowAccumRewards = 0.0 + self.episodeStartTime = time.time() + + if self.episodesSoFar == self.numTraining: + msg = 'Training Done (turning off epsilon and alpha)' + print '%s\n%s' % (msg,'-' * len(msg)) diff --git a/reinforcement/mdp.py b/reinforcement/mdp.py new file mode 100644 index 0000000..0ce0c28 --- /dev/null +++ b/reinforcement/mdp.py @@ -0,0 +1,67 @@ +# mdp.py +# ------ +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import random + +class MarkovDecisionProcess: + + def getStates(self): + """ + Return a list of all states in the MDP. + Not generally possible for large MDPs. + """ + abstract + + def getStartState(self): + """ + Return the start state of the MDP. + """ + abstract + + def getPossibleActions(self, state): + """ + Return list of possible actions from 'state'. + """ + abstract + + def getTransitionStatesAndProbs(self, state, action): + """ + Returns list of (nextState, prob) pairs + representing the states reachable + from 'state' by taking 'action' along + with their transition probabilities. + + Note that in Q-Learning and reinforcment + learning in general, we do not know these + probabilities nor do we directly model them. + """ + abstract + + def getReward(self, state, action, nextState): + """ + Get the reward for the state, action, nextState transition. + + Not available in reinforcement learning. + """ + abstract + + def isTerminal(self, state): + """ + Returns true if the current state is a terminal state. By convention, + a terminal state has zero future rewards. Sometimes the terminal state(s) + may have no possible actions. It is also common to think of the terminal + state as having a self-loop action 'pass' with zero reward; the formulations + are equivalent. + """ + abstract diff --git a/reinforcement/pacman.py b/reinforcement/pacman.py new file mode 100644 index 0000000..740451d --- /dev/null +++ b/reinforcement/pacman.py @@ -0,0 +1,684 @@ +# pacman.py +# --------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +""" +Pacman.py holds the logic for the classic pacman game along with the main +code to run a game. This file is divided into three sections: + + (i) Your interface to the pacman world: + Pacman is a complex environment. You probably don't want to + read through all of the code we wrote to make the game runs + correctly. This section contains the parts of the code + that you will need to understand in order to complete the + project. There is also some code in game.py that you should + understand. + + (ii) The hidden secrets of pacman: + This section contains all of the logic code that the pacman + environment uses to decide who can move where, who dies when + things collide, etc. You shouldn't need to read this section + of code, but you can if you want. + + (iii) Framework to start a game: + The final section contains the code for reading the command + you use to set up the game, then starting up a new game, along with + linking in all the external parts (agent functions, graphics). + Check this section out to see all the options available to you. + +To play your first game, type 'python pacman.py' from the command line. +The keys are 'a', 's', 'd', and 'w' to move (or arrow keys). Have fun! +""" +from game import GameStateData +from game import Game +from game import Directions +from game import Actions +from util import nearestPoint +from util import manhattanDistance +import util, layout +import sys, types, time, random, os + +################################################### +# YOUR INTERFACE TO THE PACMAN WORLD: A GameState # +################################################### + +class GameState: + """ + A GameState specifies the full game state, including the food, capsules, + agent configurations and score changes. + + GameStates are used by the Game object to capture the actual state of the game and + can be used by agents to reason about the game. + + Much of the information in a GameState is stored in a GameStateData object. We + strongly suggest that you access that data via the accessor methods below rather + than referring to the GameStateData object directly. + + Note that in classic Pacman, Pacman is always agent 0. + """ + + #################################################### + # Accessor methods: use these to access state data # + #################################################### + + # static variable keeps track of which states have had getLegalActions called + explored = set() + def getAndResetExplored(): + tmp = GameState.explored.copy() + GameState.explored = set() + return tmp + getAndResetExplored = staticmethod(getAndResetExplored) + + def getLegalActions( self, agentIndex=0 ): + """ + Returns the legal actions for the agent specified. + """ +# GameState.explored.add(self) + if self.isWin() or self.isLose(): return [] + + if agentIndex == 0: # Pacman is moving + return PacmanRules.getLegalActions( self ) + else: + return GhostRules.getLegalActions( self, agentIndex ) + + def generateSuccessor( self, agentIndex, action): + """ + Returns the successor state after the specified agent takes the action. + """ + # Check that successors exist + if self.isWin() or self.isLose(): raise Exception('Can\'t generate a successor of a terminal state.') + + # Copy current state + state = GameState(self) + + # Let agent's logic deal with its action's effects on the board + if agentIndex == 0: # Pacman is moving + state.data._eaten = [False for i in range(state.getNumAgents())] + PacmanRules.applyAction( state, action ) + else: # A ghost is moving + GhostRules.applyAction( state, action, agentIndex ) + + # Time passes + if agentIndex == 0: + state.data.scoreChange += -TIME_PENALTY # Penalty for waiting around + else: + GhostRules.decrementTimer( state.data.agentStates[agentIndex] ) + + # Resolve multi-agent effects + GhostRules.checkDeath( state, agentIndex ) + + # Book keeping + state.data._agentMoved = agentIndex + state.data.score += state.data.scoreChange + GameState.explored.add(self) + GameState.explored.add(state) + return state + + def getLegalPacmanActions( self ): + return self.getLegalActions( 0 ) + + def generatePacmanSuccessor( self, action ): + """ + Generates the successor state after the specified pacman move + """ + return self.generateSuccessor( 0, action ) + + def getPacmanState( self ): + """ + Returns an AgentState object for pacman (in game.py) + + state.pos gives the current position + state.direction gives the travel vector + """ + return self.data.agentStates[0].copy() + + def getPacmanPosition( self ): + return self.data.agentStates[0].getPosition() + + def getGhostStates( self ): + return self.data.agentStates[1:] + + def getGhostState( self, agentIndex ): + if agentIndex == 0 or agentIndex >= self.getNumAgents(): + raise Exception("Invalid index passed to getGhostState") + return self.data.agentStates[agentIndex] + + def getGhostPosition( self, agentIndex ): + if agentIndex == 0: + raise Exception("Pacman's index passed to getGhostPosition") + return self.data.agentStates[agentIndex].getPosition() + + def getGhostPositions(self): + return [s.getPosition() for s in self.getGhostStates()] + + def getNumAgents( self ): + return len( self.data.agentStates ) + + def getScore( self ): + return float(self.data.score) + + def getCapsules(self): + """ + Returns a list of positions (x,y) of the remaining capsules. + """ + return self.data.capsules + + def getNumFood( self ): + return self.data.food.count() + + def getFood(self): + """ + Returns a Grid of boolean food indicator variables. + + Grids can be accessed via list notation, so to check + if there is food at (x,y), just call + + currentFood = state.getFood() + if currentFood[x][y] == True: ... + """ + return self.data.food + + def getWalls(self): + """ + Returns a Grid of boolean wall indicator variables. + + Grids can be accessed via list notation, so to check + if there is a wall at (x,y), just call + + walls = state.getWalls() + if walls[x][y] == True: ... + """ + return self.data.layout.walls + + def hasFood(self, x, y): + return self.data.food[x][y] + + def hasWall(self, x, y): + return self.data.layout.walls[x][y] + + def isLose( self ): + return self.data._lose + + def isWin( self ): + return self.data._win + + ############################################# + # Helper methods: # + # You shouldn't need to call these directly # + ############################################# + + def __init__( self, prevState = None ): + """ + Generates a new state by copying information from its predecessor. + """ + if prevState != None: # Initial state + self.data = GameStateData(prevState.data) + else: + self.data = GameStateData() + + def deepCopy( self ): + state = GameState( self ) + state.data = self.data.deepCopy() + return state + + def __eq__( self, other ): + """ + Allows two states to be compared. + """ + return hasattr(other, 'data') and self.data == other.data + + def __hash__( self ): + """ + Allows states to be keys of dictionaries. + """ + return hash( self.data ) + + def __str__( self ): + + return str(self.data) + + def initialize( self, layout, numGhostAgents=1000 ): + """ + Creates an initial game state from a layout array (see layout.py). + """ + self.data.initialize(layout, numGhostAgents) + +############################################################################ +# THE HIDDEN SECRETS OF PACMAN # +# # +# You shouldn't need to look through the code in this section of the file. # +############################################################################ + +SCARED_TIME = 40 # Moves ghosts are scared +COLLISION_TOLERANCE = 0.7 # How close ghosts must be to Pacman to kill +TIME_PENALTY = 1 # Number of points lost each round + +class ClassicGameRules: + """ + These game rules manage the control flow of a game, deciding when + and how the game starts and ends. + """ + def __init__(self, timeout=30): + self.timeout = timeout + + def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False): + agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()] + initState = GameState() + initState.initialize( layout, len(ghostAgents) ) + game = Game(agents, display, self, catchExceptions=catchExceptions) + game.state = initState + self.initialState = initState.deepCopy() + self.quiet = quiet + return game + + def process(self, state, game): + """ + Checks to see whether it is time to end the game. + """ + if state.isWin(): self.win(state, game) + if state.isLose(): self.lose(state, game) + + def win( self, state, game ): + if not self.quiet: print "Pacman emerges victorious! Score: %d" % state.data.score + game.gameOver = True + + def lose( self, state, game ): + if not self.quiet: print "Pacman died! Score: %d" % state.data.score + game.gameOver = True + + def getProgress(self, game): + return float(game.state.getNumFood()) / self.initialState.getNumFood() + + def agentCrash(self, game, agentIndex): + if agentIndex == 0: + print "Pacman crashed" + else: + print "A ghost crashed" + + def getMaxTotalTime(self, agentIndex): + return self.timeout + + def getMaxStartupTime(self, agentIndex): + return self.timeout + + def getMoveWarningTime(self, agentIndex): + return self.timeout + + def getMoveTimeout(self, agentIndex): + return self.timeout + + def getMaxTimeWarnings(self, agentIndex): + return 0 + +class PacmanRules: + """ + These functions govern how pacman interacts with his environment under + the classic game rules. + """ + PACMAN_SPEED=1 + + def getLegalActions( state ): + """ + Returns a list of possible actions. + """ + return Actions.getPossibleActions( state.getPacmanState().configuration, state.data.layout.walls ) + getLegalActions = staticmethod( getLegalActions ) + + def applyAction( state, action ): + """ + Edits the state to reflect the results of the action. + """ + legal = PacmanRules.getLegalActions( state ) + if action not in legal: + raise Exception("Illegal action " + str(action)) + + pacmanState = state.data.agentStates[0] + + # Update Configuration + vector = Actions.directionToVector( action, PacmanRules.PACMAN_SPEED ) + pacmanState.configuration = pacmanState.configuration.generateSuccessor( vector ) + + # Eat + next = pacmanState.configuration.getPosition() + nearest = nearestPoint( next ) + if manhattanDistance( nearest, next ) <= 0.5 : + # Remove food + PacmanRules.consume( nearest, state ) + applyAction = staticmethod( applyAction ) + + def consume( position, state ): + x,y = position + # Eat food + if state.data.food[x][y]: + state.data.scoreChange += 10 + state.data.food = state.data.food.copy() + state.data.food[x][y] = False + state.data._foodEaten = position + # TODO: cache numFood? + numFood = state.getNumFood() + if numFood == 0 and not state.data._lose: + state.data.scoreChange += 500 + state.data._win = True + # Eat capsule + if( position in state.getCapsules() ): + state.data.capsules.remove( position ) + state.data._capsuleEaten = position + # Reset all ghosts' scared timers + for index in range( 1, len( state.data.agentStates ) ): + state.data.agentStates[index].scaredTimer = SCARED_TIME + consume = staticmethod( consume ) + +class GhostRules: + """ + These functions dictate how ghosts interact with their environment. + """ + GHOST_SPEED=1.0 + def getLegalActions( state, ghostIndex ): + """ + Ghosts cannot stop, and cannot turn around unless they + reach a dead end, but can turn 90 degrees at intersections. + """ + conf = state.getGhostState( ghostIndex ).configuration + possibleActions = Actions.getPossibleActions( conf, state.data.layout.walls ) + reverse = Actions.reverseDirection( conf.direction ) + if Directions.STOP in possibleActions: + possibleActions.remove( Directions.STOP ) + if reverse in possibleActions and len( possibleActions ) > 1: + possibleActions.remove( reverse ) + return possibleActions + getLegalActions = staticmethod( getLegalActions ) + + def applyAction( state, action, ghostIndex): + + legal = GhostRules.getLegalActions( state, ghostIndex ) + if action not in legal: + raise Exception("Illegal ghost action " + str(action)) + + ghostState = state.data.agentStates[ghostIndex] + speed = GhostRules.GHOST_SPEED + if ghostState.scaredTimer > 0: speed /= 2.0 + vector = Actions.directionToVector( action, speed ) + ghostState.configuration = ghostState.configuration.generateSuccessor( vector ) + applyAction = staticmethod( applyAction ) + + def decrementTimer( ghostState): + timer = ghostState.scaredTimer + if timer == 1: + ghostState.configuration.pos = nearestPoint( ghostState.configuration.pos ) + ghostState.scaredTimer = max( 0, timer - 1 ) + decrementTimer = staticmethod( decrementTimer ) + + def checkDeath( state, agentIndex): + pacmanPosition = state.getPacmanPosition() + if agentIndex == 0: # Pacman just moved; Anyone can kill him + for index in range( 1, len( state.data.agentStates ) ): + ghostState = state.data.agentStates[index] + ghostPosition = ghostState.configuration.getPosition() + if GhostRules.canKill( pacmanPosition, ghostPosition ): + GhostRules.collide( state, ghostState, index ) + else: + ghostState = state.data.agentStates[agentIndex] + ghostPosition = ghostState.configuration.getPosition() + if GhostRules.canKill( pacmanPosition, ghostPosition ): + GhostRules.collide( state, ghostState, agentIndex ) + checkDeath = staticmethod( checkDeath ) + + def collide( state, ghostState, agentIndex): + if ghostState.scaredTimer > 0: + state.data.scoreChange += 200 + GhostRules.placeGhost(state, ghostState) + ghostState.scaredTimer = 0 + # Added for first-person + state.data._eaten[agentIndex] = True + else: + if not state.data._win: + state.data.scoreChange -= 500 + state.data._lose = True + collide = staticmethod( collide ) + + def canKill( pacmanPosition, ghostPosition ): + return manhattanDistance( ghostPosition, pacmanPosition ) <= COLLISION_TOLERANCE + canKill = staticmethod( canKill ) + + def placeGhost(state, ghostState): + ghostState.configuration = ghostState.start + placeGhost = staticmethod( placeGhost ) + +############################# +# FRAMEWORK TO START A GAME # +############################# + +def default(str): + return str + ' [Default: %default]' + +def parseAgentArgs(str): + if str == None: return {} + pieces = str.split(',') + opts = {} + for p in pieces: + if '=' in p: + key, val = p.split('=') + else: + key,val = p, 1 + opts[key] = val + return opts + +def readCommand( argv ): + """ + Processes the command used to run pacman from the command line. + """ + from optparse import OptionParser + usageStr = """ + USAGE: python pacman.py + EXAMPLES: (1) python pacman.py + - starts an interactive game + (2) python pacman.py --layout smallClassic --zoom 2 + OR python pacman.py -l smallClassic -z 2 + - starts an interactive game on a smaller board, zoomed in + """ + parser = OptionParser(usageStr) + + parser.add_option('-n', '--numGames', dest='numGames', type='int', + help=default('the number of GAMES to play'), metavar='GAMES', default=1) + parser.add_option('-l', '--layout', dest='layout', + help=default('the LAYOUT_FILE from which to load the map layout'), + metavar='LAYOUT_FILE', default='mediumClassic') + parser.add_option('-p', '--pacman', dest='pacman', + help=default('the agent TYPE in the pacmanAgents module to use'), + metavar='TYPE', default='KeyboardAgent') + parser.add_option('-t', '--textGraphics', action='store_true', dest='textGraphics', + help='Display output as text only', default=False) + parser.add_option('-q', '--quietTextGraphics', action='store_true', dest='quietGraphics', + help='Generate minimal output and no graphics', default=False) + parser.add_option('-g', '--ghosts', dest='ghost', + help=default('the ghost agent TYPE in the ghostAgents module to use'), + metavar = 'TYPE', default='RandomGhost') + parser.add_option('-k', '--numghosts', type='int', dest='numGhosts', + help=default('The maximum number of ghosts to use'), default=4) + parser.add_option('-z', '--zoom', type='float', dest='zoom', + help=default('Zoom the size of the graphics window'), default=1.0) + parser.add_option('-f', '--fixRandomSeed', action='store_true', dest='fixRandomSeed', + help='Fixes the random seed to always play the same game', default=False) + parser.add_option('-r', '--recordActions', action='store_true', dest='record', + help='Writes game histories to a file (named by the time they were played)', default=False) + parser.add_option('--replay', dest='gameToReplay', + help='A recorded game file (pickle) to replay', default=None) + parser.add_option('-a','--agentArgs',dest='agentArgs', + help='Comma separated values sent to agent. e.g. "opt1=val1,opt2,opt3=val3"') + parser.add_option('-x', '--numTraining', dest='numTraining', type='int', + help=default('How many episodes are training (suppresses output)'), default=0) + parser.add_option('--frameTime', dest='frameTime', type='float', + help=default('Time to delay between frames; <0 means keyboard'), default=0.1) + parser.add_option('-c', '--catchExceptions', action='store_true', dest='catchExceptions', + help='Turns on exception handling and timeouts during games', default=False) + parser.add_option('--timeout', dest='timeout', type='int', + help=default('Maximum length of time an agent can spend computing in a single game'), default=30) + + options, otherjunk = parser.parse_args(argv) + if len(otherjunk) != 0: + raise Exception('Command line input not understood: ' + str(otherjunk)) + args = dict() + + # Fix the random seed + if options.fixRandomSeed: random.seed('cs188') + + # Choose a layout + args['layout'] = layout.getLayout( options.layout ) + if args['layout'] == None: raise Exception("The layout " + options.layout + " cannot be found") + + # Choose a Pacman agent + noKeyboard = options.gameToReplay == None and (options.textGraphics or options.quietGraphics) + pacmanType = loadAgent(options.pacman, noKeyboard) + agentOpts = parseAgentArgs(options.agentArgs) + if options.numTraining > 0: + args['numTraining'] = options.numTraining + if 'numTraining' not in agentOpts: agentOpts['numTraining'] = options.numTraining + pacman = pacmanType(**agentOpts) # Instantiate Pacman with agentArgs + args['pacman'] = pacman + + # Don't display training games + if 'numTrain' in agentOpts: + options.numQuiet = int(agentOpts['numTrain']) + options.numIgnore = int(agentOpts['numTrain']) + + # Choose a ghost agent + ghostType = loadAgent(options.ghost, noKeyboard) + args['ghosts'] = [ghostType( i+1 ) for i in range( options.numGhosts )] + + # Choose a display format + if options.quietGraphics: + import textDisplay + args['display'] = textDisplay.NullGraphics() + elif options.textGraphics: + import textDisplay + textDisplay.SLEEP_TIME = options.frameTime + args['display'] = textDisplay.PacmanGraphics() + else: + import graphicsDisplay + args['display'] = graphicsDisplay.PacmanGraphics(options.zoom, frameTime = options.frameTime) + args['numGames'] = options.numGames + args['record'] = options.record + args['catchExceptions'] = options.catchExceptions + args['timeout'] = options.timeout + + # Special case: recorded games don't use the runGames method or args structure + if options.gameToReplay != None: + print 'Replaying recorded game %s.' % options.gameToReplay + import cPickle + f = open(options.gameToReplay) + try: recorded = cPickle.load(f) + finally: f.close() + recorded['display'] = args['display'] + replayGame(**recorded) + sys.exit(0) + + return args + +def loadAgent(pacman, nographics): + # Looks through all pythonPath Directories for the right module, + pythonPathStr = os.path.expandvars("$PYTHONPATH") + if pythonPathStr.find(';') == -1: + pythonPathDirs = pythonPathStr.split(':') + else: + pythonPathDirs = pythonPathStr.split(';') + pythonPathDirs.append('.') + + for moduleDir in pythonPathDirs: + if not os.path.isdir(moduleDir): continue + moduleNames = [f for f in os.listdir(moduleDir) if f.endswith('gents.py')] + for modulename in moduleNames: + try: + module = __import__(modulename[:-3]) + except ImportError: + continue + if pacman in dir(module): + if nographics and modulename == 'keyboardAgents.py': + raise Exception('Using the keyboard requires graphics (not text display)') + return getattr(module, pacman) + raise Exception('The agent ' + pacman + ' is not specified in any *Agents.py.') + +def replayGame( layout, actions, display ): + import pacmanAgents, ghostAgents + rules = ClassicGameRules() + agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(layout.getNumGhosts())] + game = rules.newGame( layout, agents[0], agents[1:], display ) + state = game.state + display.initialize(state.data) + + for action in actions: + # Execute the action + state = state.generateSuccessor( *action ) + # Change the display + display.update( state.data ) + # Allow for game specific conditions (winning, losing, etc.) + rules.process(state, game) + + display.finish() + +def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30 ): + import __main__ + __main__.__dict__['_display'] = display + + rules = ClassicGameRules(timeout) + games = [] + + for i in range( numGames ): + beQuiet = i < numTraining + if beQuiet: + # Suppress output and graphics + import textDisplay + gameDisplay = textDisplay.NullGraphics() + rules.quiet = True + else: + gameDisplay = display + rules.quiet = False + game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions) + game.run() + if not beQuiet: games.append(game) + + if record: + import time, cPickle + fname = ('recorded-game-%d' % (i + 1)) + '-'.join([str(t) for t in time.localtime()[1:6]]) + f = file(fname, 'w') + components = {'layout': layout, 'actions': game.moveHistory} + cPickle.dump(components, f) + f.close() + + if (numGames-numTraining) > 0: + scores = [game.state.getScore() for game in games] + wins = [game.state.isWin() for game in games] + winRate = wins.count(True)/ float(len(wins)) + print 'Average Score:', sum(scores) / float(len(scores)) + print 'Scores: ', ', '.join([str(score) for score in scores]) + print 'Win Rate: %d/%d (%.2f)' % (wins.count(True), len(wins), winRate) + print 'Record: ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins]) + + return games + +if __name__ == '__main__': + """ + The main function called when pacman.py is run + from the command line: + + > python pacman.py + + See the usage string for more details. + + > python pacman.py --help + """ + args = readCommand( sys.argv[1:] ) # Get game components based on input + runGames( **args ) + + # import cProfile + # cProfile.run("runGames( **args )") + pass diff --git a/reinforcement/pacmanAgents.py b/reinforcement/pacmanAgents.py new file mode 100644 index 0000000..ae97634 --- /dev/null +++ b/reinforcement/pacmanAgents.py @@ -0,0 +1,52 @@ +# pacmanAgents.py +# --------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from pacman import Directions +from game import Agent +import random +import game +import util + +class LeftTurnAgent(game.Agent): + "An agent that turns left at every opportunity" + + def getAction(self, state): + legal = state.getLegalPacmanActions() + current = state.getPacmanState().configuration.direction + if current == Directions.STOP: current = Directions.NORTH + left = Directions.LEFT[current] + if left in legal: return left + if current in legal: return current + if Directions.RIGHT[current] in legal: return Directions.RIGHT[current] + if Directions.LEFT[left] in legal: return Directions.LEFT[left] + return Directions.STOP + +class GreedyAgent(Agent): + def __init__(self, evalFn="scoreEvaluation"): + self.evaluationFunction = util.lookup(evalFn, globals()) + assert self.evaluationFunction != None + + def getAction(self, state): + # Generate candidate actions + legal = state.getLegalPacmanActions() + if Directions.STOP in legal: legal.remove(Directions.STOP) + + successors = [(state.generateSuccessor(0, action), action) for action in legal] + scored = [(self.evaluationFunction(state), action) for state, action in successors] + bestScore = max(scored)[0] + bestActions = [pair[1] for pair in scored if pair[0] == bestScore] + return random.choice(bestActions) + +def scoreEvaluation(state): + return state.getScore() diff --git a/reinforcement/projectParams.py b/reinforcement/projectParams.py new file mode 100644 index 0000000..7d92e34 --- /dev/null +++ b/reinforcement/projectParams.py @@ -0,0 +1,18 @@ +# projectParams.py +# ---------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py' +PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py' +PROJECT_NAME = 'Project 3: Reinforcement learning' +BONUS_PIC = False diff --git a/reinforcement/qlearningAgents.py b/reinforcement/qlearningAgents.py new file mode 100644 index 0000000..a417a5f --- /dev/null +++ b/reinforcement/qlearningAgents.py @@ -0,0 +1,186 @@ +# qlearningAgents.py +# ------------------ +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +from game import * +from learningAgents import ReinforcementAgent +from featureExtractors import * + +import random,util,math + +class QLearningAgent(ReinforcementAgent): + """ + Q-Learning Agent + + Functions you should fill in: + - computeValueFromQValues + - computeActionFromQValues + - getQValue + - getAction + - update + + Instance variables you have access to + - self.epsilon (exploration prob) + - self.alpha (learning rate) + - self.discount (discount rate) + + Functions you should use + - self.getLegalActions(state) + which returns legal actions for a state + """ + def __init__(self, **args): + "You can initialize Q-values here..." + ReinforcementAgent.__init__(self, **args) + + "*** YOUR CODE HERE ***" + + def getQValue(self, state, action): + """ + Returns Q(state,action) + Should return 0.0 if we have never seen a state + or the Q node value otherwise + """ + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + + def computeValueFromQValues(self, state): + """ + Returns max_action Q(state,action) + where the max is over legal actions. Note that if + there are no legal actions, which is the case at the + terminal state, you should return a value of 0.0. + """ + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + def computeActionFromQValues(self, state): + """ + Compute the best action to take in a state. Note that if there + are no legal actions, which is the case at the terminal state, + you should return None. + """ + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + def getAction(self, state): + """ + Compute the action to take in the current state. With + probability self.epsilon, we should take a random action and + take the best policy action otherwise. Note that if there are + no legal actions, which is the case at the terminal state, you + should choose None as the action. + + HINT: You might want to use util.flipCoin(prob) + HINT: To pick randomly from a list, use random.choice(list) + """ + # Pick Action + legalActions = self.getLegalActions(state) + action = None + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + return action + + def update(self, state, action, nextState, reward): + """ + The parent class calls this to observe a + state = action => nextState and reward transition. + You should do your Q-Value update here + + NOTE: You should never call this function, + it will be called on your behalf + """ + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + def getPolicy(self, state): + return self.computeActionFromQValues(state) + + def getValue(self, state): + return self.computeValueFromQValues(state) + + +class PacmanQAgent(QLearningAgent): + "Exactly the same as QLearningAgent, but with different default parameters" + + def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args): + """ + These default parameters can be changed from the pacman.py command line. + For example, to change the exploration rate, try: + python pacman.py -p PacmanQLearningAgent -a epsilon=0.1 + + alpha - learning rate + epsilon - exploration rate + gamma - discount factor + numTraining - number of training episodes, i.e. no learning after these many episodes + """ + args['epsilon'] = epsilon + args['gamma'] = gamma + args['alpha'] = alpha + args['numTraining'] = numTraining + self.index = 0 # This is always Pacman + QLearningAgent.__init__(self, **args) + + def getAction(self, state): + """ + Simply calls the getAction method of QLearningAgent and then + informs parent of action for Pacman. Do not change or remove this + method. + """ + action = QLearningAgent.getAction(self,state) + self.doAction(state,action) + return action + + +class ApproximateQAgent(PacmanQAgent): + """ + ApproximateQLearningAgent + + You should only have to overwrite getQValue + and update. All other QLearningAgent functions + should work as is. + """ + def __init__(self, extractor='IdentityExtractor', **args): + self.featExtractor = util.lookup(extractor, globals())() + PacmanQAgent.__init__(self, **args) + self.weights = util.Counter() + + def getWeights(self): + return self.weights + + def getQValue(self, state, action): + """ + Should return Q(state,action) = w * featureVector + where * is the dotProduct operator + """ + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + def update(self, state, action, nextState, reward): + """ + Should update your weights based on transition + """ + "*** YOUR CODE HERE ***" + util.raiseNotDefined() + + def final(self, state): + "Called at the end of each game." + # call the super-class final method + PacmanQAgent.final(self, state) + + # did we finish training? + if self.episodesSoFar == self.numTraining: + # you might want to print your weights here for debugging + "*** YOUR CODE HERE ***" + pass diff --git a/reinforcement/reinforcementTestClasses.py b/reinforcement/reinforcementTestClasses.py new file mode 100644 index 0000000..58db4ac --- /dev/null +++ b/reinforcement/reinforcementTestClasses.py @@ -0,0 +1,924 @@ +# reinforcementTestClasses.py +# --------------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import testClasses +import random, math, traceback, sys, os +import layout, textDisplay, pacman, gridworld +import time +from util import Counter, TimeoutFunction, FixedRandom +from collections import defaultdict +from pprint import PrettyPrinter +from hashlib import sha1 +pp = PrettyPrinter() +VERBOSE = False + +import gridworld + +LIVINGREWARD = -0.1 +NOISE = 0.2 + +class ValueIterationTest(testClasses.TestCase): + + def __init__(self, question, testDict): + super(ValueIterationTest, self).__init__(question, testDict) + self.discount = float(testDict['discount']) + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + iterations = int(testDict['valueIterations']) + if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) + if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) + maxPreIterations = 10 + self.numsIterationsForDisplay = range(min(iterations, maxPreIterations)) + self.testOutFile = testDict['test_out_file'] + if maxPreIterations < iterations: + self.numsIterationsForDisplay.append(iterations) + + def writeFailureFile(self, string): + with open(self.testOutFile, 'w') as handle: + handle.write(string) + + def removeFailureFileIfExists(self): + if os.path.exists(self.testOutFile): + os.remove(self.testOutFile) + + def execute(self, grades, moduleDict, solutionDict): + failureOutputFileString = '' + failureOutputStdString = '' + for n in self.numsIterationsForDisplay: + checkPolicy = (n == self.numsIterationsForDisplay[-1]) + testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy) + failureOutputStdString += stdOutString + failureOutputFileString += fileOutString + if not testPass: + self.addMessage(failureOutputStdString) + self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile) + self.writeFailureFile(failureOutputFileString) + return self.testFail(grades) + self.removeFailureFileIfExists() + return self.testPass(grades) + + def executeNIterations(self, grades, moduleDict, solutionDict, n, checkPolicy): + testPass = True + valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n) + stdOutString = '' + fileOutString = '' + valuesKey = "values_k_%d" % n + if self.comparePrettyValues(valuesPretty, solutionDict[valuesKey]): + fileOutString += "Values at iteration %d are correct.\n" % n + fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty) + else: + testPass = False + outString = "Values at iteration %d are NOT correct.\n" % n + outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty) + outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, solutionDict[valuesKey]) + stdOutString += outString + fileOutString += outString + for action in actions: + qValuesKey = 'q_values_k_%d_action_%s' % (n, action) + qValues = qValuesPretty[action] + if self.comparePrettyValues(qValues, solutionDict[qValuesKey]): + fileOutString += "Q-Values at iteration %d for action %s are correct.\n" % (n, action) + fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues) + else: + testPass = False + outString = "Q-Values at iteration %d for action %s are NOT correct.\n" % (n, action) + outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues) + outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey]) + stdOutString += outString + fileOutString += outString + if checkPolicy: + if not self.comparePrettyValues(policyPretty, solutionDict['policy']): + testPass = False + outString = "Policy is NOT correct.\n" + outString += " Student solution:\n %s\n" % self.prettyValueSolutionString('policy', policyPretty) + outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString('policy', solutionDict['policy']) + stdOutString += outString + fileOutString += outString + return testPass, stdOutString, fileOutString + + def writeSolution(self, moduleDict, filePath): + with open(filePath, 'w') as handle: + policyPretty = '' + actions = [] + for n in self.numsIterationsForDisplay: + valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n) + handle.write(self.prettyValueSolutionString('values_k_%d' % n, valuesPretty)) + for action in actions: + handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action])) + handle.write(self.prettyValueSolutionString('policy', policyPretty)) + handle.write(self.prettyValueSolutionString('actions', '\n'.join(actions) + '\n')) + return True + + def runAgent(self, moduleDict, numIterations): + agent = moduleDict['valueIterationAgents'].ValueIterationAgent(self.grid, discount=self.discount, iterations=numIterations) + states = self.grid.getStates() + actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) + values = {} + qValues = {} + policy = {} + for state in states: + values[state] = agent.getValue(state) + policy[state] = agent.computeActionFromValues(state) + possibleActions = self.grid.getPossibleActions(state) + for action in actions: + if not qValues.has_key(action): + qValues[action] = {} + if action in possibleActions: + qValues[action][state] = agent.computeQValueFromValues(state, action) + else: + qValues[action][state] = None + valuesPretty = self.prettyValues(values) + policyPretty = self.prettyPolicy(policy) + qValuesPretty = {} + for action in actions: + qValuesPretty[action] = self.prettyValues(qValues[action]) + return (valuesPretty, qValuesPretty, actions, policyPretty) + + def prettyPrint(self, elements, formatString): + pretty = '' + states = self.grid.getStates() + for ybar in range(self.grid.grid.height): + y = self.grid.grid.height-1-ybar + row = [] + for x in range(self.grid.grid.width): + if (x, y) in states: + value = elements[(x, y)] + if value is None: + row.append(' illegal') + else: + row.append(formatString.format(elements[(x,y)])) + else: + row.append('_' * 10) + pretty += ' %s\n' % (" ".join(row), ) + pretty += '\n' + return pretty + + def prettyValues(self, values): + return self.prettyPrint(values, '{0:10.4f}') + + def prettyPolicy(self, policy): + return self.prettyPrint(policy, '{0:10s}') + + def prettyValueSolutionString(self, name, pretty): + return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip()) + + def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01): + aList = self.parsePrettyValues(aPretty) + bList = self.parsePrettyValues(bPretty) + if len(aList) != len(bList): + return False + for a, b in zip(aList, bList): + try: + aNum = float(a) + bNum = float(b) + # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0)) + error = abs(aNum - bNum) + if error > tolerance: + return False + except ValueError: + if a.strip() != b.strip(): + return False + return True + + def parsePrettyValues(self, pretty): + values = pretty.split() + return values + + +class ApproximateQLearningTest(testClasses.TestCase): + + def __init__(self, question, testDict): + super(ApproximateQLearningTest, self).__init__(question, testDict) + self.discount = float(testDict['discount']) + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) + if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + self.env = gridworld.GridworldEnvironment(self.grid) + self.epsilon = float(testDict['epsilon']) + self.learningRate = float(testDict['learningRate']) + self.extractor = 'IdentityExtractor' + if 'extractor' in testDict: + self.extractor = testDict['extractor'] + self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate} + numExperiences = int(testDict['numExperiences']) + maxPreExperiences = 10 + self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences)) + self.testOutFile = testDict['test_out_file'] + if maxPreExperiences < numExperiences: + self.numsExperiencesForDisplay.append(numExperiences) + + def writeFailureFile(self, string): + with open(self.testOutFile, 'w') as handle: + handle.write(string) + + def removeFailureFileIfExists(self): + if os.path.exists(self.testOutFile): + os.remove(self.testOutFile) + + def execute(self, grades, moduleDict, solutionDict): + failureOutputFileString = '' + failureOutputStdString = '' + for n in self.numsExperiencesForDisplay: + testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n) + failureOutputStdString += stdOutString + failureOutputFileString += fileOutString + if not testPass: + self.addMessage(failureOutputStdString) + self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile) + self.writeFailureFile(failureOutputFileString) + return self.testFail(grades) + self.removeFailureFileIfExists() + return self.testPass(grades) + + def executeNExperiences(self, grades, moduleDict, solutionDict, n): + testPass = True + qValuesPretty, weights, actions, lastExperience = self.runAgent(moduleDict, n) + stdOutString = '' + fileOutString = "==================== Iteration %d ====================\n" % n + if lastExperience is not None: + fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience + weightsKey = 'weights_k_%d' % n + if weights == eval(solutionDict[weightsKey]): + fileOutString += "Weights at iteration %d are correct." % n + fileOutString += " Student/correct solution:\n\n%s\n\n" % pp.pformat(weights) + for action in actions: + qValuesKey = 'q_values_k_%d_action_%s' % (n, action) + qValues = qValuesPretty[action] + if self.comparePrettyValues(qValues, solutionDict[qValuesKey]): + fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action) + fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues) + else: + testPass = False + outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action) + outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues) + outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey]) + stdOutString += outString + fileOutString += outString + return testPass, stdOutString, fileOutString + + def writeSolution(self, moduleDict, filePath): + with open(filePath, 'w') as handle: + for n in self.numsExperiencesForDisplay: + qValuesPretty, weights, actions, _ = self.runAgent(moduleDict, n) + handle.write(self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights))) + for action in actions: + handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action])) + return True + + def runAgent(self, moduleDict, numExperiences): + agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts) + states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates()) + states.sort() + randObj = FixedRandom().random + # choose a random start state and a random possible action from that state + # get the next state and reward from the transition function + lastExperience = None + for i in range(numExperiences): + startState = randObj.choice(states) + action = randObj.choice(self.grid.getPossibleActions(startState)) + (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) + lastExperience = (startState, action, endState, reward) + agent.update(*lastExperience) + actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) + qValues = {} + weights = agent.getWeights() + for state in states: + possibleActions = self.grid.getPossibleActions(state) + for action in actions: + if not qValues.has_key(action): + qValues[action] = {} + if action in possibleActions: + qValues[action][state] = agent.getQValue(state, action) + else: + qValues[action][state] = None + qValuesPretty = {} + for action in actions: + qValuesPretty[action] = self.prettyValues(qValues[action]) + return (qValuesPretty, weights, actions, lastExperience) + + def prettyPrint(self, elements, formatString): + pretty = '' + states = self.grid.getStates() + for ybar in range(self.grid.grid.height): + y = self.grid.grid.height-1-ybar + row = [] + for x in range(self.grid.grid.width): + if (x, y) in states: + value = elements[(x, y)] + if value is None: + row.append(' illegal') + else: + row.append(formatString.format(elements[(x,y)])) + else: + row.append('_' * 10) + pretty += ' %s\n' % (" ".join(row), ) + pretty += '\n' + return pretty + + def prettyValues(self, values): + return self.prettyPrint(values, '{0:10.4f}') + + def prettyPolicy(self, policy): + return self.prettyPrint(policy, '{0:10s}') + + def prettyValueSolutionString(self, name, pretty): + return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip()) + + def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01): + aList = self.parsePrettyValues(aPretty) + bList = self.parsePrettyValues(bPretty) + if len(aList) != len(bList): + return False + for a, b in zip(aList, bList): + try: + aNum = float(a) + bNum = float(b) + # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0)) + error = abs(aNum - bNum) + if error > tolerance: + return False + except ValueError: + if a.strip() != b.strip(): + return False + return True + + def parsePrettyValues(self, pretty): + values = pretty.split() + return values + + +class QLearningTest(testClasses.TestCase): + + def __init__(self, question, testDict): + super(QLearningTest, self).__init__(question, testDict) + self.discount = float(testDict['discount']) + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) + if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + self.env = gridworld.GridworldEnvironment(self.grid) + self.epsilon = float(testDict['epsilon']) + self.learningRate = float(testDict['learningRate']) + self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate} + numExperiences = int(testDict['numExperiences']) + maxPreExperiences = 10 + self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences)) + self.testOutFile = testDict['test_out_file'] + if maxPreExperiences < numExperiences: + self.numsExperiencesForDisplay.append(numExperiences) + + def writeFailureFile(self, string): + with open(self.testOutFile, 'w') as handle: + handle.write(string) + + def removeFailureFileIfExists(self): + if os.path.exists(self.testOutFile): + os.remove(self.testOutFile) + + def execute(self, grades, moduleDict, solutionDict): + failureOutputFileString = '' + failureOutputStdString = '' + for n in self.numsExperiencesForDisplay: + checkValuesAndPolicy = (n == self.numsExperiencesForDisplay[-1]) + testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n, checkValuesAndPolicy) + failureOutputStdString += stdOutString + failureOutputFileString += fileOutString + if not testPass: + self.addMessage(failureOutputStdString) + self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile) + self.writeFailureFile(failureOutputFileString) + return self.testFail(grades) + self.removeFailureFileIfExists() + return self.testPass(grades) + + def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy): + testPass = True + valuesPretty, qValuesPretty, actions, policyPretty, lastExperience = self.runAgent(moduleDict, n) + stdOutString = '' + fileOutString = "==================== Iteration %d ====================\n" % n + if lastExperience is not None: + fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience + for action in actions: + qValuesKey = 'q_values_k_%d_action_%s' % (n, action) + qValues = qValuesPretty[action] + if self.comparePrettyValues(qValues, solutionDict[qValuesKey]): + fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action) + fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues) + else: + testPass = False + outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action) + outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues) + outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey]) + stdOutString += outString + fileOutString += outString + if checkValuesAndPolicy: + if not self.comparePrettyValues(valuesPretty, solutionDict['values']): + testPass = False + outString = "Values are NOT correct." + outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('values', valuesPretty) + outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('values', solutionDict['values']) + stdOutString += outString + fileOutString += outString + if not self.comparePrettyValues(policyPretty, solutionDict['policy']): + testPass = False + outString = "Policy is NOT correct." + outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('policy', policyPretty) + outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('policy', solutionDict['policy']) + stdOutString += outString + fileOutString += outString + return testPass, stdOutString, fileOutString + + def writeSolution(self, moduleDict, filePath): + with open(filePath, 'w') as handle: + valuesPretty = '' + policyPretty = '' + for n in self.numsExperiencesForDisplay: + valuesPretty, qValuesPretty, actions, policyPretty, _ = self.runAgent(moduleDict, n) + for action in actions: + handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action])) + handle.write(self.prettyValueSolutionString('values', valuesPretty)) + handle.write(self.prettyValueSolutionString('policy', policyPretty)) + return True + + def runAgent(self, moduleDict, numExperiences): + agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts) + states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates()) + states.sort() + randObj = FixedRandom().random + # choose a random start state and a random possible action from that state + # get the next state and reward from the transition function + lastExperience = None + for i in range(numExperiences): + startState = randObj.choice(states) + action = randObj.choice(self.grid.getPossibleActions(startState)) + (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) + lastExperience = (startState, action, endState, reward) + agent.update(*lastExperience) + actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) + values = {} + qValues = {} + policy = {} + for state in states: + values[state] = agent.computeValueFromQValues(state) + policy[state] = agent.computeActionFromQValues(state) + possibleActions = self.grid.getPossibleActions(state) + for action in actions: + if not qValues.has_key(action): + qValues[action] = {} + if action in possibleActions: + qValues[action][state] = agent.getQValue(state, action) + else: + qValues[action][state] = None + valuesPretty = self.prettyValues(values) + policyPretty = self.prettyPolicy(policy) + qValuesPretty = {} + for action in actions: + qValuesPretty[action] = self.prettyValues(qValues[action]) + return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience) + + def prettyPrint(self, elements, formatString): + pretty = '' + states = self.grid.getStates() + for ybar in range(self.grid.grid.height): + y = self.grid.grid.height-1-ybar + row = [] + for x in range(self.grid.grid.width): + if (x, y) in states: + value = elements[(x, y)] + if value is None: + row.append(' illegal') + else: + row.append(formatString.format(elements[(x,y)])) + else: + row.append('_' * 10) + pretty += ' %s\n' % (" ".join(row), ) + pretty += '\n' + return pretty + + def prettyValues(self, values): + return self.prettyPrint(values, '{0:10.4f}') + + def prettyPolicy(self, policy): + return self.prettyPrint(policy, '{0:10s}') + + def prettyValueSolutionString(self, name, pretty): + return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip()) + + def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01): + aList = self.parsePrettyValues(aPretty) + bList = self.parsePrettyValues(bPretty) + if len(aList) != len(bList): + return False + for a, b in zip(aList, bList): + try: + aNum = float(a) + bNum = float(b) + # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0)) + error = abs(aNum - bNum) + if error > tolerance: + return False + except ValueError: + if a.strip() != b.strip(): + return False + return True + + def parsePrettyValues(self, pretty): + values = pretty.split() + return values + + +class EpsilonGreedyTest(testClasses.TestCase): + + def __init__(self, question, testDict): + super(EpsilonGreedyTest, self).__init__(question, testDict) + self.discount = float(testDict['discount']) + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) + if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) + + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + self.env = gridworld.GridworldEnvironment(self.grid) + self.epsilon = float(testDict['epsilon']) + self.learningRate = float(testDict['learningRate']) + self.numExperiences = int(testDict['numExperiences']) + self.numIterations = int(testDict['iterations']) + self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate} + + def execute(self, grades, moduleDict, solutionDict): + if self.testEpsilonGreedy(moduleDict): + return self.testPass(grades) + else: + return self.testFail(grades) + + def writeSolution(self, moduleDict, filePath): + with open(filePath, 'w') as handle: + handle.write('# This is the solution file for %s.\n' % self.path) + handle.write('# File intentionally blank.\n') + return True + + def runAgent(self, moduleDict): + agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts) + states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates()) + states.sort() + randObj = FixedRandom().random + # choose a random start state and a random possible action from that state + # get the next state and reward from the transition function + for i in range(self.numExperiences): + startState = randObj.choice(states) + action = randObj.choice(self.grid.getPossibleActions(startState)) + (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) + agent.update(startState, action, endState, reward) + return agent + + def testEpsilonGreedy(self, moduleDict, tolerance=0.025): + agent = self.runAgent(moduleDict) + for state in self.grid.getStates(): + numLegalActions = len(agent.getLegalActions(state)) + if numLegalActions <= 1: + continue + numGreedyChoices = 0 + optimalAction = agent.computeActionFromQValues(state) + for iteration in range(self.numIterations): + # assume that their computeActionFromQValues implementation is correct (q4 tests this) + if agent.getAction(state) == optimalAction: + numGreedyChoices += 1 + # e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions + # g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k) + empiricalEpsilonNumerator = self.numIterations - numGreedyChoices + empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(numLegalActions) + empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator + error = abs(empiricalEpsilon - self.epsilon) + if error > tolerance: + self.addMessage("Epsilon-greedy action selection is not correct.") + self.addMessage("Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance)) + return False + return True + + +### q6 +class Question6Test(testClasses.TestCase): + + def __init__(self, question, testDict): + super(Question6Test, self).__init__(question, testDict) + + def execute(self, grades, moduleDict, solutionDict): + studentSolution = moduleDict['analysis'].question6() + studentSolution = str(studentSolution).strip().lower() + hashedSolution = sha1(studentSolution).hexdigest() + if hashedSolution == '46729c96bb1e4081fdc81a8ff74b3e5db8fba415': + return self.testPass(grades) + else: + self.addMessage("Solution is not correct.") + self.addMessage(" Student solution: %s" % (studentSolution,)) + return self.testFail(grades) + + def writeSolution(self, moduleDict, filePath): + handle = open(filePath, 'w') + handle.write('# This is the solution file for %s.\n' % self.path) + handle.write('# File intentionally blank.\n') + handle.close() + return True + + +### q7/q8 +### ===== +## Average wins of a pacman agent + +class EvalAgentTest(testClasses.TestCase): + + def __init__(self, question, testDict): + super(EvalAgentTest, self).__init__(question, testDict) + self.pacmanParams = testDict['pacmanParams'] + + self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None + self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None + self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None + + self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()] + self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()] + self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()] + + self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]]) + + + def execute(self, grades, moduleDict, solutionDict): + self.addMessage('Grading agent using command: python pacman.py %s'% (self.pacmanParams,)) + + startTime = time.time() + games = pacman.runGames(** pacman.readCommand(self.pacmanParams.split(' '))) + totalTime = time.time() - startTime + numGames = len(games) + + stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True), + 'games': games, 'scores': [g.state.getScore() for g in games], + 'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)} + + averageScore = sum(stats['scores']) / float(len(stats['scores'])) + nonTimeouts = numGames - stats['timeouts'] + wins = stats['wins'] + + def gradeThreshold(value, minimum, thresholds, name): + points = 0 + passed = (minimum == None) or (value >= minimum) + if passed: + for t in thresholds: + if value >= t: + points += 1 + return (passed, points, value, minimum, thresholds, name) + + results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"), + gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"), + gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")] + + totalPoints = 0 + for passed, points, value, minimum, thresholds, name in results: + if minimum == None and len(thresholds)==0: + continue + + # print passed, points, value, minimum, thresholds, name + totalPoints += points + if not passed: + assert points == 0 + self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum)) + else: + self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds))) + + if minimum != None: + self.addMessage(" Grading scheme:") + self.addMessage(" < %s: fail" % (minimum,)) + if len(thresholds)==0 or minimum != thresholds[0]: + self.addMessage(" >= %s: 0 points" % (minimum,)) + for idx, threshold in enumerate(thresholds): + self.addMessage(" >= %s: %s points" % (threshold, idx+1)) + elif len(thresholds) > 0: + self.addMessage(" Grading scheme:") + self.addMessage(" < %s: 0 points" % (thresholds[0],)) + for idx, threshold in enumerate(thresholds): + self.addMessage(" >= %s: %s points" % (threshold, idx+1)) + + if any([not passed for passed, _, _, _, _, _ in results]): + totalPoints = 0 + + return self.testPartial(grades, totalPoints, self.maxPoints) + + def writeSolution(self, moduleDict, filePath): + with open(filePath, 'w') as handle: + handle.write('# This is the solution file for %s.\n' % self.path) + handle.write('# File intentionally blank.\n') + return True + + + + +### q2/q3 +### ===== +## For each parameter setting, compute the optimal policy, see if it satisfies some properties + +def followPath(policy, start, numSteps=100): + state = start + path = [] + for i in range(numSteps): + if state not in policy: + break + action = policy[state] + path.append("(%s,%s)" % state) + if action == 'north': nextState = state[0],state[1]+1 + if action == 'south': nextState = state[0],state[1]-1 + if action == 'east': nextState = state[0]+1,state[1] + if action == 'west': nextState = state[0]-1,state[1] + if action == 'exit' or action == None: + path.append('TERMINAL_STATE') + break + state = nextState + + return path + +def parseGrid(string): + grid = [[entry.strip() for entry in line.split()] for line in string.split('\n')] + for row in grid: + for x, col in enumerate(row): + try: + col = int(col) + except: + pass + if col == "_": + col = ' ' + row[x] = col + return gridworld.makeGrid(grid) + + +def computePolicy(moduleDict, grid, discount): + valueIterator = moduleDict['valueIterationAgents'].ValueIterationAgent(grid, discount=discount) + policy = {} + for state in grid.getStates(): + policy[state] = valueIterator.computeActionFromValues(state) + return policy + + + +class GridPolicyTest(testClasses.TestCase): + + def __init__(self, question, testDict): + super(GridPolicyTest, self).__init__(question, testDict) + + # Function in module in analysis that returns (discount, noise) + self.parameterFn = testDict['parameterFn'] + self.question2 = testDict.get('question2', 'false').lower() == 'true' + + # GridWorld specification + # _ is empty space + # numbers are terminal states with that value + # # is a wall + # S is a start state + # + self.gridText = testDict['grid'] + self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) + self.gridName = testDict['gridName'] + + # Policy specification + # _ policy choice not checked + # N, E, S, W policy action must be north, east, south, west + # + self.policy = parseGrid(testDict['policy']) + + # State the most probable path must visit + # (x,y) for a particular location; (0,0) is bottom left + # terminal for the terminal state + self.pathVisits = testDict.get('pathVisits', None) + + # State the most probable path must not visit + # (x,y) for a particular location; (0,0) is bottom left + # terminal for the terminal state + self.pathNotVisits = testDict.get('pathNotVisits', None) + + + def execute(self, grades, moduleDict, solutionDict): + if not hasattr(moduleDict['analysis'], self.parameterFn): + self.addMessage('Method not implemented: analysis.%s' % (self.parameterFn,)) + return self.testFail(grades) + + result = getattr(moduleDict['analysis'], self.parameterFn)() + + if type(result) == str and result.lower()[0:3] == "not": + self.addMessage('Actually, it is possible!') + return self.testFail(grades) + + if self.question2: + livingReward = None + try: + discount, noise = result + discount = float(discount) + noise = float(noise) + except: + self.addMessage('Did not return a (discount, noise) pair; instead analysis.%s returned: %s' % (self.parameterFn, result)) + return self.testFail(grades) + if discount != 0.9 and noise != 0.2: + self.addMessage('Must change either the discount or the noise, not both. Returned (discount, noise) = %s' % (result,)) + return self.testFail(grades) + else: + try: + discount, noise, livingReward = result + discount = float(discount) + noise = float(noise) + livingReward = float(livingReward) + except: + self.addMessage('Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %s' % (self.parameterFn, result)) + return self.testFail(grades) + + self.grid.setNoise(noise) + if livingReward != None: + self.grid.setLivingReward(livingReward) + + start = self.grid.getStartState() + policy = computePolicy(moduleDict, self.grid, discount) + + ## check policy + actionMap = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west', 'X': 'exit'} + width, height = self.policy.width, self.policy.height + policyPassed = True + for x in range(width): + for y in range(height): + if self.policy[x][y] in actionMap and policy[(x,y)] != actionMap[self.policy[x][y]]: + differPoint = (x,y) + policyPassed = False + + if not policyPassed: + self.addMessage('Policy not correct.') + self.addMessage(' Student policy at %s: %s' % (differPoint, policy[differPoint])) + self.addMessage(' Correct policy at %s: %s' % (differPoint, actionMap[self.policy[differPoint[0]][differPoint[1]]])) + self.addMessage(' Student policy:') + self.printPolicy(policy, False) + self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,") + self.addMessage(" . at states where the policy is not defined (e.g. walls)") + self.addMessage(' Correct policy specification:') + self.printPolicy(self.policy, True) + self.addMessage(" Legend: N,S,E,W for states in which the student policy must move north etc,") + self.addMessage(" _ for states where it doesn't matter what the student policy does.") + self.printGridworld() + return self.testFail(grades) + + ## check path + path = followPath(policy, self.grid.getStartState()) + + if self.pathVisits != None and self.pathVisits not in path: + self.addMessage('Policy does not visit state %s when moving without noise.' % (self.pathVisits,)) + self.addMessage(' States visited: %s' % (path,)) + self.addMessage(' Student policy:') + self.printPolicy(policy, False) + self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,") + self.addMessage(" . at states where policy not defined") + self.printGridworld() + return self.testFail(grades) + + if self.pathNotVisits != None and self.pathNotVisits in path: + self.addMessage('Policy visits state %s when moving without noise.' % (self.pathNotVisits,)) + self.addMessage(' States visited: %s' % (path,)) + self.addMessage(' Student policy:') + self.printPolicy(policy, False) + self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,") + self.addMessage(" . at states where policy not defined") + self.printGridworld() + return self.testFail(grades) + + return self.testPass(grades) + + def printGridworld(self): + self.addMessage(' Gridworld:') + for line in self.gridText.split('\n'): + self.addMessage(' ' + line) + self.addMessage(' Legend: # wall, _ empty, S start, numbers terminal states with that reward.') + + def printPolicy(self, policy, policyTypeIsGrid): + if policyTypeIsGrid: + legend = {'N': 'N', 'E': 'E', 'S': 'S', 'W': 'W', ' ': '_'} + else: + legend = {'north': 'N', 'east': 'E', 'south': 'S', 'west': 'W', 'exit': 'X', '.': '.', ' ': '_'} + + for ybar in range(self.grid.grid.height): + y = self.grid.grid.height-1-ybar + if policyTypeIsGrid: + self.addMessage(" %s" % (" ".join([legend[policy[x][y]] for x in range(self.grid.grid.width)]),)) + else: + self.addMessage(" %s" % (" ".join([legend[policy.get((x,y), '.')] for x in range(self.grid.grid.width)]),)) + # for state in sorted(self.grid.getStates()): + # if state != 'TERMINAL_STATE': + # self.addMessage(' (%s,%s) %s' % (state[0], state[1], policy[state])) + + + def writeSolution(self, moduleDict, filePath): + with open(filePath, 'w') as handle: + handle.write('# This is the solution file for %s.\n' % self.path) + handle.write('# File intentionally blank.\n') + return True + diff --git a/reinforcement/testClasses.py b/reinforcement/testClasses.py new file mode 100644 index 0000000..67b76b5 --- /dev/null +++ b/reinforcement/testClasses.py @@ -0,0 +1,189 @@ +# testClasses.py +# -------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +# import modules from python standard library +import inspect +import re +import sys + + +# Class which models a question in a project. Note that questions have a +# maximum number of points they are worth, and are composed of a series of +# test cases +class Question(object): + + def raiseNotDefined(self): + print 'Method not implemented: %s' % inspect.stack()[1][3] + sys.exit(1) + + def __init__(self, questionDict, display): + self.maxPoints = int(questionDict['max_points']) + self.testCases = [] + self.display = display + + def getDisplay(self): + return self.display + + def getMaxPoints(self): + return self.maxPoints + + # Note that 'thunk' must be a function which accepts a single argument, + # namely a 'grading' object + def addTestCase(self, testCase, thunk): + self.testCases.append((testCase, thunk)) + + def execute(self, grades): + self.raiseNotDefined() + +# Question in which all test cases must be passed in order to receive credit +class PassAllTestsQuestion(Question): + + def execute(self, grades): + # TODO: is this the right way to use grades? The autograder doesn't seem to use it. + testsFailed = False + grades.assignZeroCredit() + for _, f in self.testCases: + if not f(grades): + testsFailed = True + if testsFailed: + grades.fail("Tests failed.") + else: + grades.assignFullCredit() + + +# Question in which predict credit is given for test cases with a ``points'' property. +# All other tests are mandatory and must be passed. +class HackedPartialCreditQuestion(Question): + + def execute(self, grades): + # TODO: is this the right way to use grades? The autograder doesn't seem to use it. + grades.assignZeroCredit() + + points = 0 + passed = True + for testCase, f in self.testCases: + testResult = f(grades) + if "points" in testCase.testDict: + if testResult: points += float(testCase.testDict["points"]) + else: + passed = passed and testResult + + ## FIXME: Below terrible hack to match q3's logic + if int(points) == self.maxPoints and not passed: + grades.assignZeroCredit() + else: + grades.addPoints(int(points)) + + +class Q6PartialCreditQuestion(Question): + """Fails any test which returns False, otherwise doesn't effect the grades object. + Partial credit tests will add the required points.""" + + def execute(self, grades): + grades.assignZeroCredit() + + results = [] + for _, f in self.testCases: + results.append(f(grades)) + if False in results: + grades.assignZeroCredit() + +class PartialCreditQuestion(Question): + """Fails any test which returns False, otherwise doesn't effect the grades object. + Partial credit tests will add the required points.""" + + def execute(self, grades): + grades.assignZeroCredit() + + for _, f in self.testCases: + if not f(grades): + grades.assignZeroCredit() + grades.fail("Tests failed.") + return False + + + +class NumberPassedQuestion(Question): + """Grade is the number of test cases passed.""" + + def execute(self, grades): + grades.addPoints([f(grades) for _, f in self.testCases].count(True)) + + + + + +# Template modeling a generic test case +class TestCase(object): + + def raiseNotDefined(self): + print 'Method not implemented: %s' % inspect.stack()[1][3] + sys.exit(1) + + def getPath(self): + return self.path + + def __init__(self, question, testDict): + self.question = question + self.testDict = testDict + self.path = testDict['path'] + self.messages = [] + + def __str__(self): + self.raiseNotDefined() + + def execute(self, grades, moduleDict, solutionDict): + self.raiseNotDefined() + + def writeSolution(self, moduleDict, filePath): + self.raiseNotDefined() + return True + + # Tests should call the following messages for grading + # to ensure a uniform format for test output. + # + # TODO: this is hairy, but we need to fix grading.py's interface + # to get a nice hierarchical project - question - test structure, + # then these should be moved into Question proper. + def testPass(self, grades): + grades.addMessage('PASS: %s' % (self.path,)) + for line in self.messages: + grades.addMessage(' %s' % (line,)) + return True + + def testFail(self, grades): + grades.addMessage('FAIL: %s' % (self.path,)) + for line in self.messages: + grades.addMessage(' %s' % (line,)) + return False + + # This should really be question level? + # + def testPartial(self, grades, points, maxPoints): + grades.addPoints(points) + extraCredit = max(0, points - maxPoints) + regularCredit = points - extraCredit + + grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints)) + if extraCredit > 0: + grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,)) + + for line in self.messages: + grades.addMessage(' %s' % (line,)) + + return True + + def addMessage(self, message): + self.messages.extend(message.split('\n')) + diff --git a/reinforcement/testParser.py b/reinforcement/testParser.py new file mode 100644 index 0000000..ceedeaf --- /dev/null +++ b/reinforcement/testParser.py @@ -0,0 +1,85 @@ +# testParser.py +# ------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import re +import sys + +class TestParser(object): + + def __init__(self, path): + # save the path to the test file + self.path = path + + def removeComments(self, rawlines): + # remove any portion of a line following a '#' symbol + fixed_lines = [] + for l in rawlines: + idx = l.find('#') + if idx == -1: + fixed_lines.append(l) + else: + fixed_lines.append(l[0:idx]) + return '\n'.join(fixed_lines) + + def parse(self): + # read in the test case and remove comments + test = {} + with open(self.path) as handle: + raw_lines = handle.read().split('\n') + + test_text = self.removeComments(raw_lines) + test['__raw_lines__'] = raw_lines + test['path'] = self.path + test['__emit__'] = [] + lines = test_text.split('\n') + i = 0 + # read a property in each loop cycle + while(i < len(lines)): + # skip blank lines + if re.match('\A\s*\Z', lines[i]): + test['__emit__'].append(("raw", raw_lines[i])) + i += 1 + continue + m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i]) + if m: + test[m.group(1)] = m.group(2) + test['__emit__'].append(("oneline", m.group(1))) + i += 1 + continue + m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i]) + if m: + msg = [] + i += 1 + while(not re.match('\A\s*"""\s*\Z', lines[i])): + msg.append(raw_lines[i]) + i += 1 + test[m.group(1)] = '\n'.join(msg) + test['__emit__'].append(("multiline", m.group(1))) + i += 1 + continue + print 'error parsing test file: %s' % self.path + sys.exit(1) + return test + + +def emitTestDict(testDict, handle): + for kind, data in testDict['__emit__']: + if kind == "raw": + handle.write(data + "\n") + elif kind == "oneline": + handle.write('%s: "%s"\n' % (data, testDict[data])) + elif kind == "multiline": + handle.write('%s: """\n%s\n"""\n' % (data, testDict[data])) + else: + raise Exception("Bad __emit__") diff --git a/reinforcement/test_cases/CONFIG b/reinforcement/test_cases/CONFIG new file mode 100644 index 0000000..e69de29 diff --git a/reinforcement/test_cases/q1/1-tinygrid.solution b/reinforcement/test_cases/q1/1-tinygrid.solution new file mode 100644 index 0000000..5cbc57f --- /dev/null +++ b/reinforcement/test_cases/q1/1-tinygrid.solution @@ -0,0 +1,410 @@ +values_k_0: """ + 0.0000 + 0.0000 + 0.0000 +""" + +q_values_k_0_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_0_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_west: """ + illegal + 0.0000 + illegal +""" + +values_k_1: """ + -10.0000 + 0.0000 + 10.0000 +""" + +q_values_k_1_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_1_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_1_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_1_action_west: """ + illegal + 0.0000 + illegal +""" + +values_k_2: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_2_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_2_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_2_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_2_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_2_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_3: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_3_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_3_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_3_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_3_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_3_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_4: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_4_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_4_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_4_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_4_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_4_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_5: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_5_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_5_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_5_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_5_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_5_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_6: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_6_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_6_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_6_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_6_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_6_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_7: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_7_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_7_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_7_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_7_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_7_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_8: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_8_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_8_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_8_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_8_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_8_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_9: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_9_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_9_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_9_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_9_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_9_action_west: """ + illegal + 2.5000 + illegal +""" + +values_k_100: """ + -10.0000 + 5.0000 + 10.0000 +""" + +q_values_k_100_action_north: """ + illegal + -5.0000 + illegal +""" + +q_values_k_100_action_east: """ + illegal + 2.5000 + illegal +""" + +q_values_k_100_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_100_action_south: """ + illegal + 5.0000 + illegal +""" + +q_values_k_100_action_west: """ + illegal + 2.5000 + illegal +""" + +policy: """ + exit + south + exit +""" + +actions: """ +north +east +exit +south +west +""" + diff --git a/reinforcement/test_cases/q1/1-tinygrid.test b/reinforcement/test_cases/q1/1-tinygrid.test new file mode 100644 index 0000000..3c01b59 --- /dev/null +++ b/reinforcement/test_cases/q1/1-tinygrid.test @@ -0,0 +1,22 @@ +class: "ValueIterationTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.5" +noise: "0.0" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q1/2-tinygrid-noisy.solution b/reinforcement/test_cases/q1/2-tinygrid-noisy.solution new file mode 100644 index 0000000..7ba37ab --- /dev/null +++ b/reinforcement/test_cases/q1/2-tinygrid-noisy.solution @@ -0,0 +1,410 @@ +values_k_0: """ + 0.0000 + 0.0000 + 0.0000 +""" + +q_values_k_0_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_0_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_west: """ + illegal + 0.0000 + illegal +""" + +values_k_1: """ + -10.0000 + 0.0000 + 10.0000 +""" + +q_values_k_1_action_north: """ + illegal + -5.6250 + illegal +""" + +q_values_k_1_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_1_action_south: """ + illegal + 5.6250 + illegal +""" + +q_values_k_1_action_west: """ + illegal + 0.0000 + illegal +""" + +values_k_2: """ + -10.0000 + 5.6250 + 10.0000 +""" + +q_values_k_2_action_north: """ + illegal + -4.5703 + illegal +""" + +q_values_k_2_action_east: """ + illegal + 3.1641 + illegal +""" + +q_values_k_2_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_2_action_south: """ + illegal + 6.6797 + illegal +""" + +q_values_k_2_action_west: """ + illegal + 3.1641 + illegal +""" + +values_k_3: """ + -10.0000 + 6.6797 + 10.0000 +""" + +q_values_k_3_action_north: """ + illegal + -4.3726 + illegal +""" + +q_values_k_3_action_east: """ + illegal + 3.7573 + illegal +""" + +q_values_k_3_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_3_action_south: """ + illegal + 6.8774 + illegal +""" + +q_values_k_3_action_west: """ + illegal + 3.7573 + illegal +""" + +values_k_4: """ + -10.0000 + 6.8774 + 10.0000 +""" + +q_values_k_4_action_north: """ + illegal + -4.3355 + illegal +""" + +q_values_k_4_action_east: """ + illegal + 3.8686 + illegal +""" + +q_values_k_4_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_4_action_south: """ + illegal + 6.9145 + illegal +""" + +q_values_k_4_action_west: """ + illegal + 3.8686 + illegal +""" + +values_k_5: """ + -10.0000 + 6.9145 + 10.0000 +""" + +q_values_k_5_action_north: """ + illegal + -4.3285 + illegal +""" + +q_values_k_5_action_east: """ + illegal + 3.8894 + illegal +""" + +q_values_k_5_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_5_action_south: """ + illegal + 6.9215 + illegal +""" + +q_values_k_5_action_west: """ + illegal + 3.8894 + illegal +""" + +values_k_6: """ + -10.0000 + 6.9215 + 10.0000 +""" + +q_values_k_6_action_north: """ + illegal + -4.3272 + illegal +""" + +q_values_k_6_action_east: """ + illegal + 3.8933 + illegal +""" + +q_values_k_6_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_6_action_south: """ + illegal + 6.9228 + illegal +""" + +q_values_k_6_action_west: """ + illegal + 3.8933 + illegal +""" + +values_k_7: """ + -10.0000 + 6.9228 + 10.0000 +""" + +q_values_k_7_action_north: """ + illegal + -4.3270 + illegal +""" + +q_values_k_7_action_east: """ + illegal + 3.8941 + illegal +""" + +q_values_k_7_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_7_action_south: """ + illegal + 6.9230 + illegal +""" + +q_values_k_7_action_west: """ + illegal + 3.8941 + illegal +""" + +values_k_8: """ + -10.0000 + 6.9230 + 10.0000 +""" + +q_values_k_8_action_north: """ + illegal + -4.3269 + illegal +""" + +q_values_k_8_action_east: """ + illegal + 3.8942 + illegal +""" + +q_values_k_8_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_8_action_south: """ + illegal + 6.9231 + illegal +""" + +q_values_k_8_action_west: """ + illegal + 3.8942 + illegal +""" + +values_k_9: """ + -10.0000 + 6.9231 + 10.0000 +""" + +q_values_k_9_action_north: """ + illegal + -4.3269 + illegal +""" + +q_values_k_9_action_east: """ + illegal + 3.8942 + illegal +""" + +q_values_k_9_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_9_action_south: """ + illegal + 6.9231 + illegal +""" + +q_values_k_9_action_west: """ + illegal + 3.8942 + illegal +""" + +values_k_100: """ + -10.0000 + 6.9231 + 10.0000 +""" + +q_values_k_100_action_north: """ + illegal + -4.3269 + illegal +""" + +q_values_k_100_action_east: """ + illegal + 3.8942 + illegal +""" + +q_values_k_100_action_exit: """ + -10.0000 + illegal + 10.0000 +""" + +q_values_k_100_action_south: """ + illegal + 6.9231 + illegal +""" + +q_values_k_100_action_west: """ + illegal + 3.8942 + illegal +""" + +policy: """ + exit + south + exit +""" + +actions: """ +north +east +exit +south +west +""" + diff --git a/reinforcement/test_cases/q1/2-tinygrid-noisy.test b/reinforcement/test_cases/q1/2-tinygrid-noisy.test new file mode 100644 index 0000000..edad673 --- /dev/null +++ b/reinforcement/test_cases/q1/2-tinygrid-noisy.test @@ -0,0 +1,22 @@ +class: "ValueIterationTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.75" +noise: "0.25" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q1/3-bridge.solution b/reinforcement/test_cases/q1/3-bridge.solution new file mode 100644 index 0000000..088c016 --- /dev/null +++ b/reinforcement/test_cases/q1/3-bridge.solution @@ -0,0 +1,678 @@ +values_k_0: """ + __________ 0.0000 __________ + 0.0000 0.0000 0.0000 + 0.0000 0.0000 0.0000 + 0.0000 0.0000 0.0000 + 0.0000 0.0000 0.0000 + 0.0000 0.0000 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_0_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_0_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +values_k_1: """ + __________ 10.0000 __________ + -100.0000 0.0000 -100.0000 + -100.0000 0.0000 -100.0000 + -100.0000 0.0000 -100.0000 + -100.0000 0.0000 -100.0000 + -100.0000 0.0000 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_1_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -8.5000 illegal + illegal -8.5000 illegal + illegal -8.5000 illegal + illegal -8.5000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_east: """ + __________ illegal __________ + illegal -76.0750 illegal + illegal -76.5000 illegal + illegal -76.5000 illegal + illegal -76.5000 illegal + illegal -76.4575 illegal + __________ illegal __________ +""" + +q_values_k_1_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_1_action_south: """ + __________ illegal __________ + illegal -8.5000 illegal + illegal -8.5000 illegal + illegal -8.5000 illegal + illegal -8.5000 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_1_action_west: """ + __________ illegal __________ + illegal -76.0750 illegal + illegal -76.5000 illegal + illegal -76.5000 illegal + illegal -76.5000 illegal + illegal -76.4575 illegal + __________ illegal __________ +""" + +values_k_2: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -8.5000 -100.0000 + -100.0000 -8.5000 -100.0000 + -100.0000 -8.5000 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_2_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.0025 illegal + illegal -15.0025 illegal + illegal -15.0025 illegal + __________ illegal __________ +""" + +q_values_k_2_action_east: """ + __________ illegal __________ + illegal -76.4363 illegal + illegal -76.8974 illegal + illegal -77.2225 illegal + illegal -77.1900 illegal + illegal -76.8187 illegal + __________ illegal __________ +""" + +q_values_k_2_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_2_action_south: """ + __________ illegal __________ + illegal -15.0025 illegal + illegal -15.0025 illegal + illegal -15.0025 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_2_action_west: """ + __________ illegal __________ + illegal -76.4363 illegal + illegal -76.8974 illegal + illegal -77.2225 illegal + illegal -77.1900 illegal + illegal -76.8187 illegal + __________ illegal __________ +""" + +values_k_3: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.0025 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_3_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -19.9769 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_3_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1737 illegal + illegal -77.5016 illegal + illegal -77.4663 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_3_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_3_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -19.9769 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_3_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1737 illegal + illegal -77.5016 illegal + illegal -77.4663 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_4: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_4_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_4_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_4_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_4_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_4_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_5: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_5_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_5_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_5_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_5_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_5_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_6: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_6_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_6_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_6_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_6_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_6_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_7: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_7_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_7_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_7_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_7_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_7_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_8: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_8_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_8_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_8_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_8_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_8_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_9: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_9_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_9_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_9_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_9_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_9_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +values_k_100: """ + __________ 10.0000 __________ + -100.0000 -0.8500 -100.0000 + -100.0000 -9.1502 -100.0000 + -100.0000 -15.4999 -100.0000 + -100.0000 -14.4173 -100.0000 + -100.0000 -7.7350 -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_100_action_north: """ + __________ illegal __________ + illegal -0.8500 illegal + illegal -9.1502 illegal + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + __________ illegal __________ +""" + +q_values_k_100_action_east: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +q_values_k_100_action_exit: """ + __________ 10.0000 __________ + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + -100.0000 illegal -100.0000 + __________ 1.0000 __________ +""" + +q_values_k_100_action_south: """ + __________ illegal __________ + illegal -15.4999 illegal + illegal -20.3575 illegal + illegal -19.5292 illegal + illegal -14.4173 illegal + illegal -7.7350 illegal + __________ illegal __________ +""" + +q_values_k_100_action_west: """ + __________ illegal __________ + illegal -76.4639 illegal + illegal -77.1949 illegal + illegal -77.5016 illegal + illegal -77.4875 illegal + illegal -77.0702 illegal + __________ illegal __________ +""" + +policy: """ + __________ exit __________ + exit north exit + exit north exit + exit north exit + exit south exit + exit south exit + __________ exit __________ +""" + +actions: """ +north +east +exit +south +west +""" + diff --git a/reinforcement/test_cases/q1/3-bridge.test b/reinforcement/test_cases/q1/3-bridge.test new file mode 100644 index 0000000..0ae8fcf --- /dev/null +++ b/reinforcement/test_cases/q1/3-bridge.test @@ -0,0 +1,27 @@ +class: "ValueIterationTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + # 10 # + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 S -100 + # 1 # +""" +gridName: "bridgeGrid" +discount: "0.85" +noise: "0.1" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "500" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q1/4-discountgrid.solution b/reinforcement/test_cases/q1/4-discountgrid.solution new file mode 100644 index 0000000..a7aff8c --- /dev/null +++ b/reinforcement/test_cases/q1/4-discountgrid.solution @@ -0,0 +1,544 @@ +values_k_0: """ + 0.0000 0.0000 0.0000 0.0000 0.0000 + 0.0000 0.0000 __________ 0.0000 0.0000 + 0.0000 0.0000 0.0000 0.0000 0.0000 + 0.0000 0.0000 __________ __________ 0.0000 + 0.0000 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_0_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +values_k_1: """ + -10.0000 0.0000 10.0000 0.0000 0.0000 + -10.0000 0.0000 __________ 0.0000 0.0000 + -10.0000 0.0000 1.0000 0.0000 0.0000 + -10.0000 0.0000 __________ __________ 0.0000 + -10.0000 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_north: """ + illegal 0.0000 illegal 0.9000 0.0000 + illegal -0.9000 __________ 0.0000 0.0000 + illegal -0.8100 illegal 0.0900 0.0000 + illegal -0.9000 __________ __________ 0.0000 + illegal -0.9000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_east: """ + illegal 7.2000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.7200 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_1_action_south: """ + illegal 0.0000 illegal 0.9000 0.0000 + illegal -0.9000 __________ 0.0000 0.0000 + illegal -0.8100 illegal 0.0900 0.0000 + illegal -0.9000 __________ __________ 0.0000 + illegal -0.9000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_west: """ + illegal -7.2000 illegal 7.2000 0.0000 + illegal -7.2000 __________ 0.0000 0.0000 + illegal -7.2000 illegal 0.7200 0.0000 + illegal -7.2000 __________ __________ 0.0000 + illegal -7.2000 0.0000 0.0000 0.0000 +""" + +values_k_2: """ + -10.0000 7.2000 10.0000 7.2000 0.0000 + -10.0000 0.0000 __________ 0.0000 0.0000 + -10.0000 0.7200 1.0000 0.7200 0.0000 + -10.0000 0.0000 __________ __________ 0.0000 + -10.0000 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_north: """ + illegal 5.1840 illegal 6.0840 0.6480 + illegal 4.2840 __________ 5.1840 0.0000 + illegal -0.8100 illegal 0.0900 0.0648 + illegal -0.3816 __________ __________ 0.0000 + illegal -0.9000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_east: """ + illegal 7.8480 illegal 0.6480 0.0000 + illegal 0.7128 __________ 0.7128 0.0000 + illegal 0.7200 illegal 0.0648 0.0000 + illegal 0.0648 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_2_action_south: """ + illegal 0.0000 illegal 0.9000 0.6480 + illegal -0.3816 __________ 0.5184 0.0000 + illegal -0.8100 illegal 0.6084 0.0648 + illegal -0.9000 __________ __________ 0.0000 + illegal -0.9000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_west: """ + illegal -6.5520 illegal 7.8480 5.1840 + illegal -6.4872 __________ 0.7128 0.0000 + illegal -7.2000 illegal 0.7848 0.5184 + illegal -7.1352 __________ __________ 0.0000 + illegal -7.2000 0.0000 0.0000 0.0000 +""" + +values_k_3: """ + -10.0000 7.8480 10.0000 7.8480 5.1840 + -10.0000 4.2840 __________ 5.1840 0.0000 + -10.0000 0.7200 1.0000 0.7848 0.5184 + -10.0000 0.0648 __________ __________ 0.0000 + -10.0000 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_north: """ + illegal 5.6506 illegal 7.0171 4.9054 + illegal 5.1361 __________ 6.1171 4.1990 + illegal 2.2745 illegal 3.8691 0.1173 + illegal -0.3758 __________ __________ 0.3732 + illegal -0.8533 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_east: """ + illegal 8.2919 illegal 4.9054 4.1990 + illegal 3.8556 __________ 0.7770 0.5132 + illegal 1.1114 illegal 0.9104 0.3732 + illegal 0.1115 __________ __________ 0.0467 + illegal 0.0058 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_3_action_south: """ + illegal 3.0845 illegal 5.0990 1.1729 + illegal 0.0040 __________ 1.0316 0.8398 + illegal -0.7633 illegal 0.7017 0.1173 + illegal -0.8942 __________ __________ 0.0000 + illegal -0.9000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_west: """ + illegal -6.1081 illegal 8.3729 6.1171 + illegal -6.4289 __________ 4.5094 4.2457 + illegal -6.8086 illegal 1.2572 0.5651 + illegal -7.1352 __________ __________ 0.0467 + illegal -7.1942 0.0000 0.0000 0.0000 +""" + +values_k_4: """ + -10.0000 8.2919 10.0000 8.3729 6.1171 + -10.0000 5.1361 __________ 6.1171 4.2457 + -10.0000 2.2745 1.0000 3.8691 0.5651 + -10.0000 0.1115 __________ __________ 0.3732 + -10.0000 0.0058 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_north: """ + illegal 5.9702 illegal 7.4790 5.7084 + illegal 5.5324 __________ 6.9611 5.3370 + illegal 2.8880 illegal 4.5452 3.4560 + illegal 0.7477 __________ __________ 0.4740 + illegal -0.8198 0.0005 0.0000 0.2687 +""" + +q_values_k_4_action_east: """ + illegal 8.4085 illegal 5.7084 5.3370 + illegal 4.6490 __________ 4.1587 3.6583 + illegal 1.1923 illegal 1.3056 0.8225 + illegal 0.2855 __________ __________ 0.3196 + illegal 0.0106 0.0000 0.0000 0.0336 +""" + +q_values_k_4_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_4_action_south: """ + illegal 3.6980 illegal 5.8549 4.3610 + illegal 1.1999 __________ 3.7184 1.3395 + illegal -0.7298 illegal 2.9266 0.6678 + illegal -0.8858 __________ __________ 0.0672 + illegal -0.8958 0.0005 0.0000 0.0000 +""" + +q_values_k_4_action_west: """ + illegal -5.9915 illegal 8.5041 6.9611 + illegal -6.2490 __________ 5.5061 5.0057 + illegal -6.7277 illegal 1.6188 3.2015 + illegal -6.9948 __________ __________ 0.3196 + illegal -7.1894 0.0042 0.0000 0.0336 +""" + +values_k_5: """ + -10.0000 8.4085 10.0000 8.5041 6.9611 + -10.0000 5.5324 __________ 6.9611 5.3370 + -10.0000 2.8880 1.0000 4.5452 3.4560 + -10.0000 0.7477 __________ __________ 0.4740 + -10.0000 0.0106 0.0042 0.0000 0.2687 +""" + +q_values_k_5_action_north: """ + illegal 6.0541 illegal 7.6495 6.4039 + illegal 5.6521 __________ 7.2298 6.1188 + illegal 3.1733 illegal 5.4130 4.5627 + illegal 1.2467 __________ __________ 2.5736 + illegal -0.3613 0.0040 0.0246 0.3655 +""" + +q_values_k_5_action_east: """ + illegal 8.4547 illegal 6.4039 6.1188 + illegal 5.0000 __________ 5.0171 4.7802 + illegal 1.2852 illegal 3.5239 3.0113 + illegal 0.7992 __________ __________ 0.6765 + illegal 0.0713 0.0008 0.1935 0.2603 +""" + +q_values_k_5_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_5_action_south: """ + illegal 3.9833 illegal 6.5385 5.2345 + illegal 1.6773 __________ 4.3794 3.5951 + illegal -0.2717 illegal 3.6736 1.0614 + illegal -0.8251 __________ __________ 0.2788 + illegal -0.8920 0.0040 0.0246 0.2177 +""" + +q_values_k_5_action_west: """ + illegal -5.9453 illegal 8.5919 7.2298 + illegal -6.1833 __________ 6.1864 5.9496 + illegal -6.6348 illegal 1.7556 3.7955 + illegal -6.9391 __________ __________ 0.6765 + illegal -7.1318 0.0084 0.0030 0.0668 +""" + +values_k_6: """ + -10.0000 8.4547 10.0000 8.5919 7.2298 + -10.0000 5.6521 __________ 7.2298 6.1188 + -10.0000 3.1733 1.0000 5.4130 4.5627 + -10.0000 1.2467 __________ __________ 2.5736 + -10.0000 0.0713 0.0084 0.1935 0.3655 +""" + +q_values_k_6_action_north: """ + illegal 6.0874 illegal 7.7368 6.6294 + illegal 5.6961 __________ 7.3875 6.4068 + illegal 3.2595 illegal 5.7061 5.3034 + illegal 1.4970 __________ __________ 3.7484 + illegal -0.0017 0.0298 0.1730 1.9033 +""" + +q_values_k_6_action_east: """ + illegal 8.4696 illegal 6.6294 6.4068 + illegal 5.1160 __________ 5.6660 5.4669 + illegal 1.3409 illegal 4.4230 4.0675 + illegal 1.1896 __________ __________ 2.2966 + illegal 0.1246 0.1408 0.2980 0.5277 +""" + +q_values_k_6_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_6_action_south: """ + illegal 4.0695 illegal 6.7561 5.8295 + illegal 1.8935 __________ 5.0988 4.4865 + illegal 0.0876 illegal 4.3980 2.7508 + illegal -0.7365 __________ __________ 0.7264 + illegal -0.8479 0.0298 0.1730 0.3135 +""" + +q_values_k_6_action_west: """ + illegal -5.9304 illegal 8.6239 7.3875 + illegal -6.1535 __________ 6.4659 6.2668 + illegal -6.5791 illegal 1.8579 4.6797 + illegal -6.9080 __________ __________ 2.2966 + illegal -7.0814 0.0528 0.0408 0.4038 +""" + +values_k_7: """ + -10.0000 8.4696 10.0000 8.6239 7.3875 + -10.0000 5.6961 __________ 7.3875 6.4068 + -10.0000 3.2595 1.0000 5.7061 5.3034 + -10.0000 1.4970 __________ __________ 3.7484 + -10.0000 0.1246 0.1408 0.2980 1.9033 +""" + +q_values_k_7_action_north: """ + illegal 6.0981 illegal 7.7741 6.7600 + illegal 5.7108 __________ 7.4507 6.5605 + illegal 3.2912 illegal 5.8863 5.6038 + illegal 1.5816 __________ __________ 4.4932 + illegal 0.1905 0.1394 0.3985 2.8970 +""" + +q_values_k_7_action_east: """ + illegal 8.4749 illegal 6.7600 6.5605 + illegal 5.1568 __________ 5.9026 5.7551 + illegal 1.3674 illegal 4.9969 4.7324 + illegal 1.3824 __________ __________ 3.3475 + illegal 0.2473 0.2399 1.4240 1.8790 +""" + +q_values_k_7_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_7_action_south: """ + illegal 4.1012 illegal 6.8839 6.0539 + illegal 1.9595 __________ 5.3499 5.0599 + illegal 0.2678 illegal 4.6757 3.6897 + illegal -0.6755 __________ __________ 2.0451 + illegal -0.7976 0.1394 0.3985 1.5685 +""" + +q_values_k_7_action_west: """ + illegal -5.9251 illegal 8.6410 7.4507 + illegal -6.1444 __________ 6.6087 6.4612 + illegal -6.5526 illegal 1.8984 5.0224 + illegal -6.8954 __________ __________ 3.3475 + illegal -7.0541 0.1151 0.1550 0.7232 +""" + +values_k_8: """ + -10.0000 8.4749 10.0000 8.6410 7.4507 + -10.0000 5.7108 __________ 7.4507 6.5605 + -10.0000 3.2912 1.0000 5.8863 5.6038 + -10.0000 1.5816 __________ __________ 4.4932 + -10.0000 0.2473 0.2399 1.4240 2.8970 +""" + +q_values_k_8_action_north: """ + illegal 6.1019 illegal 7.7921 6.8128 + illegal 5.7159 __________ 7.4826 6.6255 + illegal 3.3017 illegal 5.9589 5.7577 + illegal 1.6120 __________ __________ 4.8435 + illegal 0.2603 0.3231 1.3076 3.6240 +""" + +q_values_k_8_action_east: """ + illegal 8.4767 illegal 6.8128 6.6255 + illegal 5.1707 __________ 6.0310 5.8985 + illegal 1.3763 illegal 5.2350 5.0295 + illegal 1.4572 __________ __________ 4.0001 + illegal 0.3373 1.0685 2.3421 2.7509 +""" + +q_values_k_8_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_8_action_south: """ + illegal 4.1117 illegal 6.9351 6.1718 + illegal 1.9836 __________ 5.4992 5.2957 + illegal 0.3287 illegal 4.8325 4.2692 + illegal -0.5796 __________ __________ 2.8946 + illegal -0.7003 0.3231 1.3076 2.4747 +""" + +q_values_k_8_action_west: """ + illegal -5.9233 illegal 8.6483 7.4826 + illegal -6.1411 __________ 6.6720 6.5394 + illegal -6.5437 illegal 1.9203 5.2330 + illegal -6.8815 __________ __________ 4.0001 + illegal -7.0354 0.2213 0.4290 1.6904 +""" + +values_k_9: """ + -10.0000 8.4767 10.0000 8.6483 7.4826 + -10.0000 5.7159 __________ 7.4826 6.6255 + -10.0000 3.3017 1.0000 5.9589 5.7577 + -10.0000 1.6120 __________ __________ 4.8435 + -10.0000 0.3373 1.0685 2.3421 3.6240 +""" + +q_values_k_9_action_north: """ + illegal 6.1032 illegal 7.8002 6.8392 + illegal 5.7177 __________ 7.4965 6.6572 + illegal 3.3055 illegal 5.9956 5.8249 + illegal 1.6223 __________ __________ 5.0174 + illegal 0.3568 1.0105 2.1087 4.0243 +""" + +q_values_k_9_action_east: """ + illegal 8.4773 illegal 6.8392 6.6572 + illegal 5.1755 __________ 6.0850 5.9620 + illegal 1.3795 illegal 5.3553 5.1777 + illegal 1.4881 __________ __________ 4.3316 + illegal 0.9447 1.8787 3.0308 3.3713 +""" + +q_values_k_9_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_9_action_south: """ + illegal 4.1155 illegal 6.9609 6.2222 + illegal 1.9917 __________ 5.5601 5.4153 + illegal 0.3506 illegal 4.8986 4.5418 + illegal -0.5121 __________ __________ 3.4811 + illegal -0.5610 1.0105 2.1087 3.1462 +""" + +q_values_k_9_action_west: """ + illegal -5.9227 illegal 8.6518 7.4965 + illegal -6.1399 __________ 6.7021 6.5791 + illegal -6.5405 illegal 1.9297 5.3226 + illegal -6.8725 __________ __________ 4.3316 + illegal -7.0246 0.4352 1.1909 2.4484 +""" + +values_k_100: """ + -10.0000 8.4777 10.0000 8.6547 7.5087 + -10.0000 5.7186 __________ 7.5087 6.6836 + -10.0000 3.3074 1.0000 6.0258 5.8841 + -10.0000 2.0045 __________ __________ 5.1665 + -10.0000 2.9289 3.4513 3.9306 4.4765 +""" + +q_values_k_100_action_north: """ + illegal 6.1039 illegal 7.8072 6.8610 + illegal 5.7186 __________ 7.5087 6.6836 + illegal 3.3074 illegal 6.0258 5.8841 + illegal 1.6617 __________ __________ 5.1665 + illegal 0.8539 3.1023 3.5435 4.4765 +""" + +q_values_k_100_action_east: """ + illegal 8.4777 illegal 6.8610 6.6836 + illegal 5.1780 __________ 6.1334 6.0175 + illegal 1.4151 illegal 5.4546 5.3030 + illegal 2.0045 __________ __________ 4.6523 + illegal 2.9289 3.4513 3.9306 4.0910 +""" + +q_values_k_100_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -10.0000 illegal illegal illegal illegal +""" + +q_values_k_100_action_south: """ + illegal 4.1174 illegal 6.9820 6.2669 + illegal 1.9960 __________ 5.6159 5.5138 + illegal 0.6333 illegal 4.9582 4.7918 + illegal 1.3892 __________ __________ 4.1531 + illegal 1.5194 3.1023 3.5435 3.9797 +""" + +q_values_k_100_action_west: """ + illegal -5.9223 illegal 8.6547 7.5087 + illegal -6.1393 __________ 6.7275 6.6116 + illegal -6.5049 illegal 1.9381 5.4051 + illegal -6.6387 __________ __________ 4.6523 + illegal -6.7560 2.7300 3.1924 3.6979 +""" + +policy: """ + exit east exit west west + exit north __________ north north + exit north exit north north + exit east __________ __________ north + exit east east east north +""" + +actions: """ +north +east +exit +south +west +""" + diff --git a/reinforcement/test_cases/q1/4-discountgrid.test b/reinforcement/test_cases/q1/4-discountgrid.test new file mode 100644 index 0000000..13f071b --- /dev/null +++ b/reinforcement/test_cases/q1/4-discountgrid.test @@ -0,0 +1,24 @@ +class: "ValueIterationTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 _ 10 _ _ + -10 _ # _ _ + -10 _ 1 _ _ + -10 _ # # _ + -10 S _ _ _ +""" +discount: "0.9" +noise: "0.2" +livingReward: "0.0" +epsilon: "0.2" +learningRate: "0.1" +numExperiences: "3000" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q1/CONFIG b/reinforcement/test_cases/q1/CONFIG new file mode 100644 index 0000000..b165d09 --- /dev/null +++ b/reinforcement/test_cases/q1/CONFIG @@ -0,0 +1,2 @@ +max_points: "6" +class: "PassAllTestsQuestion" \ No newline at end of file diff --git a/reinforcement/test_cases/q2/1-bridge-grid.solution b/reinforcement/test_cases/q2/1-bridge-grid.solution new file mode 100644 index 0000000..ffe6254 --- /dev/null +++ b/reinforcement/test_cases/q2/1-bridge-grid.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q2/1-bridge-grid.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q2/1-bridge-grid.test b/reinforcement/test_cases/q2/1-bridge-grid.test new file mode 100644 index 0000000..fac6812 --- /dev/null +++ b/reinforcement/test_cases/q2/1-bridge-grid.test @@ -0,0 +1,29 @@ +class: "GridPolicyTest" + +# Function in module in analysis that returns (discount, noise) +parameterFn: "question2" +question2: "true" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + # -100 -100 -100 -100 -100 # + 1 S _ _ _ _ 10 + # -100 -100 -100 -100 -100 # +""" +gridName: "bridgeGrid" + +# Policy specification +# _ policy choice not checked +# N, E, S, W policy action must be north, east, south, west +# +policy: """ + _ _ _ _ _ _ _ + _ E _ _ _ _ _ + _ _ _ _ _ _ _ +""" + diff --git a/reinforcement/test_cases/q2/CONFIG b/reinforcement/test_cases/q2/CONFIG new file mode 100644 index 0000000..279f0f0 --- /dev/null +++ b/reinforcement/test_cases/q2/CONFIG @@ -0,0 +1,2 @@ +max_points: "1" +class: "PassAllTestsQuestion" diff --git a/reinforcement/test_cases/q3/1-question-3.1.solution b/reinforcement/test_cases/q3/1-question-3.1.solution new file mode 100644 index 0000000..768ceea --- /dev/null +++ b/reinforcement/test_cases/q3/1-question-3.1.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q3/1-question-3.1.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q3/1-question-3.1.test b/reinforcement/test_cases/q3/1-question-3.1.test new file mode 100644 index 0000000..b1737a1 --- /dev/null +++ b/reinforcement/test_cases/q3/1-question-3.1.test @@ -0,0 +1,31 @@ +class: "GridPolicyTest" + +# Function in module in analysis that returns (discount, noise) +parameterFn: "question3a" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + _ _ _ _ _ + _ # _ _ _ + _ # 1 # 10 + S _ _ _ _ + -10 -10 -10 -10 -10 +""" +gridName: "discountGrid" + +# Policy specification +# _ policy choice not checked +# N, E, S, W policy action must be north, east, south, west +# +policy: """ + _ _ _ _ _ + _ _ _ _ _ + _ _ _ _ _ + E E N _ _ + _ _ _ _ _ +""" diff --git a/reinforcement/test_cases/q3/2-question-3.2.solution b/reinforcement/test_cases/q3/2-question-3.2.solution new file mode 100644 index 0000000..8098e15 --- /dev/null +++ b/reinforcement/test_cases/q3/2-question-3.2.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q3/2-question-3.2.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q3/2-question-3.2.test b/reinforcement/test_cases/q3/2-question-3.2.test new file mode 100644 index 0000000..e9a1e0f --- /dev/null +++ b/reinforcement/test_cases/q3/2-question-3.2.test @@ -0,0 +1,31 @@ +class: "GridPolicyTest" + +# Function in module in analysis that returns (discount, noise) +parameterFn: "question3b" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + _ _ _ _ _ + _ # _ _ _ + _ # 1 # 10 + S _ _ _ _ + -10 -10 -10 -10 -10 +""" +gridName: "discountGrid" + +# Policy specification +# _ policy choice not checked +# N, E, S, W policy action must be north, east, south, west +# +policy: """ + E E S _ _ + N _ S _ _ + N _ _ _ _ + N _ _ _ _ + _ _ _ _ _ +""" diff --git a/reinforcement/test_cases/q3/3-question-3.3.solution b/reinforcement/test_cases/q3/3-question-3.3.solution new file mode 100644 index 0000000..a59112e --- /dev/null +++ b/reinforcement/test_cases/q3/3-question-3.3.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q3/3-question-3.3.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q3/3-question-3.3.test b/reinforcement/test_cases/q3/3-question-3.3.test new file mode 100644 index 0000000..39a1490 --- /dev/null +++ b/reinforcement/test_cases/q3/3-question-3.3.test @@ -0,0 +1,31 @@ +class: "GridPolicyTest" + +# Function in module in analysis that returns (discount, noise) +parameterFn: "question3c" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + _ _ _ _ _ + _ # _ _ _ + _ # 1 # 10 + S _ _ _ _ + -10 -10 -10 -10 -10 +""" +gridName: "discountGrid" + +# Policy specification +# _ policy choice not checked +# N, E, S, W policy action must be north, east, south, west +# +policy: """ + _ _ _ _ _ + _ _ _ _ _ + _ _ _ _ _ + E E E E N + _ _ _ _ _ +""" diff --git a/reinforcement/test_cases/q3/4-question-3.4.solution b/reinforcement/test_cases/q3/4-question-3.4.solution new file mode 100644 index 0000000..3af10dc --- /dev/null +++ b/reinforcement/test_cases/q3/4-question-3.4.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q3/4-question-3.4.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q3/4-question-3.4.test b/reinforcement/test_cases/q3/4-question-3.4.test new file mode 100644 index 0000000..6728c59 --- /dev/null +++ b/reinforcement/test_cases/q3/4-question-3.4.test @@ -0,0 +1,36 @@ +class: "GridPolicyTest" + +# Function in module in analysis that returns (discount, noise) +parameterFn: "question3d" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + _ _ _ _ _ + _ # _ _ _ + _ # 1 # 10 + S _ _ _ _ + -10 -10 -10 -10 -10 +""" +gridName: "discountGrid" + +# Policy specification +# _ policy choice not checked +# N, E, S, W policy action must be north, east, south, west +# +policy: """ + _ _ _ _ _ + _ _ _ _ _ + _ _ _ _ _ + N _ _ _ _ + _ _ _ _ _ +""" + +# State the most probable path must visit +# (x,y) for a particular location; (0,0) is bottom left +# TERMINAL_STATE for the terminal state +pathVisits: "(4,2)" diff --git a/reinforcement/test_cases/q3/5-question-3.5.solution b/reinforcement/test_cases/q3/5-question-3.5.solution new file mode 100644 index 0000000..54984fa --- /dev/null +++ b/reinforcement/test_cases/q3/5-question-3.5.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q3/5-question-3.5.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q3/5-question-3.5.test b/reinforcement/test_cases/q3/5-question-3.5.test new file mode 100644 index 0000000..7ce8456 --- /dev/null +++ b/reinforcement/test_cases/q3/5-question-3.5.test @@ -0,0 +1,36 @@ +class: "GridPolicyTest" + +# Function in module in analysis that returns (discount, noise) +parameterFn: "question3e" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + _ _ _ _ _ + _ # _ _ _ + _ # 1 # 10 + S _ _ _ _ + -10 -10 -10 -10 -10 +""" +gridName: "discountGrid" + +# Policy specification +# _ policy choice not checked +# N, E, S, W policy action must be north, east, south, west +# +policy: """ + _ _ _ _ _ + _ _ _ _ _ + _ _ _ _ _ + _ _ _ _ _ + _ _ _ _ _ +""" + +# State the most probable path must not visit +# (x,y) for a particular location; (0,0) is bottom left +# TERMINAL_STATE for the terminal state +pathNotVisits: "TERMINAL_STATE" diff --git a/reinforcement/test_cases/q3/CONFIG b/reinforcement/test_cases/q3/CONFIG new file mode 100644 index 0000000..8a5420a --- /dev/null +++ b/reinforcement/test_cases/q3/CONFIG @@ -0,0 +1,2 @@ +max_points: "5" +class: "NumberPassedQuestion" diff --git a/reinforcement/test_cases/q4/1-tinygrid.solution b/reinforcement/test_cases/q4/1-tinygrid.solution new file mode 100644 index 0000000..ef85cea --- /dev/null +++ b/reinforcement/test_cases/q4/1-tinygrid.solution @@ -0,0 +1,342 @@ +q_values_k_0_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_exit: """ + 0.0000 + illegal + 0.0000 +""" + +q_values_k_0_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_1_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_2_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_exit: """ + 0.0000 + illegal + 1.9000 +""" + +q_values_k_3_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_exit: """ + 0.0000 + illegal + 2.7100 +""" + +q_values_k_4_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_exit: """ + -1.0000 + illegal + 2.7100 +""" + +q_values_k_5_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_6_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_7_action_south: """ + illegal + 0.1720 + illegal +""" + +q_values_k_7_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_exit: """ + -1.0000 + illegal + 4.0951 +""" + +q_values_k_8_action_south: """ + illegal + 0.1720 + illegal +""" + +q_values_k_8_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_exit: """ + -1.0000 + illegal + 4.6856 +""" + +q_values_k_9_action_south: """ + illegal + 0.1720 + illegal +""" + +q_values_k_9_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_100_action_north: """ + illegal + -0.4534 + illegal +""" + +q_values_k_100_action_east: """ + illegal + 0.4063 + illegal +""" + +q_values_k_100_action_exit: """ + -9.4767 + illegal + 9.8175 +""" + +q_values_k_100_action_south: """ + illegal + 2.1267 + illegal +""" + +q_values_k_100_action_west: """ + illegal + 0.3919 + illegal +""" + +values: """ + -9.4767 + 2.1267 + 9.8175 +""" + +policy: """ + exit + south + exit +""" + diff --git a/reinforcement/test_cases/q4/1-tinygrid.test b/reinforcement/test_cases/q4/1-tinygrid.test new file mode 100644 index 0000000..30c9254 --- /dev/null +++ b/reinforcement/test_cases/q4/1-tinygrid.test @@ -0,0 +1,22 @@ +class: "QLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.5" +noise: "0.0" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q4/2-tinygrid-noisy.solution b/reinforcement/test_cases/q4/2-tinygrid-noisy.solution new file mode 100644 index 0000000..84cb531 --- /dev/null +++ b/reinforcement/test_cases/q4/2-tinygrid-noisy.solution @@ -0,0 +1,342 @@ +q_values_k_0_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_exit: """ + 0.0000 + illegal + 0.0000 +""" + +q_values_k_0_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_1_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_2_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_exit: """ + 0.0000 + illegal + 1.9000 +""" + +q_values_k_3_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_exit: """ + 0.0000 + illegal + 2.7100 +""" + +q_values_k_4_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_exit: """ + -1.0000 + illegal + 2.7100 +""" + +q_values_k_5_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_6_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_7_action_south: """ + illegal + 0.2579 + illegal +""" + +q_values_k_7_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_exit: """ + -1.0000 + illegal + 4.0951 +""" + +q_values_k_8_action_south: """ + illegal + 0.2579 + illegal +""" + +q_values_k_8_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_exit: """ + -1.0000 + illegal + 4.6856 +""" + +q_values_k_9_action_south: """ + illegal + 0.2579 + illegal +""" + +q_values_k_9_action_west: """ + illegal + 0.0000 + illegal +""" + +q_values_k_100_action_north: """ + illegal + -0.6670 + illegal +""" + +q_values_k_100_action_east: """ + illegal + 0.9499 + illegal +""" + +q_values_k_100_action_exit: """ + -9.4767 + illegal + 9.8175 +""" + +q_values_k_100_action_south: """ + illegal + 3.2562 + illegal +""" + +q_values_k_100_action_west: """ + illegal + 0.8236 + illegal +""" + +values: """ + -9.4767 + 3.2562 + 9.8175 +""" + +policy: """ + exit + south + exit +""" + diff --git a/reinforcement/test_cases/q4/2-tinygrid-noisy.test b/reinforcement/test_cases/q4/2-tinygrid-noisy.test new file mode 100644 index 0000000..65541b5 --- /dev/null +++ b/reinforcement/test_cases/q4/2-tinygrid-noisy.test @@ -0,0 +1,22 @@ +class: "QLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.75" +noise: "0.25" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q4/3-bridge.solution b/reinforcement/test_cases/q4/3-bridge.solution new file mode 100644 index 0000000..f5e3dd0 --- /dev/null +++ b/reinforcement/test_cases/q4/3-bridge.solution @@ -0,0 +1,570 @@ +q_values_k_0_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_exit: """ + __________ 0.0000 __________ + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_0_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_1_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_2_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_3_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_4_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + 0.0000 illegal -10.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_5_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_6_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_7_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_8_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.1000 __________ +""" + +q_values_k_9_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_500_action_north: """ + __________ illegal __________ + illegal -5.8648 illegal + illegal -0.7995 illegal + illegal -0.1671 illegal + illegal -1.2642 illegal + illegal -0.5871 illegal + __________ illegal __________ +""" + +q_values_k_500_action_east: """ + __________ illegal __________ + illegal -17.0676 illegal + illegal -26.5534 illegal + illegal -3.6957 illegal + illegal -43.5952 illegal + illegal -31.6884 illegal + __________ illegal __________ +""" + +q_values_k_500_action_exit: """ + __________ 9.3539 __________ + -96.5663 illegal -96.9097 + -97.7472 illegal -94.1850 + -89.0581 illegal -96.9097 + -97.2187 illegal -87.8423 + -92.8210 illegal -97.2187 + __________ 0.9576 __________ +""" + +q_values_k_500_action_south: """ + __________ illegal __________ + illegal -6.8377 illegal + illegal -6.7277 illegal + illegal -3.4723 illegal + illegal -8.4015 illegal + illegal -5.5718 illegal + __________ illegal __________ +""" + +q_values_k_500_action_west: """ + __________ illegal __________ + illegal -27.0626 illegal + illegal -39.0610 illegal + illegal -40.5887 illegal + illegal -16.2839 illegal + illegal -20.7770 illegal + __________ illegal __________ +""" + +values: """ + __________ 9.3539 __________ + -96.5663 -5.8648 -96.9097 + -97.7472 -0.7995 -94.1850 + -89.0581 -0.1671 -96.9097 + -97.2187 -1.2642 -87.8423 + -92.8210 -0.5871 -97.2187 + __________ 0.9576 __________ +""" + +policy: """ + __________ exit __________ + exit north exit + exit north exit + exit north exit + exit north exit + exit north exit + __________ exit __________ +""" + diff --git a/reinforcement/test_cases/q4/3-bridge.test b/reinforcement/test_cases/q4/3-bridge.test new file mode 100644 index 0000000..4929b42 --- /dev/null +++ b/reinforcement/test_cases/q4/3-bridge.test @@ -0,0 +1,27 @@ +class: "QLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + # 10 # + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 S -100 + # 1 # +""" +gridName: "bridgeGrid" +discount: "0.85" +noise: "0.1" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "500" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q4/4-discountgrid.solution b/reinforcement/test_cases/q4/4-discountgrid.solution new file mode 100644 index 0000000..7ee77b8 --- /dev/null +++ b/reinforcement/test_cases/q4/4-discountgrid.solution @@ -0,0 +1,456 @@ +q_values_k_0_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_0_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_1_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_2_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_3_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_4_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_5_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_6_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.1000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_7_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.1000 illegal illegal + -1.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_8_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal -0.0900 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.1000 illegal illegal + -1.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_9_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3000_action_north: """ + illegal 4.3205 illegal 6.1517 3.8095 + illegal 4.4238 __________ 5.2284 3.5129 + illegal 1.0694 illegal 3.6867 2.0418 + illegal 0.3423 __________ __________ 1.0655 + illegal 0.0073 0.0079 0.0484 0.3768 +""" + +q_values_k_3000_action_east: """ + illegal 8.0584 illegal 3.7245 3.3947 + illegal 2.0499 __________ 3.2373 2.1742 + illegal 0.8687 illegal 1.7398 1.2671 + illegal 0.2927 __________ __________ 0.6669 + illegal 0.0239 0.0097 0.1611 0.2051 +""" + +q_values_k_3000_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -9.9999 illegal illegal illegal illegal +""" + +q_values_k_3000_action_south: """ + illegal -0.3521 illegal 3.6948 2.9139 + illegal -0.5605 __________ 2.1346 1.5674 + illegal 0.2093 illegal 1.5389 0.5521 + illegal -0.5505 __________ __________ 0.1006 + illegal -1.8501 0.0060 0.0514 0.1223 +""" + +q_values_k_3000_action_west: """ + illegal -6.2001 illegal 7.5146 4.9014 + illegal -5.4013 __________ 4.0484 3.4126 + illegal -8.0399 illegal 0.9653 1.6081 + illegal -7.4767 __________ __________ 0.3934 + illegal -6.3432 0.0179 0.0188 0.1028 +""" + +values: """ + -10.0000 8.0584 10.0000 7.5146 4.9014 + -10.0000 4.4238 __________ 5.2284 3.5129 + -10.0000 1.0694 1.0000 3.6867 2.0418 + -10.0000 0.3423 __________ __________ 1.0655 + -9.9999 0.0239 0.0179 0.1611 0.3768 +""" + +policy: """ + exit east exit west west + exit north __________ north north + exit north exit north north + exit north __________ __________ north + exit east west east north +""" + diff --git a/reinforcement/test_cases/q4/4-discountgrid.test b/reinforcement/test_cases/q4/4-discountgrid.test new file mode 100644 index 0000000..a8b7adc --- /dev/null +++ b/reinforcement/test_cases/q4/4-discountgrid.test @@ -0,0 +1,24 @@ +class: "QLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 _ 10 _ _ + -10 _ # _ _ + -10 _ 1 _ _ + -10 _ # # _ + -10 S _ _ _ +""" +discount: "0.9" +noise: "0.2" +livingReward: "0.0" +epsilon: "0.2" +learningRate: "0.1" +numExperiences: "3000" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q4/CONFIG b/reinforcement/test_cases/q4/CONFIG new file mode 100644 index 0000000..a5adc3f --- /dev/null +++ b/reinforcement/test_cases/q4/CONFIG @@ -0,0 +1,2 @@ +max_points: "5" +class: "PassAllTestsQuestion" diff --git a/reinforcement/test_cases/q5/1-tinygrid.solution b/reinforcement/test_cases/q5/1-tinygrid.solution new file mode 100644 index 0000000..bf7fddb --- /dev/null +++ b/reinforcement/test_cases/q5/1-tinygrid.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q5/1-tinygrid.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q5/1-tinygrid.test b/reinforcement/test_cases/q5/1-tinygrid.test new file mode 100644 index 0000000..627ed24 --- /dev/null +++ b/reinforcement/test_cases/q5/1-tinygrid.test @@ -0,0 +1,22 @@ +class: "EpsilonGreedyTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.5" +noise: "0.0" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q5/2-tinygrid-noisy.solution b/reinforcement/test_cases/q5/2-tinygrid-noisy.solution new file mode 100644 index 0000000..9b9463e --- /dev/null +++ b/reinforcement/test_cases/q5/2-tinygrid-noisy.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q5/2-tinygrid-noisy.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q5/2-tinygrid-noisy.test b/reinforcement/test_cases/q5/2-tinygrid-noisy.test new file mode 100644 index 0000000..18b1d72 --- /dev/null +++ b/reinforcement/test_cases/q5/2-tinygrid-noisy.test @@ -0,0 +1,22 @@ +class: "EpsilonGreedyTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.75" +noise: "0.25" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q5/3-bridge.solution b/reinforcement/test_cases/q5/3-bridge.solution new file mode 100644 index 0000000..3cad43c --- /dev/null +++ b/reinforcement/test_cases/q5/3-bridge.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q5/3-bridge.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q5/3-bridge.test b/reinforcement/test_cases/q5/3-bridge.test new file mode 100644 index 0000000..09bece4 --- /dev/null +++ b/reinforcement/test_cases/q5/3-bridge.test @@ -0,0 +1,27 @@ +class: "EpsilonGreedyTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + # 10 # + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 S -100 + # 1 # +""" +gridName: "bridgeGrid" +discount: "0.85" +noise: "0.1" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "500" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q5/4-discountgrid.solution b/reinforcement/test_cases/q5/4-discountgrid.solution new file mode 100644 index 0000000..33ed510 --- /dev/null +++ b/reinforcement/test_cases/q5/4-discountgrid.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q5/4-discountgrid.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q5/4-discountgrid.test b/reinforcement/test_cases/q5/4-discountgrid.test new file mode 100644 index 0000000..2aef636 --- /dev/null +++ b/reinforcement/test_cases/q5/4-discountgrid.test @@ -0,0 +1,24 @@ +class: "EpsilonGreedyTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 _ 10 _ _ + -10 _ # _ _ + -10 _ 1 _ _ + -10 _ # # _ + -10 S _ _ _ +""" +discount: "0.9" +noise: "0.2" +livingReward: "0.0" +epsilon: "0.2" +learningRate: "0.1" +numExperiences: "3000" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q5/CONFIG b/reinforcement/test_cases/q5/CONFIG new file mode 100644 index 0000000..ad7e38a --- /dev/null +++ b/reinforcement/test_cases/q5/CONFIG @@ -0,0 +1,2 @@ +max_points: "3" +class: "PassAllTestsQuestion" diff --git a/reinforcement/test_cases/q6/CONFIG b/reinforcement/test_cases/q6/CONFIG new file mode 100644 index 0000000..279f0f0 --- /dev/null +++ b/reinforcement/test_cases/q6/CONFIG @@ -0,0 +1,2 @@ +max_points: "1" +class: "PassAllTestsQuestion" diff --git a/reinforcement/test_cases/q6/grade-agent.solution b/reinforcement/test_cases/q6/grade-agent.solution new file mode 100644 index 0000000..1b2f7ce --- /dev/null +++ b/reinforcement/test_cases/q6/grade-agent.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q6/grade-agent.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q6/grade-agent.test b/reinforcement/test_cases/q6/grade-agent.test new file mode 100644 index 0000000..e427f4e --- /dev/null +++ b/reinforcement/test_cases/q6/grade-agent.test @@ -0,0 +1,2 @@ +class: "Question6Test" + diff --git a/reinforcement/test_cases/q7/CONFIG b/reinforcement/test_cases/q7/CONFIG new file mode 100644 index 0000000..63a627c --- /dev/null +++ b/reinforcement/test_cases/q7/CONFIG @@ -0,0 +1,2 @@ +max_points: "1" +class: "PartialCreditQuestion" diff --git a/reinforcement/test_cases/q7/grade-agent.solution b/reinforcement/test_cases/q7/grade-agent.solution new file mode 100644 index 0000000..651ed9d --- /dev/null +++ b/reinforcement/test_cases/q7/grade-agent.solution @@ -0,0 +1,2 @@ +# This is the solution file for test_cases/q7/grade-agent.test. +# File intentionally blank. diff --git a/reinforcement/test_cases/q7/grade-agent.test b/reinforcement/test_cases/q7/grade-agent.test new file mode 100644 index 0000000..f09ba4a --- /dev/null +++ b/reinforcement/test_cases/q7/grade-agent.test @@ -0,0 +1,6 @@ +class: "EvalAgentTest" + +# 100 test games after 2000 training games +pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed" + +winsThresholds: "70" diff --git a/reinforcement/test_cases/q8/1-tinygrid.solution b/reinforcement/test_cases/q8/1-tinygrid.solution new file mode 100644 index 0000000..f61a556 --- /dev/null +++ b/reinforcement/test_cases/q8/1-tinygrid.solution @@ -0,0 +1,429 @@ +weights_k_0: """ +{((0, 0), 'exit'): 0, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_0_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_exit: """ + 0.0000 + illegal + 0.0000 +""" + +q_values_k_0_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_1: """ +{((0, 0), 'exit'): 1.0, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_1_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_1_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_2: """ +{((0, 0), 'exit'): 1.0, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_2_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_2_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_3: """ +{((0, 0), 'exit'): 1.9, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_3_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_exit: """ + 0.0000 + illegal + 1.9000 +""" + +q_values_k_3_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_4: """ +{((0, 0), 'exit'): 2.71, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_4_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_exit: """ + 0.0000 + illegal + 2.7100 +""" + +q_values_k_4_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_5: """ +{((0, 0), 'exit'): 2.71, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_5_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_exit: """ + -1.0000 + illegal + 2.7100 +""" + +q_values_k_5_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_6: """ +{((0, 0), 'exit'): 3.439, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_6_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_6_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_7: """ +{((0, 0), 'exit'): 3.439, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.17195000000000002, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_7_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_7_action_south: """ + illegal + 0.1720 + illegal +""" + +q_values_k_7_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_8: """ +{((0, 0), 'exit'): 4.0951, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.17195000000000002, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_8_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_exit: """ + -1.0000 + illegal + 4.0951 +""" + +q_values_k_8_action_south: """ + illegal + 0.1720 + illegal +""" + +q_values_k_8_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_9: """ +{((0, 0), 'exit'): 4.68559, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.17195000000000002, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_9_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_exit: """ + -1.0000 + illegal + 4.6856 +""" + +q_values_k_9_action_south: """ + illegal + 0.1720 + illegal +""" + +q_values_k_9_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_100: """ +{((0, 0), 'exit'): 9.817519963685992, + ((0, 1), 'east'): 0.40629236674335106, + ((0, 1), 'north'): -0.4534185789984799, + ((0, 1), 'south'): 2.126721095524319, + ((0, 1), 'west'): 0.39193283364906867, + ((0, 2), 'exit'): -9.476652366972639} +""" + +q_values_k_100_action_north: """ + illegal + -0.4534 + illegal +""" + +q_values_k_100_action_east: """ + illegal + 0.4063 + illegal +""" + +q_values_k_100_action_exit: """ + -9.4767 + illegal + 9.8175 +""" + +q_values_k_100_action_south: """ + illegal + 2.1267 + illegal +""" + +q_values_k_100_action_west: """ + illegal + 0.3919 + illegal +""" + diff --git a/reinforcement/test_cases/q8/1-tinygrid.test b/reinforcement/test_cases/q8/1-tinygrid.test new file mode 100644 index 0000000..3cd9961 --- /dev/null +++ b/reinforcement/test_cases/q8/1-tinygrid.test @@ -0,0 +1,22 @@ +class: "ApproximateQLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.5" +noise: "0.0" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q8/2-tinygrid-noisy.solution b/reinforcement/test_cases/q8/2-tinygrid-noisy.solution new file mode 100644 index 0000000..800709b --- /dev/null +++ b/reinforcement/test_cases/q8/2-tinygrid-noisy.solution @@ -0,0 +1,429 @@ +weights_k_0: """ +{((0, 0), 'exit'): 0, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_0_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_exit: """ + 0.0000 + illegal + 0.0000 +""" + +q_values_k_0_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_0_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_1: """ +{((0, 0), 'exit'): 1.0, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_1_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_1_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_1_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_2: """ +{((0, 0), 'exit'): 1.0, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_2_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_exit: """ + 0.0000 + illegal + 1.0000 +""" + +q_values_k_2_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_2_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_3: """ +{((0, 0), 'exit'): 1.9, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_3_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_exit: """ + 0.0000 + illegal + 1.9000 +""" + +q_values_k_3_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_3_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_4: """ +{((0, 0), 'exit'): 2.71, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): 0} +""" + +q_values_k_4_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_exit: """ + 0.0000 + illegal + 2.7100 +""" + +q_values_k_4_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_4_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_5: """ +{((0, 0), 'exit'): 2.71, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_5_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_exit: """ + -1.0000 + illegal + 2.7100 +""" + +q_values_k_5_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_5_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_6: """ +{((0, 0), 'exit'): 3.439, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.0, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_6_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_6_action_south: """ + illegal + 0.0000 + illegal +""" + +q_values_k_6_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_7: """ +{((0, 0), 'exit'): 3.439, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.257925, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_7_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_7_action_exit: """ + -1.0000 + illegal + 3.4390 +""" + +q_values_k_7_action_south: """ + illegal + 0.2579 + illegal +""" + +q_values_k_7_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_8: """ +{((0, 0), 'exit'): 4.0951, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.257925, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_8_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_8_action_exit: """ + -1.0000 + illegal + 4.0951 +""" + +q_values_k_8_action_south: """ + illegal + 0.2579 + illegal +""" + +q_values_k_8_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_9: """ +{((0, 0), 'exit'): 4.68559, + ((0, 1), 'east'): 0, + ((0, 1), 'north'): 0, + ((0, 1), 'south'): 0.257925, + ((0, 1), 'west'): 0, + ((0, 2), 'exit'): -1.0} +""" + +q_values_k_9_action_north: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_east: """ + illegal + 0.0000 + illegal +""" + +q_values_k_9_action_exit: """ + -1.0000 + illegal + 4.6856 +""" + +q_values_k_9_action_south: """ + illegal + 0.2579 + illegal +""" + +q_values_k_9_action_west: """ + illegal + 0.0000 + illegal +""" + +weights_k_100: """ +{((0, 0), 'exit'): 9.817519963685992, + ((0, 1), 'east'): 0.9498968104823575, + ((0, 1), 'north'): -0.66699795412272, + ((0, 1), 'south'): 3.256207905310105, + ((0, 1), 'west'): 0.8236280735014627, + ((0, 2), 'exit'): -9.476652366972639} +""" + +q_values_k_100_action_north: """ + illegal + -0.6670 + illegal +""" + +q_values_k_100_action_east: """ + illegal + 0.9499 + illegal +""" + +q_values_k_100_action_exit: """ + -9.4767 + illegal + 9.8175 +""" + +q_values_k_100_action_south: """ + illegal + 3.2562 + illegal +""" + +q_values_k_100_action_west: """ + illegal + 0.8236 + illegal +""" + diff --git a/reinforcement/test_cases/q8/2-tinygrid-noisy.test b/reinforcement/test_cases/q8/2-tinygrid-noisy.test new file mode 100644 index 0000000..16a809c --- /dev/null +++ b/reinforcement/test_cases/q8/2-tinygrid-noisy.test @@ -0,0 +1,22 @@ +class: "ApproximateQLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 + S + 10 +""" +discount: "0.75" +noise: "0.25" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "100" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q8/3-bridge.solution b/reinforcement/test_cases/q8/3-bridge.solution new file mode 100644 index 0000000..3e12553 --- /dev/null +++ b/reinforcement/test_cases/q8/3-bridge.solution @@ -0,0 +1,935 @@ +weights_k_0: """ +{((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((0, 5), 'exit'): 0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_0_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_exit: """ + __________ 0.0000 __________ + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_0_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_0_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_1: """ +{((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_1_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_1_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_1_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_2: """ +{((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_2_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_2_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_2_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_3: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_3_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_3_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_3_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_4: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): -10.0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_4_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_4_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_4_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_5: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): -10.0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): -10.0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_5_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + 0.0000 illegal -10.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_5_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_5_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_6: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): -10.0, + ((0, 4), 'exit'): -10.0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): -10.0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_6_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_6_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_6_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_7: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): -10.0, + ((0, 4), 'exit'): -10.0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0.0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): -10.0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_7_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + 0.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_7_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_7_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_8: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): -10.0, + ((0, 3), 'exit'): -10.0, + ((0, 4), 'exit'): -10.0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0.0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): -10.0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_8_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.0000 __________ +""" + +q_values_k_8_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_8_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_9: """ +{((0, 1), 'exit'): -10.0, + ((0, 2), 'exit'): -10.0, + ((0, 3), 'exit'): -10.0, + ((0, 4), 'exit'): -10.0, + ((0, 5), 'exit'): -10.0, + ((1, 0), 'exit'): 0.1, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0.0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((1, 5), 'east'): 0, + ((1, 5), 'north'): 0, + ((1, 5), 'south'): 0.0, + ((1, 5), 'west'): 0, + ((1, 6), 'exit'): 0, + ((2, 1), 'exit'): 0, + ((2, 2), 'exit'): 0, + ((2, 3), 'exit'): -10.0, + ((2, 4), 'exit'): 0, + ((2, 5), 'exit'): 0} +""" + +q_values_k_9_action_north: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_east: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_exit: """ + __________ 0.0000 __________ + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal -10.0000 + -10.0000 illegal 0.0000 + -10.0000 illegal 0.0000 + __________ 0.1000 __________ +""" + +q_values_k_9_action_south: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +q_values_k_9_action_west: """ + __________ illegal __________ + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + illegal 0.0000 illegal + __________ illegal __________ +""" + +weights_k_500: """ +{((0, 1), 'exit'): -92.82102012308148, + ((0, 2), 'exit'): -97.21871610556306, + ((0, 3), 'exit'): -89.05810108684878, + ((0, 4), 'exit'): -97.74716004550608, + ((0, 5), 'exit'): -96.56631617970748, + ((1, 0), 'exit'): 0.9576088417247839, + ((1, 1), 'east'): -31.68839649871871, + ((1, 1), 'north'): -0.5871409700255297, + ((1, 1), 'south'): -5.571799344704395, + ((1, 1), 'west'): -20.777007017445538, + ((1, 2), 'east'): -43.595242197319, + ((1, 2), 'north'): -1.264202431807023, + ((1, 2), 'south'): -8.401530599975509, + ((1, 2), 'west'): -16.283916171605192, + ((1, 3), 'east'): -3.6956691, + ((1, 3), 'north'): -0.16712710492783758, + ((1, 3), 'south'): -3.4722840178579073, + ((1, 3), 'west'): -40.58867937480968, + ((1, 4), 'east'): -26.553386621338632, + ((1, 4), 'north'): -0.799493322153628, + ((1, 4), 'south'): -6.727671187497919, + ((1, 4), 'west'): -39.06095135014759, + ((1, 5), 'east'): -17.067638934181446, + ((1, 5), 'north'): -5.864753060887024, + ((1, 5), 'south'): -6.83769420759525, + ((1, 5), 'west'): -27.062643066307515, + ((1, 6), 'exit'): 9.353891811077332, + ((2, 1), 'exit'): -97.21871610556306, + ((2, 2), 'exit'): -87.84233454094309, + ((2, 3), 'exit'): -96.90968456173674, + ((2, 4), 'exit'): -94.185026299696, + ((2, 5), 'exit'): -96.90968456173674} +""" + +q_values_k_500_action_north: """ + __________ illegal __________ + illegal -5.8648 illegal + illegal -0.7995 illegal + illegal -0.1671 illegal + illegal -1.2642 illegal + illegal -0.5871 illegal + __________ illegal __________ +""" + +q_values_k_500_action_east: """ + __________ illegal __________ + illegal -17.0676 illegal + illegal -26.5534 illegal + illegal -3.6957 illegal + illegal -43.5952 illegal + illegal -31.6884 illegal + __________ illegal __________ +""" + +q_values_k_500_action_exit: """ + __________ 9.3539 __________ + -96.5663 illegal -96.9097 + -97.7472 illegal -94.1850 + -89.0581 illegal -96.9097 + -97.2187 illegal -87.8423 + -92.8210 illegal -97.2187 + __________ 0.9576 __________ +""" + +q_values_k_500_action_south: """ + __________ illegal __________ + illegal -6.8377 illegal + illegal -6.7277 illegal + illegal -3.4723 illegal + illegal -8.4015 illegal + illegal -5.5718 illegal + __________ illegal __________ +""" + +q_values_k_500_action_west: """ + __________ illegal __________ + illegal -27.0626 illegal + illegal -39.0610 illegal + illegal -40.5887 illegal + illegal -16.2839 illegal + illegal -20.7770 illegal + __________ illegal __________ +""" + diff --git a/reinforcement/test_cases/q8/3-bridge.test b/reinforcement/test_cases/q8/3-bridge.test new file mode 100644 index 0000000..7dca67b --- /dev/null +++ b/reinforcement/test_cases/q8/3-bridge.test @@ -0,0 +1,27 @@ +class: "ApproximateQLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + # 10 # + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 _ -100 + -100 S -100 + # 1 # +""" +gridName: "bridgeGrid" +discount: "0.85" +noise: "0.1" +livingReward: "0.0" +epsilon: "0.5" +learningRate: "0.1" +numExperiences: "500" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q8/4-discountgrid.solution b/reinforcement/test_cases/q8/4-discountgrid.solution new file mode 100644 index 0000000..ddb8bd6 --- /dev/null +++ b/reinforcement/test_cases/q8/4-discountgrid.solution @@ -0,0 +1,1210 @@ +weights_k_0: """ +{((0, 0), 'exit'): 0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_0_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_0_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_1: """ +{((0, 0), 'exit'): 0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_1_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_1_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_2: """ +{((0, 0), 'exit'): 0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_2_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_2_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_3: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_3_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_3_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_4: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0.0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_4_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_4_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_5: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): 0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0.0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0.0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_5_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_5_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_6: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): -1.0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0.0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0.0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_6_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_6_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_7: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): 0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): -1.0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0.0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0.1, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0.0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_7_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.1000 illegal illegal + 0.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_7_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_7_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_8: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): -1.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): -1.0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0.0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): 0, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0.1, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0.0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_8_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.1000 illegal illegal + -1.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_8_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_8_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_9: """ +{((0, 0), 'exit'): -1.0, + ((0, 1), 'exit'): -1.0, + ((0, 2), 'exit'): 0, + ((0, 3), 'exit'): -1.0, + ((0, 4), 'exit'): 0, + ((1, 0), 'east'): 0.0, + ((1, 0), 'north'): 0, + ((1, 0), 'south'): 0.0, + ((1, 0), 'west'): 0, + ((1, 1), 'east'): 0, + ((1, 1), 'north'): -0.09000000000000001, + ((1, 1), 'south'): 0, + ((1, 1), 'west'): 0, + ((1, 2), 'east'): 0, + ((1, 2), 'north'): 0, + ((1, 2), 'south'): 0, + ((1, 2), 'west'): 0, + ((1, 3), 'east'): 0, + ((1, 3), 'north'): 0, + ((1, 3), 'south'): 0, + ((1, 3), 'west'): 0, + ((1, 4), 'east'): 0, + ((1, 4), 'north'): 0, + ((1, 4), 'south'): 0, + ((1, 4), 'west'): 0, + ((2, 0), 'east'): 0, + ((2, 0), 'north'): 0, + ((2, 0), 'south'): 0, + ((2, 0), 'west'): 0, + ((2, 2), 'exit'): 0.1, + ((2, 4), 'exit'): 0, + ((3, 0), 'east'): 0, + ((3, 0), 'north'): 0, + ((3, 0), 'south'): 0.0, + ((3, 0), 'west'): 0, + ((3, 2), 'east'): 0, + ((3, 2), 'north'): 0, + ((3, 2), 'south'): 0, + ((3, 2), 'west'): 0, + ((3, 3), 'east'): 0, + ((3, 3), 'north'): 0, + ((3, 3), 'south'): 0, + ((3, 3), 'west'): 0, + ((3, 4), 'east'): 0, + ((3, 4), 'north'): 0, + ((3, 4), 'south'): 0, + ((3, 4), 'west'): 0, + ((4, 0), 'east'): 0, + ((4, 0), 'north'): 0, + ((4, 0), 'south'): 0, + ((4, 0), 'west'): 0, + ((4, 1), 'east'): 0, + ((4, 1), 'north'): 0, + ((4, 1), 'south'): 0, + ((4, 1), 'west'): 0.0, + ((4, 2), 'east'): 0, + ((4, 2), 'north'): 0, + ((4, 2), 'south'): 0, + ((4, 2), 'west'): 0, + ((4, 3), 'east'): 0, + ((4, 3), 'north'): 0, + ((4, 3), 'south'): 0, + ((4, 3), 'west'): 0, + ((4, 4), 'east'): 0, + ((4, 4), 'north'): 0, + ((4, 4), 'south'): 0, + ((4, 4), 'west'): 0} +""" + +q_values_k_9_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal -0.0900 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + -1.0000 illegal __________ illegal illegal + 0.0000 illegal 0.1000 illegal illegal + -1.0000 illegal __________ __________ illegal + -1.0000 illegal illegal illegal illegal +""" + +q_values_k_9_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_9_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_3000: """ +{((0, 0), 'exit'): -9.999907386128688, + ((0, 1), 'exit'): -9.999997424145315, + ((0, 2), 'exit'): -9.999960132765212, + ((0, 3), 'exit'): -9.999950781191618, + ((0, 4), 'exit'): -9.999999718152583, + ((1, 0), 'east'): 0.023883944628551798, + ((1, 0), 'north'): 0.007345075517907217, + ((1, 0), 'south'): -1.8500541605036829, + ((1, 0), 'west'): -6.343206820101826, + ((1, 1), 'east'): 0.2926773341159188, + ((1, 1), 'north'): 0.34229408532424677, + ((1, 1), 'south'): -0.5504925565503596, + ((1, 1), 'west'): -7.476747986710549, + ((1, 2), 'east'): 0.8687488956116186, + ((1, 2), 'north'): 1.069417111875237, + ((1, 2), 'south'): 0.20934913434979205, + ((1, 2), 'west'): -8.039885148007473, + ((1, 3), 'east'): 2.0499012629179343, + ((1, 3), 'north'): 4.423789087362333, + ((1, 3), 'south'): -0.5604917324646312, + ((1, 3), 'west'): -5.401323597944641, + ((1, 4), 'east'): 8.058415501251869, + ((1, 4), 'north'): 4.320461066773469, + ((1, 4), 'south'): -0.3521106259982003, + ((1, 4), 'west'): -6.200128408737511, + ((2, 0), 'east'): 0.009741671398482152, + ((2, 0), 'north'): 0.007940007422962705, + ((2, 0), 'south'): 0.005998204691157282, + ((2, 0), 'west'): 0.017876817897686022, + ((2, 2), 'exit'): 0.9999997681730781, + ((2, 4), 'exit'): 9.99998874031536, + ((3, 0), 'east'): 0.16105276299757887, + ((3, 0), 'north'): 0.04837252814060002, + ((3, 0), 'south'): 0.05142911717022169, + ((3, 0), 'west'): 0.018771554676648216, + ((3, 2), 'east'): 1.7398137867471506, + ((3, 2), 'north'): 3.686710364480742, + ((3, 2), 'south'): 1.5388764706778615, + ((3, 2), 'west'): 0.96534651035605, + ((3, 3), 'east'): 3.237326922914182, + ((3, 3), 'north'): 5.228354896238455, + ((3, 3), 'south'): 2.13459124715536, + ((3, 3), 'west'): 4.048386126159169, + ((3, 4), 'east'): 3.724489705852316, + ((3, 4), 'north'): 6.151706012884094, + ((3, 4), 'south'): 3.6948394494904564, + ((3, 4), 'west'): 7.514601541200661, + ((4, 0), 'east'): 0.20513468944645144, + ((4, 0), 'north'): 0.37681951125732005, + ((4, 0), 'south'): 0.12225019530041295, + ((4, 0), 'west'): 0.1027564434880755, + ((4, 1), 'east'): 0.6668747131568407, + ((4, 1), 'north'): 1.0655192675373433, + ((4, 1), 'south'): 0.10056777985567189, + ((4, 1), 'west'): 0.3933711247168481, + ((4, 2), 'east'): 1.267139958918678, + ((4, 2), 'north'): 2.04180345588135, + ((4, 2), 'south'): 0.5520994720362629, + ((4, 2), 'west'): 1.6080936315813792, + ((4, 3), 'east'): 2.174243240311463, + ((4, 3), 'north'): 3.5128789267557274, + ((4, 3), 'south'): 1.567368624478333, + ((4, 3), 'west'): 3.4125636359365155, + ((4, 4), 'east'): 3.3947156310730717, + ((4, 4), 'north'): 3.809539514332263, + ((4, 4), 'south'): 2.9139369998943274, + ((4, 4), 'west'): 4.901442747463662} +""" + +q_values_k_3000_action_north: """ + illegal 4.3205 illegal 6.1517 3.8095 + illegal 4.4238 __________ 5.2284 3.5129 + illegal 1.0694 illegal 3.6867 2.0418 + illegal 0.3423 __________ __________ 1.0655 + illegal 0.0073 0.0079 0.0484 0.3768 +""" + +q_values_k_3000_action_east: """ + illegal 8.0584 illegal 3.7245 3.3947 + illegal 2.0499 __________ 3.2373 2.1742 + illegal 0.8687 illegal 1.7398 1.2671 + illegal 0.2927 __________ __________ 0.6669 + illegal 0.0239 0.0097 0.1611 0.2051 +""" + +q_values_k_3000_action_exit: """ + -10.0000 illegal 10.0000 illegal illegal + -10.0000 illegal __________ illegal illegal + -10.0000 illegal 1.0000 illegal illegal + -10.0000 illegal __________ __________ illegal + -9.9999 illegal illegal illegal illegal +""" + +q_values_k_3000_action_south: """ + illegal -0.3521 illegal 3.6948 2.9139 + illegal -0.5605 __________ 2.1346 1.5674 + illegal 0.2093 illegal 1.5389 0.5521 + illegal -0.5505 __________ __________ 0.1006 + illegal -1.8501 0.0060 0.0514 0.1223 +""" + +q_values_k_3000_action_west: """ + illegal -6.2001 illegal 7.5146 4.9014 + illegal -5.4013 __________ 4.0484 3.4126 + illegal -8.0399 illegal 0.9653 1.6081 + illegal -7.4767 __________ __________ 0.3934 + illegal -6.3432 0.0179 0.0188 0.1028 +""" + diff --git a/reinforcement/test_cases/q8/4-discountgrid.test b/reinforcement/test_cases/q8/4-discountgrid.test new file mode 100644 index 0000000..f553ba1 --- /dev/null +++ b/reinforcement/test_cases/q8/4-discountgrid.test @@ -0,0 +1,24 @@ +class: "ApproximateQLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 _ 10 _ _ + -10 _ # _ _ + -10 _ 1 _ _ + -10 _ # # _ + -10 S _ _ _ +""" +discount: "0.9" +noise: "0.2" +livingReward: "0.0" +epsilon: "0.2" +learningRate: "0.1" +numExperiences: "3000" +valueIterations: "100" +iterations: "10000" + diff --git a/reinforcement/test_cases/q8/5-coord-extractor.solution b/reinforcement/test_cases/q8/5-coord-extractor.solution new file mode 100644 index 0000000..7e529ac --- /dev/null +++ b/reinforcement/test_cases/q8/5-coord-extractor.solution @@ -0,0 +1,880 @@ +weights_k_0: """ +{'action=east': 0, + 'action=exit': 0, + 'action=north': 0, + 'action=south': 0, + 'action=west': 0, + 'x=0': 0, + 'x=1': 0, + 'x=2': 0, + 'x=3': 0, + 'x=4': 0, + 'y=0': 0, + 'y=1': 0, + 'y=2': 0, + 'y=3': 0, + 'y=4': 0, + (0, 0): 0, + (0, 1): 0, + (0, 2): 0, + (0, 3): 0, + (0, 4): 0, + (1, 0): 0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_0_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_0_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_0_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_1: """ +{'action=east': 0, + 'action=exit': 0, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0, + 'x=0': 0, + 'x=1': 0.0, + 'x=2': 0, + 'x=3': 0, + 'x=4': 0, + 'y=0': 0, + 'y=1': 0.0, + 'y=2': 0, + 'y=3': 0, + 'y=4': 0, + (0, 0): 0, + (0, 1): 0, + (0, 2): 0, + (0, 3): 0, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_1_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_1_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_1_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_2: """ +{'action=east': 0, + 'action=exit': 0, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0, + 'x=0': 0, + 'x=1': 0.0, + 'x=2': 0, + 'x=3': 0.0, + 'x=4': 0, + 'y=0': 0, + 'y=1': 0.0, + 'y=2': 0, + 'y=3': 0.0, + 'y=4': 0, + (0, 0): 0, + (0, 1): 0, + (0, 2): 0, + (0, 3): 0, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_2_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_exit: """ + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ illegal illegal + 0.0000 illegal 0.0000 illegal illegal + 0.0000 illegal __________ __________ illegal + 0.0000 illegal illegal illegal illegal +""" + +q_values_k_2_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_2_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_3: """ +{'action=east': 0, + 'action=exit': -1.0, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0, + 'x=0': -1.0, + 'x=1': 0.0, + 'x=2': 0, + 'x=3': 0.0, + 'x=4': 0, + 'y=0': -1.0, + 'y=1': 0.0, + 'y=2': 0, + 'y=3': 0.0, + 'y=4': 0, + (0, 0): -1.0, + (0, 1): 0, + (0, 2): 0, + (0, 3): 0, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_3_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_exit: """ + -3.0000 illegal -1.0000 illegal illegal + -3.0000 illegal __________ illegal illegal + -3.0000 illegal -1.0000 illegal illegal + -3.0000 illegal __________ __________ illegal + -4.0000 illegal illegal illegal illegal +""" + +q_values_k_3_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_3_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_4: """ +{'action=east': 0.0, + 'action=exit': -1.0, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0, + 'x=0': -1.0, + 'x=1': 0.0, + 'x=2': 0, + 'x=3': 0.0, + 'x=4': 0, + 'y=0': -1.0, + 'y=1': 0.0, + 'y=2': 0, + 'y=3': 0.0, + 'y=4': 0, + (0, 0): -1.0, + (0, 1): 0, + (0, 2): 0, + (0, 3): 0, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_4_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_exit: """ + -3.0000 illegal -1.0000 illegal illegal + -3.0000 illegal __________ illegal illegal + -3.0000 illegal -1.0000 illegal illegal + -3.0000 illegal __________ __________ illegal + -4.0000 illegal illegal illegal illegal +""" + +q_values_k_4_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_4_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_5: """ +{'action=east': 0.0, + 'action=exit': -1.0, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0.0, + 'x=0': -1.0, + 'x=1': 0.0, + 'x=2': 0, + 'x=3': 0.0, + 'x=4': 0.0, + 'y=0': -1.0, + 'y=1': 0.0, + 'y=2': 0, + 'y=3': 0.0, + 'y=4': 0.0, + (0, 0): -1.0, + (0, 1): 0, + (0, 2): 0, + (0, 3): 0, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0.0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_5_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_exit: """ + -3.0000 illegal -1.0000 illegal illegal + -3.0000 illegal __________ illegal illegal + -3.0000 illegal -1.0000 illegal illegal + -3.0000 illegal __________ __________ illegal + -4.0000 illegal illegal illegal illegal +""" + +q_values_k_5_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_5_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_6: """ +{'action=east': 0.0, + 'action=exit': -1.7000000000000002, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0.0, + 'x=0': -1.7000000000000002, + 'x=1': 0.0, + 'x=2': 0, + 'x=3': 0.0, + 'x=4': 0.0, + 'y=0': -1.7000000000000002, + 'y=1': 0.0, + 'y=2': 0, + 'y=3': 0.0, + 'y=4': 0.0, + (0, 0): -1.0, + (0, 1): 0, + (0, 2): 0, + (0, 3): -0.7000000000000001, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0.0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_6_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_exit: """ + -5.1000 illegal -1.7000 illegal illegal + -5.8000 illegal __________ illegal illegal + -5.1000 illegal -1.7000 illegal illegal + -5.1000 illegal __________ __________ illegal + -6.1000 illegal illegal illegal illegal +""" + +q_values_k_6_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +q_values_k_6_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.0000 0.0000 0.0000 +""" + +weights_k_7: """ +{'action=east': 0.0, + 'action=exit': -1.4300000000000002, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0.0, + 'x=0': -1.7000000000000002, + 'x=1': 0.0, + 'x=2': 0.27, + 'x=3': 0.0, + 'x=4': 0.0, + 'y=0': -1.7000000000000002, + 'y=1': 0.0, + 'y=2': 0.27, + 'y=3': 0.0, + 'y=4': 0.0, + (0, 0): -1.0, + (0, 1): 0, + (0, 2): 0, + (0, 3): -0.7000000000000001, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0.27, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0.0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_7_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +q_values_k_7_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +q_values_k_7_action_exit: """ + -4.8300 illegal -0.8900 illegal illegal + -5.5300 illegal __________ illegal illegal + -4.8300 illegal -0.6200 illegal illegal + -4.8300 illegal __________ __________ illegal + -5.8300 illegal illegal illegal illegal +""" + +q_values_k_7_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +q_values_k_7_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +weights_k_8: """ +{'action=east': 0.0, + 'action=exit': -1.947, + 'action=north': 0, + 'action=south': 0.0, + 'action=west': 0.0, + 'x=0': -2.217, + 'x=1': 0.0, + 'x=2': 0.27, + 'x=3': 0.0, + 'x=4': 0.0, + 'y=0': -2.217, + 'y=1': 0.0, + 'y=2': 0.27, + 'y=3': 0.0, + 'y=4': 0.0, + (0, 0): -1.0, + (0, 1): -0.517, + (0, 2): 0, + (0, 3): -0.7000000000000001, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): 0, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0.27, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0.0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_8_action_north: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +q_values_k_8_action_east: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +q_values_k_8_action_exit: """ + -6.3810 illegal -1.4070 illegal illegal + -7.0810 illegal __________ illegal illegal + -6.3810 illegal -1.1370 illegal illegal + -6.8980 illegal __________ __________ illegal + -7.3810 illegal illegal illegal illegal +""" + +q_values_k_8_action_south: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +q_values_k_8_action_west: """ + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ 0.0000 0.0000 + illegal 0.0000 illegal 0.0000 0.0000 + illegal 0.0000 __________ __________ 0.0000 + illegal 0.0000 0.5400 0.0000 0.0000 +""" + +weights_k_9: """ +{'action=east': 0.0, + 'action=exit': -1.947, + 'action=north': -0.62082, + 'action=south': 0.0, + 'action=west': 0.0, + 'x=0': -2.217, + 'x=1': -0.62082, + 'x=2': 0.27, + 'x=3': 0.0, + 'x=4': 0.0, + 'y=0': -2.217, + 'y=1': -0.62082, + 'y=2': 0.27, + 'y=3': 0.0, + 'y=4': 0.0, + (0, 0): -1.0, + (0, 1): -0.517, + (0, 2): 0, + (0, 3): -0.7000000000000001, + (0, 4): 0, + (1, 0): 0.0, + (1, 1): -0.62082, + (1, 2): 0, + (1, 3): 0, + (1, 4): 0, + (2, 0): 0, + (2, 2): 0.27, + (2, 4): 0, + (3, 0): 0.0, + (3, 2): 0, + (3, 3): 0, + (3, 4): 0, + (4, 0): 0, + (4, 1): 0.0, + (4, 2): 0, + (4, 3): 0, + (4, 4): 0} +""" + +q_values_k_9_action_north: """ + illegal -1.8625 illegal -0.6208 -0.6208 + illegal -1.8625 __________ -0.6208 -0.6208 + illegal -1.8625 illegal -0.6208 -0.6208 + illegal -2.4833 __________ __________ -0.6208 + illegal -1.8625 -0.0808 -0.6208 -0.6208 +""" + +q_values_k_9_action_east: """ + illegal -1.2416 illegal 0.0000 0.0000 + illegal -1.2416 __________ 0.0000 0.0000 + illegal -1.2416 illegal 0.0000 0.0000 + illegal -1.8625 __________ __________ 0.0000 + illegal -1.2416 0.5400 0.0000 0.0000 +""" + +q_values_k_9_action_exit: """ + -6.3810 illegal -1.4070 illegal illegal + -7.0810 illegal __________ illegal illegal + -6.3810 illegal -1.1370 illegal illegal + -6.8980 illegal __________ __________ illegal + -7.3810 illegal illegal illegal illegal +""" + +q_values_k_9_action_south: """ + illegal -1.2416 illegal 0.0000 0.0000 + illegal -1.2416 __________ 0.0000 0.0000 + illegal -1.2416 illegal 0.0000 0.0000 + illegal -1.8625 __________ __________ 0.0000 + illegal -1.2416 0.5400 0.0000 0.0000 +""" + +q_values_k_9_action_west: """ + illegal -1.2416 illegal 0.0000 0.0000 + illegal -1.2416 __________ 0.0000 0.0000 + illegal -1.2416 illegal 0.0000 0.0000 + illegal -1.8625 __________ __________ 0.0000 + illegal -1.2416 0.5400 0.0000 0.0000 +""" + +weights_k_3000: """ +{'action=east': 6.719916513522846, + 'action=exit': -2.2444981376861555, + 'action=north': 4.568574519923728, + 'action=south': 3.761510351874819, + 'action=west': 1.2828606322891556, + 'x=0': -3.604063955849794, + 'x=1': 0.6731476152061693, + 'x=2': 4.000208353074704, + 'x=3': 5.988311380073477, + 'x=4': 7.0307604874198235, + 'y=0': -3.604063955849794, + 'y=1': 0.6731476152061693, + 'y=2': 4.000208353074704, + 'y=3': 5.988311380073477, + 'y=4': 7.0307604874198235, + (0, 0): -0.7073688447583666, + (0, 1): -0.7542862401704076, + (0, 2): -0.7043014501203066, + (0, 3): -0.7433344649617668, + (0, 4): -0.6947729558389527, + (1, 0): 2.364273811399719, + (1, 1): -0.2695405704605499, + (1, 2): -0.7105979212702271, + (1, 3): -1.4866826750327933, + (1, 4): 0.7756949705700219, + (2, 0): 2.64064253491107, + (2, 2): -3.7381118310263166, + (2, 4): 5.097677649189953, + (3, 0): 2.505262939441149, + (3, 2): 0.27218788923837256, + (3, 3): 2.2611084206093195, + (3, 4): 0.9497521307846304, + (4, 0): 1.7330586015291545, + (4, 1): 0.980194046153168, + (4, 2): 0.78786289128181, + (4, 3): 1.493343270762865, + (4, 4): 2.0363016776928333} +""" + +q_values_k_3000_action_north: """ + illegal 6.6906 illegal 17.4949 20.6664 + illegal 4.4282 __________ 18.8063 20.1234 + illegal 5.2043 illegal 16.8174 19.4180 + illegal 5.6453 __________ __________ 19.6103 + illegal 8.2791 15.2096 19.0505 20.3632 +""" + +q_values_k_3000_action_east: """ + illegal 8.8419 illegal 19.6463 22.8177 + illegal 6.5795 __________ 20.9576 22.2748 + illegal 7.3556 illegal 18.9687 21.5693 + illegal 7.7967 __________ __________ 21.7616 + illegal 10.4305 17.3610 21.2018 22.5145 +""" + +q_values_k_3000_action_exit: """ + -10.1474 illegal 10.8536 illegal illegal + -10.1960 illegal __________ illegal illegal + -10.1569 illegal 2.0178 illegal illegal + -10.2069 illegal __________ __________ illegal + -10.1600 illegal illegal illegal illegal +""" + +q_values_k_3000_action_south: """ + illegal 5.8835 illegal 16.6879 19.8593 + illegal 3.6211 __________ 17.9992 19.3164 + illegal 4.3972 illegal 16.0103 18.6109 + illegal 4.8383 __________ __________ 18.8032 + illegal 7.4721 14.4026 18.2434 19.5561 +""" + +q_values_k_3000_action_west: """ + illegal 3.4049 illegal 14.2092 17.3807 + illegal 1.1425 __________ 15.5206 16.8377 + illegal 1.9186 illegal 13.5317 16.1322 + illegal 2.3596 __________ __________ 16.3246 + illegal 4.9934 11.9239 15.7647 17.0774 +""" + diff --git a/reinforcement/test_cases/q8/5-coord-extractor.test b/reinforcement/test_cases/q8/5-coord-extractor.test new file mode 100644 index 0000000..ec06a14 --- /dev/null +++ b/reinforcement/test_cases/q8/5-coord-extractor.test @@ -0,0 +1,25 @@ +class: "ApproximateQLearningTest" + +# GridWorld specification +# _ is empty space +# numbers are terminal states with that value +# # is a wall +# S is a start state +# +grid: """ + -10 _ 10 _ _ + -10 _ # _ _ + -10 _ 1 _ _ + -10 _ # # _ + -10 S _ _ _ +""" +discount: "0.9" +noise: "0.2" +livingReward: "0.0" +epsilon: "0.2" +learningRate: "0.1" +numExperiences: "3000" +valueIterations: "100" +iterations: "10000" +extractor: "CoordinateExtractor" + diff --git a/reinforcement/test_cases/q8/CONFIG b/reinforcement/test_cases/q8/CONFIG new file mode 100644 index 0000000..ad7e38a --- /dev/null +++ b/reinforcement/test_cases/q8/CONFIG @@ -0,0 +1,2 @@ +max_points: "3" +class: "PassAllTestsQuestion" diff --git a/reinforcement/textDisplay.py b/reinforcement/textDisplay.py new file mode 100644 index 0000000..e920ad4 --- /dev/null +++ b/reinforcement/textDisplay.py @@ -0,0 +1,81 @@ +# textDisplay.py +# -------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import time +try: + import pacman +except: + pass + +DRAW_EVERY = 1 +SLEEP_TIME = 0 # This can be overwritten by __init__ +DISPLAY_MOVES = False +QUIET = False # Supresses output + +class NullGraphics: + def initialize(self, state, isBlue = False): + pass + + def update(self, state): + pass + + def checkNullDisplay(self): + return True + + def pause(self): + time.sleep(SLEEP_TIME) + + def draw(self, state): + print state + + def updateDistributions(self, dist): + pass + + def finish(self): + pass + +class PacmanGraphics: + def __init__(self, speed=None): + if speed != None: + global SLEEP_TIME + SLEEP_TIME = speed + + def initialize(self, state, isBlue = False): + self.draw(state) + self.pause() + self.turn = 0 + self.agentCounter = 0 + + def update(self, state): + numAgents = len(state.agentStates) + self.agentCounter = (self.agentCounter + 1) % numAgents + if self.agentCounter == 0: + self.turn += 1 + if DISPLAY_MOVES: + ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)] + print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts + if self.turn % DRAW_EVERY == 0: + self.draw(state) + self.pause() + if state._win or state._lose: + self.draw(state) + + def pause(self): + time.sleep(SLEEP_TIME) + + def draw(self, state): + print state + + def finish(self): + pass diff --git a/reinforcement/textGridworldDisplay.py b/reinforcement/textGridworldDisplay.py new file mode 100644 index 0000000..e014428 --- /dev/null +++ b/reinforcement/textGridworldDisplay.py @@ -0,0 +1,324 @@ +# textGridworldDisplay.py +# ----------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import util + +class TextGridworldDisplay: + + def __init__(self, gridworld): + self.gridworld = gridworld + + def start(self): + pass + + def pause(self): + pass + + def displayValues(self, agent, currentState = None, message = None): + if message != None: + print message + values = util.Counter() + policy = {} + states = self.gridworld.getStates() + for state in states: + values[state] = agent.getValue(state) + policy[state] = agent.getPolicy(state) + prettyPrintValues(self.gridworld, values, policy, currentState) + + def displayNullValues(self, agent, currentState = None, message = None): + if message != None: print message + prettyPrintNullValues(self.gridworld, currentState) + + def displayQValues(self, agent, currentState = None, message = None): + if message != None: print message + qValues = util.Counter() + states = self.gridworld.getStates() + for state in states: + for action in self.gridworld.getPossibleActions(state): + qValues[(state, action)] = agent.getQValue(state, action) + prettyPrintQValues(self.gridworld, qValues, currentState) + + +def prettyPrintValues(gridWorld, values, policy=None, currentState = None): + grid = gridWorld.grid + maxLen = 11 + newRows = [] + for y in range(grid.height): + newRow = [] + for x in range(grid.width): + state = (x, y) + value = values[state] + action = None + if policy != None and state in policy: + action = policy[state] + actions = gridWorld.getPossibleActions(state) + if action not in actions and 'exit' in actions: + action = 'exit' + valString = None + if action == 'exit': + valString = border('%.2f' % value) + else: + valString = '\n\n%.2f\n\n' % value + valString += ' '*maxLen + if grid[x][y] == 'S': + valString = '\n\nS: %.2f\n\n' % value + valString += ' '*maxLen + if grid[x][y] == '#': + valString = '\n#####\n#####\n#####\n' + valString += ' '*maxLen + pieces = [valString] + text = ("\n".join(pieces)).split('\n') + if currentState == state: + l = len(text[1]) + if l == 0: + text[1] = '*' + else: + text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|" + if action == 'east': + text[2] = ' ' + text[2] + ' >' + elif action == 'west': + text[2] = '< ' + text[2] + ' ' + elif action == 'north': + text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2) + elif action == 'south': + text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2) + newCell = "\n".join(text) + newRow.append(newCell) + newRows.append(newRow) + numCols = grid.width + for rowNum, row in enumerate(newRows): + row.insert(0,"\n\n"+str(rowNum)) + newRows.reverse() + colLabels = [str(colNum) for colNum in range(numCols)] + colLabels.insert(0,' ') + finalRows = [colLabels] + newRows + print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True) + + +def prettyPrintNullValues(gridWorld, currentState = None): + grid = gridWorld.grid + maxLen = 11 + newRows = [] + for y in range(grid.height): + newRow = [] + for x in range(grid.width): + state = (x, y) + + # value = values[state] + + action = None + # if policy != None and state in policy: + # action = policy[state] + # + actions = gridWorld.getPossibleActions(state) + + if action not in actions and 'exit' in actions: + action = 'exit' + + valString = None + # if action == 'exit': + # valString = border('%.2f' % value) + # else: + # valString = '\n\n%.2f\n\n' % value + # valString += ' '*maxLen + + if grid[x][y] == 'S': + valString = '\n\nS\n\n' + valString += ' '*maxLen + elif grid[x][y] == '#': + valString = '\n#####\n#####\n#####\n' + valString += ' '*maxLen + elif type(grid[x][y]) == float or type(grid[x][y]) == int: + valString = border('%.2f' % float(grid[x][y])) + else: valString = border(' ') + pieces = [valString] + + text = ("\n".join(pieces)).split('\n') + + if currentState == state: + l = len(text[1]) + if l == 0: + text[1] = '*' + else: + text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|" + + if action == 'east': + text[2] = ' ' + text[2] + ' >' + elif action == 'west': + text[2] = '< ' + text[2] + ' ' + elif action == 'north': + text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2) + elif action == 'south': + text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2) + newCell = "\n".join(text) + newRow.append(newCell) + newRows.append(newRow) + numCols = grid.width + for rowNum, row in enumerate(newRows): + row.insert(0,"\n\n"+str(rowNum)) + newRows.reverse() + colLabels = [str(colNum) for colNum in range(numCols)] + colLabels.insert(0,' ') + finalRows = [colLabels] + newRows + print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True) + +def prettyPrintQValues(gridWorld, qValues, currentState=None): + grid = gridWorld.grid + maxLen = 11 + newRows = [] + for y in range(grid.height): + newRow = [] + for x in range(grid.width): + state = (x, y) + actions = gridWorld.getPossibleActions(state) + if actions == None or len(actions) == 0: + actions = [None] + bestQ = max([qValues[(state, action)] for action in actions]) + bestActions = [action for action in actions if qValues[(state, action)] == bestQ] + + # display cell + qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions]) + northString = ('north' in qStrings and qStrings['north']) or ' ' + southString = ('south' in qStrings and qStrings['south']) or ' ' + eastString = ('east' in qStrings and qStrings['east']) or ' ' + westString = ('west' in qStrings and qStrings['west']) or ' ' + exitString = ('exit' in qStrings and qStrings['exit']) or ' ' + + eastLen = len(eastString) + westLen = len(westString) + if eastLen < westLen: + eastString = ' '*(westLen-eastLen)+eastString + if westLen < eastLen: + westString = westString+' '*(eastLen-westLen) + + if 'north' in bestActions: + northString = '/'+northString+'\\' + if 'south' in bestActions: + southString = '\\'+southString+'/' + if 'east' in bestActions: + eastString = ''+eastString+'>' + else: + eastString = ''+eastString+' ' + if 'west' in bestActions: + westString = '<'+westString+'' + else: + westString = ' '+westString+'' + if 'exit' in bestActions: + exitString = '[ '+exitString+' ]' + + + ewString = westString + " " + eastString + if state == currentState: + ewString = westString + " * " + eastString + if state == gridWorld.getStartState(): + ewString = westString + " S " + eastString + if state == currentState and state == gridWorld.getStartState(): + ewString = westString + " S:* " + eastString + + text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString] + + if grid[x][y] == '#': + text = ['', '\n#####\n#####\n#####', ''] + + newCell = "\n".join(text) + newRow.append(newCell) + newRows.append(newRow) + numCols = grid.width + for rowNum, row in enumerate(newRows): + row.insert(0,"\n\n\n"+str(rowNum)) + newRows.reverse() + colLabels = [str(colNum) for colNum in range(numCols)] + colLabels.insert(0,' ') + finalRows = [colLabels] + newRows + + print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True) + +def border(text): + length = len(text) + pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)] + return '\n'.join(pieces) + +# INDENTING CODE + +# Indenting code based on a post from George Sakkis +# (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662) + +import cStringIO,operator + +def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left', + separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x): + """Indents a table by column. + - rows: A sequence of sequences of items, one sequence per row. + - hasHeader: True if the first row consists of the columns' names. + - headerChar: Character to be used for the row separator line + (if hasHeader==True or separateRows==True). + - delim: The column delimiter. + - justify: Determines how are data justified in their column. + Valid values are 'left','right' and 'center'. + - separateRows: True if rows are to be separated by a line + of 'headerChar's. + - prefix: A string prepended to each printed row. + - postfix: A string appended to each printed row. + - wrapfunc: A function f(text) for wrapping text; each element in + the table is first wrapped by this function.""" + # closure for breaking logical rows to physical, using wrapfunc + def rowWrapper(row): + newRows = [wrapfunc(item).split('\n') for item in row] + return [[substr or '' for substr in item] for item in map(None,*newRows)] + # break each logical row into one or more physical ones + logicalRows = [rowWrapper(row) for row in rows] + # columns of physical rows + columns = map(None,*reduce(operator.add,logicalRows)) + # get the maximum of each column by the string length of its items + maxWidths = [max([len(str(item)) for item in column]) for column in columns] + rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \ + len(delim)*(len(maxWidths)-1)) + # select the appropriate justify method + justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()] + output=cStringIO.StringIO() + if separateRows: print >> output, rowSeparator + for physicalRows in logicalRows: + for row in physicalRows: + print >> output, \ + prefix \ + + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \ + + postfix + if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False + return output.getvalue() + +import math +def wrap_always(text, width): + """A simple word-wrap function that wraps text on exactly width characters. + It doesn't split the text in words.""" + return '\n'.join([ text[width*i:width*(i+1)] \ + for i in xrange(int(math.ceil(1.*len(text)/width))) ]) + + +# TEST OF DISPLAY CODE + +if __name__ == '__main__': + import gridworld, util + + grid = gridworld.getCliffGrid3() + print grid.getStates() + + policy = dict([(state,'east') for state in grid.getStates()]) + values = util.Counter(dict([(state,1000.23) for state in grid.getStates()])) + prettyPrintValues(grid, values, policy, currentState = (0,0)) + + stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()] + qStates = reduce(lambda x,y: x+y, stateCrossActions, []) + qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates])) + qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])])) + prettyPrintQValues(grid, qValues, currentState = (0,0)) diff --git a/reinforcement/util.py b/reinforcement/util.py new file mode 100644 index 0000000..7b0cf93 --- /dev/null +++ b/reinforcement/util.py @@ -0,0 +1,653 @@ +# util.py +# ------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import sys +import inspect +import heapq, random +import cStringIO + + +class FixedRandom: + def __init__(self): + fixedState = (3, (2147483648L, 507801126L, 683453281L, 310439348L, 2597246090L, \ + 2209084787L, 2267831527L, 979920060L, 3098657677L, 37650879L, 807947081L, 3974896263L, \ + 881243242L, 3100634921L, 1334775171L, 3965168385L, 746264660L, 4074750168L, 500078808L, \ + 776561771L, 702988163L, 1636311725L, 2559226045L, 157578202L, 2498342920L, 2794591496L, \ + 4130598723L, 496985844L, 2944563015L, 3731321600L, 3514814613L, 3362575829L, 3038768745L, \ + 2206497038L, 1108748846L, 1317460727L, 3134077628L, 988312410L, 1674063516L, 746456451L, \ + 3958482413L, 1857117812L, 708750586L, 1583423339L, 3466495450L, 1536929345L, 1137240525L, \ + 3875025632L, 2466137587L, 1235845595L, 4214575620L, 3792516855L, 657994358L, 1241843248L, \ + 1695651859L, 3678946666L, 1929922113L, 2351044952L, 2317810202L, 2039319015L, 460787996L, \ + 3654096216L, 4068721415L, 1814163703L, 2904112444L, 1386111013L, 574629867L, 2654529343L, \ + 3833135042L, 2725328455L, 552431551L, 4006991378L, 1331562057L, 3710134542L, 303171486L, \ + 1203231078L, 2670768975L, 54570816L, 2679609001L, 578983064L, 1271454725L, 3230871056L, \ + 2496832891L, 2944938195L, 1608828728L, 367886575L, 2544708204L, 103775539L, 1912402393L, \ + 1098482180L, 2738577070L, 3091646463L, 1505274463L, 2079416566L, 659100352L, 839995305L, \ + 1696257633L, 274389836L, 3973303017L, 671127655L, 1061109122L, 517486945L, 1379749962L, \ + 3421383928L, 3116950429L, 2165882425L, 2346928266L, 2892678711L, 2936066049L, 1316407868L, \ + 2873411858L, 4279682888L, 2744351923L, 3290373816L, 1014377279L, 955200944L, 4220990860L, \ + 2386098930L, 1772997650L, 3757346974L, 1621616438L, 2877097197L, 442116595L, 2010480266L, \ + 2867861469L, 2955352695L, 605335967L, 2222936009L, 2067554933L, 4129906358L, 1519608541L, \ + 1195006590L, 1942991038L, 2736562236L, 279162408L, 1415982909L, 4099901426L, 1732201505L, \ + 2934657937L, 860563237L, 2479235483L, 3081651097L, 2244720867L, 3112631622L, 1636991639L, \ + 3860393305L, 2312061927L, 48780114L, 1149090394L, 2643246550L, 1764050647L, 3836789087L, \ + 3474859076L, 4237194338L, 1735191073L, 2150369208L, 92164394L, 756974036L, 2314453957L, \ + 323969533L, 4267621035L, 283649842L, 810004843L, 727855536L, 1757827251L, 3334960421L, \ + 3261035106L, 38417393L, 2660980472L, 1256633965L, 2184045390L, 811213141L, 2857482069L, \ + 2237770878L, 3891003138L, 2787806886L, 2435192790L, 2249324662L, 3507764896L, 995388363L, \ + 856944153L, 619213904L, 3233967826L, 3703465555L, 3286531781L, 3863193356L, 2992340714L, \ + 413696855L, 3865185632L, 1704163171L, 3043634452L, 2225424707L, 2199018022L, 3506117517L, \ + 3311559776L, 3374443561L, 1207829628L, 668793165L, 1822020716L, 2082656160L, 1160606415L, \ + 3034757648L, 741703672L, 3094328738L, 459332691L, 2702383376L, 1610239915L, 4162939394L, \ + 557861574L, 3805706338L, 3832520705L, 1248934879L, 3250424034L, 892335058L, 74323433L, \ + 3209751608L, 3213220797L, 3444035873L, 3743886725L, 1783837251L, 610968664L, 580745246L, \ + 4041979504L, 201684874L, 2673219253L, 1377283008L, 3497299167L, 2344209394L, 2304982920L, \ + 3081403782L, 2599256854L, 3184475235L, 3373055826L, 695186388L, 2423332338L, 222864327L, \ + 1258227992L, 3627871647L, 3487724980L, 4027953808L, 3053320360L, 533627073L, 3026232514L, \ + 2340271949L, 867277230L, 868513116L, 2158535651L, 2487822909L, 3428235761L, 3067196046L, \ + 3435119657L, 1908441839L, 788668797L, 3367703138L, 3317763187L, 908264443L, 2252100381L, \ + 764223334L, 4127108988L, 384641349L, 3377374722L, 1263833251L, 1958694944L, 3847832657L, \ + 1253909612L, 1096494446L, 555725445L, 2277045895L, 3340096504L, 1383318686L, 4234428127L, \ + 1072582179L, 94169494L, 1064509968L, 2681151917L, 2681864920L, 734708852L, 1338914021L, \ + 1270409500L, 1789469116L, 4191988204L, 1716329784L, 2213764829L, 3712538840L, 919910444L, \ + 1318414447L, 3383806712L, 3054941722L, 3378649942L, 1205735655L, 1268136494L, 2214009444L, \ + 2532395133L, 3232230447L, 230294038L, 342599089L, 772808141L, 4096882234L, 3146662953L, \ + 2784264306L, 1860954704L, 2675279609L, 2984212876L, 2466966981L, 2627986059L, 2985545332L, \ + 2578042598L, 1458940786L, 2944243755L, 3959506256L, 1509151382L, 325761900L, 942251521L, \ + 4184289782L, 2756231555L, 3297811774L, 1169708099L, 3280524138L, 3805245319L, 3227360276L, \ + 3199632491L, 2235795585L, 2865407118L, 36763651L, 2441503575L, 3314890374L, 1755526087L, \ + 17915536L, 1196948233L, 949343045L, 3815841867L, 489007833L, 2654997597L, 2834744136L, \ + 417688687L, 2843220846L, 85621843L, 747339336L, 2043645709L, 3520444394L, 1825470818L, \ + 647778910L, 275904777L, 1249389189L, 3640887431L, 4200779599L, 323384601L, 3446088641L, \ + 4049835786L, 1718989062L, 3563787136L, 44099190L, 3281263107L, 22910812L, 1826109246L, \ + 745118154L, 3392171319L, 1571490704L, 354891067L, 815955642L, 1453450421L, 940015623L, \ + 796817754L, 1260148619L, 3898237757L, 176670141L, 1870249326L, 3317738680L, 448918002L, \ + 4059166594L, 2003827551L, 987091377L, 224855998L, 3520570137L, 789522610L, 2604445123L, \ + 454472869L, 475688926L, 2990723466L, 523362238L, 3897608102L, 806637149L, 2642229586L, \ + 2928614432L, 1564415411L, 1691381054L, 3816907227L, 4082581003L, 1895544448L, 3728217394L, \ + 3214813157L, 4054301607L, 1882632454L, 2873728645L, 3694943071L, 1297991732L, 2101682438L, \ + 3952579552L, 678650400L, 1391722293L, 478833748L, 2976468591L, 158586606L, 2576499787L, \ + 662690848L, 3799889765L, 3328894692L, 2474578497L, 2383901391L, 1718193504L, 3003184595L, \ + 3630561213L, 1929441113L, 3848238627L, 1594310094L, 3040359840L, 3051803867L, 2462788790L, \ + 954409915L, 802581771L, 681703307L, 545982392L, 2738993819L, 8025358L, 2827719383L, \ + 770471093L, 3484895980L, 3111306320L, 3900000891L, 2116916652L, 397746721L, 2087689510L, \ + 721433935L, 1396088885L, 2751612384L, 1998988613L, 2135074843L, 2521131298L, 707009172L, \ + 2398321482L, 688041159L, 2264560137L, 482388305L, 207864885L, 3735036991L, 3490348331L, \ + 1963642811L, 3260224305L, 3493564223L, 1939428454L, 1128799656L, 1366012432L, 2858822447L, \ + 1428147157L, 2261125391L, 1611208390L, 1134826333L, 2374102525L, 3833625209L, 2266397263L, \ + 3189115077L, 770080230L, 2674657172L, 4280146640L, 3604531615L, 4235071805L, 3436987249L, \ + 509704467L, 2582695198L, 4256268040L, 3391197562L, 1460642842L, 1617931012L, 457825497L, \ + 1031452907L, 1330422862L, 4125947620L, 2280712485L, 431892090L, 2387410588L, 2061126784L, \ + 896457479L, 3480499461L, 2488196663L, 4021103792L, 1877063114L, 2744470201L, 1046140599L, \ + 2129952955L, 3583049218L, 4217723693L, 2720341743L, 820661843L, 1079873609L, 3360954200L, \ + 3652304997L, 3335838575L, 2178810636L, 1908053374L, 4026721976L, 1793145418L, 476541615L, \ + 973420250L, 515553040L, 919292001L, 2601786155L, 1685119450L, 3030170809L, 1590676150L, \ + 1665099167L, 651151584L, 2077190587L, 957892642L, 646336572L, 2743719258L, 866169074L, \ + 851118829L, 4225766285L, 963748226L, 799549420L, 1955032629L, 799460000L, 2425744063L, \ + 2441291571L, 1928963772L, 528930629L, 2591962884L, 3495142819L, 1896021824L, 901320159L, \ + 3181820243L, 843061941L, 3338628510L, 3782438992L, 9515330L, 1705797226L, 953535929L, \ + 764833876L, 3202464965L, 2970244591L, 519154982L, 3390617541L, 566616744L, 3438031503L, \ + 1853838297L, 170608755L, 1393728434L, 676900116L, 3184965776L, 1843100290L, 78995357L, \ + 2227939888L, 3460264600L, 1745705055L, 1474086965L, 572796246L, 4081303004L, 882828851L, \ + 1295445825L, 137639900L, 3304579600L, 2722437017L, 4093422709L, 273203373L, 2666507854L, \ + 3998836510L, 493829981L, 1623949669L, 3482036755L, 3390023939L, 833233937L, 1639668730L, \ + 1499455075L, 249728260L, 1210694006L, 3836497489L, 1551488720L, 3253074267L, 3388238003L, \ + 2372035079L, 3945715164L, 2029501215L, 3362012634L, 2007375355L, 4074709820L, 631485888L, \ + 3135015769L, 4273087084L, 3648076204L, 2739943601L, 1374020358L, 1760722448L, 3773939706L, \ + 1313027823L, 1895251226L, 4224465911L, 421382535L, 1141067370L, 3660034846L, 3393185650L, \ + 1850995280L, 1451917312L, 3841455409L, 3926840308L, 1397397252L, 2572864479L, 2500171350L, \ + 3119920613L, 531400869L, 1626487579L, 1099320497L, 407414753L, 2438623324L, 99073255L, \ + 3175491512L, 656431560L, 1153671785L, 236307875L, 2824738046L, 2320621382L, 892174056L, \ + 230984053L, 719791226L, 2718891946L, 624L), None) + self.random = random.Random() + self.random.setstate(fixedState) + +""" + Data structures useful for implementing SearchAgents +""" + +class Stack: + "A container with a last-in-first-out (LIFO) queuing policy." + def __init__(self): + self.list = [] + + def push(self,item): + "Push 'item' onto the stack" + self.list.append(item) + + def pop(self): + "Pop the most recently pushed item from the stack" + return self.list.pop() + + def isEmpty(self): + "Returns true if the stack is empty" + return len(self.list) == 0 + +class Queue: + "A container with a first-in-first-out (FIFO) queuing policy." + def __init__(self): + self.list = [] + + def push(self,item): + "Enqueue the 'item' into the queue" + self.list.insert(0,item) + + def pop(self): + """ + Dequeue the earliest enqueued item still in the queue. This + operation removes the item from the queue. + """ + return self.list.pop() + + def isEmpty(self): + "Returns true if the queue is empty" + return len(self.list) == 0 + +class PriorityQueue: + """ + Implements a priority queue data structure. Each inserted item + has a priority associated with it and the client is usually interested + in quick retrieval of the lowest-priority item in the queue. This + data structure allows O(1) access to the lowest-priority item. + + Note that this PriorityQueue does not allow you to change the priority + of an item. However, you may insert the same item multiple times with + different priorities. + """ + def __init__(self): + self.heap = [] + self.count = 0 + + def push(self, item, priority): + # FIXME: restored old behaviour to check against old results better + # FIXED: restored to stable behaviour + entry = (priority, self.count, item) + # entry = (priority, item) + heapq.heappush(self.heap, entry) + self.count += 1 + + def pop(self): + (_, _, item) = heapq.heappop(self.heap) + # (_, item) = heapq.heappop(self.heap) + return item + + def isEmpty(self): + return len(self.heap) == 0 + +class PriorityQueueWithFunction(PriorityQueue): + """ + Implements a priority queue with the same push/pop signature of the + Queue and the Stack classes. This is designed for drop-in replacement for + those two classes. The caller has to provide a priority function, which + extracts each item's priority. + """ + def __init__(self, priorityFunction): + "priorityFunction (item) -> priority" + self.priorityFunction = priorityFunction # store the priority function + PriorityQueue.__init__(self) # super-class initializer + + def push(self, item): + "Adds an item to the queue with priority from the priority function" + PriorityQueue.push(self, item, self.priorityFunction(item)) + + +def manhattanDistance( xy1, xy2 ): + "Returns the Manhattan distance between points xy1 and xy2" + return abs( xy1[0] - xy2[0] ) + abs( xy1[1] - xy2[1] ) + +""" + Data structures and functions useful for various course projects + + The search project should not need anything below this line. +""" + +class Counter(dict): + """ + A counter keeps track of counts for a set of keys. + + The counter class is an extension of the standard python + dictionary type. It is specialized to have number values + (integers or floats), and includes a handful of additional + functions to ease the task of counting data. In particular, + all keys are defaulted to have value 0. Using a dictionary: + + a = {} + print a['test'] + + would give an error, while the Counter class analogue: + + >>> a = Counter() + >>> print a['test'] + 0 + + returns the default 0 value. Note that to reference a key + that you know is contained in the counter, + you can still use the dictionary syntax: + + >>> a = Counter() + >>> a['test'] = 2 + >>> print a['test'] + 2 + + This is very useful for counting things without initializing their counts, + see for example: + + >>> a['blah'] += 1 + >>> print a['blah'] + 1 + + The counter also includes additional functionality useful in implementing + the classifiers for this assignment. Two counters can be added, + subtracted or multiplied together. See below for details. They can + also be normalized and their total count and arg max can be extracted. + """ + def __getitem__(self, idx): + self.setdefault(idx, 0) + return dict.__getitem__(self, idx) + + def incrementAll(self, keys, count): + """ + Increments all elements of keys by the same count. + + >>> a = Counter() + >>> a.incrementAll(['one','two', 'three'], 1) + >>> a['one'] + 1 + >>> a['two'] + 1 + """ + for key in keys: + self[key] += count + + def argMax(self): + """ + Returns the key with the highest value. + """ + if len(self.keys()) == 0: return None + all = self.items() + values = [x[1] for x in all] + maxIndex = values.index(max(values)) + return all[maxIndex][0] + + def sortedKeys(self): + """ + Returns a list of keys sorted by their values. Keys + with the highest values will appear first. + + >>> a = Counter() + >>> a['first'] = -2 + >>> a['second'] = 4 + >>> a['third'] = 1 + >>> a.sortedKeys() + ['second', 'third', 'first'] + """ + sortedItems = self.items() + compare = lambda x, y: sign(y[1] - x[1]) + sortedItems.sort(cmp=compare) + return [x[0] for x in sortedItems] + + def totalCount(self): + """ + Returns the sum of counts for all keys. + """ + return sum(self.values()) + + def normalize(self): + """ + Edits the counter such that the total count of all + keys sums to 1. The ratio of counts for all keys + will remain the same. Note that normalizing an empty + Counter will result in an error. + """ + total = float(self.totalCount()) + if total == 0: return + for key in self.keys(): + self[key] = self[key] / total + + def divideAll(self, divisor): + """ + Divides all counts by divisor + """ + divisor = float(divisor) + for key in self: + self[key] /= divisor + + def copy(self): + """ + Returns a copy of the counter + """ + return Counter(dict.copy(self)) + + def __mul__(self, y ): + """ + Multiplying two counters gives the dot product of their vectors where + each unique label is a vector element. + + >>> a = Counter() + >>> b = Counter() + >>> a['first'] = -2 + >>> a['second'] = 4 + >>> b['first'] = 3 + >>> b['second'] = 5 + >>> a['third'] = 1.5 + >>> a['fourth'] = 2.5 + >>> a * b + 14 + """ + sum = 0 + x = self + if len(x) > len(y): + x,y = y,x + for key in x: + if key not in y: + continue + sum += x[key] * y[key] + return sum + + def __radd__(self, y): + """ + Adding another counter to a counter increments the current counter + by the values stored in the second counter. + + >>> a = Counter() + >>> b = Counter() + >>> a['first'] = -2 + >>> a['second'] = 4 + >>> b['first'] = 3 + >>> b['third'] = 1 + >>> a += b + >>> a['first'] + 1 + """ + for key, value in y.items(): + self[key] += value + + def __add__( self, y ): + """ + Adding two counters gives a counter with the union of all keys and + counts of the second added to counts of the first. + + >>> a = Counter() + >>> b = Counter() + >>> a['first'] = -2 + >>> a['second'] = 4 + >>> b['first'] = 3 + >>> b['third'] = 1 + >>> (a + b)['first'] + 1 + """ + addend = Counter() + for key in self: + if key in y: + addend[key] = self[key] + y[key] + else: + addend[key] = self[key] + for key in y: + if key in self: + continue + addend[key] = y[key] + return addend + + def __sub__( self, y ): + """ + Subtracting a counter from another gives a counter with the union of all keys and + counts of the second subtracted from counts of the first. + + >>> a = Counter() + >>> b = Counter() + >>> a['first'] = -2 + >>> a['second'] = 4 + >>> b['first'] = 3 + >>> b['third'] = 1 + >>> (a - b)['first'] + -5 + """ + addend = Counter() + for key in self: + if key in y: + addend[key] = self[key] - y[key] + else: + addend[key] = self[key] + for key in y: + if key in self: + continue + addend[key] = -1 * y[key] + return addend + +def raiseNotDefined(): + fileName = inspect.stack()[1][1] + line = inspect.stack()[1][2] + method = inspect.stack()[1][3] + + print "*** Method not implemented: %s at line %s of %s" % (method, line, fileName) + sys.exit(1) + +def normalize(vectorOrCounter): + """ + normalize a vector or counter by dividing each value by the sum of all values + """ + normalizedCounter = Counter() + if type(vectorOrCounter) == type(normalizedCounter): + counter = vectorOrCounter + total = float(counter.totalCount()) + if total == 0: return counter + for key in counter.keys(): + value = counter[key] + normalizedCounter[key] = value / total + return normalizedCounter + else: + vector = vectorOrCounter + s = float(sum(vector)) + if s == 0: return vector + return [el / s for el in vector] + +def nSample(distribution, values, n): + if sum(distribution) != 1: + distribution = normalize(distribution) + rand = [random.random() for i in range(n)] + rand.sort() + samples = [] + samplePos, distPos, cdf = 0,0, distribution[0] + while samplePos < n: + if rand[samplePos] < cdf: + samplePos += 1 + samples.append(values[distPos]) + else: + distPos += 1 + cdf += distribution[distPos] + return samples + +def sample(distribution, values = None): + if type(distribution) == Counter: + items = sorted(distribution.items()) + distribution = [i[1] for i in items] + values = [i[0] for i in items] + if sum(distribution) != 1: + distribution = normalize(distribution) + choice = random.random() + i, total= 0, distribution[0] + while choice > total: + i += 1 + total += distribution[i] + return values[i] + +def sampleFromCounter(ctr): + items = sorted(ctr.items()) + return sample([v for k,v in items], [k for k,v in items]) + +def getProbability(value, distribution, values): + """ + Gives the probability of a value under a discrete distribution + defined by (distributions, values). + """ + total = 0.0 + for prob, val in zip(distribution, values): + if val == value: + total += prob + return total + +def flipCoin( p ): + r = random.random() + return r < p + +def chooseFromDistribution( distribution ): + "Takes either a counter or a list of (prob, key) pairs and samples" + if type(distribution) == dict or type(distribution) == Counter: + return sample(distribution) + r = random.random() + base = 0.0 + for prob, element in distribution: + base += prob + if r <= base: return element + +def nearestPoint( pos ): + """ + Finds the nearest grid point to a position (discretizes). + """ + ( current_row, current_col ) = pos + + grid_row = int( current_row + 0.5 ) + grid_col = int( current_col + 0.5 ) + return ( grid_row, grid_col ) + +def sign( x ): + """ + Returns 1 or -1 depending on the sign of x + """ + if( x >= 0 ): + return 1 + else: + return -1 + +def arrayInvert(array): + """ + Inverts a matrix stored as a list of lists. + """ + result = [[] for i in array] + for outer in array: + for inner in range(len(outer)): + result[inner].append(outer[inner]) + return result + +def matrixAsList( matrix, value = True ): + """ + Turns a matrix into a list of coordinates matching the specified value + """ + rows, cols = len( matrix ), len( matrix[0] ) + cells = [] + for row in range( rows ): + for col in range( cols ): + if matrix[row][col] == value: + cells.append( ( row, col ) ) + return cells + +def lookup(name, namespace): + """ + Get a method or class from any imported module from its name. + Usage: lookup(functionName, globals()) + """ + dots = name.count('.') + if dots > 0: + moduleName, objName = '.'.join(name.split('.')[:-1]), name.split('.')[-1] + module = __import__(moduleName) + return getattr(module, objName) + else: + modules = [obj for obj in namespace.values() if str(type(obj)) == ""] + options = [getattr(module, name) for module in modules if name in dir(module)] + options += [obj[1] for obj in namespace.items() if obj[0] == name ] + if len(options) == 1: return options[0] + if len(options) > 1: raise Exception, 'Name conflict for %s' + raise Exception, '%s not found as a method or class' % name + +def pause(): + """ + Pauses the output stream awaiting user feedback. + """ + print "" + raw_input() + + +# code to handle timeouts +# +# FIXME +# NOTE: TimeoutFuncton is NOT reentrant. Later timeouts will silently +# disable earlier timeouts. Could be solved by maintaining a global list +# of active time outs. Currently, questions which have test cases calling +# this have all student code so wrapped. +# +import signal +import time +class TimeoutFunctionException(Exception): + """Exception to raise on a timeout""" + pass + + +class TimeoutFunction: + def __init__(self, function, timeout): + self.timeout = timeout + self.function = function + + def handle_timeout(self, signum, frame): + raise TimeoutFunctionException() + + def __call__(self, *args, **keyArgs): + # If we have SIGALRM signal, use it to cause an exception if and + # when this function runs too long. Otherwise check the time taken + # after the method has returned, and throw an exception then. + if hasattr(signal, 'SIGALRM'): + old = signal.signal(signal.SIGALRM, self.handle_timeout) + signal.alarm(self.timeout) + try: + result = self.function(*args, **keyArgs) + finally: + signal.signal(signal.SIGALRM, old) + signal.alarm(0) + else: + startTime = time.time() + result = self.function(*args, **keyArgs) + timeElapsed = time.time() - startTime + if timeElapsed >= self.timeout: + self.handle_timeout(None, None) + return result + + + +_ORIGINAL_STDOUT = None +_ORIGINAL_STDERR = None +_MUTED = False + +class WritableNull: + def write(self, string): + pass + +def mutePrint(): + global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED + if _MUTED: + return + _MUTED = True + + _ORIGINAL_STDOUT = sys.stdout + #_ORIGINAL_STDERR = sys.stderr + sys.stdout = WritableNull() + #sys.stderr = WritableNull() + +def unmutePrint(): + global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED + if not _MUTED: + return + _MUTED = False + + sys.stdout = _ORIGINAL_STDOUT + #sys.stderr = _ORIGINAL_STDERR + diff --git a/reinforcement/valueIterationAgents.py b/reinforcement/valueIterationAgents.py new file mode 100644 index 0000000..f56bd51 --- /dev/null +++ b/reinforcement/valueIterationAgents.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# valueIterationAgents.py +# ----------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import mdp, util + +from learningAgents import ValueEstimationAgent + + +class ValueIterationAgent(ValueEstimationAgent): + """ + * Please read learningAgents.py before reading this.* + + A ValueIterationAgent takes a Markov decision process + (see mdp.py) on initialization and runs value iteration + for a given number of iterations using the supplied + discount factor. + """ + + def __init__(self, mdp, discount=0.9, iterations=100): + """ + Your value iteration agent should take an mdp on + construction, run the indicated number of iterations + and then act according to the resulting policy. + + Some useful mdp methods you will use: + mdp.getStates() + mdp.getPossibleActions(state) + mdp.getTransitionStatesAndProbs(state, action) + mdp.getReward(state, action, nextState) + mdp.isTerminal(state) + """ + self.mdp = mdp + self.discount = discount + self.iterations = iterations + self.values = util.Counter() # A Counter is a dict with default 0 + + # Write value iteration code here + "*** YOUR CODE HERE ***" + states = self.mdp.getStates() + + print "__init__ ... states: " + str(states) + + for i in range(iterations): + # On reprend les valeurs de l'itération précédente comme référence + # Copie pour batch + q_copy = self.values.copy() + for state in states: + q_new = None + for action in self.mdp.getPossibleActions(state): + q = self.computeQValueFromValues(state, action) + # Garder la meilleure Q value + if q_new is None or q_new < q: + q_new = q + # Gérer le cas sans successeurs + if q_new is None: + q_copy[state] = 0 + else: + q_copy[state] = q_new + # On met à jour pout les prochaines itérations + self.values = q_copy + + def getValue(self, state): + """ + Return the value of the state (computed in __init__). + """ + return self.values[state] + + def computeQValueFromValues(self, state, action): + """ + Compute the Q-value of action in state from the + value function stored in self.values. + """ + "*** YOUR CODE HERE ***" + values = [] + for nextState, prob in self.mdp.getTransitionStatesAndProbs(state,action): + reward = self.mdp.getReward(state, action, nextState) + discount = self.discount + next_state_value = self.values[nextState] + values.append(prob*(reward+discount*next_state_value)) + return sum(values) + + def computeActionFromValues(self, state): + """ + The policy is the best action in the given state + according to the values currently stored in self.values. + + You may break ties any way you see fit. Note that if + there are no legal actions, which is the case at the + terminal state, you should return None. + """ + "*** YOUR CODE HERE ***" + possibleActions = self.mdp.getPossibleActions(state) + + if len(possibleActions) == 0: + return None + + q_values = [self.computeQValueFromValues(state, action) for action in possibleActions] + print "computeActionFromValues ... q_values: "+str(q_values) + print "index:"+str(q_values.index(max(q_values))) + print "action:"+str(possibleActions[q_values.index(max(q_values))]) + return possibleActions[q_values.index(max(q_values))] + + def getPolicy(self, state): + return self.computeActionFromValues(state) + + def getAction(self, state): + "Returns the policy at the state (no exploration)." + return self.computeActionFromValues(state) + + def getQValue(self, state, action): + return self.computeQValueFromValues(state, action) diff --git a/search/search.py b/search/search.py index 76be37b..beb4f2f 100644 --- a/search/search.py +++ b/search/search.py @@ -1,4 +1,4 @@ -# search.py +al.ca# search.py # --------- # Licensing Information: You are free to use or extend these projects for # educational purposes provided that (1) you do not distribute or publish