ajout des fichiers du tp 2 q1 répondue
This commit is contained in:
parent
e4cca7e1fa
commit
c3b9ff7050
101 changed files with 16680 additions and 1 deletions
1
reinforcement/VERSION
Normal file
1
reinforcement/VERSION
Normal file
|
@ -0,0 +1 @@
|
|||
v1.001
|
73
reinforcement/analysis.py
Normal file
73
reinforcement/analysis.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
# analysis.py
|
||||
# -----------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
######################
|
||||
# ANALYSIS QUESTIONS #
|
||||
######################
|
||||
|
||||
# Set the given parameters to obtain the specified policies through
|
||||
# value iteration.
|
||||
|
||||
def question2():
|
||||
answerDiscount = 0.9
|
||||
answerNoise = 0.2
|
||||
return answerDiscount, answerNoise
|
||||
|
||||
def question3a():
|
||||
answerDiscount = None
|
||||
answerNoise = None
|
||||
answerLivingReward = None
|
||||
return answerDiscount, answerNoise, answerLivingReward
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
|
||||
def question3b():
|
||||
answerDiscount = None
|
||||
answerNoise = None
|
||||
answerLivingReward = None
|
||||
return answerDiscount, answerNoise, answerLivingReward
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
|
||||
def question3c():
|
||||
answerDiscount = None
|
||||
answerNoise = None
|
||||
answerLivingReward = None
|
||||
return answerDiscount, answerNoise, answerLivingReward
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
|
||||
def question3d():
|
||||
answerDiscount = None
|
||||
answerNoise = None
|
||||
answerLivingReward = None
|
||||
return answerDiscount, answerNoise, answerLivingReward
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
|
||||
def question3e():
|
||||
answerDiscount = None
|
||||
answerNoise = None
|
||||
answerLivingReward = None
|
||||
return answerDiscount, answerNoise, answerLivingReward
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
|
||||
def question6():
|
||||
answerEpsilon = None
|
||||
answerLearningRate = None
|
||||
return answerEpsilon, answerLearningRate
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
|
||||
if __name__ == '__main__':
|
||||
print 'Answers to analysis questions:'
|
||||
import analysis
|
||||
for q in [q for q in dir(analysis) if q.startswith('question')]:
|
||||
response = getattr(analysis, q)()
|
||||
print ' Question %s:\t%s' % (q, str(response))
|
351
reinforcement/autograder.py
Normal file
351
reinforcement/autograder.py
Normal file
|
@ -0,0 +1,351 @@
|
|||
# autograder.py
|
||||
# -------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
# imports from python standard library
|
||||
import grading
|
||||
import imp
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import projectParams
|
||||
import random
|
||||
random.seed(0)
|
||||
try:
|
||||
from pacman import GameState
|
||||
except:
|
||||
pass
|
||||
|
||||
# register arguments and set default values
|
||||
def readCommand(argv):
|
||||
parser = optparse.OptionParser(description = 'Run public tests on student code')
|
||||
parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False, noGraphics=False)
|
||||
parser.add_option('--test-directory',
|
||||
dest = 'testRoot',
|
||||
default = 'test_cases',
|
||||
help = 'Root test directory which contains subdirectories corresponding to each question')
|
||||
parser.add_option('--student-code',
|
||||
dest = 'studentCode',
|
||||
default = projectParams.STUDENT_CODE_DEFAULT,
|
||||
help = 'comma separated list of student code files')
|
||||
parser.add_option('--code-directory',
|
||||
dest = 'codeRoot',
|
||||
default = "",
|
||||
help = 'Root directory containing the student and testClass code')
|
||||
parser.add_option('--test-case-code',
|
||||
dest = 'testCaseCode',
|
||||
default = projectParams.PROJECT_TEST_CLASSES,
|
||||
help = 'class containing testClass classes for this project')
|
||||
parser.add_option('--generate-solutions',
|
||||
dest = 'generateSolutions',
|
||||
action = 'store_true',
|
||||
help = 'Write solutions generated to .solution file')
|
||||
parser.add_option('--edx-output',
|
||||
dest = 'edxOutput',
|
||||
action = 'store_true',
|
||||
help = 'Generate edX output files')
|
||||
parser.add_option('--mute',
|
||||
dest = 'muteOutput',
|
||||
action = 'store_true',
|
||||
help = 'Mute output from executing tests')
|
||||
parser.add_option('--print-tests', '-p',
|
||||
dest = 'printTestCase',
|
||||
action = 'store_true',
|
||||
help = 'Print each test case before running them.')
|
||||
parser.add_option('--test', '-t',
|
||||
dest = 'runTest',
|
||||
default = None,
|
||||
help = 'Run one particular test. Relative to test root.')
|
||||
parser.add_option('--question', '-q',
|
||||
dest = 'gradeQuestion',
|
||||
default = None,
|
||||
help = 'Grade one particular question.')
|
||||
parser.add_option('--no-graphics',
|
||||
dest = 'noGraphics',
|
||||
action = 'store_true',
|
||||
help = 'No graphics display for pacman games.')
|
||||
(options, args) = parser.parse_args(argv)
|
||||
return options
|
||||
|
||||
|
||||
# confirm we should author solution files
|
||||
def confirmGenerate():
|
||||
print 'WARNING: this action will overwrite any solution files.'
|
||||
print 'Are you sure you want to proceed? (yes/no)'
|
||||
while True:
|
||||
ans = sys.stdin.readline().strip()
|
||||
if ans == 'yes':
|
||||
break
|
||||
elif ans == 'no':
|
||||
sys.exit(0)
|
||||
else:
|
||||
print 'please answer either "yes" or "no"'
|
||||
|
||||
|
||||
# TODO: Fix this so that it tracebacks work correctly
|
||||
# Looking at source of the traceback module, presuming it works
|
||||
# the same as the intepreters, it uses co_filename. This is,
|
||||
# however, a readonly attribute.
|
||||
def setModuleName(module, filename):
|
||||
functionType = type(confirmGenerate)
|
||||
classType = type(optparse.Option)
|
||||
|
||||
for i in dir(module):
|
||||
o = getattr(module, i)
|
||||
if hasattr(o, '__file__'): continue
|
||||
|
||||
if type(o) == functionType:
|
||||
setattr(o, '__file__', filename)
|
||||
elif type(o) == classType:
|
||||
setattr(o, '__file__', filename)
|
||||
# TODO: assign member __file__'s?
|
||||
#print i, type(o)
|
||||
|
||||
|
||||
#from cStringIO import StringIO
|
||||
|
||||
def loadModuleString(moduleSource):
|
||||
# Below broken, imp doesn't believe its being passed a file:
|
||||
# ValueError: load_module arg#2 should be a file or None
|
||||
#
|
||||
#f = StringIO(moduleCodeDict[k])
|
||||
#tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE))
|
||||
tmp = imp.new_module(k)
|
||||
exec moduleCodeDict[k] in tmp.__dict__
|
||||
setModuleName(tmp, k)
|
||||
return tmp
|
||||
|
||||
import py_compile
|
||||
|
||||
def loadModuleFile(moduleName, filePath):
|
||||
with open(filePath, 'r') as f:
|
||||
return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE))
|
||||
|
||||
|
||||
def readFile(path, root=""):
|
||||
"Read file from disk at specified path and return as string"
|
||||
with open(os.path.join(root, path), 'r') as handle:
|
||||
return handle.read()
|
||||
|
||||
|
||||
#######################################################################
|
||||
# Error Hint Map
|
||||
#######################################################################
|
||||
|
||||
# TODO: use these
|
||||
ERROR_HINT_MAP = {
|
||||
'q1': {
|
||||
"<type 'exceptions.IndexError'>": """
|
||||
We noticed that your project threw an IndexError on q1.
|
||||
While many things may cause this, it may have been from
|
||||
assuming a certain number of successors from a state space
|
||||
or assuming a certain number of actions available from a given
|
||||
state. Try making your code more general (no hardcoded indices)
|
||||
and submit again!
|
||||
"""
|
||||
},
|
||||
'q3': {
|
||||
"<type 'exceptions.AttributeError'>": """
|
||||
We noticed that your project threw an AttributeError on q3.
|
||||
While many things may cause this, it may have been from assuming
|
||||
a certain size or structure to the state space. For example, if you have
|
||||
a line of code assuming that the state is (x, y) and we run your code
|
||||
on a state space with (x, y, z), this error could be thrown. Try
|
||||
making your code more general and submit again!
|
||||
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
import pprint
|
||||
|
||||
def splitStrings(d):
|
||||
d2 = dict(d)
|
||||
for k in d:
|
||||
if k[0:2] == "__":
|
||||
del d2[k]
|
||||
continue
|
||||
if d2[k].find("\n") >= 0:
|
||||
d2[k] = d2[k].split("\n")
|
||||
return d2
|
||||
|
||||
|
||||
def printTest(testDict, solutionDict):
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
print "Test case:"
|
||||
for line in testDict["__raw_lines__"]:
|
||||
print " |", line
|
||||
print "Solution:"
|
||||
for line in solutionDict["__raw_lines__"]:
|
||||
print " |", line
|
||||
|
||||
|
||||
def runTest(testName, moduleDict, printTestCase=False, display=None):
|
||||
import testParser
|
||||
import testClasses
|
||||
for module in moduleDict:
|
||||
setattr(sys.modules[__name__], module, moduleDict[module])
|
||||
|
||||
testDict = testParser.TestParser(testName + ".test").parse()
|
||||
solutionDict = testParser.TestParser(testName + ".solution").parse()
|
||||
test_out_file = os.path.join('%s.test_output' % testName)
|
||||
testDict['test_out_file'] = test_out_file
|
||||
testClass = getattr(projectTestClasses, testDict['class'])
|
||||
|
||||
questionClass = getattr(testClasses, 'Question')
|
||||
question = questionClass({'max_points': 0}, display)
|
||||
testCase = testClass(question, testDict)
|
||||
|
||||
if printTestCase:
|
||||
printTest(testDict, solutionDict)
|
||||
|
||||
# This is a fragile hack to create a stub grades object
|
||||
grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)])
|
||||
testCase.execute(grades, moduleDict, solutionDict)
|
||||
|
||||
|
||||
# returns all the tests you need to run in order to run question
|
||||
def getDepends(testParser, testRoot, question):
|
||||
allDeps = [question]
|
||||
questionDict = testParser.TestParser(os.path.join(testRoot, question, 'CONFIG')).parse()
|
||||
if 'depends' in questionDict:
|
||||
depends = questionDict['depends'].split()
|
||||
for d in depends:
|
||||
# run dependencies first
|
||||
allDeps = getDepends(testParser, testRoot, d) + allDeps
|
||||
return allDeps
|
||||
|
||||
# get list of questions to grade
|
||||
def getTestSubdirs(testParser, testRoot, questionToGrade):
|
||||
problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse()
|
||||
if questionToGrade != None:
|
||||
questions = getDepends(testParser, testRoot, questionToGrade)
|
||||
if len(questions) > 1:
|
||||
print 'Note: due to dependencies, the following tests will be run: %s' % ' '.join(questions)
|
||||
return questions
|
||||
if 'order' in problemDict:
|
||||
return problemDict['order'].split()
|
||||
return sorted(os.listdir(testRoot))
|
||||
|
||||
|
||||
# evaluate student code
|
||||
def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False,
|
||||
printTestCase=False, questionToGrade=None, display=None):
|
||||
# imports of testbench code. note that the testClasses import must follow
|
||||
# the import of student code due to dependencies
|
||||
import testParser
|
||||
import testClasses
|
||||
for module in moduleDict:
|
||||
setattr(sys.modules[__name__], module, moduleDict[module])
|
||||
|
||||
questions = []
|
||||
questionDicts = {}
|
||||
test_subdirs = getTestSubdirs(testParser, testRoot, questionToGrade)
|
||||
for q in test_subdirs:
|
||||
subdir_path = os.path.join(testRoot, q)
|
||||
if not os.path.isdir(subdir_path) or q[0] == '.':
|
||||
continue
|
||||
|
||||
# create a question object
|
||||
questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse()
|
||||
questionClass = getattr(testClasses, questionDict['class'])
|
||||
question = questionClass(questionDict, display)
|
||||
questionDicts[q] = questionDict
|
||||
|
||||
# load test cases into question
|
||||
tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path))
|
||||
tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests)
|
||||
for t in sorted(tests):
|
||||
test_file = os.path.join(subdir_path, '%s.test' % t)
|
||||
solution_file = os.path.join(subdir_path, '%s.solution' % t)
|
||||
test_out_file = os.path.join(subdir_path, '%s.test_output' % t)
|
||||
testDict = testParser.TestParser(test_file).parse()
|
||||
if testDict.get("disabled", "false").lower() == "true":
|
||||
continue
|
||||
testDict['test_out_file'] = test_out_file
|
||||
testClass = getattr(projectTestClasses, testDict['class'])
|
||||
testCase = testClass(question, testDict)
|
||||
def makefun(testCase, solution_file):
|
||||
if generateSolutions:
|
||||
# write solution file to disk
|
||||
return lambda grades: testCase.writeSolution(moduleDict, solution_file)
|
||||
else:
|
||||
# read in solution dictionary and pass as an argument
|
||||
testDict = testParser.TestParser(test_file).parse()
|
||||
solutionDict = testParser.TestParser(solution_file).parse()
|
||||
if printTestCase:
|
||||
return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict)
|
||||
else:
|
||||
return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
|
||||
question.addTestCase(testCase, makefun(testCase, solution_file))
|
||||
|
||||
# Note extra function is necessary for scoping reasons
|
||||
def makefun(question):
|
||||
return lambda grades: question.execute(grades)
|
||||
setattr(sys.modules[__name__], q, makefun(question))
|
||||
questions.append((q, question.getMaxPoints()))
|
||||
|
||||
grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput)
|
||||
if questionToGrade == None:
|
||||
for q in questionDicts:
|
||||
for prereq in questionDicts[q].get('depends', '').split():
|
||||
grades.addPrereq(q, prereq)
|
||||
|
||||
grades.grade(sys.modules[__name__], bonusPic = projectParams.BONUS_PIC)
|
||||
return grades.points
|
||||
|
||||
|
||||
|
||||
def getDisplay(graphicsByDefault, options=None):
|
||||
graphics = graphicsByDefault
|
||||
if options is not None and options.noGraphics:
|
||||
graphics = False
|
||||
if graphics:
|
||||
try:
|
||||
import graphicsDisplay
|
||||
return graphicsDisplay.PacmanGraphics(1, frameTime=.05)
|
||||
except ImportError:
|
||||
pass
|
||||
import textDisplay
|
||||
return textDisplay.NullGraphics()
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
options = readCommand(sys.argv)
|
||||
if options.generateSolutions:
|
||||
confirmGenerate()
|
||||
codePaths = options.studentCode.split(',')
|
||||
# moduleCodeDict = {}
|
||||
# for cp in codePaths:
|
||||
# moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
|
||||
# moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot)
|
||||
# moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot)
|
||||
# moduleDict = loadModuleDict(moduleCodeDict)
|
||||
|
||||
moduleDict = {}
|
||||
for cp in codePaths:
|
||||
moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
|
||||
moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp))
|
||||
moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1)
|
||||
moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode))
|
||||
|
||||
|
||||
if options.runTest != None:
|
||||
runTest(options.runTest, moduleDict, printTestCase=options.printTestCase, display=getDisplay(True, options))
|
||||
else:
|
||||
evaluate(options.generateSolutions, options.testRoot, moduleDict,
|
||||
edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase,
|
||||
questionToGrade=options.gradeQuestion, display=getDisplay(options.gradeQuestion!=None, options))
|
384
reinforcement/crawler.py
Normal file
384
reinforcement/crawler.py
Normal file
|
@ -0,0 +1,384 @@
|
|||
# crawler.py
|
||||
# ----------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
#!/usr/bin/python
|
||||
import math
|
||||
from math import pi as PI
|
||||
import time
|
||||
import environment
|
||||
import random
|
||||
|
||||
class CrawlingRobotEnvironment(environment.Environment):
|
||||
|
||||
def __init__(self, crawlingRobot):
|
||||
|
||||
self.crawlingRobot = crawlingRobot
|
||||
|
||||
# The state is of the form (armAngle, handAngle)
|
||||
# where the angles are bucket numbers, not actual
|
||||
# degree measurements
|
||||
self.state = None
|
||||
|
||||
self.nArmStates = 9
|
||||
self.nHandStates = 13
|
||||
|
||||
# create a list of arm buckets and hand buckets to
|
||||
# discretize the state space
|
||||
minArmAngle,maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
|
||||
minHandAngle,maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
|
||||
armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates-1)
|
||||
handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates-1)
|
||||
self.armBuckets = [minArmAngle+(armIncrement*i) \
|
||||
for i in range(self.nArmStates)]
|
||||
self.handBuckets = [minHandAngle+(handIncrement*i) \
|
||||
for i in range(self.nHandStates)]
|
||||
|
||||
# Reset
|
||||
self.reset()
|
||||
|
||||
def getCurrentState(self):
|
||||
"""
|
||||
Return the current state
|
||||
of the crawling robot
|
||||
"""
|
||||
return self.state
|
||||
|
||||
def getPossibleActions(self, state):
|
||||
"""
|
||||
Returns possible actions
|
||||
for the states in the
|
||||
current state
|
||||
"""
|
||||
|
||||
actions = list()
|
||||
|
||||
currArmBucket,currHandBucket = state
|
||||
if currArmBucket > 0: actions.append('arm-down')
|
||||
if currArmBucket < self.nArmStates-1: actions.append('arm-up')
|
||||
if currHandBucket > 0: actions.append('hand-down')
|
||||
if currHandBucket < self.nHandStates-1: actions.append('hand-up')
|
||||
|
||||
return actions
|
||||
|
||||
def doAction(self, action):
|
||||
"""
|
||||
Perform the action and update
|
||||
the current state of the Environment
|
||||
and return the reward for the
|
||||
current state, the next state
|
||||
and the taken action.
|
||||
|
||||
Returns:
|
||||
nextState, reward
|
||||
"""
|
||||
nextState, reward = None, None
|
||||
|
||||
oldX,oldY = self.crawlingRobot.getRobotPosition()
|
||||
|
||||
armBucket,handBucket = self.state
|
||||
armAngle,handAngle = self.crawlingRobot.getAngles()
|
||||
if action == 'arm-up':
|
||||
newArmAngle = self.armBuckets[armBucket+1]
|
||||
self.crawlingRobot.moveArm(newArmAngle)
|
||||
nextState = (armBucket+1,handBucket)
|
||||
if action == 'arm-down':
|
||||
newArmAngle = self.armBuckets[armBucket-1]
|
||||
self.crawlingRobot.moveArm(newArmAngle)
|
||||
nextState = (armBucket-1,handBucket)
|
||||
if action == 'hand-up':
|
||||
newHandAngle = self.handBuckets[handBucket+1]
|
||||
self.crawlingRobot.moveHand(newHandAngle)
|
||||
nextState = (armBucket,handBucket+1)
|
||||
if action == 'hand-down':
|
||||
newHandAngle = self.handBuckets[handBucket-1]
|
||||
self.crawlingRobot.moveHand(newHandAngle)
|
||||
nextState = (armBucket,handBucket-1)
|
||||
|
||||
newX,newY = self.crawlingRobot.getRobotPosition()
|
||||
|
||||
# a simple reward function
|
||||
reward = newX - oldX
|
||||
|
||||
self.state = nextState
|
||||
return nextState, reward
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Resets the Environment to the initial state
|
||||
"""
|
||||
## Initialize the state to be the middle
|
||||
## value for each parameter e.g. if there are 13 and 19
|
||||
## buckets for the arm and hand parameters, then the intial
|
||||
## state should be (6,9)
|
||||
##
|
||||
## Also call self.crawlingRobot.setAngles()
|
||||
## to the initial arm and hand angle
|
||||
|
||||
armState = self.nArmStates/2
|
||||
handState = self.nHandStates/2
|
||||
self.state = armState,handState
|
||||
self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
|
||||
self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
|
||||
|
||||
|
||||
class CrawlingRobot:
|
||||
|
||||
def setAngles(self, armAngle, handAngle):
|
||||
"""
|
||||
set the robot's arm and hand angles
|
||||
to the passed in values
|
||||
"""
|
||||
self.armAngle = armAngle
|
||||
self.handAngle = handAngle
|
||||
|
||||
def getAngles(self):
|
||||
"""
|
||||
returns the pair of (armAngle, handAngle)
|
||||
"""
|
||||
return self.armAngle, self.handAngle
|
||||
|
||||
def getRobotPosition(self):
|
||||
"""
|
||||
returns the (x,y) coordinates
|
||||
of the lower-left point of the
|
||||
robot
|
||||
"""
|
||||
return self.robotPos
|
||||
|
||||
def moveArm(self, newArmAngle):
|
||||
"""
|
||||
move the robot arm to 'newArmAngle'
|
||||
"""
|
||||
oldArmAngle = self.armAngle
|
||||
if newArmAngle > self.maxArmAngle:
|
||||
raise 'Crawling Robot: Arm Raised too high. Careful!'
|
||||
if newArmAngle < self.minArmAngle:
|
||||
raise 'Crawling Robot: Arm Raised too low. Careful!'
|
||||
disp = self.displacement(self.armAngle, self.handAngle,
|
||||
newArmAngle, self.handAngle)
|
||||
curXPos = self.robotPos[0]
|
||||
self.robotPos = (curXPos+disp, self.robotPos[1])
|
||||
self.armAngle = newArmAngle
|
||||
|
||||
# Position and Velocity Sign Post
|
||||
self.positions.append(self.getRobotPosition()[0])
|
||||
# self.angleSums.append(abs(math.degrees(oldArmAngle)-math.degrees(newArmAngle)))
|
||||
if len(self.positions) > 100:
|
||||
self.positions.pop(0)
|
||||
# self.angleSums.pop(0)
|
||||
|
||||
def moveHand(self, newHandAngle):
|
||||
"""
|
||||
move the robot hand to 'newArmAngle'
|
||||
"""
|
||||
oldHandAngle = self.handAngle
|
||||
|
||||
if newHandAngle > self.maxHandAngle:
|
||||
raise 'Crawling Robot: Hand Raised too high. Careful!'
|
||||
if newHandAngle < self.minHandAngle:
|
||||
raise 'Crawling Robot: Hand Raised too low. Careful!'
|
||||
disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
|
||||
curXPos = self.robotPos[0]
|
||||
self.robotPos = (curXPos+disp, self.robotPos[1])
|
||||
self.handAngle = newHandAngle
|
||||
|
||||
# Position and Velocity Sign Post
|
||||
self.positions.append(self.getRobotPosition()[0])
|
||||
# self.angleSums.append(abs(math.degrees(oldHandAngle)-math.degrees(newHandAngle)))
|
||||
if len(self.positions) > 100:
|
||||
self.positions.pop(0)
|
||||
# self.angleSums.pop(0)
|
||||
|
||||
def getMinAndMaxArmAngles(self):
|
||||
"""
|
||||
get the lower- and upper- bound
|
||||
for the arm angles returns (min,max) pair
|
||||
"""
|
||||
return self.minArmAngle, self.maxArmAngle
|
||||
|
||||
def getMinAndMaxHandAngles(self):
|
||||
"""
|
||||
get the lower- and upper- bound
|
||||
for the hand angles returns (min,max) pair
|
||||
"""
|
||||
return self.minHandAngle, self.maxHandAngle
|
||||
|
||||
def getRotationAngle(self):
|
||||
"""
|
||||
get the current angle the
|
||||
robot body is rotated off the ground
|
||||
"""
|
||||
armCos, armSin = self.__getCosAndSin(self.armAngle)
|
||||
handCos, handSin = self.__getCosAndSin(self.handAngle)
|
||||
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
|
||||
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
|
||||
if y < 0:
|
||||
return math.atan(-y/x)
|
||||
return 0.0
|
||||
|
||||
|
||||
## You shouldn't need methods below here
|
||||
|
||||
|
||||
def __getCosAndSin(self, angle):
|
||||
return math.cos(angle), math.sin(angle)
|
||||
|
||||
def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
|
||||
|
||||
oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
|
||||
armCos, armSin = self.__getCosAndSin(armDegree)
|
||||
oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
|
||||
handCos, handSin = self.__getCosAndSin(handDegree)
|
||||
|
||||
xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
|
||||
yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight
|
||||
|
||||
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
|
||||
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
|
||||
|
||||
if y < 0:
|
||||
if yOld <= 0:
|
||||
return math.sqrt(xOld*xOld + yOld*yOld) - math.sqrt(x*x + y*y)
|
||||
return (xOld - yOld*(x-xOld) / (y - yOld)) - math.sqrt(x*x + y*y)
|
||||
else:
|
||||
if yOld >= 0:
|
||||
return 0.0
|
||||
return -(x - y * (xOld-x)/(yOld-y)) + math.sqrt(xOld*xOld + yOld*yOld)
|
||||
|
||||
raise 'Never Should See This!'
|
||||
|
||||
def draw(self, stepCount, stepDelay):
|
||||
x1, y1 = self.getRobotPosition()
|
||||
x1 = x1 % self.totWidth
|
||||
|
||||
## Check Lower Still on the ground
|
||||
if y1 != self.groundY:
|
||||
raise 'Flying Robot!!'
|
||||
|
||||
rotationAngle = self.getRotationAngle()
|
||||
cosRot, sinRot = self.__getCosAndSin(rotationAngle)
|
||||
|
||||
x2 = x1 + self.robotWidth * cosRot
|
||||
y2 = y1 - self.robotWidth * sinRot
|
||||
|
||||
x3 = x1 - self.robotHeight * sinRot
|
||||
y3 = y1 - self.robotHeight * cosRot
|
||||
|
||||
x4 = x3 + cosRot*self.robotWidth
|
||||
y4 = y3 - sinRot*self.robotWidth
|
||||
|
||||
self.canvas.coords(self.robotBody,x1,y1,x2,y2,x4,y4,x3,y3)
|
||||
|
||||
armCos, armSin = self.__getCosAndSin(rotationAngle+self.armAngle)
|
||||
xArm = x4 + self.armLength * armCos
|
||||
yArm = y4 - self.armLength * armSin
|
||||
|
||||
self.canvas.coords(self.robotArm,x4,y4,xArm,yArm)
|
||||
|
||||
handCos, handSin = self.__getCosAndSin(self.handAngle+rotationAngle)
|
||||
xHand = xArm + self.handLength * handCos
|
||||
yHand = yArm - self.handLength * handSin
|
||||
|
||||
self.canvas.coords(self.robotHand,xArm,yArm,xHand,yHand)
|
||||
|
||||
|
||||
# Position and Velocity Sign Post
|
||||
# time = len(self.positions) + 0.5 * sum(self.angleSums)
|
||||
# velocity = (self.positions[-1]-self.positions[0]) / time
|
||||
# if len(self.positions) == 1: return
|
||||
steps = (stepCount - self.lastStep)
|
||||
if steps==0:return
|
||||
# pos = self.positions[-1]
|
||||
# velocity = (pos - self.lastPos) / steps
|
||||
# g = .9 ** (10 * stepDelay)
|
||||
# g = .99 ** steps
|
||||
# self.velAvg = g * self.velAvg + (1 - g) * velocity
|
||||
# g = .999 ** steps
|
||||
# self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity
|
||||
pos = self.positions[-1]
|
||||
velocity = pos - self.positions[-2]
|
||||
vel2 = (pos - self.positions[0]) / len(self.positions)
|
||||
self.velAvg = .9 * self.velAvg + .1 * vel2
|
||||
velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
|
||||
# velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
|
||||
velocityMsg = 'Velocity: %.2f' % velocity
|
||||
positionMsg = 'Position: %2.f' % pos
|
||||
stepMsg = 'Step: %d' % stepCount
|
||||
if 'vel_msg' in dir(self):
|
||||
self.canvas.delete(self.vel_msg)
|
||||
self.canvas.delete(self.pos_msg)
|
||||
self.canvas.delete(self.step_msg)
|
||||
self.canvas.delete(self.velavg_msg)
|
||||
# self.canvas.delete(self.velavg2_msg)
|
||||
# self.velavg2_msg = self.canvas.create_text(850,190,text=velMsg2)
|
||||
self.velavg_msg = self.canvas.create_text(650,190,text=velMsg)
|
||||
self.vel_msg = self.canvas.create_text(450,190,text=velocityMsg)
|
||||
self.pos_msg = self.canvas.create_text(250,190,text=positionMsg)
|
||||
self.step_msg = self.canvas.create_text(50,190,text=stepMsg)
|
||||
# self.lastPos = pos
|
||||
self.lastStep = stepCount
|
||||
# self.lastVel = velocity
|
||||
|
||||
def __init__(self, canvas):
|
||||
|
||||
## Canvas ##
|
||||
self.canvas = canvas
|
||||
self.velAvg = 0
|
||||
# self.velAvg2 = 0
|
||||
# self.lastPos = 0
|
||||
self.lastStep = 0
|
||||
# self.lastVel = 0
|
||||
|
||||
## Arm and Hand Degrees ##
|
||||
self.armAngle = self.oldArmDegree = 0.0
|
||||
self.handAngle = self.oldHandDegree = -PI/6
|
||||
|
||||
self.maxArmAngle = PI/6
|
||||
self.minArmAngle = -PI/6
|
||||
|
||||
self.maxHandAngle = 0
|
||||
self.minHandAngle = -(5.0/6.0) * PI
|
||||
|
||||
## Draw Ground ##
|
||||
self.totWidth = canvas.winfo_reqwidth()
|
||||
self.totHeight = canvas.winfo_reqheight()
|
||||
self.groundHeight = 40
|
||||
self.groundY = self.totHeight - self.groundHeight
|
||||
|
||||
self.ground = canvas.create_rectangle(0,
|
||||
self.groundY,self.totWidth,self.totHeight, fill='blue')
|
||||
|
||||
## Robot Body ##
|
||||
self.robotWidth = 80
|
||||
self.robotHeight = 40
|
||||
self.robotPos = (20, self.groundY)
|
||||
self.robotBody = canvas.create_polygon(0,0,0,0,0,0,0,0, fill='green')
|
||||
|
||||
## Robot Arm ##
|
||||
self.armLength = 60
|
||||
self.robotArm = canvas.create_line(0,0,0,0,fill='orange',width=5)
|
||||
|
||||
## Robot Hand ##
|
||||
self.handLength = 40
|
||||
self.robotHand = canvas.create_line(0,0,0,0,fill='red',width=3)
|
||||
|
||||
self.positions = [0,0]
|
||||
# self.angleSums = [0,0]
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from graphicsCrawlerDisplay import *
|
||||
run()
|
56
reinforcement/environment.py
Normal file
56
reinforcement/environment.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
# environment.py
|
||||
# --------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
#!/usr/bin/python
|
||||
|
||||
class Environment:
|
||||
|
||||
def getCurrentState(self):
|
||||
"""
|
||||
Returns the current state of enviornment
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getPossibleActions(self, state):
|
||||
"""
|
||||
Returns possible actions the agent
|
||||
can take in the given state. Can
|
||||
return the empty list if we are in
|
||||
a terminal state.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def doAction(self, action):
|
||||
"""
|
||||
Performs the given action in the current
|
||||
environment state and updates the enviornment.
|
||||
|
||||
Returns a (reward, nextState) pair
|
||||
"""
|
||||
abstract
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Resets the current state to the start state
|
||||
"""
|
||||
abstract
|
||||
|
||||
def isTerminal(self):
|
||||
"""
|
||||
Has the enviornment entered a terminal
|
||||
state? This means there are no successors
|
||||
"""
|
||||
state = self.getCurrentState()
|
||||
actions = self.getPossibleActions(state)
|
||||
return len(actions) == 0
|
103
reinforcement/featureExtractors.py
Normal file
103
reinforcement/featureExtractors.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
# featureExtractors.py
|
||||
# --------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
"Feature extractors for Pacman game states"
|
||||
|
||||
from game import Directions, Actions
|
||||
import util
|
||||
|
||||
class FeatureExtractor:
|
||||
def getFeatures(self, state, action):
|
||||
"""
|
||||
Returns a dict from features to counts
|
||||
Usually, the count will just be 1.0 for
|
||||
indicator functions.
|
||||
"""
|
||||
util.raiseNotDefined()
|
||||
|
||||
class IdentityExtractor(FeatureExtractor):
|
||||
def getFeatures(self, state, action):
|
||||
feats = util.Counter()
|
||||
feats[(state,action)] = 1.0
|
||||
return feats
|
||||
|
||||
class CoordinateExtractor(FeatureExtractor):
|
||||
def getFeatures(self, state, action):
|
||||
feats = util.Counter()
|
||||
feats[state] = 1.0
|
||||
feats['x=%d' % state[0]] = 1.0
|
||||
feats['y=%d' % state[0]] = 1.0
|
||||
feats['action=%s' % action] = 1.0
|
||||
return feats
|
||||
|
||||
def closestFood(pos, food, walls):
|
||||
"""
|
||||
closestFood -- this is similar to the function that we have
|
||||
worked on in the search project; here its all in one place
|
||||
"""
|
||||
fringe = [(pos[0], pos[1], 0)]
|
||||
expanded = set()
|
||||
while fringe:
|
||||
pos_x, pos_y, dist = fringe.pop(0)
|
||||
if (pos_x, pos_y) in expanded:
|
||||
continue
|
||||
expanded.add((pos_x, pos_y))
|
||||
# if we find a food at this location then exit
|
||||
if food[pos_x][pos_y]:
|
||||
return dist
|
||||
# otherwise spread out from the location to its neighbours
|
||||
nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
|
||||
for nbr_x, nbr_y in nbrs:
|
||||
fringe.append((nbr_x, nbr_y, dist+1))
|
||||
# no food found
|
||||
return None
|
||||
|
||||
class SimpleExtractor(FeatureExtractor):
|
||||
"""
|
||||
Returns simple features for a basic reflex Pacman:
|
||||
- whether food will be eaten
|
||||
- how far away the next food is
|
||||
- whether a ghost collision is imminent
|
||||
- whether a ghost is one step away
|
||||
"""
|
||||
|
||||
def getFeatures(self, state, action):
|
||||
# extract the grid of food and wall locations and get the ghost locations
|
||||
food = state.getFood()
|
||||
walls = state.getWalls()
|
||||
ghosts = state.getGhostPositions()
|
||||
|
||||
features = util.Counter()
|
||||
|
||||
features["bias"] = 1.0
|
||||
|
||||
# compute the location of pacman after he takes the action
|
||||
x, y = state.getPacmanPosition()
|
||||
dx, dy = Actions.directionToVector(action)
|
||||
next_x, next_y = int(x + dx), int(y + dy)
|
||||
|
||||
# count the number of ghosts 1-step away
|
||||
features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
|
||||
|
||||
# if there is no danger of ghosts then add the food feature
|
||||
if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
|
||||
features["eats-food"] = 1.0
|
||||
|
||||
dist = closestFood((next_x, next_y), food, walls)
|
||||
if dist is not None:
|
||||
# make the distance a number less than one otherwise the update
|
||||
# will diverge wildly
|
||||
features["closest-food"] = float(dist) / (walls.width * walls.height)
|
||||
features.divideAll(10.0)
|
||||
return features
|
729
reinforcement/game.py
Normal file
729
reinforcement/game.py
Normal file
|
@ -0,0 +1,729 @@
|
|||
# game.py
|
||||
# -------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
# game.py
|
||||
# -------
|
||||
# Licensing Information: Please do not distribute or publish solutions to this
|
||||
# project. You are free to use and extend these projects for educational
|
||||
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
|
||||
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
|
||||
|
||||
from util import *
|
||||
import time, os
|
||||
import traceback
|
||||
import sys
|
||||
|
||||
#######################
|
||||
# Parts worth reading #
|
||||
#######################
|
||||
|
||||
class Agent:
|
||||
"""
|
||||
An agent must define a getAction method, but may also define the
|
||||
following methods which will be called if they exist:
|
||||
|
||||
def registerInitialState(self, state): # inspects the starting state
|
||||
"""
|
||||
def __init__(self, index=0):
|
||||
self.index = index
|
||||
|
||||
def getAction(self, state):
|
||||
"""
|
||||
The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
|
||||
must return an action from Directions.{North, South, East, West, Stop}
|
||||
"""
|
||||
raiseNotDefined()
|
||||
|
||||
class Directions:
|
||||
NORTH = 'North'
|
||||
SOUTH = 'South'
|
||||
EAST = 'East'
|
||||
WEST = 'West'
|
||||
STOP = 'Stop'
|
||||
|
||||
LEFT = {NORTH: WEST,
|
||||
SOUTH: EAST,
|
||||
EAST: NORTH,
|
||||
WEST: SOUTH,
|
||||
STOP: STOP}
|
||||
|
||||
RIGHT = dict([(y,x) for x, y in LEFT.items()])
|
||||
|
||||
REVERSE = {NORTH: SOUTH,
|
||||
SOUTH: NORTH,
|
||||
EAST: WEST,
|
||||
WEST: EAST,
|
||||
STOP: STOP}
|
||||
|
||||
class Configuration:
|
||||
"""
|
||||
A Configuration holds the (x,y) coordinate of a character, along with its
|
||||
traveling direction.
|
||||
|
||||
The convention for positions, like a graph, is that (0,0) is the lower left corner, x increases
|
||||
horizontally and y increases vertically. Therefore, north is the direction of increasing y, or (0,1).
|
||||
"""
|
||||
|
||||
def __init__(self, pos, direction):
|
||||
self.pos = pos
|
||||
self.direction = direction
|
||||
|
||||
def getPosition(self):
|
||||
return (self.pos)
|
||||
|
||||
def getDirection(self):
|
||||
return self.direction
|
||||
|
||||
def isInteger(self):
|
||||
x,y = self.pos
|
||||
return x == int(x) and y == int(y)
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None: return False
|
||||
return (self.pos == other.pos and self.direction == other.direction)
|
||||
|
||||
def __hash__(self):
|
||||
x = hash(self.pos)
|
||||
y = hash(self.direction)
|
||||
return hash(x + 13 * y)
|
||||
|
||||
def __str__(self):
|
||||
return "(x,y)="+str(self.pos)+", "+str(self.direction)
|
||||
|
||||
def generateSuccessor(self, vector):
|
||||
"""
|
||||
Generates a new configuration reached by translating the current
|
||||
configuration by the action vector. This is a low-level call and does
|
||||
not attempt to respect the legality of the movement.
|
||||
|
||||
Actions are movement vectors.
|
||||
"""
|
||||
x, y= self.pos
|
||||
dx, dy = vector
|
||||
direction = Actions.vectorToDirection(vector)
|
||||
if direction == Directions.STOP:
|
||||
direction = self.direction # There is no stop direction
|
||||
return Configuration((x + dx, y+dy), direction)
|
||||
|
||||
class AgentState:
|
||||
"""
|
||||
AgentStates hold the state of an agent (configuration, speed, scared, etc).
|
||||
"""
|
||||
|
||||
def __init__( self, startConfiguration, isPacman ):
|
||||
self.start = startConfiguration
|
||||
self.configuration = startConfiguration
|
||||
self.isPacman = isPacman
|
||||
self.scaredTimer = 0
|
||||
self.numCarrying = 0
|
||||
self.numReturned = 0
|
||||
|
||||
def __str__( self ):
|
||||
if self.isPacman:
|
||||
return "Pacman: " + str( self.configuration )
|
||||
else:
|
||||
return "Ghost: " + str( self.configuration )
|
||||
|
||||
def __eq__( self, other ):
|
||||
if other == None:
|
||||
return False
|
||||
return self.configuration == other.configuration and self.scaredTimer == other.scaredTimer
|
||||
|
||||
def __hash__(self):
|
||||
return hash(hash(self.configuration) + 13 * hash(self.scaredTimer))
|
||||
|
||||
def copy( self ):
|
||||
state = AgentState( self.start, self.isPacman )
|
||||
state.configuration = self.configuration
|
||||
state.scaredTimer = self.scaredTimer
|
||||
state.numCarrying = self.numCarrying
|
||||
state.numReturned = self.numReturned
|
||||
return state
|
||||
|
||||
def getPosition(self):
|
||||
if self.configuration == None: return None
|
||||
return self.configuration.getPosition()
|
||||
|
||||
def getDirection(self):
|
||||
return self.configuration.getDirection()
|
||||
|
||||
class Grid:
|
||||
"""
|
||||
A 2-dimensional array of objects backed by a list of lists. Data is accessed
|
||||
via grid[x][y] where (x,y) are positions on a Pacman map with x horizontal,
|
||||
y vertical and the origin (0,0) in the bottom left corner.
|
||||
|
||||
The __str__ method constructs an output that is oriented like a pacman board.
|
||||
"""
|
||||
def __init__(self, width, height, initialValue=False, bitRepresentation=None):
|
||||
if initialValue not in [False, True]: raise Exception('Grids can only contain booleans')
|
||||
self.CELLS_PER_INT = 30
|
||||
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.data = [[initialValue for y in range(height)] for x in range(width)]
|
||||
if bitRepresentation:
|
||||
self._unpackBits(bitRepresentation)
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.data[i]
|
||||
|
||||
def __setitem__(self, key, item):
|
||||
self.data[key] = item
|
||||
|
||||
def __str__(self):
|
||||
out = [[str(self.data[x][y])[0] for x in range(self.width)] for y in range(self.height)]
|
||||
out.reverse()
|
||||
return '\n'.join([''.join(x) for x in out])
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None: return False
|
||||
return self.data == other.data
|
||||
|
||||
def __hash__(self):
|
||||
# return hash(str(self))
|
||||
base = 1
|
||||
h = 0
|
||||
for l in self.data:
|
||||
for i in l:
|
||||
if i:
|
||||
h += base
|
||||
base *= 2
|
||||
return hash(h)
|
||||
|
||||
def copy(self):
|
||||
g = Grid(self.width, self.height)
|
||||
g.data = [x[:] for x in self.data]
|
||||
return g
|
||||
|
||||
def deepCopy(self):
|
||||
return self.copy()
|
||||
|
||||
def shallowCopy(self):
|
||||
g = Grid(self.width, self.height)
|
||||
g.data = self.data
|
||||
return g
|
||||
|
||||
def count(self, item =True ):
|
||||
return sum([x.count(item) for x in self.data])
|
||||
|
||||
def asList(self, key = True):
|
||||
list = []
|
||||
for x in range(self.width):
|
||||
for y in range(self.height):
|
||||
if self[x][y] == key: list.append( (x,y) )
|
||||
return list
|
||||
|
||||
def packBits(self):
|
||||
"""
|
||||
Returns an efficient int list representation
|
||||
|
||||
(width, height, bitPackedInts...)
|
||||
"""
|
||||
bits = [self.width, self.height]
|
||||
currentInt = 0
|
||||
for i in range(self.height * self.width):
|
||||
bit = self.CELLS_PER_INT - (i % self.CELLS_PER_INT) - 1
|
||||
x, y = self._cellIndexToPosition(i)
|
||||
if self[x][y]:
|
||||
currentInt += 2 ** bit
|
||||
if (i + 1) % self.CELLS_PER_INT == 0:
|
||||
bits.append(currentInt)
|
||||
currentInt = 0
|
||||
bits.append(currentInt)
|
||||
return tuple(bits)
|
||||
|
||||
def _cellIndexToPosition(self, index):
|
||||
x = index / self.height
|
||||
y = index % self.height
|
||||
return x, y
|
||||
|
||||
def _unpackBits(self, bits):
|
||||
"""
|
||||
Fills in data from a bit-level representation
|
||||
"""
|
||||
cell = 0
|
||||
for packed in bits:
|
||||
for bit in self._unpackInt(packed, self.CELLS_PER_INT):
|
||||
if cell == self.width * self.height: break
|
||||
x, y = self._cellIndexToPosition(cell)
|
||||
self[x][y] = bit
|
||||
cell += 1
|
||||
|
||||
def _unpackInt(self, packed, size):
|
||||
bools = []
|
||||
if packed < 0: raise ValueError, "must be a positive integer"
|
||||
for i in range(size):
|
||||
n = 2 ** (self.CELLS_PER_INT - i - 1)
|
||||
if packed >= n:
|
||||
bools.append(True)
|
||||
packed -= n
|
||||
else:
|
||||
bools.append(False)
|
||||
return bools
|
||||
|
||||
def reconstituteGrid(bitRep):
|
||||
if type(bitRep) is not type((1,2)):
|
||||
return bitRep
|
||||
width, height = bitRep[:2]
|
||||
return Grid(width, height, bitRepresentation= bitRep[2:])
|
||||
|
||||
####################################
|
||||
# Parts you shouldn't have to read #
|
||||
####################################
|
||||
|
||||
class Actions:
|
||||
"""
|
||||
A collection of static methods for manipulating move actions.
|
||||
"""
|
||||
# Directions
|
||||
_directions = {Directions.NORTH: (0, 1),
|
||||
Directions.SOUTH: (0, -1),
|
||||
Directions.EAST: (1, 0),
|
||||
Directions.WEST: (-1, 0),
|
||||
Directions.STOP: (0, 0)}
|
||||
|
||||
_directionsAsList = _directions.items()
|
||||
|
||||
TOLERANCE = .001
|
||||
|
||||
def reverseDirection(action):
|
||||
if action == Directions.NORTH:
|
||||
return Directions.SOUTH
|
||||
if action == Directions.SOUTH:
|
||||
return Directions.NORTH
|
||||
if action == Directions.EAST:
|
||||
return Directions.WEST
|
||||
if action == Directions.WEST:
|
||||
return Directions.EAST
|
||||
return action
|
||||
reverseDirection = staticmethod(reverseDirection)
|
||||
|
||||
def vectorToDirection(vector):
|
||||
dx, dy = vector
|
||||
if dy > 0:
|
||||
return Directions.NORTH
|
||||
if dy < 0:
|
||||
return Directions.SOUTH
|
||||
if dx < 0:
|
||||
return Directions.WEST
|
||||
if dx > 0:
|
||||
return Directions.EAST
|
||||
return Directions.STOP
|
||||
vectorToDirection = staticmethod(vectorToDirection)
|
||||
|
||||
def directionToVector(direction, speed = 1.0):
|
||||
dx, dy = Actions._directions[direction]
|
||||
return (dx * speed, dy * speed)
|
||||
directionToVector = staticmethod(directionToVector)
|
||||
|
||||
def getPossibleActions(config, walls):
|
||||
possible = []
|
||||
x, y = config.pos
|
||||
x_int, y_int = int(x + 0.5), int(y + 0.5)
|
||||
|
||||
# In between grid points, all agents must continue straight
|
||||
if (abs(x - x_int) + abs(y - y_int) > Actions.TOLERANCE):
|
||||
return [config.getDirection()]
|
||||
|
||||
for dir, vec in Actions._directionsAsList:
|
||||
dx, dy = vec
|
||||
next_y = y_int + dy
|
||||
next_x = x_int + dx
|
||||
if not walls[next_x][next_y]: possible.append(dir)
|
||||
|
||||
return possible
|
||||
|
||||
getPossibleActions = staticmethod(getPossibleActions)
|
||||
|
||||
def getLegalNeighbors(position, walls):
|
||||
x,y = position
|
||||
x_int, y_int = int(x + 0.5), int(y + 0.5)
|
||||
neighbors = []
|
||||
for dir, vec in Actions._directionsAsList:
|
||||
dx, dy = vec
|
||||
next_x = x_int + dx
|
||||
if next_x < 0 or next_x == walls.width: continue
|
||||
next_y = y_int + dy
|
||||
if next_y < 0 or next_y == walls.height: continue
|
||||
if not walls[next_x][next_y]: neighbors.append((next_x, next_y))
|
||||
return neighbors
|
||||
getLegalNeighbors = staticmethod(getLegalNeighbors)
|
||||
|
||||
def getSuccessor(position, action):
|
||||
dx, dy = Actions.directionToVector(action)
|
||||
x, y = position
|
||||
return (x + dx, y + dy)
|
||||
getSuccessor = staticmethod(getSuccessor)
|
||||
|
||||
class GameStateData:
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__( self, prevState = None ):
|
||||
"""
|
||||
Generates a new data packet by copying information from its predecessor.
|
||||
"""
|
||||
if prevState != None:
|
||||
self.food = prevState.food.shallowCopy()
|
||||
self.capsules = prevState.capsules[:]
|
||||
self.agentStates = self.copyAgentStates( prevState.agentStates )
|
||||
self.layout = prevState.layout
|
||||
self._eaten = prevState._eaten
|
||||
self.score = prevState.score
|
||||
|
||||
self._foodEaten = None
|
||||
self._foodAdded = None
|
||||
self._capsuleEaten = None
|
||||
self._agentMoved = None
|
||||
self._lose = False
|
||||
self._win = False
|
||||
self.scoreChange = 0
|
||||
|
||||
def deepCopy( self ):
|
||||
state = GameStateData( self )
|
||||
state.food = self.food.deepCopy()
|
||||
state.layout = self.layout.deepCopy()
|
||||
state._agentMoved = self._agentMoved
|
||||
state._foodEaten = self._foodEaten
|
||||
state._foodAdded = self._foodAdded
|
||||
state._capsuleEaten = self._capsuleEaten
|
||||
return state
|
||||
|
||||
def copyAgentStates( self, agentStates ):
|
||||
copiedStates = []
|
||||
for agentState in agentStates:
|
||||
copiedStates.append( agentState.copy() )
|
||||
return copiedStates
|
||||
|
||||
def __eq__( self, other ):
|
||||
"""
|
||||
Allows two states to be compared.
|
||||
"""
|
||||
if other == None: return False
|
||||
# TODO Check for type of other
|
||||
if not self.agentStates == other.agentStates: return False
|
||||
if not self.food == other.food: return False
|
||||
if not self.capsules == other.capsules: return False
|
||||
if not self.score == other.score: return False
|
||||
return True
|
||||
|
||||
def __hash__( self ):
|
||||
"""
|
||||
Allows states to be keys of dictionaries.
|
||||
"""
|
||||
for i, state in enumerate( self.agentStates ):
|
||||
try:
|
||||
int(hash(state))
|
||||
except TypeError, e:
|
||||
print e
|
||||
#hash(state)
|
||||
return int((hash(tuple(self.agentStates)) + 13*hash(self.food) + 113* hash(tuple(self.capsules)) + 7 * hash(self.score)) % 1048575 )
|
||||
|
||||
def __str__( self ):
|
||||
width, height = self.layout.width, self.layout.height
|
||||
map = Grid(width, height)
|
||||
if type(self.food) == type((1,2)):
|
||||
self.food = reconstituteGrid(self.food)
|
||||
for x in range(width):
|
||||
for y in range(height):
|
||||
food, walls = self.food, self.layout.walls
|
||||
map[x][y] = self._foodWallStr(food[x][y], walls[x][y])
|
||||
|
||||
for agentState in self.agentStates:
|
||||
if agentState == None: continue
|
||||
if agentState.configuration == None: continue
|
||||
x,y = [int( i ) for i in nearestPoint( agentState.configuration.pos )]
|
||||
agent_dir = agentState.configuration.direction
|
||||
if agentState.isPacman:
|
||||
map[x][y] = self._pacStr( agent_dir )
|
||||
else:
|
||||
map[x][y] = self._ghostStr( agent_dir )
|
||||
|
||||
for x, y in self.capsules:
|
||||
map[x][y] = 'o'
|
||||
|
||||
return str(map) + ("\nScore: %d\n" % self.score)
|
||||
|
||||
def _foodWallStr( self, hasFood, hasWall ):
|
||||
if hasFood:
|
||||
return '.'
|
||||
elif hasWall:
|
||||
return '%'
|
||||
else:
|
||||
return ' '
|
||||
|
||||
def _pacStr( self, dir ):
|
||||
if dir == Directions.NORTH:
|
||||
return 'v'
|
||||
if dir == Directions.SOUTH:
|
||||
return '^'
|
||||
if dir == Directions.WEST:
|
||||
return '>'
|
||||
return '<'
|
||||
|
||||
def _ghostStr( self, dir ):
|
||||
return 'G'
|
||||
if dir == Directions.NORTH:
|
||||
return 'M'
|
||||
if dir == Directions.SOUTH:
|
||||
return 'W'
|
||||
if dir == Directions.WEST:
|
||||
return '3'
|
||||
return 'E'
|
||||
|
||||
def initialize( self, layout, numGhostAgents ):
|
||||
"""
|
||||
Creates an initial game state from a layout array (see layout.py).
|
||||
"""
|
||||
self.food = layout.food.copy()
|
||||
#self.capsules = []
|
||||
self.capsules = layout.capsules[:]
|
||||
self.layout = layout
|
||||
self.score = 0
|
||||
self.scoreChange = 0
|
||||
|
||||
self.agentStates = []
|
||||
numGhosts = 0
|
||||
for isPacman, pos in layout.agentPositions:
|
||||
if not isPacman:
|
||||
if numGhosts == numGhostAgents: continue # Max ghosts reached already
|
||||
else: numGhosts += 1
|
||||
self.agentStates.append( AgentState( Configuration( pos, Directions.STOP), isPacman) )
|
||||
self._eaten = [False for a in self.agentStates]
|
||||
|
||||
try:
|
||||
import boinc
|
||||
_BOINC_ENABLED = True
|
||||
except:
|
||||
_BOINC_ENABLED = False
|
||||
|
||||
class Game:
|
||||
"""
|
||||
The Game manages the control flow, soliciting actions from agents.
|
||||
"""
|
||||
|
||||
def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False ):
|
||||
self.agentCrashed = False
|
||||
self.agents = agents
|
||||
self.display = display
|
||||
self.rules = rules
|
||||
self.startingIndex = startingIndex
|
||||
self.gameOver = False
|
||||
self.muteAgents = muteAgents
|
||||
self.catchExceptions = catchExceptions
|
||||
self.moveHistory = []
|
||||
self.totalAgentTimes = [0 for agent in agents]
|
||||
self.totalAgentTimeWarnings = [0 for agent in agents]
|
||||
self.agentTimeout = False
|
||||
import cStringIO
|
||||
self.agentOutput = [cStringIO.StringIO() for agent in agents]
|
||||
|
||||
def getProgress(self):
|
||||
if self.gameOver:
|
||||
return 1.0
|
||||
else:
|
||||
return self.rules.getProgress(self)
|
||||
|
||||
def _agentCrash( self, agentIndex, quiet=False):
|
||||
"Helper method for handling agent crashes"
|
||||
if not quiet: traceback.print_exc()
|
||||
self.gameOver = True
|
||||
self.agentCrashed = True
|
||||
self.rules.agentCrash(self, agentIndex)
|
||||
|
||||
OLD_STDOUT = None
|
||||
OLD_STDERR = None
|
||||
|
||||
def mute(self, agentIndex):
|
||||
if not self.muteAgents: return
|
||||
global OLD_STDOUT, OLD_STDERR
|
||||
import cStringIO
|
||||
OLD_STDOUT = sys.stdout
|
||||
OLD_STDERR = sys.stderr
|
||||
sys.stdout = self.agentOutput[agentIndex]
|
||||
sys.stderr = self.agentOutput[agentIndex]
|
||||
|
||||
def unmute(self):
|
||||
if not self.muteAgents: return
|
||||
global OLD_STDOUT, OLD_STDERR
|
||||
# Revert stdout/stderr to originals
|
||||
sys.stdout = OLD_STDOUT
|
||||
sys.stderr = OLD_STDERR
|
||||
|
||||
|
||||
def run( self ):
|
||||
"""
|
||||
Main control loop for game play.
|
||||
"""
|
||||
self.display.initialize(self.state.data)
|
||||
self.numMoves = 0
|
||||
|
||||
###self.display.initialize(self.state.makeObservation(1).data)
|
||||
# inform learning agents of the game start
|
||||
for i in range(len(self.agents)):
|
||||
agent = self.agents[i]
|
||||
if not agent:
|
||||
self.mute(i)
|
||||
# this is a null agent, meaning it failed to load
|
||||
# the other team wins
|
||||
print >>sys.stderr, "Agent %d failed to load" % i
|
||||
self.unmute()
|
||||
self._agentCrash(i, quiet=True)
|
||||
return
|
||||
if ("registerInitialState" in dir(agent)):
|
||||
self.mute(i)
|
||||
if self.catchExceptions:
|
||||
try:
|
||||
timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
|
||||
try:
|
||||
start_time = time.time()
|
||||
timed_func(self.state.deepCopy())
|
||||
time_taken = time.time() - start_time
|
||||
self.totalAgentTimes[i] += time_taken
|
||||
except TimeoutFunctionException:
|
||||
print >>sys.stderr, "Agent %d ran out of time on startup!" % i
|
||||
self.unmute()
|
||||
self.agentTimeout = True
|
||||
self._agentCrash(i, quiet=True)
|
||||
return
|
||||
except Exception,data:
|
||||
self._agentCrash(i, quiet=False)
|
||||
self.unmute()
|
||||
return
|
||||
else:
|
||||
agent.registerInitialState(self.state.deepCopy())
|
||||
## TODO: could this exceed the total time
|
||||
self.unmute()
|
||||
|
||||
agentIndex = self.startingIndex
|
||||
numAgents = len( self.agents )
|
||||
|
||||
while not self.gameOver:
|
||||
# Fetch the next agent
|
||||
agent = self.agents[agentIndex]
|
||||
move_time = 0
|
||||
skip_action = False
|
||||
# Generate an observation of the state
|
||||
if 'observationFunction' in dir( agent ):
|
||||
self.mute(agentIndex)
|
||||
if self.catchExceptions:
|
||||
try:
|
||||
timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
|
||||
try:
|
||||
start_time = time.time()
|
||||
observation = timed_func(self.state.deepCopy())
|
||||
except TimeoutFunctionException:
|
||||
skip_action = True
|
||||
move_time += time.time() - start_time
|
||||
self.unmute()
|
||||
except Exception,data:
|
||||
self._agentCrash(agentIndex, quiet=False)
|
||||
self.unmute()
|
||||
return
|
||||
else:
|
||||
observation = agent.observationFunction(self.state.deepCopy())
|
||||
self.unmute()
|
||||
else:
|
||||
observation = self.state.deepCopy()
|
||||
|
||||
# Solicit an action
|
||||
action = None
|
||||
self.mute(agentIndex)
|
||||
if self.catchExceptions:
|
||||
try:
|
||||
timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
|
||||
try:
|
||||
start_time = time.time()
|
||||
if skip_action:
|
||||
raise TimeoutFunctionException()
|
||||
action = timed_func( observation )
|
||||
except TimeoutFunctionException:
|
||||
print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex
|
||||
self.agentTimeout = True
|
||||
self._agentCrash(agentIndex, quiet=True)
|
||||
self.unmute()
|
||||
return
|
||||
|
||||
move_time += time.time() - start_time
|
||||
|
||||
if move_time > self.rules.getMoveWarningTime(agentIndex):
|
||||
self.totalAgentTimeWarnings[agentIndex] += 1
|
||||
print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
|
||||
if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
|
||||
print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
|
||||
self.agentTimeout = True
|
||||
self._agentCrash(agentIndex, quiet=True)
|
||||
self.unmute()
|
||||
return
|
||||
|
||||
self.totalAgentTimes[agentIndex] += move_time
|
||||
#print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
|
||||
if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
|
||||
print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])
|
||||
self.agentTimeout = True
|
||||
self._agentCrash(agentIndex, quiet=True)
|
||||
self.unmute()
|
||||
return
|
||||
self.unmute()
|
||||
except Exception,data:
|
||||
self._agentCrash(agentIndex)
|
||||
self.unmute()
|
||||
return
|
||||
else:
|
||||
action = agent.getAction(observation)
|
||||
self.unmute()
|
||||
|
||||
# Execute the action
|
||||
self.moveHistory.append( (agentIndex, action) )
|
||||
if self.catchExceptions:
|
||||
try:
|
||||
self.state = self.state.generateSuccessor( agentIndex, action )
|
||||
except Exception,data:
|
||||
self.mute(agentIndex)
|
||||
self._agentCrash(agentIndex)
|
||||
self.unmute()
|
||||
return
|
||||
else:
|
||||
self.state = self.state.generateSuccessor( agentIndex, action )
|
||||
|
||||
# Change the display
|
||||
self.display.update( self.state.data )
|
||||
###idx = agentIndex - agentIndex % 2 + 1
|
||||
###self.display.update( self.state.makeObservation(idx).data )
|
||||
|
||||
# Allow for game specific conditions (winning, losing, etc.)
|
||||
self.rules.process(self.state, self)
|
||||
# Track progress
|
||||
if agentIndex == numAgents + 1: self.numMoves += 1
|
||||
# Next agent
|
||||
agentIndex = ( agentIndex + 1 ) % numAgents
|
||||
|
||||
if _BOINC_ENABLED:
|
||||
boinc.set_fraction_done(self.getProgress())
|
||||
|
||||
# inform a learning agent of the game result
|
||||
for agentIndex, agent in enumerate(self.agents):
|
||||
if "final" in dir( agent ) :
|
||||
try:
|
||||
self.mute(agentIndex)
|
||||
agent.final( self.state )
|
||||
self.unmute()
|
||||
except Exception,data:
|
||||
if not self.catchExceptions: raise
|
||||
self._agentCrash(agentIndex)
|
||||
self.unmute()
|
||||
return
|
||||
self.display.finish()
|
81
reinforcement/ghostAgents.py
Normal file
81
reinforcement/ghostAgents.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
# ghostAgents.py
|
||||
# --------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from game import Agent
|
||||
from game import Actions
|
||||
from game import Directions
|
||||
import random
|
||||
from util import manhattanDistance
|
||||
import util
|
||||
|
||||
class GhostAgent( Agent ):
|
||||
def __init__( self, index ):
|
||||
self.index = index
|
||||
|
||||
def getAction( self, state ):
|
||||
dist = self.getDistribution(state)
|
||||
if len(dist) == 0:
|
||||
return Directions.STOP
|
||||
else:
|
||||
return util.chooseFromDistribution( dist )
|
||||
|
||||
def getDistribution(self, state):
|
||||
"Returns a Counter encoding a distribution over actions from the provided state."
|
||||
util.raiseNotDefined()
|
||||
|
||||
class RandomGhost( GhostAgent ):
|
||||
"A ghost that chooses a legal action uniformly at random."
|
||||
def getDistribution( self, state ):
|
||||
dist = util.Counter()
|
||||
for a in state.getLegalActions( self.index ): dist[a] = 1.0
|
||||
dist.normalize()
|
||||
return dist
|
||||
|
||||
class DirectionalGhost( GhostAgent ):
|
||||
"A ghost that prefers to rush Pacman, or flee when scared."
|
||||
def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ):
|
||||
self.index = index
|
||||
self.prob_attack = prob_attack
|
||||
self.prob_scaredFlee = prob_scaredFlee
|
||||
|
||||
def getDistribution( self, state ):
|
||||
# Read variables from state
|
||||
ghostState = state.getGhostState( self.index )
|
||||
legalActions = state.getLegalActions( self.index )
|
||||
pos = state.getGhostPosition( self.index )
|
||||
isScared = ghostState.scaredTimer > 0
|
||||
|
||||
speed = 1
|
||||
if isScared: speed = 0.5
|
||||
|
||||
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
|
||||
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
|
||||
pacmanPosition = state.getPacmanPosition()
|
||||
|
||||
# Select best actions given the state
|
||||
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
|
||||
if isScared:
|
||||
bestScore = max( distancesToPacman )
|
||||
bestProb = self.prob_scaredFlee
|
||||
else:
|
||||
bestScore = min( distancesToPacman )
|
||||
bestProb = self.prob_attack
|
||||
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
|
||||
|
||||
# Construct distribution
|
||||
dist = util.Counter()
|
||||
for a in bestActions: dist[a] = bestProb / len(bestActions)
|
||||
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
|
||||
dist.normalize()
|
||||
return dist
|
282
reinforcement/grading.py
Normal file
282
reinforcement/grading.py
Normal file
|
@ -0,0 +1,282 @@
|
|||
# grading.py
|
||||
# ----------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
"Common code for autograders"
|
||||
|
||||
import cgi
|
||||
import time
|
||||
import sys
|
||||
import traceback
|
||||
import pdb
|
||||
from collections import defaultdict
|
||||
import util
|
||||
|
||||
class Grades:
|
||||
"A data structure for project grades, along with formatting code to display them"
|
||||
def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False):
|
||||
"""
|
||||
Defines the grading scheme for a project
|
||||
projectName: project name
|
||||
questionsAndMaxesDict: a list of (question name, max points per question)
|
||||
"""
|
||||
self.questions = [el[0] for el in questionsAndMaxesList]
|
||||
self.maxes = dict(questionsAndMaxesList)
|
||||
self.points = Counter()
|
||||
self.messages = dict([(q, []) for q in self.questions])
|
||||
self.project = projectName
|
||||
self.start = time.localtime()[1:6]
|
||||
self.sane = True # Sanity checks
|
||||
self.currentQuestion = None # Which question we're grading
|
||||
self.edxOutput = edxOutput
|
||||
self.mute = muteOutput
|
||||
self.prereqs = defaultdict(set)
|
||||
|
||||
#print 'Autograder transcript for %s' % self.project
|
||||
print 'Starting on %d-%d at %d:%02d:%02d' % self.start
|
||||
|
||||
def addPrereq(self, question, prereq):
|
||||
self.prereqs[question].add(prereq)
|
||||
|
||||
def grade(self, gradingModule, exceptionMap = {}, bonusPic = False):
|
||||
"""
|
||||
Grades each question
|
||||
gradingModule: the module with all the grading functions (pass in with sys.modules[__name__])
|
||||
"""
|
||||
|
||||
completedQuestions = set([])
|
||||
for q in self.questions:
|
||||
print '\nQuestion %s' % q
|
||||
print '=' * (9 + len(q))
|
||||
print
|
||||
self.currentQuestion = q
|
||||
|
||||
incompleted = self.prereqs[q].difference(completedQuestions)
|
||||
if len(incompleted) > 0:
|
||||
prereq = incompleted.pop()
|
||||
print \
|
||||
"""*** NOTE: Make sure to complete Question %s before working on Question %s,
|
||||
*** because Question %s builds upon your answer for Question %s.
|
||||
""" % (prereq, q, q, prereq)
|
||||
continue
|
||||
|
||||
if self.mute: util.mutePrint()
|
||||
try:
|
||||
util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
|
||||
#TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function
|
||||
except Exception, inst:
|
||||
self.addExceptionMessage(q, inst, traceback)
|
||||
self.addErrorHints(exceptionMap, inst, q[1])
|
||||
except:
|
||||
self.fail('FAIL: Terminated with a string exception.')
|
||||
finally:
|
||||
if self.mute: util.unmutePrint()
|
||||
|
||||
if self.points[q] >= self.maxes[q]:
|
||||
completedQuestions.add(q)
|
||||
|
||||
print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q])
|
||||
|
||||
|
||||
print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6]
|
||||
print "\nProvisional grades\n=================="
|
||||
|
||||
for q in self.questions:
|
||||
print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q])
|
||||
print '------------------'
|
||||
print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values()))
|
||||
if bonusPic and self.points.totalCount() == 25:
|
||||
print """
|
||||
|
||||
ALL HAIL GRANDPAC.
|
||||
LONG LIVE THE GHOSTBUSTING KING.
|
||||
|
||||
--- ---- ---
|
||||
| \ / + \ / |
|
||||
| + \--/ \--/ + |
|
||||
| + + |
|
||||
| + + + |
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
\ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
V \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
\ / @@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
V @@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@
|
||||
/\ @@@@@@@@@@@@@@@@@@@@@@
|
||||
/ \ @@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
/\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
/ \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
/ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@
|
||||
|
||||
"""
|
||||
print """
|
||||
Your grades are NOT yet registered. To register your grades, make sure
|
||||
to follow your instructor's guidelines to receive credit on your project.
|
||||
"""
|
||||
|
||||
if self.edxOutput:
|
||||
self.produceOutput()
|
||||
|
||||
def addExceptionMessage(self, q, inst, traceback):
|
||||
"""
|
||||
Method to format the exception message, this is more complicated because
|
||||
we need to cgi.escape the traceback but wrap the exception in a <pre> tag
|
||||
"""
|
||||
self.fail('FAIL: Exception raised: %s' % inst)
|
||||
self.addMessage('')
|
||||
for line in traceback.format_exc().split('\n'):
|
||||
self.addMessage(line)
|
||||
|
||||
def addErrorHints(self, exceptionMap, errorInstance, questionNum):
|
||||
typeOf = str(type(errorInstance))
|
||||
questionName = 'q' + questionNum
|
||||
errorHint = ''
|
||||
|
||||
# question specific error hints
|
||||
if exceptionMap.get(questionName):
|
||||
questionMap = exceptionMap.get(questionName)
|
||||
if (questionMap.get(typeOf)):
|
||||
errorHint = questionMap.get(typeOf)
|
||||
# fall back to general error messages if a question specific
|
||||
# one does not exist
|
||||
if (exceptionMap.get(typeOf)):
|
||||
errorHint = exceptionMap.get(typeOf)
|
||||
|
||||
# dont include the HTML if we have no error hint
|
||||
if not errorHint:
|
||||
return ''
|
||||
|
||||
for line in errorHint.split('\n'):
|
||||
self.addMessage(line)
|
||||
|
||||
def produceOutput(self):
|
||||
edxOutput = open('edx_response.html', 'w')
|
||||
edxOutput.write("<div>")
|
||||
|
||||
# first sum
|
||||
total_possible = sum(self.maxes.values())
|
||||
total_score = sum(self.points.values())
|
||||
checkOrX = '<span class="incorrect"/>'
|
||||
if (total_score >= total_possible):
|
||||
checkOrX = '<span class="correct"/>'
|
||||
header = """
|
||||
<h3>
|
||||
Total score ({total_score} / {total_possible})
|
||||
</h3>
|
||||
""".format(total_score = total_score,
|
||||
total_possible = total_possible,
|
||||
checkOrX = checkOrX
|
||||
)
|
||||
edxOutput.write(header)
|
||||
|
||||
for q in self.questions:
|
||||
if len(q) == 2:
|
||||
name = q[1]
|
||||
else:
|
||||
name = q
|
||||
checkOrX = '<span class="incorrect"/>'
|
||||
if (self.points[q] == self.maxes[q]):
|
||||
checkOrX = '<span class="correct"/>'
|
||||
#messages = '\n<br/>\n'.join(self.messages[q])
|
||||
messages = "<pre>%s</pre>" % '\n'.join(self.messages[q])
|
||||
output = """
|
||||
<div class="test">
|
||||
<section>
|
||||
<div class="shortform">
|
||||
Question {q} ({points}/{max}) {checkOrX}
|
||||
</div>
|
||||
<div class="longform">
|
||||
{messages}
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
""".format(q = name,
|
||||
max = self.maxes[q],
|
||||
messages = messages,
|
||||
checkOrX = checkOrX,
|
||||
points = self.points[q]
|
||||
)
|
||||
# print "*** output for Question %s " % q[1]
|
||||
# print output
|
||||
edxOutput.write(output)
|
||||
edxOutput.write("</div>")
|
||||
edxOutput.close()
|
||||
edxOutput = open('edx_grade', 'w')
|
||||
edxOutput.write(str(self.points.totalCount()))
|
||||
edxOutput.close()
|
||||
|
||||
def fail(self, message, raw=False):
|
||||
"Sets sanity check bit to false and outputs a message"
|
||||
self.sane = False
|
||||
self.assignZeroCredit()
|
||||
self.addMessage(message, raw)
|
||||
|
||||
def assignZeroCredit(self):
|
||||
self.points[self.currentQuestion] = 0
|
||||
|
||||
def addPoints(self, amt):
|
||||
self.points[self.currentQuestion] += amt
|
||||
|
||||
def deductPoints(self, amt):
|
||||
self.points[self.currentQuestion] -= amt
|
||||
|
||||
def assignFullCredit(self, message="", raw=False):
|
||||
self.points[self.currentQuestion] = self.maxes[self.currentQuestion]
|
||||
if message != "":
|
||||
self.addMessage(message, raw)
|
||||
|
||||
def addMessage(self, message, raw=False):
|
||||
if not raw:
|
||||
# We assume raw messages, formatted for HTML, are printed separately
|
||||
if self.mute: util.unmutePrint()
|
||||
print '*** ' + message
|
||||
if self.mute: util.mutePrint()
|
||||
message = cgi.escape(message)
|
||||
self.messages[self.currentQuestion].append(message)
|
||||
|
||||
def addMessageToEmail(self, message):
|
||||
print "WARNING**** addMessageToEmail is deprecated %s" % message
|
||||
for line in message.split('\n'):
|
||||
pass
|
||||
#print '%%% ' + line + ' %%%'
|
||||
#self.messages[self.currentQuestion].append(line)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Counter(dict):
|
||||
"""
|
||||
Dict with default 0
|
||||
"""
|
||||
def __getitem__(self, idx):
|
||||
try:
|
||||
return dict.__getitem__(self, idx)
|
||||
except KeyError:
|
||||
return 0
|
||||
|
||||
def totalCount(self):
|
||||
"""
|
||||
Returns the sum of counts for all keys.
|
||||
"""
|
||||
return sum(self.values())
|
||||
|
333
reinforcement/graphicsCrawlerDisplay.py
Normal file
333
reinforcement/graphicsCrawlerDisplay.py
Normal file
|
@ -0,0 +1,333 @@
|
|||
# graphicsCrawlerDisplay.py
|
||||
# -------------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
# graphicsCrawlerDisplay.py
|
||||
# -------------------------
|
||||
# Licensing Information: Please do not distribute or publish solutions to this
|
||||
# project. You are free to use and extend these projects for educational
|
||||
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
|
||||
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and Pieter
|
||||
# Abbeel in Spring 2013.
|
||||
# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
|
||||
|
||||
import Tkinter
|
||||
import qlearningAgents
|
||||
import time
|
||||
import threading
|
||||
import sys
|
||||
import crawler
|
||||
#import pendulum
|
||||
import math
|
||||
from math import pi as PI
|
||||
|
||||
robotType = 'crawler'
|
||||
|
||||
class Application:
|
||||
|
||||
def sigmoid(self, x):
|
||||
return 1.0 / (1.0 + 2.0 ** (-x))
|
||||
|
||||
def incrementSpeed(self, inc):
|
||||
self.tickTime *= inc
|
||||
# self.epsilon = min(1.0, self.epsilon)
|
||||
# self.epsilon = max(0.0,self.epsilon)
|
||||
# self.learner.setSpeed(self.epsilon)
|
||||
self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
|
||||
|
||||
def incrementEpsilon(self, inc):
|
||||
self.ep += inc
|
||||
self.epsilon = self.sigmoid(self.ep)
|
||||
self.learner.setEpsilon(self.epsilon)
|
||||
self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
|
||||
|
||||
def incrementGamma(self, inc):
|
||||
self.ga += inc
|
||||
self.gamma = self.sigmoid(self.ga)
|
||||
self.learner.setDiscount(self.gamma)
|
||||
self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
|
||||
|
||||
def incrementAlpha(self, inc):
|
||||
self.al += inc
|
||||
self.alpha = self.sigmoid(self.al)
|
||||
self.learner.setLearningRate(self.alpha)
|
||||
self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
|
||||
|
||||
def __initGUI(self, win):
|
||||
## Window ##
|
||||
self.win = win
|
||||
|
||||
## Initialize Frame ##
|
||||
win.grid()
|
||||
self.dec = -.5
|
||||
self.inc = .5
|
||||
self.tickTime = 0.1
|
||||
|
||||
## Epsilon Button + Label ##
|
||||
self.setupSpeedButtonAndLabel(win)
|
||||
|
||||
self.setupEpsilonButtonAndLabel(win)
|
||||
|
||||
## Gamma Button + Label ##
|
||||
self.setUpGammaButtonAndLabel(win)
|
||||
|
||||
## Alpha Button + Label ##
|
||||
self.setupAlphaButtonAndLabel(win)
|
||||
|
||||
## Exit Button ##
|
||||
#self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
|
||||
#self.exit_button.grid(row=0, column=9)
|
||||
|
||||
## Simulation Buttons ##
|
||||
# self.setupSimulationButtons(win)
|
||||
|
||||
## Canvas ##
|
||||
self.canvas = Tkinter.Canvas(root, height=200, width=1000)
|
||||
self.canvas.grid(row=2,columnspan=10)
|
||||
|
||||
def setupAlphaButtonAndLabel(self, win):
|
||||
self.alpha_minus = Tkinter.Button(win,
|
||||
text="-",command=(lambda: self.incrementAlpha(self.dec)))
|
||||
self.alpha_minus.grid(row=1, column=3, padx=10)
|
||||
|
||||
self.alpha = self.sigmoid(self.al)
|
||||
self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
|
||||
self.alpha_label.grid(row=1, column=4)
|
||||
|
||||
self.alpha_plus = Tkinter.Button(win,
|
||||
text="+",command=(lambda: self.incrementAlpha(self.inc)))
|
||||
self.alpha_plus.grid(row=1, column=5, padx=10)
|
||||
|
||||
def setUpGammaButtonAndLabel(self, win):
|
||||
self.gamma_minus = Tkinter.Button(win,
|
||||
text="-",command=(lambda: self.incrementGamma(self.dec)))
|
||||
self.gamma_minus.grid(row=1, column=0, padx=10)
|
||||
|
||||
self.gamma = self.sigmoid(self.ga)
|
||||
self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
|
||||
self.gamma_label.grid(row=1, column=1)
|
||||
|
||||
self.gamma_plus = Tkinter.Button(win,
|
||||
text="+",command=(lambda: self.incrementGamma(self.inc)))
|
||||
self.gamma_plus.grid(row=1, column=2, padx=10)
|
||||
|
||||
def setupEpsilonButtonAndLabel(self, win):
|
||||
self.epsilon_minus = Tkinter.Button(win,
|
||||
text="-",command=(lambda: self.incrementEpsilon(self.dec)))
|
||||
self.epsilon_minus.grid(row=0, column=3)
|
||||
|
||||
self.epsilon = self.sigmoid(self.ep)
|
||||
self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
|
||||
self.epsilon_label.grid(row=0, column=4)
|
||||
|
||||
self.epsilon_plus = Tkinter.Button(win,
|
||||
text="+",command=(lambda: self.incrementEpsilon(self.inc)))
|
||||
self.epsilon_plus.grid(row=0, column=5)
|
||||
|
||||
def setupSpeedButtonAndLabel(self, win):
|
||||
self.speed_minus = Tkinter.Button(win,
|
||||
text="-",command=(lambda: self.incrementSpeed(.5)))
|
||||
self.speed_minus.grid(row=0, column=0)
|
||||
|
||||
self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
|
||||
self.speed_label.grid(row=0, column=1)
|
||||
|
||||
self.speed_plus = Tkinter.Button(win,
|
||||
text="+",command=(lambda: self.incrementSpeed(2)))
|
||||
self.speed_plus.grid(row=0, column=2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def skip5kSteps(self):
|
||||
self.stepsToSkip = 5000
|
||||
|
||||
def __init__(self, win):
|
||||
|
||||
self.ep = 0
|
||||
self.ga = 2
|
||||
self.al = 2
|
||||
self.stepCount = 0
|
||||
## Init Gui
|
||||
|
||||
self.__initGUI(win)
|
||||
|
||||
# Init environment
|
||||
if robotType == 'crawler':
|
||||
self.robot = crawler.CrawlingRobot(self.canvas)
|
||||
self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
|
||||
elif robotType == 'pendulum':
|
||||
self.robot = pendulum.PendulumRobot(self.canvas)
|
||||
self.robotEnvironment = \
|
||||
pendulum.PendulumRobotEnvironment(self.robot)
|
||||
else:
|
||||
raise "Unknown RobotType"
|
||||
|
||||
# Init Agent
|
||||
simulationFn = lambda agent: \
|
||||
simulation.SimulationEnvironment(self.robotEnvironment,agent)
|
||||
actionFn = lambda state: \
|
||||
self.robotEnvironment.getPossibleActions(state)
|
||||
self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
|
||||
|
||||
self.learner.setEpsilon(self.epsilon)
|
||||
self.learner.setLearningRate(self.alpha)
|
||||
self.learner.setDiscount(self.gamma)
|
||||
|
||||
# Start GUI
|
||||
self.running = True
|
||||
self.stopped = False
|
||||
self.stepsToSkip = 0
|
||||
self.thread = threading.Thread(target=self.run)
|
||||
self.thread.start()
|
||||
|
||||
|
||||
def exit(self):
|
||||
self.running = False
|
||||
for i in range(5):
|
||||
if not self.stopped:
|
||||
time.sleep(0.1)
|
||||
try:
|
||||
self.win.destroy()
|
||||
except:
|
||||
pass
|
||||
sys.exit(0)
|
||||
|
||||
def step(self):
|
||||
|
||||
self.stepCount += 1
|
||||
|
||||
state = self.robotEnvironment.getCurrentState()
|
||||
actions = self.robotEnvironment.getPossibleActions(state)
|
||||
if len(actions) == 0.0:
|
||||
self.robotEnvironment.reset()
|
||||
state = self.robotEnvironment.getCurrentState()
|
||||
actions = self.robotEnvironment.getPossibleActions(state)
|
||||
print 'Reset!'
|
||||
action = self.learner.getAction(state)
|
||||
if action == None:
|
||||
raise 'None action returned: Code Not Complete'
|
||||
nextState, reward = self.robotEnvironment.doAction(action)
|
||||
self.learner.observeTransition(state, action, nextState, reward)
|
||||
|
||||
def animatePolicy(self):
|
||||
if robotType != 'pendulum':
|
||||
raise 'Only pendulum can animatePolicy'
|
||||
|
||||
|
||||
totWidth = self.canvas.winfo_reqwidth()
|
||||
totHeight = self.canvas.winfo_reqheight()
|
||||
|
||||
length = 0.48 * min(totWidth, totHeight)
|
||||
x,y = totWidth-length-30, length+10
|
||||
|
||||
|
||||
|
||||
angleMin, angleMax = self.robot.getMinAndMaxAngle()
|
||||
velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
|
||||
|
||||
if not 'animatePolicyBox' in dir(self):
|
||||
self.canvas.create_line(x,y,x+length,y)
|
||||
self.canvas.create_line(x+length,y,x+length,y-length)
|
||||
self.canvas.create_line(x+length,y-length,x,y-length)
|
||||
self.canvas.create_line(x,y-length,x,y)
|
||||
self.animatePolicyBox = 1
|
||||
self.canvas.create_text(x+length/2,y+10,text='angle')
|
||||
self.canvas.create_text(x-30,y-length/2,text='velocity')
|
||||
self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
|
||||
self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
|
||||
self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
|
||||
|
||||
|
||||
|
||||
angleDelta = (angleMax-angleMin) / 100
|
||||
velDelta = (velMax-velMin) / 100
|
||||
for i in range(100):
|
||||
angle = angleMin + i * angleDelta
|
||||
|
||||
for j in range(100):
|
||||
vel = velMin + j * velDelta
|
||||
state = self.robotEnvironment.getState(angle,vel)
|
||||
max, argMax = None, None
|
||||
if not self.learner.seenState(state):
|
||||
argMax = 'unseen'
|
||||
else:
|
||||
for action in ('kickLeft','kickRight','doNothing'):
|
||||
qVal = self.learner.getQValue(state, action)
|
||||
if max == None or qVal > max:
|
||||
max, argMax = qVal, action
|
||||
if argMax != 'unseen':
|
||||
if argMax == 'kickLeft':
|
||||
color = 'blue'
|
||||
elif argMax == 'kickRight':
|
||||
color = 'red'
|
||||
elif argMax == 'doNothing':
|
||||
color = 'white'
|
||||
dx = length / 100.0
|
||||
dy = length / 100.0
|
||||
x0, y0 = x+i*dx, y-j*dy
|
||||
self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
|
||||
|
||||
|
||||
|
||||
|
||||
def run(self):
|
||||
self.stepCount = 0
|
||||
self.learner.startEpisode()
|
||||
while True:
|
||||
minSleep = .01
|
||||
tm = max(minSleep, self.tickTime)
|
||||
time.sleep(tm)
|
||||
self.stepsToSkip = int(tm / self.tickTime) - 1
|
||||
|
||||
if not self.running:
|
||||
self.stopped = True
|
||||
return
|
||||
for i in range(self.stepsToSkip):
|
||||
self.step()
|
||||
self.stepsToSkip = 0
|
||||
self.step()
|
||||
# self.robot.draw()
|
||||
self.learner.stopEpisode()
|
||||
|
||||
def start(self):
|
||||
self.win.mainloop()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def run():
|
||||
global root
|
||||
root = Tkinter.Tk()
|
||||
root.title( 'Crawler GUI' )
|
||||
root.resizable( 0, 0 )
|
||||
|
||||
# root.mainloop()
|
||||
|
||||
|
||||
app = Application(root)
|
||||
def update_gui():
|
||||
app.robot.draw(app.stepCount, app.tickTime)
|
||||
root.after(10, update_gui)
|
||||
update_gui()
|
||||
|
||||
root.protocol( 'WM_DELETE_WINDOW', app.exit)
|
||||
try:
|
||||
app.start()
|
||||
except:
|
||||
app.exit()
|
679
reinforcement/graphicsDisplay.py
Normal file
679
reinforcement/graphicsDisplay.py
Normal file
|
@ -0,0 +1,679 @@
|
|||
# graphicsDisplay.py
|
||||
# ------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from graphicsUtils import *
|
||||
import math, time
|
||||
from game import Directions
|
||||
|
||||
###########################
|
||||
# GRAPHICS DISPLAY CODE #
|
||||
###########################
|
||||
|
||||
# Most code by Dan Klein and John Denero written or rewritten for cs188, UC Berkeley.
|
||||
# Some code from a Pacman implementation by LiveWires, and used / modified with permission.
|
||||
|
||||
DEFAULT_GRID_SIZE = 30.0
|
||||
INFO_PANE_HEIGHT = 35
|
||||
BACKGROUND_COLOR = formatColor(0,0,0)
|
||||
WALL_COLOR = formatColor(0.0/255.0, 51.0/255.0, 255.0/255.0)
|
||||
INFO_PANE_COLOR = formatColor(.4,.4,0)
|
||||
SCORE_COLOR = formatColor(.9, .9, .9)
|
||||
PACMAN_OUTLINE_WIDTH = 2
|
||||
PACMAN_CAPTURE_OUTLINE_WIDTH = 4
|
||||
|
||||
GHOST_COLORS = []
|
||||
GHOST_COLORS.append(formatColor(.9,0,0)) # Red
|
||||
GHOST_COLORS.append(formatColor(0,.3,.9)) # Blue
|
||||
GHOST_COLORS.append(formatColor(.98,.41,.07)) # Orange
|
||||
GHOST_COLORS.append(formatColor(.1,.75,.7)) # Green
|
||||
GHOST_COLORS.append(formatColor(1.0,0.6,0.0)) # Yellow
|
||||
GHOST_COLORS.append(formatColor(.4,0.13,0.91)) # Purple
|
||||
|
||||
TEAM_COLORS = GHOST_COLORS[:2]
|
||||
|
||||
GHOST_SHAPE = [
|
||||
( 0, 0.3 ),
|
||||
( 0.25, 0.75 ),
|
||||
( 0.5, 0.3 ),
|
||||
( 0.75, 0.75 ),
|
||||
( 0.75, -0.5 ),
|
||||
( 0.5, -0.75 ),
|
||||
(-0.5, -0.75 ),
|
||||
(-0.75, -0.5 ),
|
||||
(-0.75, 0.75 ),
|
||||
(-0.5, 0.3 ),
|
||||
(-0.25, 0.75 )
|
||||
]
|
||||
GHOST_SIZE = 0.65
|
||||
SCARED_COLOR = formatColor(1,1,1)
|
||||
|
||||
GHOST_VEC_COLORS = map(colorToVector, GHOST_COLORS)
|
||||
|
||||
PACMAN_COLOR = formatColor(255.0/255.0,255.0/255.0,61.0/255)
|
||||
PACMAN_SCALE = 0.5
|
||||
#pacman_speed = 0.25
|
||||
|
||||
# Food
|
||||
FOOD_COLOR = formatColor(1,1,1)
|
||||
FOOD_SIZE = 0.1
|
||||
|
||||
# Laser
|
||||
LASER_COLOR = formatColor(1,0,0)
|
||||
LASER_SIZE = 0.02
|
||||
|
||||
# Capsule graphics
|
||||
CAPSULE_COLOR = formatColor(1,1,1)
|
||||
CAPSULE_SIZE = 0.25
|
||||
|
||||
# Drawing walls
|
||||
WALL_RADIUS = 0.15
|
||||
|
||||
class InfoPane:
|
||||
def __init__(self, layout, gridSize):
|
||||
self.gridSize = gridSize
|
||||
self.width = (layout.width) * gridSize
|
||||
self.base = (layout.height + 1) * gridSize
|
||||
self.height = INFO_PANE_HEIGHT
|
||||
self.fontSize = 24
|
||||
self.textColor = PACMAN_COLOR
|
||||
self.drawPane()
|
||||
|
||||
def toScreen(self, pos, y = None):
|
||||
"""
|
||||
Translates a point relative from the bottom left of the info pane.
|
||||
"""
|
||||
if y == None:
|
||||
x,y = pos
|
||||
else:
|
||||
x = pos
|
||||
|
||||
x = self.gridSize + x # Margin
|
||||
y = self.base + y
|
||||
return x,y
|
||||
|
||||
def drawPane(self):
|
||||
self.scoreText = text( self.toScreen(0, 0 ), self.textColor, "SCORE: 0", "Times", self.fontSize, "bold")
|
||||
|
||||
def initializeGhostDistances(self, distances):
|
||||
self.ghostDistanceText = []
|
||||
|
||||
size = 20
|
||||
if self.width < 240:
|
||||
size = 12
|
||||
if self.width < 160:
|
||||
size = 10
|
||||
|
||||
for i, d in enumerate(distances):
|
||||
t = text( self.toScreen(self.width/2 + self.width/8 * i, 0), GHOST_COLORS[i+1], d, "Times", size, "bold")
|
||||
self.ghostDistanceText.append(t)
|
||||
|
||||
def updateScore(self, score):
|
||||
changeText(self.scoreText, "SCORE: % 4d" % score)
|
||||
|
||||
def setTeam(self, isBlue):
|
||||
text = "RED TEAM"
|
||||
if isBlue: text = "BLUE TEAM"
|
||||
self.teamText = text( self.toScreen(300, 0 ), self.textColor, text, "Times", self.fontSize, "bold")
|
||||
|
||||
def updateGhostDistances(self, distances):
|
||||
if len(distances) == 0: return
|
||||
if 'ghostDistanceText' not in dir(self): self.initializeGhostDistances(distances)
|
||||
else:
|
||||
for i, d in enumerate(distances):
|
||||
changeText(self.ghostDistanceText[i], d)
|
||||
|
||||
def drawGhost(self):
|
||||
pass
|
||||
|
||||
def drawPacman(self):
|
||||
pass
|
||||
|
||||
def drawWarning(self):
|
||||
pass
|
||||
|
||||
def clearIcon(self):
|
||||
pass
|
||||
|
||||
def updateMessage(self, message):
|
||||
pass
|
||||
|
||||
def clearMessage(self):
|
||||
pass
|
||||
|
||||
|
||||
class PacmanGraphics:
|
||||
def __init__(self, zoom=1.0, frameTime=0.0, capture=False):
|
||||
self.have_window = 0
|
||||
self.currentGhostImages = {}
|
||||
self.pacmanImage = None
|
||||
self.zoom = zoom
|
||||
self.gridSize = DEFAULT_GRID_SIZE * zoom
|
||||
self.capture = capture
|
||||
self.frameTime = frameTime
|
||||
|
||||
def checkNullDisplay(self):
|
||||
return False
|
||||
|
||||
def initialize(self, state, isBlue = False):
|
||||
self.isBlue = isBlue
|
||||
self.startGraphics(state)
|
||||
|
||||
# self.drawDistributions(state)
|
||||
self.distributionImages = None # Initialized lazily
|
||||
self.drawStaticObjects(state)
|
||||
self.drawAgentObjects(state)
|
||||
|
||||
# Information
|
||||
self.previousState = state
|
||||
|
||||
def startGraphics(self, state):
|
||||
self.layout = state.layout
|
||||
layout = self.layout
|
||||
self.width = layout.width
|
||||
self.height = layout.height
|
||||
self.make_window(self.width, self.height)
|
||||
self.infoPane = InfoPane(layout, self.gridSize)
|
||||
self.currentState = layout
|
||||
|
||||
def drawDistributions(self, state):
|
||||
walls = state.layout.walls
|
||||
dist = []
|
||||
for x in range(walls.width):
|
||||
distx = []
|
||||
dist.append(distx)
|
||||
for y in range(walls.height):
|
||||
( screen_x, screen_y ) = self.to_screen( (x, y) )
|
||||
block = square( (screen_x, screen_y),
|
||||
0.5 * self.gridSize,
|
||||
color = BACKGROUND_COLOR,
|
||||
filled = 1, behind=2)
|
||||
distx.append(block)
|
||||
self.distributionImages = dist
|
||||
|
||||
def drawStaticObjects(self, state):
|
||||
layout = self.layout
|
||||
self.drawWalls(layout.walls)
|
||||
self.food = self.drawFood(layout.food)
|
||||
self.capsules = self.drawCapsules(layout.capsules)
|
||||
refresh()
|
||||
|
||||
def drawAgentObjects(self, state):
|
||||
self.agentImages = [] # (agentState, image)
|
||||
for index, agent in enumerate(state.agentStates):
|
||||
if agent.isPacman:
|
||||
image = self.drawPacman(agent, index)
|
||||
self.agentImages.append( (agent, image) )
|
||||
else:
|
||||
image = self.drawGhost(agent, index)
|
||||
self.agentImages.append( (agent, image) )
|
||||
refresh()
|
||||
|
||||
def swapImages(self, agentIndex, newState):
|
||||
"""
|
||||
Changes an image from a ghost to a pacman or vis versa (for capture)
|
||||
"""
|
||||
prevState, prevImage = self.agentImages[agentIndex]
|
||||
for item in prevImage: remove_from_screen(item)
|
||||
if newState.isPacman:
|
||||
image = self.drawPacman(newState, agentIndex)
|
||||
self.agentImages[agentIndex] = (newState, image )
|
||||
else:
|
||||
image = self.drawGhost(newState, agentIndex)
|
||||
self.agentImages[agentIndex] = (newState, image )
|
||||
refresh()
|
||||
|
||||
def update(self, newState):
|
||||
agentIndex = newState._agentMoved
|
||||
agentState = newState.agentStates[agentIndex]
|
||||
|
||||
if self.agentImages[agentIndex][0].isPacman != agentState.isPacman: self.swapImages(agentIndex, agentState)
|
||||
prevState, prevImage = self.agentImages[agentIndex]
|
||||
if agentState.isPacman:
|
||||
self.animatePacman(agentState, prevState, prevImage)
|
||||
else:
|
||||
self.moveGhost(agentState, agentIndex, prevState, prevImage)
|
||||
self.agentImages[agentIndex] = (agentState, prevImage)
|
||||
|
||||
if newState._foodEaten != None:
|
||||
self.removeFood(newState._foodEaten, self.food)
|
||||
if newState._capsuleEaten != None:
|
||||
self.removeCapsule(newState._capsuleEaten, self.capsules)
|
||||
self.infoPane.updateScore(newState.score)
|
||||
if 'ghostDistances' in dir(newState):
|
||||
self.infoPane.updateGhostDistances(newState.ghostDistances)
|
||||
|
||||
def make_window(self, width, height):
|
||||
grid_width = (width-1) * self.gridSize
|
||||
grid_height = (height-1) * self.gridSize
|
||||
screen_width = 2*self.gridSize + grid_width
|
||||
screen_height = 2*self.gridSize + grid_height + INFO_PANE_HEIGHT
|
||||
|
||||
begin_graphics(screen_width,
|
||||
screen_height,
|
||||
BACKGROUND_COLOR,
|
||||
"CS188 Pacman")
|
||||
|
||||
def drawPacman(self, pacman, index):
|
||||
position = self.getPosition(pacman)
|
||||
screen_point = self.to_screen(position)
|
||||
endpoints = self.getEndpoints(self.getDirection(pacman))
|
||||
|
||||
width = PACMAN_OUTLINE_WIDTH
|
||||
outlineColor = PACMAN_COLOR
|
||||
fillColor = PACMAN_COLOR
|
||||
|
||||
if self.capture:
|
||||
outlineColor = TEAM_COLORS[index % 2]
|
||||
fillColor = GHOST_COLORS[index]
|
||||
width = PACMAN_CAPTURE_OUTLINE_WIDTH
|
||||
|
||||
return [circle(screen_point, PACMAN_SCALE * self.gridSize,
|
||||
fillColor = fillColor, outlineColor = outlineColor,
|
||||
endpoints = endpoints,
|
||||
width = width)]
|
||||
|
||||
def getEndpoints(self, direction, position=(0,0)):
|
||||
x, y = position
|
||||
pos = x - int(x) + y - int(y)
|
||||
width = 30 + 80 * math.sin(math.pi* pos)
|
||||
|
||||
delta = width / 2
|
||||
if (direction == 'West'):
|
||||
endpoints = (180+delta, 180-delta)
|
||||
elif (direction == 'North'):
|
||||
endpoints = (90+delta, 90-delta)
|
||||
elif (direction == 'South'):
|
||||
endpoints = (270+delta, 270-delta)
|
||||
else:
|
||||
endpoints = (0+delta, 0-delta)
|
||||
return endpoints
|
||||
|
||||
def movePacman(self, position, direction, image):
|
||||
screenPosition = self.to_screen(position)
|
||||
endpoints = self.getEndpoints( direction, position )
|
||||
r = PACMAN_SCALE * self.gridSize
|
||||
moveCircle(image[0], screenPosition, r, endpoints)
|
||||
refresh()
|
||||
|
||||
def animatePacman(self, pacman, prevPacman, image):
|
||||
if self.frameTime < 0:
|
||||
print 'Press any key to step forward, "q" to play'
|
||||
keys = wait_for_keys()
|
||||
if 'q' in keys:
|
||||
self.frameTime = 0.1
|
||||
if self.frameTime > 0.01 or self.frameTime < 0:
|
||||
start = time.time()
|
||||
fx, fy = self.getPosition(prevPacman)
|
||||
px, py = self.getPosition(pacman)
|
||||
frames = 4.0
|
||||
for i in range(1,int(frames) + 1):
|
||||
pos = px*i/frames + fx*(frames-i)/frames, py*i/frames + fy*(frames-i)/frames
|
||||
self.movePacman(pos, self.getDirection(pacman), image)
|
||||
refresh()
|
||||
sleep(abs(self.frameTime) / frames)
|
||||
else:
|
||||
self.movePacman(self.getPosition(pacman), self.getDirection(pacman), image)
|
||||
refresh()
|
||||
|
||||
def getGhostColor(self, ghost, ghostIndex):
|
||||
if ghost.scaredTimer > 0:
|
||||
return SCARED_COLOR
|
||||
else:
|
||||
return GHOST_COLORS[ghostIndex]
|
||||
|
||||
def drawGhost(self, ghost, agentIndex):
|
||||
pos = self.getPosition(ghost)
|
||||
dir = self.getDirection(ghost)
|
||||
(screen_x, screen_y) = (self.to_screen(pos) )
|
||||
coords = []
|
||||
for (x, y) in GHOST_SHAPE:
|
||||
coords.append((x*self.gridSize*GHOST_SIZE + screen_x, y*self.gridSize*GHOST_SIZE + screen_y))
|
||||
|
||||
colour = self.getGhostColor(ghost, agentIndex)
|
||||
body = polygon(coords, colour, filled = 1)
|
||||
WHITE = formatColor(1.0, 1.0, 1.0)
|
||||
BLACK = formatColor(0.0, 0.0, 0.0)
|
||||
|
||||
dx = 0
|
||||
dy = 0
|
||||
if dir == 'North':
|
||||
dy = -0.2
|
||||
if dir == 'South':
|
||||
dy = 0.2
|
||||
if dir == 'East':
|
||||
dx = 0.2
|
||||
if dir == 'West':
|
||||
dx = -0.2
|
||||
leftEye = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
|
||||
rightEye = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
|
||||
leftPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
|
||||
rightPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
|
||||
ghostImageParts = []
|
||||
ghostImageParts.append(body)
|
||||
ghostImageParts.append(leftEye)
|
||||
ghostImageParts.append(rightEye)
|
||||
ghostImageParts.append(leftPupil)
|
||||
ghostImageParts.append(rightPupil)
|
||||
|
||||
return ghostImageParts
|
||||
|
||||
def moveEyes(self, pos, dir, eyes):
|
||||
(screen_x, screen_y) = (self.to_screen(pos) )
|
||||
dx = 0
|
||||
dy = 0
|
||||
if dir == 'North':
|
||||
dy = -0.2
|
||||
if dir == 'South':
|
||||
dy = 0.2
|
||||
if dir == 'East':
|
||||
dx = 0.2
|
||||
if dir == 'West':
|
||||
dx = -0.2
|
||||
moveCircle(eyes[0],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
|
||||
moveCircle(eyes[1],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
|
||||
moveCircle(eyes[2],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
|
||||
moveCircle(eyes[3],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
|
||||
|
||||
def moveGhost(self, ghost, ghostIndex, prevGhost, ghostImageParts):
|
||||
old_x, old_y = self.to_screen(self.getPosition(prevGhost))
|
||||
new_x, new_y = self.to_screen(self.getPosition(ghost))
|
||||
delta = new_x - old_x, new_y - old_y
|
||||
|
||||
for ghostImagePart in ghostImageParts:
|
||||
move_by(ghostImagePart, delta)
|
||||
refresh()
|
||||
|
||||
if ghost.scaredTimer > 0:
|
||||
color = SCARED_COLOR
|
||||
else:
|
||||
color = GHOST_COLORS[ghostIndex]
|
||||
edit(ghostImageParts[0], ('fill', color), ('outline', color))
|
||||
self.moveEyes(self.getPosition(ghost), self.getDirection(ghost), ghostImageParts[-4:])
|
||||
refresh()
|
||||
|
||||
def getPosition(self, agentState):
|
||||
if agentState.configuration == None: return (-1000, -1000)
|
||||
return agentState.getPosition()
|
||||
|
||||
def getDirection(self, agentState):
|
||||
if agentState.configuration == None: return Directions.STOP
|
||||
return agentState.configuration.getDirection()
|
||||
|
||||
def finish(self):
|
||||
end_graphics()
|
||||
|
||||
def to_screen(self, point):
|
||||
( x, y ) = point
|
||||
#y = self.height - y
|
||||
x = (x + 1)*self.gridSize
|
||||
y = (self.height - y)*self.gridSize
|
||||
return ( x, y )
|
||||
|
||||
# Fixes some TK issue with off-center circles
|
||||
def to_screen2(self, point):
|
||||
( x, y ) = point
|
||||
#y = self.height - y
|
||||
x = (x + 1)*self.gridSize
|
||||
y = (self.height - y)*self.gridSize
|
||||
return ( x, y )
|
||||
|
||||
def drawWalls(self, wallMatrix):
|
||||
wallColor = WALL_COLOR
|
||||
for xNum, x in enumerate(wallMatrix):
|
||||
if self.capture and (xNum * 2) < wallMatrix.width: wallColor = TEAM_COLORS[0]
|
||||
if self.capture and (xNum * 2) >= wallMatrix.width: wallColor = TEAM_COLORS[1]
|
||||
|
||||
for yNum, cell in enumerate(x):
|
||||
if cell: # There's a wall here
|
||||
pos = (xNum, yNum)
|
||||
screen = self.to_screen(pos)
|
||||
screen2 = self.to_screen2(pos)
|
||||
|
||||
# draw each quadrant of the square based on adjacent walls
|
||||
wIsWall = self.isWall(xNum-1, yNum, wallMatrix)
|
||||
eIsWall = self.isWall(xNum+1, yNum, wallMatrix)
|
||||
nIsWall = self.isWall(xNum, yNum+1, wallMatrix)
|
||||
sIsWall = self.isWall(xNum, yNum-1, wallMatrix)
|
||||
nwIsWall = self.isWall(xNum-1, yNum+1, wallMatrix)
|
||||
swIsWall = self.isWall(xNum-1, yNum-1, wallMatrix)
|
||||
neIsWall = self.isWall(xNum+1, yNum+1, wallMatrix)
|
||||
seIsWall = self.isWall(xNum+1, yNum-1, wallMatrix)
|
||||
|
||||
# NE quadrant
|
||||
if (not nIsWall) and (not eIsWall):
|
||||
# inner circle
|
||||
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (0,91), 'arc')
|
||||
if (nIsWall) and (not eIsWall):
|
||||
# vertical line
|
||||
line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
|
||||
if (not nIsWall) and (eIsWall):
|
||||
# horizontal line
|
||||
line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||
if (nIsWall) and (eIsWall) and (not neIsWall):
|
||||
# outer circle
|
||||
circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (180,271), 'arc')
|
||||
line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||
line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
|
||||
|
||||
# NW quadrant
|
||||
if (not nIsWall) and (not wIsWall):
|
||||
# inner circle
|
||||
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (90,181), 'arc')
|
||||
if (nIsWall) and (not wIsWall):
|
||||
# vertical line
|
||||
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
|
||||
if (not nIsWall) and (wIsWall):
|
||||
# horizontal line
|
||||
line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||
if (nIsWall) and (wIsWall) and (not nwIsWall):
|
||||
# outer circle
|
||||
circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (270,361), 'arc')
|
||||
line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
|
||||
|
||||
# SE quadrant
|
||||
if (not sIsWall) and (not eIsWall):
|
||||
# inner circle
|
||||
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (270,361), 'arc')
|
||||
if (sIsWall) and (not eIsWall):
|
||||
# vertical line
|
||||
line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
|
||||
if (not sIsWall) and (eIsWall):
|
||||
# horizontal line
|
||||
line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||
if (sIsWall) and (eIsWall) and (not seIsWall):
|
||||
# outer circle
|
||||
circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (90,181), 'arc')
|
||||
line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5, self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||
line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
|
||||
|
||||
# SW quadrant
|
||||
if (not sIsWall) and (not wIsWall):
|
||||
# inner circle
|
||||
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (180,271), 'arc')
|
||||
if (sIsWall) and (not wIsWall):
|
||||
# vertical line
|
||||
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
|
||||
if (not sIsWall) and (wIsWall):
|
||||
# horizontal line
|
||||
line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||
if (sIsWall) and (wIsWall) and (not swIsWall):
|
||||
# outer circle
|
||||
circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (0,91), 'arc')
|
||||
line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
|
||||
|
||||
def isWall(self, x, y, walls):
|
||||
if x < 0 or y < 0:
|
||||
return False
|
||||
if x >= walls.width or y >= walls.height:
|
||||
return False
|
||||
return walls[x][y]
|
||||
|
||||
def drawFood(self, foodMatrix ):
|
||||
foodImages = []
|
||||
color = FOOD_COLOR
|
||||
for xNum, x in enumerate(foodMatrix):
|
||||
if self.capture and (xNum * 2) <= foodMatrix.width: color = TEAM_COLORS[0]
|
||||
if self.capture and (xNum * 2) > foodMatrix.width: color = TEAM_COLORS[1]
|
||||
imageRow = []
|
||||
foodImages.append(imageRow)
|
||||
for yNum, cell in enumerate(x):
|
||||
if cell: # There's food here
|
||||
screen = self.to_screen((xNum, yNum ))
|
||||
dot = circle( screen,
|
||||
FOOD_SIZE * self.gridSize,
|
||||
outlineColor = color, fillColor = color,
|
||||
width = 1)
|
||||
imageRow.append(dot)
|
||||
else:
|
||||
imageRow.append(None)
|
||||
return foodImages
|
||||
|
||||
def drawCapsules(self, capsules ):
|
||||
capsuleImages = {}
|
||||
for capsule in capsules:
|
||||
( screen_x, screen_y ) = self.to_screen(capsule)
|
||||
dot = circle( (screen_x, screen_y),
|
||||
CAPSULE_SIZE * self.gridSize,
|
||||
outlineColor = CAPSULE_COLOR,
|
||||
fillColor = CAPSULE_COLOR,
|
||||
width = 1)
|
||||
capsuleImages[capsule] = dot
|
||||
return capsuleImages
|
||||
|
||||
def removeFood(self, cell, foodImages ):
|
||||
x, y = cell
|
||||
remove_from_screen(foodImages[x][y])
|
||||
|
||||
def removeCapsule(self, cell, capsuleImages ):
|
||||
x, y = cell
|
||||
remove_from_screen(capsuleImages[(x, y)])
|
||||
|
||||
def drawExpandedCells(self, cells):
|
||||
"""
|
||||
Draws an overlay of expanded grid positions for search agents
|
||||
"""
|
||||
n = float(len(cells))
|
||||
baseColor = [1.0, 0.0, 0.0]
|
||||
self.clearExpandedCells()
|
||||
self.expandedCells = []
|
||||
for k, cell in enumerate(cells):
|
||||
screenPos = self.to_screen( cell)
|
||||
cellColor = formatColor(*[(n-k) * c * .5 / n + .25 for c in baseColor])
|
||||
block = square(screenPos,
|
||||
0.5 * self.gridSize,
|
||||
color = cellColor,
|
||||
filled = 1, behind=2)
|
||||
self.expandedCells.append(block)
|
||||
if self.frameTime < 0:
|
||||
refresh()
|
||||
|
||||
def clearExpandedCells(self):
|
||||
if 'expandedCells' in dir(self) and len(self.expandedCells) > 0:
|
||||
for cell in self.expandedCells:
|
||||
remove_from_screen(cell)
|
||||
|
||||
|
||||
def updateDistributions(self, distributions):
|
||||
"Draws an agent's belief distributions"
|
||||
# copy all distributions so we don't change their state
|
||||
distributions = map(lambda x: x.copy(), distributions)
|
||||
if self.distributionImages == None:
|
||||
self.drawDistributions(self.previousState)
|
||||
for x in range(len(self.distributionImages)):
|
||||
for y in range(len(self.distributionImages[0])):
|
||||
image = self.distributionImages[x][y]
|
||||
weights = [dist[ (x,y) ] for dist in distributions]
|
||||
|
||||
if sum(weights) != 0:
|
||||
pass
|
||||
# Fog of war
|
||||
color = [0.0,0.0,0.0]
|
||||
colors = GHOST_VEC_COLORS[1:] # With Pacman
|
||||
if self.capture: colors = GHOST_VEC_COLORS
|
||||
for weight, gcolor in zip(weights, colors):
|
||||
color = [min(1.0, c + 0.95 * g * weight ** .3) for c,g in zip(color, gcolor)]
|
||||
changeColor(image, formatColor(*color))
|
||||
refresh()
|
||||
|
||||
class FirstPersonPacmanGraphics(PacmanGraphics):
|
||||
def __init__(self, zoom = 1.0, showGhosts = True, capture = False, frameTime=0):
|
||||
PacmanGraphics.__init__(self, zoom, frameTime=frameTime)
|
||||
self.showGhosts = showGhosts
|
||||
self.capture = capture
|
||||
|
||||
def initialize(self, state, isBlue = False):
|
||||
|
||||
self.isBlue = isBlue
|
||||
PacmanGraphics.startGraphics(self, state)
|
||||
# Initialize distribution images
|
||||
walls = state.layout.walls
|
||||
dist = []
|
||||
self.layout = state.layout
|
||||
|
||||
# Draw the rest
|
||||
self.distributionImages = None # initialize lazily
|
||||
self.drawStaticObjects(state)
|
||||
self.drawAgentObjects(state)
|
||||
|
||||
# Information
|
||||
self.previousState = state
|
||||
|
||||
def lookAhead(self, config, state):
|
||||
if config.getDirection() == 'Stop':
|
||||
return
|
||||
else:
|
||||
pass
|
||||
# Draw relevant ghosts
|
||||
allGhosts = state.getGhostStates()
|
||||
visibleGhosts = state.getVisibleGhosts()
|
||||
for i, ghost in enumerate(allGhosts):
|
||||
if ghost in visibleGhosts:
|
||||
self.drawGhost(ghost, i)
|
||||
else:
|
||||
self.currentGhostImages[i] = None
|
||||
|
||||
def getGhostColor(self, ghost, ghostIndex):
|
||||
return GHOST_COLORS[ghostIndex]
|
||||
|
||||
def getPosition(self, ghostState):
|
||||
if not self.showGhosts and not ghostState.isPacman and ghostState.getPosition()[1] > 1:
|
||||
return (-1000, -1000)
|
||||
else:
|
||||
return PacmanGraphics.getPosition(self, ghostState)
|
||||
|
||||
def add(x, y):
|
||||
return (x[0] + y[0], x[1] + y[1])
|
||||
|
||||
|
||||
# Saving graphical output
|
||||
# -----------------------
|
||||
# Note: to make an animated gif from this postscript output, try the command:
|
||||
# convert -delay 7 -loop 1 -compress lzw -layers optimize frame* out.gif
|
||||
# convert is part of imagemagick (freeware)
|
||||
|
||||
SAVE_POSTSCRIPT = False
|
||||
POSTSCRIPT_OUTPUT_DIR = 'frames'
|
||||
FRAME_NUMBER = 0
|
||||
import os
|
||||
|
||||
def saveFrame():
|
||||
"Saves the current graphical output as a postscript file"
|
||||
global SAVE_POSTSCRIPT, FRAME_NUMBER, POSTSCRIPT_OUTPUT_DIR
|
||||
if not SAVE_POSTSCRIPT: return
|
||||
if not os.path.exists(POSTSCRIPT_OUTPUT_DIR): os.mkdir(POSTSCRIPT_OUTPUT_DIR)
|
||||
name = os.path.join(POSTSCRIPT_OUTPUT_DIR, 'frame_%08d.ps' % FRAME_NUMBER)
|
||||
FRAME_NUMBER += 1
|
||||
writePostscript(name) # writes the current canvas
|
348
reinforcement/graphicsGridworldDisplay.py
Normal file
348
reinforcement/graphicsGridworldDisplay.py
Normal file
|
@ -0,0 +1,348 @@
|
|||
# graphicsGridworldDisplay.py
|
||||
# ---------------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import util
|
||||
from graphicsUtils import *
|
||||
|
||||
class GraphicsGridworldDisplay:
|
||||
|
||||
def __init__(self, gridworld, size=120, speed=1.0):
|
||||
self.gridworld = gridworld
|
||||
self.size = size
|
||||
self.speed = speed
|
||||
|
||||
def start(self):
|
||||
setup(self.gridworld, size=self.size)
|
||||
|
||||
def pause(self):
|
||||
wait_for_keys()
|
||||
|
||||
def displayValues(self, agent, currentState = None, message = 'Agent Values'):
|
||||
values = util.Counter()
|
||||
policy = {}
|
||||
states = self.gridworld.getStates()
|
||||
for state in states:
|
||||
values[state] = agent.getValue(state)
|
||||
policy[state] = agent.getPolicy(state)
|
||||
drawValues(self.gridworld, values, policy, currentState, message)
|
||||
sleep(0.05 / self.speed)
|
||||
|
||||
def displayNullValues(self, currentState = None, message = ''):
|
||||
values = util.Counter()
|
||||
#policy = {}
|
||||
states = self.gridworld.getStates()
|
||||
for state in states:
|
||||
values[state] = 0.0
|
||||
#policy[state] = agent.getPolicy(state)
|
||||
drawNullValues(self.gridworld, currentState,'')
|
||||
# drawValues(self.gridworld, values, policy, currentState, message)
|
||||
sleep(0.05 / self.speed)
|
||||
|
||||
def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
|
||||
qValues = util.Counter()
|
||||
states = self.gridworld.getStates()
|
||||
for state in states:
|
||||
for action in self.gridworld.getPossibleActions(state):
|
||||
qValues[(state, action)] = agent.getQValue(state, action)
|
||||
drawQValues(self.gridworld, qValues, currentState, message)
|
||||
sleep(0.05 / self.speed)
|
||||
|
||||
BACKGROUND_COLOR = formatColor(0,0,0)
|
||||
EDGE_COLOR = formatColor(1,1,1)
|
||||
OBSTACLE_COLOR = formatColor(0.5,0.5,0.5)
|
||||
TEXT_COLOR = formatColor(1,1,1)
|
||||
MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7)
|
||||
LOCATION_COLOR = formatColor(0,0,1)
|
||||
|
||||
WINDOW_SIZE = -1
|
||||
GRID_SIZE = -1
|
||||
GRID_HEIGHT = -1
|
||||
MARGIN = -1
|
||||
|
||||
def setup(gridworld, title = "Gridworld Display", size = 120):
|
||||
global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT
|
||||
grid = gridworld.grid
|
||||
WINDOW_SIZE = size
|
||||
GRID_SIZE = size
|
||||
GRID_HEIGHT = grid.height
|
||||
MARGIN = GRID_SIZE * 0.75
|
||||
screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2
|
||||
screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2
|
||||
|
||||
begin_graphics(screen_width,
|
||||
screen_height,
|
||||
BACKGROUND_COLOR, title=title)
|
||||
|
||||
def drawNullValues(gridworld, currentState = None, message = ''):
|
||||
grid = gridworld.grid
|
||||
blank()
|
||||
for x in range(grid.width):
|
||||
for y in range(grid.height):
|
||||
state = (x, y)
|
||||
gridType = grid[x][y]
|
||||
isExit = (str(gridType) != gridType)
|
||||
isCurrent = (currentState == state)
|
||||
if gridType == '#':
|
||||
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
|
||||
else:
|
||||
drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent)
|
||||
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
|
||||
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
|
||||
|
||||
|
||||
def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'):
|
||||
grid = gridworld.grid
|
||||
blank()
|
||||
valueList = [values[state] for state in gridworld.getStates()] + [0.0]
|
||||
minValue = min(valueList)
|
||||
maxValue = max(valueList)
|
||||
for x in range(grid.width):
|
||||
for y in range(grid.height):
|
||||
state = (x, y)
|
||||
gridType = grid[x][y]
|
||||
isExit = (str(gridType) != gridType)
|
||||
isCurrent = (currentState == state)
|
||||
if gridType == '#':
|
||||
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
|
||||
else:
|
||||
value = values[state]
|
||||
action = None
|
||||
if policy != None and state in policy:
|
||||
action = policy[state]
|
||||
actions = gridworld.getPossibleActions(state)
|
||||
if action not in actions and 'exit' in actions:
|
||||
action = 'exit'
|
||||
valString = '%.2f' % value
|
||||
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
|
||||
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
|
||||
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
|
||||
|
||||
def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
|
||||
grid = gridworld.grid
|
||||
blank()
|
||||
stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
|
||||
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
|
||||
qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
|
||||
minValue = min(qValueList)
|
||||
maxValue = max(qValueList)
|
||||
for x in range(grid.width):
|
||||
for y in range(grid.height):
|
||||
state = (x, y)
|
||||
gridType = grid[x][y]
|
||||
isExit = (str(gridType) != gridType)
|
||||
isCurrent = (currentState == state)
|
||||
actions = gridworld.getPossibleActions(state)
|
||||
if actions == None or len(actions) == 0:
|
||||
actions = [None]
|
||||
bestQ = max([qValues[(state, action)] for action in actions])
|
||||
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
|
||||
|
||||
q = util.Counter()
|
||||
valStrings = {}
|
||||
for action in actions:
|
||||
v = qValues[(state, action)]
|
||||
q[action] += v
|
||||
valStrings[action] = '%.2f' % v
|
||||
if gridType == '#':
|
||||
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
|
||||
elif isExit:
|
||||
action = 'exit'
|
||||
value = q[action]
|
||||
valString = '%.2f' % value
|
||||
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
|
||||
else:
|
||||
drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
|
||||
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
|
||||
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
|
||||
|
||||
|
||||
def blank():
|
||||
clear_screen()
|
||||
|
||||
def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent):
|
||||
|
||||
square_color = getColor(0, -1, 1)
|
||||
|
||||
if isObstacle:
|
||||
square_color = OBSTACLE_COLOR
|
||||
|
||||
(screen_x, screen_y) = to_screen((x, y))
|
||||
square( (screen_x, screen_y),
|
||||
0.5* GRID_SIZE,
|
||||
color = square_color,
|
||||
filled = 1,
|
||||
width = 1)
|
||||
|
||||
square( (screen_x, screen_y),
|
||||
0.5* GRID_SIZE,
|
||||
color = EDGE_COLOR,
|
||||
filled = 0,
|
||||
width = 3)
|
||||
|
||||
if isTerminal and not isObstacle:
|
||||
square( (screen_x, screen_y),
|
||||
0.4* GRID_SIZE,
|
||||
color = EDGE_COLOR,
|
||||
filled = 0,
|
||||
width = 2)
|
||||
text( (screen_x, screen_y),
|
||||
TEXT_COLOR,
|
||||
str(grid[x][y]),
|
||||
"Courier", -24, "bold", "c")
|
||||
|
||||
|
||||
text_color = TEXT_COLOR
|
||||
|
||||
if not isObstacle and isCurrent:
|
||||
circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
|
||||
|
||||
# if not isObstacle:
|
||||
# text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c")
|
||||
|
||||
def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent):
|
||||
|
||||
square_color = getColor(val, min, max)
|
||||
|
||||
if isObstacle:
|
||||
square_color = OBSTACLE_COLOR
|
||||
|
||||
(screen_x, screen_y) = to_screen((x, y))
|
||||
square( (screen_x, screen_y),
|
||||
0.5* GRID_SIZE,
|
||||
color = square_color,
|
||||
filled = 1,
|
||||
width = 1)
|
||||
square( (screen_x, screen_y),
|
||||
0.5* GRID_SIZE,
|
||||
color = EDGE_COLOR,
|
||||
filled = 0,
|
||||
width = 3)
|
||||
if isTerminal and not isObstacle:
|
||||
square( (screen_x, screen_y),
|
||||
0.4* GRID_SIZE,
|
||||
color = EDGE_COLOR,
|
||||
filled = 0,
|
||||
width = 2)
|
||||
|
||||
|
||||
if action == 'north':
|
||||
polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||
if action == 'south':
|
||||
polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||
if action == 'west':
|
||||
polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||
if action == 'east':
|
||||
polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||
|
||||
|
||||
text_color = TEXT_COLOR
|
||||
|
||||
if not isObstacle and isCurrent:
|
||||
circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR )
|
||||
|
||||
if not isObstacle:
|
||||
text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c")
|
||||
|
||||
|
||||
def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent):
|
||||
|
||||
(screen_x, screen_y) = to_screen((x, y))
|
||||
|
||||
center = (screen_x, screen_y)
|
||||
nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
|
||||
ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
|
||||
se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
|
||||
sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
|
||||
n = (screen_x, screen_y-0.5*GRID_SIZE+5)
|
||||
s = (screen_x, screen_y+0.5*GRID_SIZE-5)
|
||||
w = (screen_x-0.5*GRID_SIZE+5, screen_y)
|
||||
e = (screen_x+0.5*GRID_SIZE-5, screen_y)
|
||||
|
||||
actions = qVals.keys()
|
||||
for action in actions:
|
||||
|
||||
wedge_color = getColor(qVals[action], minVal, maxVal)
|
||||
|
||||
if action == 'north':
|
||||
polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False)
|
||||
#text(n, text_color, valStr, "Courier", 8, "bold", "n")
|
||||
if action == 'south':
|
||||
polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False)
|
||||
#text(s, text_color, valStr, "Courier", 8, "bold", "s")
|
||||
if action == 'east':
|
||||
polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False)
|
||||
#text(e, text_color, valStr, "Courier", 8, "bold", "e")
|
||||
if action == 'west':
|
||||
polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False)
|
||||
#text(w, text_color, valStr, "Courier", 8, "bold", "w")
|
||||
|
||||
square( (screen_x, screen_y),
|
||||
0.5* GRID_SIZE,
|
||||
color = EDGE_COLOR,
|
||||
filled = 0,
|
||||
width = 3)
|
||||
line(ne, sw, color = EDGE_COLOR)
|
||||
line(nw, se, color = EDGE_COLOR)
|
||||
|
||||
if isCurrent:
|
||||
circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
|
||||
|
||||
for action in actions:
|
||||
text_color = TEXT_COLOR
|
||||
if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR
|
||||
valStr = ""
|
||||
if action in valStrs:
|
||||
valStr = valStrs[action]
|
||||
h = -20
|
||||
if action == 'north':
|
||||
#polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0)
|
||||
text(n, text_color, valStr, "Courier", h, "bold", "n")
|
||||
if action == 'south':
|
||||
#polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0)
|
||||
text(s, text_color, valStr, "Courier", h, "bold", "s")
|
||||
if action == 'east':
|
||||
#polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0)
|
||||
text(e, text_color, valStr, "Courier", h, "bold", "e")
|
||||
if action == 'west':
|
||||
#polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0)
|
||||
text(w, text_color, valStr, "Courier", h, "bold", "w")
|
||||
|
||||
|
||||
def getColor(val, minVal, max):
|
||||
r, g = 0.0, 0.0
|
||||
if val < 0 and minVal < 0:
|
||||
r = val * 0.65 / minVal
|
||||
if val > 0 and max > 0:
|
||||
g = val * 0.65 / max
|
||||
return formatColor(r,g,0.0)
|
||||
|
||||
|
||||
def square(pos, size, color, filled, width):
|
||||
x, y = pos
|
||||
dx, dy = size, size
|
||||
return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False)
|
||||
|
||||
|
||||
def to_screen(point):
|
||||
( gamex, gamey ) = point
|
||||
x = gamex*GRID_SIZE + MARGIN
|
||||
y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN
|
||||
return ( x, y )
|
||||
|
||||
def to_grid(point):
|
||||
(x, y) = point
|
||||
x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
|
||||
y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
|
||||
print point, "-->", (x, y)
|
||||
return (x, y)
|
398
reinforcement/graphicsUtils.py
Normal file
398
reinforcement/graphicsUtils.py
Normal file
|
@ -0,0 +1,398 @@
|
|||
# graphicsUtils.py
|
||||
# ----------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import sys
|
||||
import math
|
||||
import random
|
||||
import string
|
||||
import time
|
||||
import types
|
||||
import Tkinter
|
||||
|
||||
_Windows = sys.platform == 'win32' # True if on Win95/98/NT
|
||||
|
||||
_root_window = None # The root window for graphics output
|
||||
_canvas = None # The canvas which holds graphics
|
||||
_canvas_xs = None # Size of canvas object
|
||||
_canvas_ys = None
|
||||
_canvas_x = None # Current position on canvas
|
||||
_canvas_y = None
|
||||
_canvas_col = None # Current colour (set to black below)
|
||||
_canvas_tsize = 12
|
||||
_canvas_tserifs = 0
|
||||
|
||||
def formatColor(r, g, b):
|
||||
return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255))
|
||||
|
||||
def colorToVector(color):
|
||||
return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]])
|
||||
|
||||
if _Windows:
|
||||
_canvas_tfonts = ['times new roman', 'lucida console']
|
||||
else:
|
||||
_canvas_tfonts = ['times', 'lucidasans-24']
|
||||
pass # XXX need defaults here
|
||||
|
||||
def sleep(secs):
|
||||
global _root_window
|
||||
if _root_window == None:
|
||||
time.sleep(secs)
|
||||
else:
|
||||
_root_window.update_idletasks()
|
||||
_root_window.after(int(1000 * secs), _root_window.quit)
|
||||
_root_window.mainloop()
|
||||
|
||||
def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None):
|
||||
|
||||
global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color
|
||||
|
||||
# Check for duplicate call
|
||||
if _root_window is not None:
|
||||
# Lose the window.
|
||||
_root_window.destroy()
|
||||
|
||||
# Save the canvas size parameters
|
||||
_canvas_xs, _canvas_ys = width - 1, height - 1
|
||||
_canvas_x, _canvas_y = 0, _canvas_ys
|
||||
_bg_color = color
|
||||
|
||||
# Create the root window
|
||||
_root_window = Tkinter.Tk()
|
||||
_root_window.protocol('WM_DELETE_WINDOW', _destroy_window)
|
||||
_root_window.title(title or 'Graphics Window')
|
||||
_root_window.resizable(0, 0)
|
||||
|
||||
# Create the canvas object
|
||||
try:
|
||||
_canvas = Tkinter.Canvas(_root_window, width=width, height=height)
|
||||
_canvas.pack()
|
||||
draw_background()
|
||||
_canvas.update()
|
||||
except:
|
||||
_root_window = None
|
||||
raise
|
||||
|
||||
# Bind to key-down and key-up events
|
||||
_root_window.bind( "<KeyPress>", _keypress )
|
||||
_root_window.bind( "<KeyRelease>", _keyrelease )
|
||||
_root_window.bind( "<FocusIn>", _clear_keys )
|
||||
_root_window.bind( "<FocusOut>", _clear_keys )
|
||||
_root_window.bind( "<Button-1>", _leftclick )
|
||||
_root_window.bind( "<Button-2>", _rightclick )
|
||||
_root_window.bind( "<Button-3>", _rightclick )
|
||||
_root_window.bind( "<Control-Button-1>", _ctrl_leftclick)
|
||||
_clear_keys()
|
||||
|
||||
_leftclick_loc = None
|
||||
_rightclick_loc = None
|
||||
_ctrl_leftclick_loc = None
|
||||
|
||||
def _leftclick(event):
|
||||
global _leftclick_loc
|
||||
_leftclick_loc = (event.x, event.y)
|
||||
|
||||
def _rightclick(event):
|
||||
global _rightclick_loc
|
||||
_rightclick_loc = (event.x, event.y)
|
||||
|
||||
def _ctrl_leftclick(event):
|
||||
global _ctrl_leftclick_loc
|
||||
_ctrl_leftclick_loc = (event.x, event.y)
|
||||
|
||||
def wait_for_click():
|
||||
while True:
|
||||
global _leftclick_loc
|
||||
global _rightclick_loc
|
||||
global _ctrl_leftclick_loc
|
||||
if _leftclick_loc != None:
|
||||
val = _leftclick_loc
|
||||
_leftclick_loc = None
|
||||
return val, 'left'
|
||||
if _rightclick_loc != None:
|
||||
val = _rightclick_loc
|
||||
_rightclick_loc = None
|
||||
return val, 'right'
|
||||
if _ctrl_leftclick_loc != None:
|
||||
val = _ctrl_leftclick_loc
|
||||
_ctrl_leftclick_loc = None
|
||||
return val, 'ctrl_left'
|
||||
sleep(0.05)
|
||||
|
||||
def draw_background():
|
||||
corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)]
|
||||
polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False)
|
||||
|
||||
def _destroy_window(event=None):
|
||||
sys.exit(0)
|
||||
# global _root_window
|
||||
# _root_window.destroy()
|
||||
# _root_window = None
|
||||
#print "DESTROY"
|
||||
|
||||
def end_graphics():
|
||||
global _root_window, _canvas, _mouse_enabled
|
||||
try:
|
||||
try:
|
||||
sleep(1)
|
||||
if _root_window != None:
|
||||
_root_window.destroy()
|
||||
except SystemExit, e:
|
||||
print 'Ending graphics raised an exception:', e
|
||||
finally:
|
||||
_root_window = None
|
||||
_canvas = None
|
||||
_mouse_enabled = 0
|
||||
_clear_keys()
|
||||
|
||||
def clear_screen(background=None):
|
||||
global _canvas_x, _canvas_y
|
||||
_canvas.delete('all')
|
||||
draw_background()
|
||||
_canvas_x, _canvas_y = 0, _canvas_ys
|
||||
|
||||
def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1):
|
||||
c = []
|
||||
for coord in coords:
|
||||
c.append(coord[0])
|
||||
c.append(coord[1])
|
||||
if fillColor == None: fillColor = outlineColor
|
||||
if filled == 0: fillColor = ""
|
||||
poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width)
|
||||
if behind > 0:
|
||||
_canvas.tag_lower(poly, behind) # Higher should be more visible
|
||||
return poly
|
||||
|
||||
def square(pos, r, color, filled=1, behind=0):
|
||||
x, y = pos
|
||||
coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)]
|
||||
return polygon(coords, color, color, filled, 0, behind=behind)
|
||||
|
||||
def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2):
|
||||
x, y = pos
|
||||
x0, x1 = x - r - 1, x + r
|
||||
y0, y1 = y - r - 1, y + r
|
||||
if endpoints == None:
|
||||
e = [0, 359]
|
||||
else:
|
||||
e = list(endpoints)
|
||||
while e[0] > e[1]: e[1] = e[1] + 360
|
||||
|
||||
return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor,
|
||||
extent=e[1] - e[0], start=e[0], style=style, width=width)
|
||||
|
||||
def image(pos, file="../../blueghost.gif"):
|
||||
x, y = pos
|
||||
# img = PhotoImage(file=file)
|
||||
return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW)
|
||||
|
||||
|
||||
def refresh():
|
||||
_canvas.update_idletasks()
|
||||
|
||||
def moveCircle(id, pos, r, endpoints=None):
|
||||
global _canvas_x, _canvas_y
|
||||
|
||||
x, y = pos
|
||||
# x0, x1 = x - r, x + r + 1
|
||||
# y0, y1 = y - r, y + r + 1
|
||||
x0, x1 = x - r - 1, x + r
|
||||
y0, y1 = y - r - 1, y + r
|
||||
if endpoints == None:
|
||||
e = [0, 359]
|
||||
else:
|
||||
e = list(endpoints)
|
||||
while e[0] > e[1]: e[1] = e[1] + 360
|
||||
|
||||
edit(id, ('start', e[0]), ('extent', e[1] - e[0]))
|
||||
move_to(id, x0, y0)
|
||||
|
||||
def edit(id, *args):
|
||||
_canvas.itemconfigure(id, **dict(args))
|
||||
|
||||
def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"):
|
||||
global _canvas_x, _canvas_y
|
||||
x, y = pos
|
||||
font = (font, str(size), style)
|
||||
return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor)
|
||||
|
||||
def changeText(id, newText, font=None, size=12, style='normal'):
|
||||
_canvas.itemconfigure(id, text=newText)
|
||||
if font != None:
|
||||
_canvas.itemconfigure(id, font=(font, '-%d' % size, style))
|
||||
|
||||
def changeColor(id, newColor):
|
||||
_canvas.itemconfigure(id, fill=newColor)
|
||||
|
||||
def line(here, there, color=formatColor(0, 0, 0), width=2):
|
||||
x0, y0 = here[0], here[1]
|
||||
x1, y1 = there[0], there[1]
|
||||
return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width)
|
||||
|
||||
##############################################################################
|
||||
### Keypress handling ########################################################
|
||||
##############################################################################
|
||||
|
||||
# We bind to key-down and key-up events.
|
||||
|
||||
_keysdown = {}
|
||||
_keyswaiting = {}
|
||||
# This holds an unprocessed key release. We delay key releases by up to
|
||||
# one call to keys_pressed() to get round a problem with auto repeat.
|
||||
_got_release = None
|
||||
|
||||
def _keypress(event):
|
||||
global _got_release
|
||||
#remap_arrows(event)
|
||||
_keysdown[event.keysym] = 1
|
||||
_keyswaiting[event.keysym] = 1
|
||||
# print event.char, event.keycode
|
||||
_got_release = None
|
||||
|
||||
def _keyrelease(event):
|
||||
global _got_release
|
||||
#remap_arrows(event)
|
||||
try:
|
||||
del _keysdown[event.keysym]
|
||||
except:
|
||||
pass
|
||||
_got_release = 1
|
||||
|
||||
def remap_arrows(event):
|
||||
# TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT)
|
||||
if event.char in ['a', 's', 'd', 'w']:
|
||||
return
|
||||
if event.keycode in [37, 101]: # LEFT ARROW (win / x)
|
||||
event.char = 'a'
|
||||
if event.keycode in [38, 99]: # UP ARROW
|
||||
event.char = 'w'
|
||||
if event.keycode in [39, 102]: # RIGHT ARROW
|
||||
event.char = 'd'
|
||||
if event.keycode in [40, 104]: # DOWN ARROW
|
||||
event.char = 's'
|
||||
|
||||
def _clear_keys(event=None):
|
||||
global _keysdown, _got_release, _keyswaiting
|
||||
_keysdown = {}
|
||||
_keyswaiting = {}
|
||||
_got_release = None
|
||||
|
||||
def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent,
|
||||
d_w=Tkinter.tkinter.DONT_WAIT):
|
||||
d_o_e(d_w)
|
||||
if _got_release:
|
||||
d_o_e(d_w)
|
||||
return _keysdown.keys()
|
||||
|
||||
def keys_waiting():
|
||||
global _keyswaiting
|
||||
keys = _keyswaiting.keys()
|
||||
_keyswaiting = {}
|
||||
return keys
|
||||
|
||||
# Block for a list of keys...
|
||||
|
||||
def wait_for_keys():
|
||||
keys = []
|
||||
while keys == []:
|
||||
keys = keys_pressed()
|
||||
sleep(0.05)
|
||||
return keys
|
||||
|
||||
def remove_from_screen(x,
|
||||
d_o_e=Tkinter.tkinter.dooneevent,
|
||||
d_w=Tkinter.tkinter.DONT_WAIT):
|
||||
_canvas.delete(x)
|
||||
d_o_e(d_w)
|
||||
|
||||
def _adjust_coords(coord_list, x, y):
|
||||
for i in range(0, len(coord_list), 2):
|
||||
coord_list[i] = coord_list[i] + x
|
||||
coord_list[i + 1] = coord_list[i + 1] + y
|
||||
return coord_list
|
||||
|
||||
def move_to(object, x, y=None,
|
||||
d_o_e=Tkinter.tkinter.dooneevent,
|
||||
d_w=Tkinter.tkinter.DONT_WAIT):
|
||||
if y is None:
|
||||
try: x, y = x
|
||||
except: raise 'incomprehensible coordinates'
|
||||
|
||||
horiz = True
|
||||
newCoords = []
|
||||
current_x, current_y = _canvas.coords(object)[0:2] # first point
|
||||
for coord in _canvas.coords(object):
|
||||
if horiz:
|
||||
inc = x - current_x
|
||||
else:
|
||||
inc = y - current_y
|
||||
horiz = not horiz
|
||||
|
||||
newCoords.append(coord + inc)
|
||||
|
||||
_canvas.coords(object, *newCoords)
|
||||
d_o_e(d_w)
|
||||
|
||||
def move_by(object, x, y=None,
|
||||
d_o_e=Tkinter.tkinter.dooneevent,
|
||||
d_w=Tkinter.tkinter.DONT_WAIT, lift=False):
|
||||
if y is None:
|
||||
try: x, y = x
|
||||
except: raise Exception, 'incomprehensible coordinates'
|
||||
|
||||
horiz = True
|
||||
newCoords = []
|
||||
for coord in _canvas.coords(object):
|
||||
if horiz:
|
||||
inc = x
|
||||
else:
|
||||
inc = y
|
||||
horiz = not horiz
|
||||
|
||||
newCoords.append(coord + inc)
|
||||
|
||||
_canvas.coords(object, *newCoords)
|
||||
d_o_e(d_w)
|
||||
if lift:
|
||||
_canvas.tag_raise(object)
|
||||
|
||||
def writePostscript(filename):
|
||||
"Writes the current canvas to a postscript file."
|
||||
psfile = file(filename, 'w')
|
||||
psfile.write(_canvas.postscript(pageanchor='sw',
|
||||
y='0.c',
|
||||
x='0.c'))
|
||||
psfile.close()
|
||||
|
||||
ghost_shape = [
|
||||
(0, - 0.5),
|
||||
(0.25, - 0.75),
|
||||
(0.5, - 0.5),
|
||||
(0.75, - 0.75),
|
||||
(0.75, 0.5),
|
||||
(0.5, 0.75),
|
||||
(- 0.5, 0.75),
|
||||
(- 0.75, 0.5),
|
||||
(- 0.75, - 0.75),
|
||||
(- 0.5, - 0.5),
|
||||
(- 0.25, - 0.75)
|
||||
]
|
||||
|
||||
if __name__ == '__main__':
|
||||
begin_graphics()
|
||||
clear_screen()
|
||||
ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape]
|
||||
g = polygon(ghost_shape, formatColor(1, 1, 1))
|
||||
move_to(g, (50, 50))
|
||||
circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15])
|
||||
sleep(2)
|
585
reinforcement/gridworld.py
Normal file
585
reinforcement/gridworld.py
Normal file
|
@ -0,0 +1,585 @@
|
|||
# gridworld.py
|
||||
# ------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import random
|
||||
import sys
|
||||
import mdp
|
||||
import environment
|
||||
import util
|
||||
import optparse
|
||||
|
||||
class Gridworld(mdp.MarkovDecisionProcess):
|
||||
"""
|
||||
Gridworld
|
||||
"""
|
||||
def __init__(self, grid):
|
||||
# layout
|
||||
if type(grid) == type([]): grid = makeGrid(grid)
|
||||
self.grid = grid
|
||||
|
||||
# parameters
|
||||
self.livingReward = 0.0
|
||||
self.noise = 0.2
|
||||
|
||||
def setLivingReward(self, reward):
|
||||
"""
|
||||
The (negative) reward for exiting "normal" states.
|
||||
|
||||
Note that in the R+N text, this reward is on entering
|
||||
a state and therefore is not clearly part of the state's
|
||||
future rewards.
|
||||
"""
|
||||
self.livingReward = reward
|
||||
|
||||
def setNoise(self, noise):
|
||||
"""
|
||||
The probability of moving in an unintended direction.
|
||||
"""
|
||||
self.noise = noise
|
||||
|
||||
|
||||
def getPossibleActions(self, state):
|
||||
"""
|
||||
Returns list of valid actions for 'state'.
|
||||
|
||||
Note that you can request moves into walls and
|
||||
that "exit" states transition to the terminal
|
||||
state under the special action "done".
|
||||
"""
|
||||
if state == self.grid.terminalState:
|
||||
return ()
|
||||
x,y = state
|
||||
if type(self.grid[x][y]) == int:
|
||||
return ('exit',)
|
||||
return ('north','west','south','east')
|
||||
|
||||
def getStates(self):
|
||||
"""
|
||||
Return list of all states.
|
||||
"""
|
||||
# The true terminal state.
|
||||
states = [self.grid.terminalState]
|
||||
for x in range(self.grid.width):
|
||||
for y in range(self.grid.height):
|
||||
if self.grid[x][y] != '#':
|
||||
state = (x,y)
|
||||
states.append(state)
|
||||
return states
|
||||
|
||||
def getReward(self, state, action, nextState):
|
||||
"""
|
||||
Get reward for state, action, nextState transition.
|
||||
|
||||
Note that the reward depends only on the state being
|
||||
departed (as in the R+N book examples, which more or
|
||||
less use this convention).
|
||||
"""
|
||||
if state == self.grid.terminalState:
|
||||
return 0.0
|
||||
x, y = state
|
||||
cell = self.grid[x][y]
|
||||
if type(cell) == int or type(cell) == float:
|
||||
return cell
|
||||
return self.livingReward
|
||||
|
||||
def getStartState(self):
|
||||
for x in range(self.grid.width):
|
||||
for y in range(self.grid.height):
|
||||
if self.grid[x][y] == 'S':
|
||||
return (x, y)
|
||||
raise 'Grid has no start state'
|
||||
|
||||
def isTerminal(self, state):
|
||||
"""
|
||||
Only the TERMINAL_STATE state is *actually* a terminal state.
|
||||
The other "exit" states are technically non-terminals with
|
||||
a single action "exit" which leads to the true terminal state.
|
||||
This convention is to make the grids line up with the examples
|
||||
in the R+N textbook.
|
||||
"""
|
||||
return state == self.grid.terminalState
|
||||
|
||||
|
||||
def getTransitionStatesAndProbs(self, state, action):
|
||||
"""
|
||||
Returns list of (nextState, prob) pairs
|
||||
representing the states reachable
|
||||
from 'state' by taking 'action' along
|
||||
with their transition probabilities.
|
||||
"""
|
||||
|
||||
if action not in self.getPossibleActions(state):
|
||||
raise "Illegal action!"
|
||||
|
||||
if self.isTerminal(state):
|
||||
return []
|
||||
|
||||
x, y = state
|
||||
|
||||
if type(self.grid[x][y]) == int or type(self.grid[x][y]) == float:
|
||||
termState = self.grid.terminalState
|
||||
return [(termState, 1.0)]
|
||||
|
||||
successors = []
|
||||
|
||||
northState = (self.__isAllowed(y+1,x) and (x,y+1)) or state
|
||||
westState = (self.__isAllowed(y,x-1) and (x-1,y)) or state
|
||||
southState = (self.__isAllowed(y-1,x) and (x,y-1)) or state
|
||||
eastState = (self.__isAllowed(y,x+1) and (x+1,y)) or state
|
||||
|
||||
if action == 'north' or action == 'south':
|
||||
if action == 'north':
|
||||
successors.append((northState,1-self.noise))
|
||||
else:
|
||||
successors.append((southState,1-self.noise))
|
||||
|
||||
massLeft = self.noise
|
||||
successors.append((westState,massLeft/2.0))
|
||||
successors.append((eastState,massLeft/2.0))
|
||||
|
||||
if action == 'west' or action == 'east':
|
||||
if action == 'west':
|
||||
successors.append((westState,1-self.noise))
|
||||
else:
|
||||
successors.append((eastState,1-self.noise))
|
||||
|
||||
massLeft = self.noise
|
||||
successors.append((northState,massLeft/2.0))
|
||||
successors.append((southState,massLeft/2.0))
|
||||
|
||||
successors = self.__aggregate(successors)
|
||||
|
||||
return successors
|
||||
|
||||
def __aggregate(self, statesAndProbs):
|
||||
counter = util.Counter()
|
||||
for state, prob in statesAndProbs:
|
||||
counter[state] += prob
|
||||
newStatesAndProbs = []
|
||||
for state, prob in counter.items():
|
||||
newStatesAndProbs.append((state, prob))
|
||||
return newStatesAndProbs
|
||||
|
||||
def __isAllowed(self, y, x):
|
||||
if y < 0 or y >= self.grid.height: return False
|
||||
if x < 0 or x >= self.grid.width: return False
|
||||
return self.grid[x][y] != '#'
|
||||
|
||||
class GridworldEnvironment(environment.Environment):
|
||||
|
||||
def __init__(self, gridWorld):
|
||||
self.gridWorld = gridWorld
|
||||
self.reset()
|
||||
|
||||
def getCurrentState(self):
|
||||
return self.state
|
||||
|
||||
def getPossibleActions(self, state):
|
||||
return self.gridWorld.getPossibleActions(state)
|
||||
|
||||
def doAction(self, action):
|
||||
state = self.getCurrentState()
|
||||
(nextState, reward) = self.getRandomNextState(state, action)
|
||||
self.state = nextState
|
||||
return (nextState, reward)
|
||||
|
||||
def getRandomNextState(self, state, action, randObj=None):
|
||||
rand = -1.0
|
||||
if randObj is None:
|
||||
rand = random.random()
|
||||
else:
|
||||
rand = randObj.random()
|
||||
sum = 0.0
|
||||
successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
|
||||
for nextState, prob in successors:
|
||||
sum += prob
|
||||
if sum > 1.0:
|
||||
raise 'Total transition probability more than one; sample failure.'
|
||||
if rand < sum:
|
||||
reward = self.gridWorld.getReward(state, action, nextState)
|
||||
return (nextState, reward)
|
||||
raise 'Total transition probability less than one; sample failure.'
|
||||
|
||||
def reset(self):
|
||||
self.state = self.gridWorld.getStartState()
|
||||
|
||||
class Grid:
|
||||
"""
|
||||
A 2-dimensional array of immutables backed by a list of lists. Data is accessed
|
||||
via grid[x][y] where (x,y) are cartesian coordinates with x horizontal,
|
||||
y vertical and the origin (0,0) in the bottom left corner.
|
||||
|
||||
The __str__ method constructs an output that is oriented appropriately.
|
||||
"""
|
||||
def __init__(self, width, height, initialValue=' '):
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.data = [[initialValue for y in range(height)] for x in range(width)]
|
||||
self.terminalState = 'TERMINAL_STATE'
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.data[i]
|
||||
|
||||
def __setitem__(self, key, item):
|
||||
self.data[key] = item
|
||||
|
||||
def __eq__(self, other):
|
||||
if other == None: return False
|
||||
return self.data == other.data
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.data)
|
||||
|
||||
def copy(self):
|
||||
g = Grid(self.width, self.height)
|
||||
g.data = [x[:] for x in self.data]
|
||||
return g
|
||||
|
||||
def deepCopy(self):
|
||||
return self.copy()
|
||||
|
||||
def shallowCopy(self):
|
||||
g = Grid(self.width, self.height)
|
||||
g.data = self.data
|
||||
return g
|
||||
|
||||
def _getLegacyText(self):
|
||||
t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)]
|
||||
t.reverse()
|
||||
return t
|
||||
|
||||
def __str__(self):
|
||||
return str(self._getLegacyText())
|
||||
|
||||
def makeGrid(gridString):
|
||||
width, height = len(gridString[0]), len(gridString)
|
||||
grid = Grid(width, height)
|
||||
for ybar, line in enumerate(gridString):
|
||||
y = height - ybar - 1
|
||||
for x, el in enumerate(line):
|
||||
grid[x][y] = el
|
||||
return grid
|
||||
|
||||
def getCliffGrid():
|
||||
grid = [[' ',' ',' ',' ',' '],
|
||||
['S',' ',' ',' ',10],
|
||||
[-100,-100, -100, -100, -100]]
|
||||
return Gridworld(makeGrid(grid))
|
||||
|
||||
def getCliffGrid2():
|
||||
grid = [[' ',' ',' ',' ',' '],
|
||||
[8,'S',' ',' ',10],
|
||||
[-100,-100, -100, -100, -100]]
|
||||
return Gridworld(grid)
|
||||
|
||||
def getDiscountGrid():
|
||||
grid = [[' ',' ',' ',' ',' '],
|
||||
[' ','#',' ',' ',' '],
|
||||
[' ','#', 1,'#', 10],
|
||||
['S',' ',' ',' ',' '],
|
||||
[-10,-10, -10, -10, -10]]
|
||||
return Gridworld(grid)
|
||||
|
||||
def getBridgeGrid():
|
||||
grid = [[ '#',-100, -100, -100, -100, -100, '#'],
|
||||
[ 1, 'S', ' ', ' ', ' ', ' ', 10],
|
||||
[ '#',-100, -100, -100, -100, -100, '#']]
|
||||
return Gridworld(grid)
|
||||
|
||||
def getBookGrid():
|
||||
grid = [[' ',' ',' ',+1],
|
||||
[' ','#',' ',-1],
|
||||
['S',' ',' ',' ']]
|
||||
return Gridworld(grid)
|
||||
|
||||
def getMazeGrid():
|
||||
grid = [[' ',' ',' ',+1],
|
||||
['#','#',' ','#'],
|
||||
[' ','#',' ',' '],
|
||||
[' ','#','#',' '],
|
||||
['S',' ',' ',' ']]
|
||||
return Gridworld(grid)
|
||||
|
||||
|
||||
|
||||
def getUserAction(state, actionFunction):
|
||||
"""
|
||||
Get an action from the user (rather than the agent).
|
||||
|
||||
Used for debugging and lecture demos.
|
||||
"""
|
||||
import graphicsUtils
|
||||
action = None
|
||||
while True:
|
||||
keys = graphicsUtils.wait_for_keys()
|
||||
if 'Up' in keys: action = 'north'
|
||||
if 'Down' in keys: action = 'south'
|
||||
if 'Left' in keys: action = 'west'
|
||||
if 'Right' in keys: action = 'east'
|
||||
if 'q' in keys: sys.exit(0)
|
||||
if action == None: continue
|
||||
break
|
||||
actions = actionFunction(state)
|
||||
if action not in actions:
|
||||
action = actions[0]
|
||||
return action
|
||||
|
||||
def printString(x): print x
|
||||
|
||||
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
|
||||
returns = 0
|
||||
totalDiscount = 1.0
|
||||
environment.reset()
|
||||
if 'startEpisode' in dir(agent): agent.startEpisode()
|
||||
message("BEGINNING EPISODE: "+str(episode)+"\n")
|
||||
while True:
|
||||
|
||||
# DISPLAY CURRENT STATE
|
||||
state = environment.getCurrentState()
|
||||
display(state)
|
||||
pause()
|
||||
|
||||
# END IF IN A TERMINAL STATE
|
||||
actions = environment.getPossibleActions(state)
|
||||
if len(actions) == 0:
|
||||
message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
|
||||
return returns
|
||||
|
||||
# GET ACTION (USUALLY FROM AGENT)
|
||||
action = decision(state)
|
||||
if action == None:
|
||||
raise 'Error: Agent returned None action'
|
||||
|
||||
# EXECUTE ACTION
|
||||
nextState, reward = environment.doAction(action)
|
||||
message("Started in state: "+str(state)+
|
||||
"\nTook action: "+str(action)+
|
||||
"\nEnded in state: "+str(nextState)+
|
||||
"\nGot reward: "+str(reward)+"\n")
|
||||
# UPDATE LEARNER
|
||||
if 'observeTransition' in dir(agent):
|
||||
agent.observeTransition(state, action, nextState, reward)
|
||||
|
||||
returns += reward * totalDiscount
|
||||
totalDiscount *= discount
|
||||
|
||||
if 'stopEpisode' in dir(agent):
|
||||
agent.stopEpisode()
|
||||
|
||||
def parseOptions():
|
||||
optParser = optparse.OptionParser()
|
||||
optParser.add_option('-d', '--discount',action='store',
|
||||
type='float',dest='discount',default=0.9,
|
||||
help='Discount on future (default %default)')
|
||||
optParser.add_option('-r', '--livingReward',action='store',
|
||||
type='float',dest='livingReward',default=0.0,
|
||||
metavar="R", help='Reward for living for a time step (default %default)')
|
||||
optParser.add_option('-n', '--noise',action='store',
|
||||
type='float',dest='noise',default=0.2,
|
||||
metavar="P", help='How often action results in ' +
|
||||
'unintended direction (default %default)' )
|
||||
optParser.add_option('-e', '--epsilon',action='store',
|
||||
type='float',dest='epsilon',default=0.3,
|
||||
metavar="E", help='Chance of taking a random action in q-learning (default %default)')
|
||||
optParser.add_option('-l', '--learningRate',action='store',
|
||||
type='float',dest='learningRate',default=0.5,
|
||||
metavar="P", help='TD learning rate (default %default)' )
|
||||
optParser.add_option('-i', '--iterations',action='store',
|
||||
type='int',dest='iters',default=10,
|
||||
metavar="K", help='Number of rounds of value iteration (default %default)')
|
||||
optParser.add_option('-k', '--episodes',action='store',
|
||||
type='int',dest='episodes',default=1,
|
||||
metavar="K", help='Number of epsiodes of the MDP to run (default %default)')
|
||||
optParser.add_option('-g', '--grid',action='store',
|
||||
metavar="G", type='string',dest='grid',default="BookGrid",
|
||||
help='Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)' )
|
||||
optParser.add_option('-w', '--windowSize', metavar="X", type='int',dest='gridSize',default=150,
|
||||
help='Request a window width of X pixels *per grid cell* (default %default)')
|
||||
optParser.add_option('-a', '--agent',action='store', metavar="A",
|
||||
type='string',dest='agent',default="random",
|
||||
help='Agent type (options are \'random\', \'value\' and \'q\', default %default)')
|
||||
optParser.add_option('-t', '--text',action='store_true',
|
||||
dest='textDisplay',default=False,
|
||||
help='Use text-only ASCII display')
|
||||
optParser.add_option('-p', '--pause',action='store_true',
|
||||
dest='pause',default=False,
|
||||
help='Pause GUI after each time step when running the MDP')
|
||||
optParser.add_option('-q', '--quiet',action='store_true',
|
||||
dest='quiet',default=False,
|
||||
help='Skip display of any learning episodes')
|
||||
optParser.add_option('-s', '--speed',action='store', metavar="S", type=float,
|
||||
dest='speed',default=1.0,
|
||||
help='Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)')
|
||||
optParser.add_option('-m', '--manual',action='store_true',
|
||||
dest='manual',default=False,
|
||||
help='Manually control agent')
|
||||
optParser.add_option('-v', '--valueSteps',action='store_true' ,default=False,
|
||||
help='Display each step of value iteration')
|
||||
|
||||
opts, args = optParser.parse_args()
|
||||
|
||||
if opts.manual and opts.agent != 'q':
|
||||
print '## Disabling Agents in Manual Mode (-m) ##'
|
||||
opts.agent = None
|
||||
|
||||
# MANAGE CONFLICTS
|
||||
if opts.textDisplay or opts.quiet:
|
||||
# if opts.quiet:
|
||||
opts.pause = False
|
||||
# opts.manual = False
|
||||
|
||||
if opts.manual:
|
||||
opts.pause = True
|
||||
|
||||
return opts
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
opts = parseOptions()
|
||||
|
||||
###########################
|
||||
# GET THE GRIDWORLD
|
||||
###########################
|
||||
|
||||
import gridworld
|
||||
mdpFunction = getattr(gridworld, "get"+opts.grid)
|
||||
mdp = mdpFunction()
|
||||
mdp.setLivingReward(opts.livingReward)
|
||||
mdp.setNoise(opts.noise)
|
||||
env = gridworld.GridworldEnvironment(mdp)
|
||||
|
||||
|
||||
###########################
|
||||
# GET THE DISPLAY ADAPTER
|
||||
###########################
|
||||
|
||||
import textGridworldDisplay
|
||||
display = textGridworldDisplay.TextGridworldDisplay(mdp)
|
||||
if not opts.textDisplay:
|
||||
import graphicsGridworldDisplay
|
||||
display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)
|
||||
try:
|
||||
display.start()
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
|
||||
###########################
|
||||
# GET THE AGENT
|
||||
###########################
|
||||
|
||||
import valueIterationAgents, qlearningAgents
|
||||
a = None
|
||||
if opts.agent == 'value':
|
||||
a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters)
|
||||
elif opts.agent == 'q':
|
||||
#env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
|
||||
#simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
|
||||
gridWorldEnv = GridworldEnvironment(mdp)
|
||||
actionFn = lambda state: mdp.getPossibleActions(state)
|
||||
qLearnOpts = {'gamma': opts.discount,
|
||||
'alpha': opts.learningRate,
|
||||
'epsilon': opts.epsilon,
|
||||
'actionFn': actionFn}
|
||||
a = qlearningAgents.QLearningAgent(**qLearnOpts)
|
||||
elif opts.agent == 'random':
|
||||
# # No reason to use the random agent without episodes
|
||||
if opts.episodes == 0:
|
||||
opts.episodes = 10
|
||||
class RandomAgent:
|
||||
def getAction(self, state):
|
||||
return random.choice(mdp.getPossibleActions(state))
|
||||
def getValue(self, state):
|
||||
return 0.0
|
||||
def getQValue(self, state, action):
|
||||
return 0.0
|
||||
def getPolicy(self, state):
|
||||
"NOTE: 'random' is a special policy value; don't use it in your code."
|
||||
return 'random'
|
||||
def update(self, state, action, nextState, reward):
|
||||
pass
|
||||
a = RandomAgent()
|
||||
else:
|
||||
if not opts.manual: raise 'Unknown agent type: '+opts.agent
|
||||
|
||||
|
||||
###########################
|
||||
# RUN EPISODES
|
||||
###########################
|
||||
# DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
|
||||
try:
|
||||
if not opts.manual and opts.agent == 'value':
|
||||
if opts.valueSteps:
|
||||
for i in range(opts.iters):
|
||||
tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i)
|
||||
display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS")
|
||||
display.pause()
|
||||
|
||||
display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS")
|
||||
display.pause()
|
||||
display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS")
|
||||
display.pause()
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
# FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
|
||||
displayCallback = lambda x: None
|
||||
if not opts.quiet:
|
||||
if opts.manual and opts.agent == None:
|
||||
displayCallback = lambda state: display.displayNullValues(state)
|
||||
else:
|
||||
if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
|
||||
if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
|
||||
if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES")
|
||||
|
||||
messageCallback = lambda x: printString(x)
|
||||
if opts.quiet:
|
||||
messageCallback = lambda x: None
|
||||
|
||||
# FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
|
||||
pauseCallback = lambda : None
|
||||
if opts.pause:
|
||||
pauseCallback = lambda : display.pause()
|
||||
|
||||
# FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
|
||||
if opts.manual:
|
||||
decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions)
|
||||
else:
|
||||
decisionCallback = a.getAction
|
||||
|
||||
# RUN EPISODES
|
||||
if opts.episodes > 0:
|
||||
print
|
||||
print "RUNNING", opts.episodes, "EPISODES"
|
||||
print
|
||||
returns = 0
|
||||
for episode in range(1, opts.episodes+1):
|
||||
returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
|
||||
if opts.episodes > 0:
|
||||
print
|
||||
print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes)
|
||||
print
|
||||
print
|
||||
|
||||
# DISPLAY POST-LEARNING VALUES / Q-VALUES
|
||||
if opts.agent == 'q' and not opts.manual:
|
||||
try:
|
||||
display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES")
|
||||
display.pause()
|
||||
display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES")
|
||||
display.pause()
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
84
reinforcement/keyboardAgents.py
Normal file
84
reinforcement/keyboardAgents.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
# keyboardAgents.py
|
||||
# -----------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from game import Agent
|
||||
from game import Directions
|
||||
import random
|
||||
|
||||
class KeyboardAgent(Agent):
|
||||
"""
|
||||
An agent controlled by the keyboard.
|
||||
"""
|
||||
# NOTE: Arrow keys also work.
|
||||
WEST_KEY = 'a'
|
||||
EAST_KEY = 'd'
|
||||
NORTH_KEY = 'w'
|
||||
SOUTH_KEY = 's'
|
||||
STOP_KEY = 'q'
|
||||
|
||||
def __init__( self, index = 0 ):
|
||||
|
||||
self.lastMove = Directions.STOP
|
||||
self.index = index
|
||||
self.keys = []
|
||||
|
||||
def getAction( self, state):
|
||||
from graphicsUtils import keys_waiting
|
||||
from graphicsUtils import keys_pressed
|
||||
keys = keys_waiting() + keys_pressed()
|
||||
if keys != []:
|
||||
self.keys = keys
|
||||
|
||||
legal = state.getLegalActions(self.index)
|
||||
move = self.getMove(legal)
|
||||
|
||||
if move == Directions.STOP:
|
||||
# Try to move in the same direction as before
|
||||
if self.lastMove in legal:
|
||||
move = self.lastMove
|
||||
|
||||
if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP
|
||||
|
||||
if move not in legal:
|
||||
move = random.choice(legal)
|
||||
|
||||
self.lastMove = move
|
||||
return move
|
||||
|
||||
def getMove(self, legal):
|
||||
move = Directions.STOP
|
||||
if (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal: move = Directions.WEST
|
||||
if (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST
|
||||
if (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal: move = Directions.NORTH
|
||||
if (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
|
||||
return move
|
||||
|
||||
class KeyboardAgent2(KeyboardAgent):
|
||||
"""
|
||||
A second agent controlled by the keyboard.
|
||||
"""
|
||||
# NOTE: Arrow keys also work.
|
||||
WEST_KEY = 'j'
|
||||
EAST_KEY = "l"
|
||||
NORTH_KEY = 'i'
|
||||
SOUTH_KEY = 'k'
|
||||
STOP_KEY = 'u'
|
||||
|
||||
def getMove(self, legal):
|
||||
move = Directions.STOP
|
||||
if (self.WEST_KEY in self.keys) and Directions.WEST in legal: move = Directions.WEST
|
||||
if (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST
|
||||
if (self.NORTH_KEY in self.keys) and Directions.NORTH in legal: move = Directions.NORTH
|
||||
if (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
|
||||
return move
|
149
reinforcement/layout.py
Normal file
149
reinforcement/layout.py
Normal file
|
@ -0,0 +1,149 @@
|
|||
# layout.py
|
||||
# ---------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from util import manhattanDistance
|
||||
from game import Grid
|
||||
import os
|
||||
import random
|
||||
|
||||
VISIBILITY_MATRIX_CACHE = {}
|
||||
|
||||
class Layout:
|
||||
"""
|
||||
A Layout manages the static information about the game board.
|
||||
"""
|
||||
|
||||
def __init__(self, layoutText):
|
||||
self.width = len(layoutText[0])
|
||||
self.height= len(layoutText)
|
||||
self.walls = Grid(self.width, self.height, False)
|
||||
self.food = Grid(self.width, self.height, False)
|
||||
self.capsules = []
|
||||
self.agentPositions = []
|
||||
self.numGhosts = 0
|
||||
self.processLayoutText(layoutText)
|
||||
self.layoutText = layoutText
|
||||
self.totalFood = len(self.food.asList())
|
||||
# self.initializeVisibilityMatrix()
|
||||
|
||||
def getNumGhosts(self):
|
||||
return self.numGhosts
|
||||
|
||||
def initializeVisibilityMatrix(self):
|
||||
global VISIBILITY_MATRIX_CACHE
|
||||
if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE:
|
||||
from game import Directions
|
||||
vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)]
|
||||
dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST]
|
||||
vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()})
|
||||
for x in range(self.width):
|
||||
for y in range(self.height):
|
||||
if self.walls[x][y] == False:
|
||||
for vec, direction in zip(vecs, dirs):
|
||||
dx, dy = vec
|
||||
nextx, nexty = x + dx, y + dy
|
||||
while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] :
|
||||
vis[x][y][direction].add((nextx, nexty))
|
||||
nextx, nexty = x + dx, y + dy
|
||||
self.visibility = vis
|
||||
VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis
|
||||
else:
|
||||
self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)]
|
||||
|
||||
def isWall(self, pos):
|
||||
x, col = pos
|
||||
return self.walls[x][col]
|
||||
|
||||
def getRandomLegalPosition(self):
|
||||
x = random.choice(range(self.width))
|
||||
y = random.choice(range(self.height))
|
||||
while self.isWall( (x, y) ):
|
||||
x = random.choice(range(self.width))
|
||||
y = random.choice(range(self.height))
|
||||
return (x,y)
|
||||
|
||||
def getRandomCorner(self):
|
||||
poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
|
||||
return random.choice(poses)
|
||||
|
||||
def getFurthestCorner(self, pacPos):
|
||||
poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
|
||||
dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses])
|
||||
return pos
|
||||
|
||||
def isVisibleFrom(self, ghostPos, pacPos, pacDirection):
|
||||
row, col = [int(x) for x in pacPos]
|
||||
return ghostPos in self.visibility[row][col][pacDirection]
|
||||
|
||||
def __str__(self):
|
||||
return "\n".join(self.layoutText)
|
||||
|
||||
def deepCopy(self):
|
||||
return Layout(self.layoutText[:])
|
||||
|
||||
def processLayoutText(self, layoutText):
|
||||
"""
|
||||
Coordinates are flipped from the input format to the (x,y) convention here
|
||||
|
||||
The shape of the maze. Each character
|
||||
represents a different type of object.
|
||||
% - Wall
|
||||
. - Food
|
||||
o - Capsule
|
||||
G - Ghost
|
||||
P - Pacman
|
||||
Other characters are ignored.
|
||||
"""
|
||||
maxY = self.height - 1
|
||||
for y in range(self.height):
|
||||
for x in range(self.width):
|
||||
layoutChar = layoutText[maxY - y][x]
|
||||
self.processLayoutChar(x, y, layoutChar)
|
||||
self.agentPositions.sort()
|
||||
self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions]
|
||||
|
||||
def processLayoutChar(self, x, y, layoutChar):
|
||||
if layoutChar == '%':
|
||||
self.walls[x][y] = True
|
||||
elif layoutChar == '.':
|
||||
self.food[x][y] = True
|
||||
elif layoutChar == 'o':
|
||||
self.capsules.append((x, y))
|
||||
elif layoutChar == 'P':
|
||||
self.agentPositions.append( (0, (x, y) ) )
|
||||
elif layoutChar in ['G']:
|
||||
self.agentPositions.append( (1, (x, y) ) )
|
||||
self.numGhosts += 1
|
||||
elif layoutChar in ['1', '2', '3', '4']:
|
||||
self.agentPositions.append( (int(layoutChar), (x,y)))
|
||||
self.numGhosts += 1
|
||||
def getLayout(name, back = 2):
|
||||
if name.endswith('.lay'):
|
||||
layout = tryToLoad('layouts/' + name)
|
||||
if layout == None: layout = tryToLoad(name)
|
||||
else:
|
||||
layout = tryToLoad('layouts/' + name + '.lay')
|
||||
if layout == None: layout = tryToLoad(name + '.lay')
|
||||
if layout == None and back >= 0:
|
||||
curdir = os.path.abspath('.')
|
||||
os.chdir('..')
|
||||
layout = getLayout(name, back -1)
|
||||
os.chdir(curdir)
|
||||
return layout
|
||||
|
||||
def tryToLoad(fullname):
|
||||
if(not os.path.exists(fullname)): return None
|
||||
f = open(fullname)
|
||||
try: return Layout([line.strip() for line in f])
|
||||
finally: f.close()
|
7
reinforcement/layouts/capsuleClassic.lay
Normal file
7
reinforcement/layouts/capsuleClassic.lay
Normal file
|
@ -0,0 +1,7 @@
|
|||
%%%%%%%%%%%%%%%%%%%
|
||||
%G. G ....%
|
||||
%.% % %%%%%% %.%%.%
|
||||
%.%o% % o% %.o%.%
|
||||
%.%%%.% %%% %..%.%
|
||||
%..... P %..%G%
|
||||
%%%%%%%%%%%%%%%%%%%%
|
9
reinforcement/layouts/contestClassic.lay
Normal file
9
reinforcement/layouts/contestClassic.lay
Normal file
|
@ -0,0 +1,9 @@
|
|||
%%%%%%%%%%%%%%%%%%%%
|
||||
%o...%........%...o%
|
||||
%.%%.%.%%..%%.%.%%.%
|
||||
%...... G GG%......%
|
||||
%.%.%%.%% %%%.%%.%.%
|
||||
%.%....% ooo%.%..%.%
|
||||
%.%.%%.% %% %.%.%%.%
|
||||
%o%......P....%....%
|
||||
%%%%%%%%%%%%%%%%%%%%
|
11
reinforcement/layouts/mediumClassic.lay
Normal file
11
reinforcement/layouts/mediumClassic.lay
Normal file
|
@ -0,0 +1,11 @@
|
|||
%%%%%%%%%%%%%%%%%%%%
|
||||
%o...%........%....%
|
||||
%.%%.%.%%%%%%.%.%%.%
|
||||
%.%..............%.%
|
||||
%.%.%%.%% %%.%%.%.%
|
||||
%......%G G%......%
|
||||
%.%.%%.%%%%%%.%%.%.%
|
||||
%.%..............%.%
|
||||
%.%%.%.%%%%%%.%.%%.%
|
||||
%....%...P....%...o%
|
||||
%%%%%%%%%%%%%%%%%%%%
|
7
reinforcement/layouts/mediumGrid.lay
Normal file
7
reinforcement/layouts/mediumGrid.lay
Normal file
|
@ -0,0 +1,7 @@
|
|||
%%%%%%%%
|
||||
%P %
|
||||
% .% . %
|
||||
% % %
|
||||
% .% . %
|
||||
% G%
|
||||
%%%%%%%%
|
5
reinforcement/layouts/minimaxClassic.lay
Normal file
5
reinforcement/layouts/minimaxClassic.lay
Normal file
|
@ -0,0 +1,5 @@
|
|||
%%%%%%%%%
|
||||
%.P G%
|
||||
% %.%G%%%
|
||||
%G %%%
|
||||
%%%%%%%%%
|
9
reinforcement/layouts/openClassic.lay
Normal file
9
reinforcement/layouts/openClassic.lay
Normal file
|
@ -0,0 +1,9 @@
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%.. P .... .... %
|
||||
%.. ... ... ... ... %
|
||||
%.. ... ... ... ... %
|
||||
%.. .... .... G %
|
||||
%.. ... ... ... ... %
|
||||
%.. ... ... ... ... %
|
||||
%.. .... .... o%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%
|
27
reinforcement/layouts/originalClassic.lay
Normal file
27
reinforcement/layouts/originalClassic.lay
Normal file
|
@ -0,0 +1,27 @@
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%............%%............%
|
||||
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||
%o%%%%.%%%%%.%%.%%%%%.%%%%o%
|
||||
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||
%..........................%
|
||||
%.%%%%.%%.%%%%%%%%.%%.%%%%.%
|
||||
%.%%%%.%%.%%%%%%%%.%%.%%%%.%
|
||||
%......%%....%%....%%......%
|
||||
%%%%%%.%%%%% %% %%%%%.%%%%%%
|
||||
%%%%%%.%%%%% %% %%%%%.%%%%%%
|
||||
%%%%%%.% %.%%%%%%
|
||||
%%%%%%.% %%%% %%%% %.%%%%%%
|
||||
% . %G GG G% . %
|
||||
%%%%%%.% %%%%%%%%%% %.%%%%%%
|
||||
%%%%%%.% %.%%%%%%
|
||||
%%%%%%.% %%%%%%%%%% %.%%%%%%
|
||||
%............%%............%
|
||||
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||
%o..%%....... .......%%..o%
|
||||
%%%.%%.%%.%%%%%%%%.%%.%%.%%%
|
||||
%%%.%%.%%.%%%%%%%%.%%.%%.%%%
|
||||
%......%%....%%....%%......%
|
||||
%.%%%%%%%%%%.%%.%%%%%%%%%%.%
|
||||
%.............P............%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
7
reinforcement/layouts/smallClassic.lay
Normal file
7
reinforcement/layouts/smallClassic.lay
Normal file
|
@ -0,0 +1,7 @@
|
|||
%%%%%%%%%%%%%%%%%%%%
|
||||
%......%G G%......%
|
||||
%.%%...%% %%...%%.%
|
||||
%.%o.%........%.o%.%
|
||||
%.%%.%.%%%%%%.%.%%.%
|
||||
%........P.........%
|
||||
%%%%%%%%%%%%%%%%%%%%
|
7
reinforcement/layouts/smallGrid.lay
Normal file
7
reinforcement/layouts/smallGrid.lay
Normal file
|
@ -0,0 +1,7 @@
|
|||
%%%%%%%
|
||||
% P %
|
||||
% %%% %
|
||||
% %. %
|
||||
% %%% %
|
||||
%. G %
|
||||
%%%%%%%
|
10
reinforcement/layouts/testClassic.lay
Normal file
10
reinforcement/layouts/testClassic.lay
Normal file
|
@ -0,0 +1,10 @@
|
|||
%%%%%
|
||||
% . %
|
||||
%.G.%
|
||||
% . %
|
||||
%. .%
|
||||
% %
|
||||
% .%
|
||||
% %
|
||||
%P .%
|
||||
%%%%%
|
5
reinforcement/layouts/trappedClassic.lay
Normal file
5
reinforcement/layouts/trappedClassic.lay
Normal file
|
@ -0,0 +1,5 @@
|
|||
%%%%%%%%
|
||||
% P G%
|
||||
%G%%%%%%
|
||||
%.... %
|
||||
%%%%%%%%
|
13
reinforcement/layouts/trickyClassic.lay
Normal file
13
reinforcement/layouts/trickyClassic.lay
Normal file
|
@ -0,0 +1,13 @@
|
|||
%%%%%%%%%%%%%%%%%%%%
|
||||
%o...%........%...o%
|
||||
%.%%.%.%%..%%.%.%%.%
|
||||
%.%.....%..%.....%.%
|
||||
%.%.%%.%% %%.%%.%.%
|
||||
%...... GGGG%.%....%
|
||||
%.%....%%%%%%.%..%.%
|
||||
%.%....% oo%.%..%.%
|
||||
%.%....% %%%%.%..%.%
|
||||
%.%...........%..%.%
|
||||
%.%%.%.%%%%%%.%.%%.%
|
||||
%o...%...P....%...o%
|
||||
%%%%%%%%%%%%%%%%%%%%
|
258
reinforcement/learningAgents.py
Normal file
258
reinforcement/learningAgents.py
Normal file
|
@ -0,0 +1,258 @@
|
|||
# learningAgents.py
|
||||
# -----------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from game import Directions, Agent, Actions
|
||||
|
||||
import random,util,time
|
||||
|
||||
class ValueEstimationAgent(Agent):
|
||||
"""
|
||||
Abstract agent which assigns values to (state,action)
|
||||
Q-Values for an environment. As well as a value to a
|
||||
state and a policy given respectively by,
|
||||
|
||||
V(s) = max_{a in actions} Q(s,a)
|
||||
policy(s) = arg_max_{a in actions} Q(s,a)
|
||||
|
||||
Both ValueIterationAgent and QLearningAgent inherit
|
||||
from this agent. While a ValueIterationAgent has
|
||||
a model of the environment via a MarkovDecisionProcess
|
||||
(see mdp.py) that is used to estimate Q-Values before
|
||||
ever actually acting, the QLearningAgent estimates
|
||||
Q-Values while acting in the environment.
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10):
|
||||
"""
|
||||
Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,...
|
||||
alpha - learning rate
|
||||
epsilon - exploration rate
|
||||
gamma - discount factor
|
||||
numTraining - number of training episodes, i.e. no learning after these many episodes
|
||||
"""
|
||||
self.alpha = float(alpha)
|
||||
self.epsilon = float(epsilon)
|
||||
self.discount = float(gamma)
|
||||
self.numTraining = int(numTraining)
|
||||
|
||||
####################################
|
||||
# Override These Functions #
|
||||
####################################
|
||||
def getQValue(self, state, action):
|
||||
"""
|
||||
Should return Q(state,action)
|
||||
"""
|
||||
util.raiseNotDefined()
|
||||
|
||||
def getValue(self, state):
|
||||
"""
|
||||
What is the value of this state under the best action?
|
||||
Concretely, this is given by
|
||||
|
||||
V(s) = max_{a in actions} Q(s,a)
|
||||
"""
|
||||
util.raiseNotDefined()
|
||||
|
||||
def getPolicy(self, state):
|
||||
"""
|
||||
What is the best action to take in the state. Note that because
|
||||
we might want to explore, this might not coincide with getAction
|
||||
Concretely, this is given by
|
||||
|
||||
policy(s) = arg_max_{a in actions} Q(s,a)
|
||||
|
||||
If many actions achieve the maximal Q-value,
|
||||
it doesn't matter which is selected.
|
||||
"""
|
||||
util.raiseNotDefined()
|
||||
|
||||
def getAction(self, state):
|
||||
"""
|
||||
state: can call state.getLegalActions()
|
||||
Choose an action and return it.
|
||||
"""
|
||||
util.raiseNotDefined()
|
||||
|
||||
class ReinforcementAgent(ValueEstimationAgent):
|
||||
"""
|
||||
Abstract Reinforcemnt Agent: A ValueEstimationAgent
|
||||
which estimates Q-Values (as well as policies) from experience
|
||||
rather than a model
|
||||
|
||||
What you need to know:
|
||||
- The environment will call
|
||||
observeTransition(state,action,nextState,deltaReward),
|
||||
which will call update(state, action, nextState, deltaReward)
|
||||
which you should override.
|
||||
- Use self.getLegalActions(state) to know which actions
|
||||
are available in a state
|
||||
"""
|
||||
####################################
|
||||
# Override These Functions #
|
||||
####################################
|
||||
|
||||
def update(self, state, action, nextState, reward):
|
||||
"""
|
||||
This class will call this function, which you write, after
|
||||
observing a transition and reward
|
||||
"""
|
||||
util.raiseNotDefined()
|
||||
|
||||
####################################
|
||||
# Read These Functions #
|
||||
####################################
|
||||
|
||||
def getLegalActions(self,state):
|
||||
"""
|
||||
Get the actions available for a given
|
||||
state. This is what you should use to
|
||||
obtain legal actions for a state
|
||||
"""
|
||||
return self.actionFn(state)
|
||||
|
||||
def observeTransition(self, state,action,nextState,deltaReward):
|
||||
"""
|
||||
Called by environment to inform agent that a transition has
|
||||
been observed. This will result in a call to self.update
|
||||
on the same arguments
|
||||
|
||||
NOTE: Do *not* override or call this function
|
||||
"""
|
||||
self.episodeRewards += deltaReward
|
||||
self.update(state,action,nextState,deltaReward)
|
||||
|
||||
def startEpisode(self):
|
||||
"""
|
||||
Called by environment when new episode is starting
|
||||
"""
|
||||
self.lastState = None
|
||||
self.lastAction = None
|
||||
self.episodeRewards = 0.0
|
||||
|
||||
def stopEpisode(self):
|
||||
"""
|
||||
Called by environment when episode is done
|
||||
"""
|
||||
if self.episodesSoFar < self.numTraining:
|
||||
self.accumTrainRewards += self.episodeRewards
|
||||
else:
|
||||
self.accumTestRewards += self.episodeRewards
|
||||
self.episodesSoFar += 1
|
||||
if self.episodesSoFar >= self.numTraining:
|
||||
# Take off the training wheels
|
||||
self.epsilon = 0.0 # no exploration
|
||||
self.alpha = 0.0 # no learning
|
||||
|
||||
def isInTraining(self):
|
||||
return self.episodesSoFar < self.numTraining
|
||||
|
||||
def isInTesting(self):
|
||||
return not self.isInTraining()
|
||||
|
||||
def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1):
|
||||
"""
|
||||
actionFn: Function which takes a state and returns the list of legal actions
|
||||
|
||||
alpha - learning rate
|
||||
epsilon - exploration rate
|
||||
gamma - discount factor
|
||||
numTraining - number of training episodes, i.e. no learning after these many episodes
|
||||
"""
|
||||
if actionFn == None:
|
||||
actionFn = lambda state: state.getLegalActions()
|
||||
self.actionFn = actionFn
|
||||
self.episodesSoFar = 0
|
||||
self.accumTrainRewards = 0.0
|
||||
self.accumTestRewards = 0.0
|
||||
self.numTraining = int(numTraining)
|
||||
self.epsilon = float(epsilon)
|
||||
self.alpha = float(alpha)
|
||||
self.discount = float(gamma)
|
||||
|
||||
################################
|
||||
# Controls needed for Crawler #
|
||||
################################
|
||||
def setEpsilon(self, epsilon):
|
||||
self.epsilon = epsilon
|
||||
|
||||
def setLearningRate(self, alpha):
|
||||
self.alpha = alpha
|
||||
|
||||
def setDiscount(self, discount):
|
||||
self.discount = discount
|
||||
|
||||
def doAction(self,state,action):
|
||||
"""
|
||||
Called by inherited class when
|
||||
an action is taken in a state
|
||||
"""
|
||||
self.lastState = state
|
||||
self.lastAction = action
|
||||
|
||||
###################
|
||||
# Pacman Specific #
|
||||
###################
|
||||
def observationFunction(self, state):
|
||||
"""
|
||||
This is where we ended up after our last action.
|
||||
The simulation should somehow ensure this is called
|
||||
"""
|
||||
if not self.lastState is None:
|
||||
reward = state.getScore() - self.lastState.getScore()
|
||||
self.observeTransition(self.lastState, self.lastAction, state, reward)
|
||||
return state
|
||||
|
||||
def registerInitialState(self, state):
|
||||
self.startEpisode()
|
||||
if self.episodesSoFar == 0:
|
||||
print 'Beginning %d episodes of Training' % (self.numTraining)
|
||||
|
||||
def final(self, state):
|
||||
"""
|
||||
Called by Pacman game at the terminal state
|
||||
"""
|
||||
deltaReward = state.getScore() - self.lastState.getScore()
|
||||
self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
|
||||
self.stopEpisode()
|
||||
|
||||
# Make sure we have this var
|
||||
if not 'episodeStartTime' in self.__dict__:
|
||||
self.episodeStartTime = time.time()
|
||||
if not 'lastWindowAccumRewards' in self.__dict__:
|
||||
self.lastWindowAccumRewards = 0.0
|
||||
self.lastWindowAccumRewards += state.getScore()
|
||||
|
||||
NUM_EPS_UPDATE = 100
|
||||
if self.episodesSoFar % NUM_EPS_UPDATE == 0:
|
||||
print 'Reinforcement Learning Status:'
|
||||
windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
|
||||
if self.episodesSoFar <= self.numTraining:
|
||||
trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
|
||||
print '\tCompleted %d out of %d training episodes' % (
|
||||
self.episodesSoFar,self.numTraining)
|
||||
print '\tAverage Rewards over all training: %.2f' % (
|
||||
trainAvg)
|
||||
else:
|
||||
testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
|
||||
print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
|
||||
print '\tAverage Rewards over testing: %.2f' % testAvg
|
||||
print '\tAverage Rewards for last %d episodes: %.2f' % (
|
||||
NUM_EPS_UPDATE,windowAvg)
|
||||
print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
|
||||
self.lastWindowAccumRewards = 0.0
|
||||
self.episodeStartTime = time.time()
|
||||
|
||||
if self.episodesSoFar == self.numTraining:
|
||||
msg = 'Training Done (turning off epsilon and alpha)'
|
||||
print '%s\n%s' % (msg,'-' * len(msg))
|
67
reinforcement/mdp.py
Normal file
67
reinforcement/mdp.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
# mdp.py
|
||||
# ------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import random
|
||||
|
||||
class MarkovDecisionProcess:
|
||||
|
||||
def getStates(self):
|
||||
"""
|
||||
Return a list of all states in the MDP.
|
||||
Not generally possible for large MDPs.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getStartState(self):
|
||||
"""
|
||||
Return the start state of the MDP.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getPossibleActions(self, state):
|
||||
"""
|
||||
Return list of possible actions from 'state'.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getTransitionStatesAndProbs(self, state, action):
|
||||
"""
|
||||
Returns list of (nextState, prob) pairs
|
||||
representing the states reachable
|
||||
from 'state' by taking 'action' along
|
||||
with their transition probabilities.
|
||||
|
||||
Note that in Q-Learning and reinforcment
|
||||
learning in general, we do not know these
|
||||
probabilities nor do we directly model them.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getReward(self, state, action, nextState):
|
||||
"""
|
||||
Get the reward for the state, action, nextState transition.
|
||||
|
||||
Not available in reinforcement learning.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def isTerminal(self, state):
|
||||
"""
|
||||
Returns true if the current state is a terminal state. By convention,
|
||||
a terminal state has zero future rewards. Sometimes the terminal state(s)
|
||||
may have no possible actions. It is also common to think of the terminal
|
||||
state as having a self-loop action 'pass' with zero reward; the formulations
|
||||
are equivalent.
|
||||
"""
|
||||
abstract
|
684
reinforcement/pacman.py
Normal file
684
reinforcement/pacman.py
Normal file
|
@ -0,0 +1,684 @@
|
|||
# pacman.py
|
||||
# ---------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
"""
|
||||
Pacman.py holds the logic for the classic pacman game along with the main
|
||||
code to run a game. This file is divided into three sections:
|
||||
|
||||
(i) Your interface to the pacman world:
|
||||
Pacman is a complex environment. You probably don't want to
|
||||
read through all of the code we wrote to make the game runs
|
||||
correctly. This section contains the parts of the code
|
||||
that you will need to understand in order to complete the
|
||||
project. There is also some code in game.py that you should
|
||||
understand.
|
||||
|
||||
(ii) The hidden secrets of pacman:
|
||||
This section contains all of the logic code that the pacman
|
||||
environment uses to decide who can move where, who dies when
|
||||
things collide, etc. You shouldn't need to read this section
|
||||
of code, but you can if you want.
|
||||
|
||||
(iii) Framework to start a game:
|
||||
The final section contains the code for reading the command
|
||||
you use to set up the game, then starting up a new game, along with
|
||||
linking in all the external parts (agent functions, graphics).
|
||||
Check this section out to see all the options available to you.
|
||||
|
||||
To play your first game, type 'python pacman.py' from the command line.
|
||||
The keys are 'a', 's', 'd', and 'w' to move (or arrow keys). Have fun!
|
||||
"""
|
||||
from game import GameStateData
|
||||
from game import Game
|
||||
from game import Directions
|
||||
from game import Actions
|
||||
from util import nearestPoint
|
||||
from util import manhattanDistance
|
||||
import util, layout
|
||||
import sys, types, time, random, os
|
||||
|
||||
###################################################
|
||||
# YOUR INTERFACE TO THE PACMAN WORLD: A GameState #
|
||||
###################################################
|
||||
|
||||
class GameState:
|
||||
"""
|
||||
A GameState specifies the full game state, including the food, capsules,
|
||||
agent configurations and score changes.
|
||||
|
||||
GameStates are used by the Game object to capture the actual state of the game and
|
||||
can be used by agents to reason about the game.
|
||||
|
||||
Much of the information in a GameState is stored in a GameStateData object. We
|
||||
strongly suggest that you access that data via the accessor methods below rather
|
||||
than referring to the GameStateData object directly.
|
||||
|
||||
Note that in classic Pacman, Pacman is always agent 0.
|
||||
"""
|
||||
|
||||
####################################################
|
||||
# Accessor methods: use these to access state data #
|
||||
####################################################
|
||||
|
||||
# static variable keeps track of which states have had getLegalActions called
|
||||
explored = set()
|
||||
def getAndResetExplored():
|
||||
tmp = GameState.explored.copy()
|
||||
GameState.explored = set()
|
||||
return tmp
|
||||
getAndResetExplored = staticmethod(getAndResetExplored)
|
||||
|
||||
def getLegalActions( self, agentIndex=0 ):
|
||||
"""
|
||||
Returns the legal actions for the agent specified.
|
||||
"""
|
||||
# GameState.explored.add(self)
|
||||
if self.isWin() or self.isLose(): return []
|
||||
|
||||
if agentIndex == 0: # Pacman is moving
|
||||
return PacmanRules.getLegalActions( self )
|
||||
else:
|
||||
return GhostRules.getLegalActions( self, agentIndex )
|
||||
|
||||
def generateSuccessor( self, agentIndex, action):
|
||||
"""
|
||||
Returns the successor state after the specified agent takes the action.
|
||||
"""
|
||||
# Check that successors exist
|
||||
if self.isWin() or self.isLose(): raise Exception('Can\'t generate a successor of a terminal state.')
|
||||
|
||||
# Copy current state
|
||||
state = GameState(self)
|
||||
|
||||
# Let agent's logic deal with its action's effects on the board
|
||||
if agentIndex == 0: # Pacman is moving
|
||||
state.data._eaten = [False for i in range(state.getNumAgents())]
|
||||
PacmanRules.applyAction( state, action )
|
||||
else: # A ghost is moving
|
||||
GhostRules.applyAction( state, action, agentIndex )
|
||||
|
||||
# Time passes
|
||||
if agentIndex == 0:
|
||||
state.data.scoreChange += -TIME_PENALTY # Penalty for waiting around
|
||||
else:
|
||||
GhostRules.decrementTimer( state.data.agentStates[agentIndex] )
|
||||
|
||||
# Resolve multi-agent effects
|
||||
GhostRules.checkDeath( state, agentIndex )
|
||||
|
||||
# Book keeping
|
||||
state.data._agentMoved = agentIndex
|
||||
state.data.score += state.data.scoreChange
|
||||
GameState.explored.add(self)
|
||||
GameState.explored.add(state)
|
||||
return state
|
||||
|
||||
def getLegalPacmanActions( self ):
|
||||
return self.getLegalActions( 0 )
|
||||
|
||||
def generatePacmanSuccessor( self, action ):
|
||||
"""
|
||||
Generates the successor state after the specified pacman move
|
||||
"""
|
||||
return self.generateSuccessor( 0, action )
|
||||
|
||||
def getPacmanState( self ):
|
||||
"""
|
||||
Returns an AgentState object for pacman (in game.py)
|
||||
|
||||
state.pos gives the current position
|
||||
state.direction gives the travel vector
|
||||
"""
|
||||
return self.data.agentStates[0].copy()
|
||||
|
||||
def getPacmanPosition( self ):
|
||||
return self.data.agentStates[0].getPosition()
|
||||
|
||||
def getGhostStates( self ):
|
||||
return self.data.agentStates[1:]
|
||||
|
||||
def getGhostState( self, agentIndex ):
|
||||
if agentIndex == 0 or agentIndex >= self.getNumAgents():
|
||||
raise Exception("Invalid index passed to getGhostState")
|
||||
return self.data.agentStates[agentIndex]
|
||||
|
||||
def getGhostPosition( self, agentIndex ):
|
||||
if agentIndex == 0:
|
||||
raise Exception("Pacman's index passed to getGhostPosition")
|
||||
return self.data.agentStates[agentIndex].getPosition()
|
||||
|
||||
def getGhostPositions(self):
|
||||
return [s.getPosition() for s in self.getGhostStates()]
|
||||
|
||||
def getNumAgents( self ):
|
||||
return len( self.data.agentStates )
|
||||
|
||||
def getScore( self ):
|
||||
return float(self.data.score)
|
||||
|
||||
def getCapsules(self):
|
||||
"""
|
||||
Returns a list of positions (x,y) of the remaining capsules.
|
||||
"""
|
||||
return self.data.capsules
|
||||
|
||||
def getNumFood( self ):
|
||||
return self.data.food.count()
|
||||
|
||||
def getFood(self):
|
||||
"""
|
||||
Returns a Grid of boolean food indicator variables.
|
||||
|
||||
Grids can be accessed via list notation, so to check
|
||||
if there is food at (x,y), just call
|
||||
|
||||
currentFood = state.getFood()
|
||||
if currentFood[x][y] == True: ...
|
||||
"""
|
||||
return self.data.food
|
||||
|
||||
def getWalls(self):
|
||||
"""
|
||||
Returns a Grid of boolean wall indicator variables.
|
||||
|
||||
Grids can be accessed via list notation, so to check
|
||||
if there is a wall at (x,y), just call
|
||||
|
||||
walls = state.getWalls()
|
||||
if walls[x][y] == True: ...
|
||||
"""
|
||||
return self.data.layout.walls
|
||||
|
||||
def hasFood(self, x, y):
|
||||
return self.data.food[x][y]
|
||||
|
||||
def hasWall(self, x, y):
|
||||
return self.data.layout.walls[x][y]
|
||||
|
||||
def isLose( self ):
|
||||
return self.data._lose
|
||||
|
||||
def isWin( self ):
|
||||
return self.data._win
|
||||
|
||||
#############################################
|
||||
# Helper methods: #
|
||||
# You shouldn't need to call these directly #
|
||||
#############################################
|
||||
|
||||
def __init__( self, prevState = None ):
|
||||
"""
|
||||
Generates a new state by copying information from its predecessor.
|
||||
"""
|
||||
if prevState != None: # Initial state
|
||||
self.data = GameStateData(prevState.data)
|
||||
else:
|
||||
self.data = GameStateData()
|
||||
|
||||
def deepCopy( self ):
|
||||
state = GameState( self )
|
||||
state.data = self.data.deepCopy()
|
||||
return state
|
||||
|
||||
def __eq__( self, other ):
|
||||
"""
|
||||
Allows two states to be compared.
|
||||
"""
|
||||
return hasattr(other, 'data') and self.data == other.data
|
||||
|
||||
def __hash__( self ):
|
||||
"""
|
||||
Allows states to be keys of dictionaries.
|
||||
"""
|
||||
return hash( self.data )
|
||||
|
||||
def __str__( self ):
|
||||
|
||||
return str(self.data)
|
||||
|
||||
def initialize( self, layout, numGhostAgents=1000 ):
|
||||
"""
|
||||
Creates an initial game state from a layout array (see layout.py).
|
||||
"""
|
||||
self.data.initialize(layout, numGhostAgents)
|
||||
|
||||
############################################################################
|
||||
# THE HIDDEN SECRETS OF PACMAN #
|
||||
# #
|
||||
# You shouldn't need to look through the code in this section of the file. #
|
||||
############################################################################
|
||||
|
||||
SCARED_TIME = 40 # Moves ghosts are scared
|
||||
COLLISION_TOLERANCE = 0.7 # How close ghosts must be to Pacman to kill
|
||||
TIME_PENALTY = 1 # Number of points lost each round
|
||||
|
||||
class ClassicGameRules:
|
||||
"""
|
||||
These game rules manage the control flow of a game, deciding when
|
||||
and how the game starts and ends.
|
||||
"""
|
||||
def __init__(self, timeout=30):
|
||||
self.timeout = timeout
|
||||
|
||||
def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False):
|
||||
agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()]
|
||||
initState = GameState()
|
||||
initState.initialize( layout, len(ghostAgents) )
|
||||
game = Game(agents, display, self, catchExceptions=catchExceptions)
|
||||
game.state = initState
|
||||
self.initialState = initState.deepCopy()
|
||||
self.quiet = quiet
|
||||
return game
|
||||
|
||||
def process(self, state, game):
|
||||
"""
|
||||
Checks to see whether it is time to end the game.
|
||||
"""
|
||||
if state.isWin(): self.win(state, game)
|
||||
if state.isLose(): self.lose(state, game)
|
||||
|
||||
def win( self, state, game ):
|
||||
if not self.quiet: print "Pacman emerges victorious! Score: %d" % state.data.score
|
||||
game.gameOver = True
|
||||
|
||||
def lose( self, state, game ):
|
||||
if not self.quiet: print "Pacman died! Score: %d" % state.data.score
|
||||
game.gameOver = True
|
||||
|
||||
def getProgress(self, game):
|
||||
return float(game.state.getNumFood()) / self.initialState.getNumFood()
|
||||
|
||||
def agentCrash(self, game, agentIndex):
|
||||
if agentIndex == 0:
|
||||
print "Pacman crashed"
|
||||
else:
|
||||
print "A ghost crashed"
|
||||
|
||||
def getMaxTotalTime(self, agentIndex):
|
||||
return self.timeout
|
||||
|
||||
def getMaxStartupTime(self, agentIndex):
|
||||
return self.timeout
|
||||
|
||||
def getMoveWarningTime(self, agentIndex):
|
||||
return self.timeout
|
||||
|
||||
def getMoveTimeout(self, agentIndex):
|
||||
return self.timeout
|
||||
|
||||
def getMaxTimeWarnings(self, agentIndex):
|
||||
return 0
|
||||
|
||||
class PacmanRules:
|
||||
"""
|
||||
These functions govern how pacman interacts with his environment under
|
||||
the classic game rules.
|
||||
"""
|
||||
PACMAN_SPEED=1
|
||||
|
||||
def getLegalActions( state ):
|
||||
"""
|
||||
Returns a list of possible actions.
|
||||
"""
|
||||
return Actions.getPossibleActions( state.getPacmanState().configuration, state.data.layout.walls )
|
||||
getLegalActions = staticmethod( getLegalActions )
|
||||
|
||||
def applyAction( state, action ):
|
||||
"""
|
||||
Edits the state to reflect the results of the action.
|
||||
"""
|
||||
legal = PacmanRules.getLegalActions( state )
|
||||
if action not in legal:
|
||||
raise Exception("Illegal action " + str(action))
|
||||
|
||||
pacmanState = state.data.agentStates[0]
|
||||
|
||||
# Update Configuration
|
||||
vector = Actions.directionToVector( action, PacmanRules.PACMAN_SPEED )
|
||||
pacmanState.configuration = pacmanState.configuration.generateSuccessor( vector )
|
||||
|
||||
# Eat
|
||||
next = pacmanState.configuration.getPosition()
|
||||
nearest = nearestPoint( next )
|
||||
if manhattanDistance( nearest, next ) <= 0.5 :
|
||||
# Remove food
|
||||
PacmanRules.consume( nearest, state )
|
||||
applyAction = staticmethod( applyAction )
|
||||
|
||||
def consume( position, state ):
|
||||
x,y = position
|
||||
# Eat food
|
||||
if state.data.food[x][y]:
|
||||
state.data.scoreChange += 10
|
||||
state.data.food = state.data.food.copy()
|
||||
state.data.food[x][y] = False
|
||||
state.data._foodEaten = position
|
||||
# TODO: cache numFood?
|
||||
numFood = state.getNumFood()
|
||||
if numFood == 0 and not state.data._lose:
|
||||
state.data.scoreChange += 500
|
||||
state.data._win = True
|
||||
# Eat capsule
|
||||
if( position in state.getCapsules() ):
|
||||
state.data.capsules.remove( position )
|
||||
state.data._capsuleEaten = position
|
||||
# Reset all ghosts' scared timers
|
||||
for index in range( 1, len( state.data.agentStates ) ):
|
||||
state.data.agentStates[index].scaredTimer = SCARED_TIME
|
||||
consume = staticmethod( consume )
|
||||
|
||||
class GhostRules:
|
||||
"""
|
||||
These functions dictate how ghosts interact with their environment.
|
||||
"""
|
||||
GHOST_SPEED=1.0
|
||||
def getLegalActions( state, ghostIndex ):
|
||||
"""
|
||||
Ghosts cannot stop, and cannot turn around unless they
|
||||
reach a dead end, but can turn 90 degrees at intersections.
|
||||
"""
|
||||
conf = state.getGhostState( ghostIndex ).configuration
|
||||
possibleActions = Actions.getPossibleActions( conf, state.data.layout.walls )
|
||||
reverse = Actions.reverseDirection( conf.direction )
|
||||
if Directions.STOP in possibleActions:
|
||||
possibleActions.remove( Directions.STOP )
|
||||
if reverse in possibleActions and len( possibleActions ) > 1:
|
||||
possibleActions.remove( reverse )
|
||||
return possibleActions
|
||||
getLegalActions = staticmethod( getLegalActions )
|
||||
|
||||
def applyAction( state, action, ghostIndex):
|
||||
|
||||
legal = GhostRules.getLegalActions( state, ghostIndex )
|
||||
if action not in legal:
|
||||
raise Exception("Illegal ghost action " + str(action))
|
||||
|
||||
ghostState = state.data.agentStates[ghostIndex]
|
||||
speed = GhostRules.GHOST_SPEED
|
||||
if ghostState.scaredTimer > 0: speed /= 2.0
|
||||
vector = Actions.directionToVector( action, speed )
|
||||
ghostState.configuration = ghostState.configuration.generateSuccessor( vector )
|
||||
applyAction = staticmethod( applyAction )
|
||||
|
||||
def decrementTimer( ghostState):
|
||||
timer = ghostState.scaredTimer
|
||||
if timer == 1:
|
||||
ghostState.configuration.pos = nearestPoint( ghostState.configuration.pos )
|
||||
ghostState.scaredTimer = max( 0, timer - 1 )
|
||||
decrementTimer = staticmethod( decrementTimer )
|
||||
|
||||
def checkDeath( state, agentIndex):
|
||||
pacmanPosition = state.getPacmanPosition()
|
||||
if agentIndex == 0: # Pacman just moved; Anyone can kill him
|
||||
for index in range( 1, len( state.data.agentStates ) ):
|
||||
ghostState = state.data.agentStates[index]
|
||||
ghostPosition = ghostState.configuration.getPosition()
|
||||
if GhostRules.canKill( pacmanPosition, ghostPosition ):
|
||||
GhostRules.collide( state, ghostState, index )
|
||||
else:
|
||||
ghostState = state.data.agentStates[agentIndex]
|
||||
ghostPosition = ghostState.configuration.getPosition()
|
||||
if GhostRules.canKill( pacmanPosition, ghostPosition ):
|
||||
GhostRules.collide( state, ghostState, agentIndex )
|
||||
checkDeath = staticmethod( checkDeath )
|
||||
|
||||
def collide( state, ghostState, agentIndex):
|
||||
if ghostState.scaredTimer > 0:
|
||||
state.data.scoreChange += 200
|
||||
GhostRules.placeGhost(state, ghostState)
|
||||
ghostState.scaredTimer = 0
|
||||
# Added for first-person
|
||||
state.data._eaten[agentIndex] = True
|
||||
else:
|
||||
if not state.data._win:
|
||||
state.data.scoreChange -= 500
|
||||
state.data._lose = True
|
||||
collide = staticmethod( collide )
|
||||
|
||||
def canKill( pacmanPosition, ghostPosition ):
|
||||
return manhattanDistance( ghostPosition, pacmanPosition ) <= COLLISION_TOLERANCE
|
||||
canKill = staticmethod( canKill )
|
||||
|
||||
def placeGhost(state, ghostState):
|
||||
ghostState.configuration = ghostState.start
|
||||
placeGhost = staticmethod( placeGhost )
|
||||
|
||||
#############################
|
||||
# FRAMEWORK TO START A GAME #
|
||||
#############################
|
||||
|
||||
def default(str):
|
||||
return str + ' [Default: %default]'
|
||||
|
||||
def parseAgentArgs(str):
|
||||
if str == None: return {}
|
||||
pieces = str.split(',')
|
||||
opts = {}
|
||||
for p in pieces:
|
||||
if '=' in p:
|
||||
key, val = p.split('=')
|
||||
else:
|
||||
key,val = p, 1
|
||||
opts[key] = val
|
||||
return opts
|
||||
|
||||
def readCommand( argv ):
|
||||
"""
|
||||
Processes the command used to run pacman from the command line.
|
||||
"""
|
||||
from optparse import OptionParser
|
||||
usageStr = """
|
||||
USAGE: python pacman.py <options>
|
||||
EXAMPLES: (1) python pacman.py
|
||||
- starts an interactive game
|
||||
(2) python pacman.py --layout smallClassic --zoom 2
|
||||
OR python pacman.py -l smallClassic -z 2
|
||||
- starts an interactive game on a smaller board, zoomed in
|
||||
"""
|
||||
parser = OptionParser(usageStr)
|
||||
|
||||
parser.add_option('-n', '--numGames', dest='numGames', type='int',
|
||||
help=default('the number of GAMES to play'), metavar='GAMES', default=1)
|
||||
parser.add_option('-l', '--layout', dest='layout',
|
||||
help=default('the LAYOUT_FILE from which to load the map layout'),
|
||||
metavar='LAYOUT_FILE', default='mediumClassic')
|
||||
parser.add_option('-p', '--pacman', dest='pacman',
|
||||
help=default('the agent TYPE in the pacmanAgents module to use'),
|
||||
metavar='TYPE', default='KeyboardAgent')
|
||||
parser.add_option('-t', '--textGraphics', action='store_true', dest='textGraphics',
|
||||
help='Display output as text only', default=False)
|
||||
parser.add_option('-q', '--quietTextGraphics', action='store_true', dest='quietGraphics',
|
||||
help='Generate minimal output and no graphics', default=False)
|
||||
parser.add_option('-g', '--ghosts', dest='ghost',
|
||||
help=default('the ghost agent TYPE in the ghostAgents module to use'),
|
||||
metavar = 'TYPE', default='RandomGhost')
|
||||
parser.add_option('-k', '--numghosts', type='int', dest='numGhosts',
|
||||
help=default('The maximum number of ghosts to use'), default=4)
|
||||
parser.add_option('-z', '--zoom', type='float', dest='zoom',
|
||||
help=default('Zoom the size of the graphics window'), default=1.0)
|
||||
parser.add_option('-f', '--fixRandomSeed', action='store_true', dest='fixRandomSeed',
|
||||
help='Fixes the random seed to always play the same game', default=False)
|
||||
parser.add_option('-r', '--recordActions', action='store_true', dest='record',
|
||||
help='Writes game histories to a file (named by the time they were played)', default=False)
|
||||
parser.add_option('--replay', dest='gameToReplay',
|
||||
help='A recorded game file (pickle) to replay', default=None)
|
||||
parser.add_option('-a','--agentArgs',dest='agentArgs',
|
||||
help='Comma separated values sent to agent. e.g. "opt1=val1,opt2,opt3=val3"')
|
||||
parser.add_option('-x', '--numTraining', dest='numTraining', type='int',
|
||||
help=default('How many episodes are training (suppresses output)'), default=0)
|
||||
parser.add_option('--frameTime', dest='frameTime', type='float',
|
||||
help=default('Time to delay between frames; <0 means keyboard'), default=0.1)
|
||||
parser.add_option('-c', '--catchExceptions', action='store_true', dest='catchExceptions',
|
||||
help='Turns on exception handling and timeouts during games', default=False)
|
||||
parser.add_option('--timeout', dest='timeout', type='int',
|
||||
help=default('Maximum length of time an agent can spend computing in a single game'), default=30)
|
||||
|
||||
options, otherjunk = parser.parse_args(argv)
|
||||
if len(otherjunk) != 0:
|
||||
raise Exception('Command line input not understood: ' + str(otherjunk))
|
||||
args = dict()
|
||||
|
||||
# Fix the random seed
|
||||
if options.fixRandomSeed: random.seed('cs188')
|
||||
|
||||
# Choose a layout
|
||||
args['layout'] = layout.getLayout( options.layout )
|
||||
if args['layout'] == None: raise Exception("The layout " + options.layout + " cannot be found")
|
||||
|
||||
# Choose a Pacman agent
|
||||
noKeyboard = options.gameToReplay == None and (options.textGraphics or options.quietGraphics)
|
||||
pacmanType = loadAgent(options.pacman, noKeyboard)
|
||||
agentOpts = parseAgentArgs(options.agentArgs)
|
||||
if options.numTraining > 0:
|
||||
args['numTraining'] = options.numTraining
|
||||
if 'numTraining' not in agentOpts: agentOpts['numTraining'] = options.numTraining
|
||||
pacman = pacmanType(**agentOpts) # Instantiate Pacman with agentArgs
|
||||
args['pacman'] = pacman
|
||||
|
||||
# Don't display training games
|
||||
if 'numTrain' in agentOpts:
|
||||
options.numQuiet = int(agentOpts['numTrain'])
|
||||
options.numIgnore = int(agentOpts['numTrain'])
|
||||
|
||||
# Choose a ghost agent
|
||||
ghostType = loadAgent(options.ghost, noKeyboard)
|
||||
args['ghosts'] = [ghostType( i+1 ) for i in range( options.numGhosts )]
|
||||
|
||||
# Choose a display format
|
||||
if options.quietGraphics:
|
||||
import textDisplay
|
||||
args['display'] = textDisplay.NullGraphics()
|
||||
elif options.textGraphics:
|
||||
import textDisplay
|
||||
textDisplay.SLEEP_TIME = options.frameTime
|
||||
args['display'] = textDisplay.PacmanGraphics()
|
||||
else:
|
||||
import graphicsDisplay
|
||||
args['display'] = graphicsDisplay.PacmanGraphics(options.zoom, frameTime = options.frameTime)
|
||||
args['numGames'] = options.numGames
|
||||
args['record'] = options.record
|
||||
args['catchExceptions'] = options.catchExceptions
|
||||
args['timeout'] = options.timeout
|
||||
|
||||
# Special case: recorded games don't use the runGames method or args structure
|
||||
if options.gameToReplay != None:
|
||||
print 'Replaying recorded game %s.' % options.gameToReplay
|
||||
import cPickle
|
||||
f = open(options.gameToReplay)
|
||||
try: recorded = cPickle.load(f)
|
||||
finally: f.close()
|
||||
recorded['display'] = args['display']
|
||||
replayGame(**recorded)
|
||||
sys.exit(0)
|
||||
|
||||
return args
|
||||
|
||||
def loadAgent(pacman, nographics):
|
||||
# Looks through all pythonPath Directories for the right module,
|
||||
pythonPathStr = os.path.expandvars("$PYTHONPATH")
|
||||
if pythonPathStr.find(';') == -1:
|
||||
pythonPathDirs = pythonPathStr.split(':')
|
||||
else:
|
||||
pythonPathDirs = pythonPathStr.split(';')
|
||||
pythonPathDirs.append('.')
|
||||
|
||||
for moduleDir in pythonPathDirs:
|
||||
if not os.path.isdir(moduleDir): continue
|
||||
moduleNames = [f for f in os.listdir(moduleDir) if f.endswith('gents.py')]
|
||||
for modulename in moduleNames:
|
||||
try:
|
||||
module = __import__(modulename[:-3])
|
||||
except ImportError:
|
||||
continue
|
||||
if pacman in dir(module):
|
||||
if nographics and modulename == 'keyboardAgents.py':
|
||||
raise Exception('Using the keyboard requires graphics (not text display)')
|
||||
return getattr(module, pacman)
|
||||
raise Exception('The agent ' + pacman + ' is not specified in any *Agents.py.')
|
||||
|
||||
def replayGame( layout, actions, display ):
|
||||
import pacmanAgents, ghostAgents
|
||||
rules = ClassicGameRules()
|
||||
agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(layout.getNumGhosts())]
|
||||
game = rules.newGame( layout, agents[0], agents[1:], display )
|
||||
state = game.state
|
||||
display.initialize(state.data)
|
||||
|
||||
for action in actions:
|
||||
# Execute the action
|
||||
state = state.generateSuccessor( *action )
|
||||
# Change the display
|
||||
display.update( state.data )
|
||||
# Allow for game specific conditions (winning, losing, etc.)
|
||||
rules.process(state, game)
|
||||
|
||||
display.finish()
|
||||
|
||||
def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30 ):
|
||||
import __main__
|
||||
__main__.__dict__['_display'] = display
|
||||
|
||||
rules = ClassicGameRules(timeout)
|
||||
games = []
|
||||
|
||||
for i in range( numGames ):
|
||||
beQuiet = i < numTraining
|
||||
if beQuiet:
|
||||
# Suppress output and graphics
|
||||
import textDisplay
|
||||
gameDisplay = textDisplay.NullGraphics()
|
||||
rules.quiet = True
|
||||
else:
|
||||
gameDisplay = display
|
||||
rules.quiet = False
|
||||
game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions)
|
||||
game.run()
|
||||
if not beQuiet: games.append(game)
|
||||
|
||||
if record:
|
||||
import time, cPickle
|
||||
fname = ('recorded-game-%d' % (i + 1)) + '-'.join([str(t) for t in time.localtime()[1:6]])
|
||||
f = file(fname, 'w')
|
||||
components = {'layout': layout, 'actions': game.moveHistory}
|
||||
cPickle.dump(components, f)
|
||||
f.close()
|
||||
|
||||
if (numGames-numTraining) > 0:
|
||||
scores = [game.state.getScore() for game in games]
|
||||
wins = [game.state.isWin() for game in games]
|
||||
winRate = wins.count(True)/ float(len(wins))
|
||||
print 'Average Score:', sum(scores) / float(len(scores))
|
||||
print 'Scores: ', ', '.join([str(score) for score in scores])
|
||||
print 'Win Rate: %d/%d (%.2f)' % (wins.count(True), len(wins), winRate)
|
||||
print 'Record: ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins])
|
||||
|
||||
return games
|
||||
|
||||
if __name__ == '__main__':
|
||||
"""
|
||||
The main function called when pacman.py is run
|
||||
from the command line:
|
||||
|
||||
> python pacman.py
|
||||
|
||||
See the usage string for more details.
|
||||
|
||||
> python pacman.py --help
|
||||
"""
|
||||
args = readCommand( sys.argv[1:] ) # Get game components based on input
|
||||
runGames( **args )
|
||||
|
||||
# import cProfile
|
||||
# cProfile.run("runGames( **args )")
|
||||
pass
|
52
reinforcement/pacmanAgents.py
Normal file
52
reinforcement/pacmanAgents.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
# pacmanAgents.py
|
||||
# ---------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from pacman import Directions
|
||||
from game import Agent
|
||||
import random
|
||||
import game
|
||||
import util
|
||||
|
||||
class LeftTurnAgent(game.Agent):
|
||||
"An agent that turns left at every opportunity"
|
||||
|
||||
def getAction(self, state):
|
||||
legal = state.getLegalPacmanActions()
|
||||
current = state.getPacmanState().configuration.direction
|
||||
if current == Directions.STOP: current = Directions.NORTH
|
||||
left = Directions.LEFT[current]
|
||||
if left in legal: return left
|
||||
if current in legal: return current
|
||||
if Directions.RIGHT[current] in legal: return Directions.RIGHT[current]
|
||||
if Directions.LEFT[left] in legal: return Directions.LEFT[left]
|
||||
return Directions.STOP
|
||||
|
||||
class GreedyAgent(Agent):
|
||||
def __init__(self, evalFn="scoreEvaluation"):
|
||||
self.evaluationFunction = util.lookup(evalFn, globals())
|
||||
assert self.evaluationFunction != None
|
||||
|
||||
def getAction(self, state):
|
||||
# Generate candidate actions
|
||||
legal = state.getLegalPacmanActions()
|
||||
if Directions.STOP in legal: legal.remove(Directions.STOP)
|
||||
|
||||
successors = [(state.generateSuccessor(0, action), action) for action in legal]
|
||||
scored = [(self.evaluationFunction(state), action) for state, action in successors]
|
||||
bestScore = max(scored)[0]
|
||||
bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
|
||||
return random.choice(bestActions)
|
||||
|
||||
def scoreEvaluation(state):
|
||||
return state.getScore()
|
18
reinforcement/projectParams.py
Normal file
18
reinforcement/projectParams.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
# projectParams.py
|
||||
# ----------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py'
|
||||
PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py'
|
||||
PROJECT_NAME = 'Project 3: Reinforcement learning'
|
||||
BONUS_PIC = False
|
186
reinforcement/qlearningAgents.py
Normal file
186
reinforcement/qlearningAgents.py
Normal file
|
@ -0,0 +1,186 @@
|
|||
# qlearningAgents.py
|
||||
# ------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
from game import *
|
||||
from learningAgents import ReinforcementAgent
|
||||
from featureExtractors import *
|
||||
|
||||
import random,util,math
|
||||
|
||||
class QLearningAgent(ReinforcementAgent):
|
||||
"""
|
||||
Q-Learning Agent
|
||||
|
||||
Functions you should fill in:
|
||||
- computeValueFromQValues
|
||||
- computeActionFromQValues
|
||||
- getQValue
|
||||
- getAction
|
||||
- update
|
||||
|
||||
Instance variables you have access to
|
||||
- self.epsilon (exploration prob)
|
||||
- self.alpha (learning rate)
|
||||
- self.discount (discount rate)
|
||||
|
||||
Functions you should use
|
||||
- self.getLegalActions(state)
|
||||
which returns legal actions for a state
|
||||
"""
|
||||
def __init__(self, **args):
|
||||
"You can initialize Q-values here..."
|
||||
ReinforcementAgent.__init__(self, **args)
|
||||
|
||||
"*** YOUR CODE HERE ***"
|
||||
|
||||
def getQValue(self, state, action):
|
||||
"""
|
||||
Returns Q(state,action)
|
||||
Should return 0.0 if we have never seen a state
|
||||
or the Q node value otherwise
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
|
||||
def computeValueFromQValues(self, state):
|
||||
"""
|
||||
Returns max_action Q(state,action)
|
||||
where the max is over legal actions. Note that if
|
||||
there are no legal actions, which is the case at the
|
||||
terminal state, you should return a value of 0.0.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
def computeActionFromQValues(self, state):
|
||||
"""
|
||||
Compute the best action to take in a state. Note that if there
|
||||
are no legal actions, which is the case at the terminal state,
|
||||
you should return None.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
def getAction(self, state):
|
||||
"""
|
||||
Compute the action to take in the current state. With
|
||||
probability self.epsilon, we should take a random action and
|
||||
take the best policy action otherwise. Note that if there are
|
||||
no legal actions, which is the case at the terminal state, you
|
||||
should choose None as the action.
|
||||
|
||||
HINT: You might want to use util.flipCoin(prob)
|
||||
HINT: To pick randomly from a list, use random.choice(list)
|
||||
"""
|
||||
# Pick Action
|
||||
legalActions = self.getLegalActions(state)
|
||||
action = None
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
return action
|
||||
|
||||
def update(self, state, action, nextState, reward):
|
||||
"""
|
||||
The parent class calls this to observe a
|
||||
state = action => nextState and reward transition.
|
||||
You should do your Q-Value update here
|
||||
|
||||
NOTE: You should never call this function,
|
||||
it will be called on your behalf
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
def getPolicy(self, state):
|
||||
return self.computeActionFromQValues(state)
|
||||
|
||||
def getValue(self, state):
|
||||
return self.computeValueFromQValues(state)
|
||||
|
||||
|
||||
class PacmanQAgent(QLearningAgent):
|
||||
"Exactly the same as QLearningAgent, but with different default parameters"
|
||||
|
||||
def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
|
||||
"""
|
||||
These default parameters can be changed from the pacman.py command line.
|
||||
For example, to change the exploration rate, try:
|
||||
python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
|
||||
|
||||
alpha - learning rate
|
||||
epsilon - exploration rate
|
||||
gamma - discount factor
|
||||
numTraining - number of training episodes, i.e. no learning after these many episodes
|
||||
"""
|
||||
args['epsilon'] = epsilon
|
||||
args['gamma'] = gamma
|
||||
args['alpha'] = alpha
|
||||
args['numTraining'] = numTraining
|
||||
self.index = 0 # This is always Pacman
|
||||
QLearningAgent.__init__(self, **args)
|
||||
|
||||
def getAction(self, state):
|
||||
"""
|
||||
Simply calls the getAction method of QLearningAgent and then
|
||||
informs parent of action for Pacman. Do not change or remove this
|
||||
method.
|
||||
"""
|
||||
action = QLearningAgent.getAction(self,state)
|
||||
self.doAction(state,action)
|
||||
return action
|
||||
|
||||
|
||||
class ApproximateQAgent(PacmanQAgent):
|
||||
"""
|
||||
ApproximateQLearningAgent
|
||||
|
||||
You should only have to overwrite getQValue
|
||||
and update. All other QLearningAgent functions
|
||||
should work as is.
|
||||
"""
|
||||
def __init__(self, extractor='IdentityExtractor', **args):
|
||||
self.featExtractor = util.lookup(extractor, globals())()
|
||||
PacmanQAgent.__init__(self, **args)
|
||||
self.weights = util.Counter()
|
||||
|
||||
def getWeights(self):
|
||||
return self.weights
|
||||
|
||||
def getQValue(self, state, action):
|
||||
"""
|
||||
Should return Q(state,action) = w * featureVector
|
||||
where * is the dotProduct operator
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
def update(self, state, action, nextState, reward):
|
||||
"""
|
||||
Should update your weights based on transition
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
def final(self, state):
|
||||
"Called at the end of each game."
|
||||
# call the super-class final method
|
||||
PacmanQAgent.final(self, state)
|
||||
|
||||
# did we finish training?
|
||||
if self.episodesSoFar == self.numTraining:
|
||||
# you might want to print your weights here for debugging
|
||||
"*** YOUR CODE HERE ***"
|
||||
pass
|
924
reinforcement/reinforcementTestClasses.py
Normal file
924
reinforcement/reinforcementTestClasses.py
Normal file
|
@ -0,0 +1,924 @@
|
|||
# reinforcementTestClasses.py
|
||||
# ---------------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import testClasses
|
||||
import random, math, traceback, sys, os
|
||||
import layout, textDisplay, pacman, gridworld
|
||||
import time
|
||||
from util import Counter, TimeoutFunction, FixedRandom
|
||||
from collections import defaultdict
|
||||
from pprint import PrettyPrinter
|
||||
from hashlib import sha1
|
||||
pp = PrettyPrinter()
|
||||
VERBOSE = False
|
||||
|
||||
import gridworld
|
||||
|
||||
LIVINGREWARD = -0.1
|
||||
NOISE = 0.2
|
||||
|
||||
class ValueIterationTest(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(ValueIterationTest, self).__init__(question, testDict)
|
||||
self.discount = float(testDict['discount'])
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
iterations = int(testDict['valueIterations'])
|
||||
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||
maxPreIterations = 10
|
||||
self.numsIterationsForDisplay = range(min(iterations, maxPreIterations))
|
||||
self.testOutFile = testDict['test_out_file']
|
||||
if maxPreIterations < iterations:
|
||||
self.numsIterationsForDisplay.append(iterations)
|
||||
|
||||
def writeFailureFile(self, string):
|
||||
with open(self.testOutFile, 'w') as handle:
|
||||
handle.write(string)
|
||||
|
||||
def removeFailureFileIfExists(self):
|
||||
if os.path.exists(self.testOutFile):
|
||||
os.remove(self.testOutFile)
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
failureOutputFileString = ''
|
||||
failureOutputStdString = ''
|
||||
for n in self.numsIterationsForDisplay:
|
||||
checkPolicy = (n == self.numsIterationsForDisplay[-1])
|
||||
testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
|
||||
failureOutputStdString += stdOutString
|
||||
failureOutputFileString += fileOutString
|
||||
if not testPass:
|
||||
self.addMessage(failureOutputStdString)
|
||||
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
|
||||
self.writeFailureFile(failureOutputFileString)
|
||||
return self.testFail(grades)
|
||||
self.removeFailureFileIfExists()
|
||||
return self.testPass(grades)
|
||||
|
||||
def executeNIterations(self, grades, moduleDict, solutionDict, n, checkPolicy):
|
||||
testPass = True
|
||||
valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
|
||||
stdOutString = ''
|
||||
fileOutString = ''
|
||||
valuesKey = "values_k_%d" % n
|
||||
if self.comparePrettyValues(valuesPretty, solutionDict[valuesKey]):
|
||||
fileOutString += "Values at iteration %d are correct.\n" % n
|
||||
fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
|
||||
else:
|
||||
testPass = False
|
||||
outString = "Values at iteration %d are NOT correct.\n" % n
|
||||
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
|
||||
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, solutionDict[valuesKey])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
for action in actions:
|
||||
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
|
||||
qValues = qValuesPretty[action]
|
||||
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
|
||||
fileOutString += "Q-Values at iteration %d for action %s are correct.\n" % (n, action)
|
||||
fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||
else:
|
||||
testPass = False
|
||||
outString = "Q-Values at iteration %d for action %s are NOT correct.\n" % (n, action)
|
||||
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
if checkPolicy:
|
||||
if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
|
||||
testPass = False
|
||||
outString = "Policy is NOT correct.\n"
|
||||
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString('policy', policyPretty)
|
||||
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString('policy', solutionDict['policy'])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
return testPass, stdOutString, fileOutString
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
with open(filePath, 'w') as handle:
|
||||
policyPretty = ''
|
||||
actions = []
|
||||
for n in self.numsIterationsForDisplay:
|
||||
valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
|
||||
handle.write(self.prettyValueSolutionString('values_k_%d' % n, valuesPretty))
|
||||
for action in actions:
|
||||
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
|
||||
handle.write(self.prettyValueSolutionString('policy', policyPretty))
|
||||
handle.write(self.prettyValueSolutionString('actions', '\n'.join(actions) + '\n'))
|
||||
return True
|
||||
|
||||
def runAgent(self, moduleDict, numIterations):
|
||||
agent = moduleDict['valueIterationAgents'].ValueIterationAgent(self.grid, discount=self.discount, iterations=numIterations)
|
||||
states = self.grid.getStates()
|
||||
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
|
||||
values = {}
|
||||
qValues = {}
|
||||
policy = {}
|
||||
for state in states:
|
||||
values[state] = agent.getValue(state)
|
||||
policy[state] = agent.computeActionFromValues(state)
|
||||
possibleActions = self.grid.getPossibleActions(state)
|
||||
for action in actions:
|
||||
if not qValues.has_key(action):
|
||||
qValues[action] = {}
|
||||
if action in possibleActions:
|
||||
qValues[action][state] = agent.computeQValueFromValues(state, action)
|
||||
else:
|
||||
qValues[action][state] = None
|
||||
valuesPretty = self.prettyValues(values)
|
||||
policyPretty = self.prettyPolicy(policy)
|
||||
qValuesPretty = {}
|
||||
for action in actions:
|
||||
qValuesPretty[action] = self.prettyValues(qValues[action])
|
||||
return (valuesPretty, qValuesPretty, actions, policyPretty)
|
||||
|
||||
def prettyPrint(self, elements, formatString):
|
||||
pretty = ''
|
||||
states = self.grid.getStates()
|
||||
for ybar in range(self.grid.grid.height):
|
||||
y = self.grid.grid.height-1-ybar
|
||||
row = []
|
||||
for x in range(self.grid.grid.width):
|
||||
if (x, y) in states:
|
||||
value = elements[(x, y)]
|
||||
if value is None:
|
||||
row.append(' illegal')
|
||||
else:
|
||||
row.append(formatString.format(elements[(x,y)]))
|
||||
else:
|
||||
row.append('_' * 10)
|
||||
pretty += ' %s\n' % (" ".join(row), )
|
||||
pretty += '\n'
|
||||
return pretty
|
||||
|
||||
def prettyValues(self, values):
|
||||
return self.prettyPrint(values, '{0:10.4f}')
|
||||
|
||||
def prettyPolicy(self, policy):
|
||||
return self.prettyPrint(policy, '{0:10s}')
|
||||
|
||||
def prettyValueSolutionString(self, name, pretty):
|
||||
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
|
||||
|
||||
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
|
||||
aList = self.parsePrettyValues(aPretty)
|
||||
bList = self.parsePrettyValues(bPretty)
|
||||
if len(aList) != len(bList):
|
||||
return False
|
||||
for a, b in zip(aList, bList):
|
||||
try:
|
||||
aNum = float(a)
|
||||
bNum = float(b)
|
||||
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
|
||||
error = abs(aNum - bNum)
|
||||
if error > tolerance:
|
||||
return False
|
||||
except ValueError:
|
||||
if a.strip() != b.strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
def parsePrettyValues(self, pretty):
|
||||
values = pretty.split()
|
||||
return values
|
||||
|
||||
|
||||
class ApproximateQLearningTest(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(ApproximateQLearningTest, self).__init__(question, testDict)
|
||||
self.discount = float(testDict['discount'])
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
self.env = gridworld.GridworldEnvironment(self.grid)
|
||||
self.epsilon = float(testDict['epsilon'])
|
||||
self.learningRate = float(testDict['learningRate'])
|
||||
self.extractor = 'IdentityExtractor'
|
||||
if 'extractor' in testDict:
|
||||
self.extractor = testDict['extractor']
|
||||
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
|
||||
numExperiences = int(testDict['numExperiences'])
|
||||
maxPreExperiences = 10
|
||||
self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
|
||||
self.testOutFile = testDict['test_out_file']
|
||||
if maxPreExperiences < numExperiences:
|
||||
self.numsExperiencesForDisplay.append(numExperiences)
|
||||
|
||||
def writeFailureFile(self, string):
|
||||
with open(self.testOutFile, 'w') as handle:
|
||||
handle.write(string)
|
||||
|
||||
def removeFailureFileIfExists(self):
|
||||
if os.path.exists(self.testOutFile):
|
||||
os.remove(self.testOutFile)
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
failureOutputFileString = ''
|
||||
failureOutputStdString = ''
|
||||
for n in self.numsExperiencesForDisplay:
|
||||
testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n)
|
||||
failureOutputStdString += stdOutString
|
||||
failureOutputFileString += fileOutString
|
||||
if not testPass:
|
||||
self.addMessage(failureOutputStdString)
|
||||
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
|
||||
self.writeFailureFile(failureOutputFileString)
|
||||
return self.testFail(grades)
|
||||
self.removeFailureFileIfExists()
|
||||
return self.testPass(grades)
|
||||
|
||||
def executeNExperiences(self, grades, moduleDict, solutionDict, n):
|
||||
testPass = True
|
||||
qValuesPretty, weights, actions, lastExperience = self.runAgent(moduleDict, n)
|
||||
stdOutString = ''
|
||||
fileOutString = "==================== Iteration %d ====================\n" % n
|
||||
if lastExperience is not None:
|
||||
fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience
|
||||
weightsKey = 'weights_k_%d' % n
|
||||
if weights == eval(solutionDict[weightsKey]):
|
||||
fileOutString += "Weights at iteration %d are correct." % n
|
||||
fileOutString += " Student/correct solution:\n\n%s\n\n" % pp.pformat(weights)
|
||||
for action in actions:
|
||||
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
|
||||
qValues = qValuesPretty[action]
|
||||
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
|
||||
fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
|
||||
fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||
else:
|
||||
testPass = False
|
||||
outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
|
||||
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
return testPass, stdOutString, fileOutString
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
with open(filePath, 'w') as handle:
|
||||
for n in self.numsExperiencesForDisplay:
|
||||
qValuesPretty, weights, actions, _ = self.runAgent(moduleDict, n)
|
||||
handle.write(self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights)))
|
||||
for action in actions:
|
||||
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
|
||||
return True
|
||||
|
||||
def runAgent(self, moduleDict, numExperiences):
|
||||
agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts)
|
||||
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
|
||||
states.sort()
|
||||
randObj = FixedRandom().random
|
||||
# choose a random start state and a random possible action from that state
|
||||
# get the next state and reward from the transition function
|
||||
lastExperience = None
|
||||
for i in range(numExperiences):
|
||||
startState = randObj.choice(states)
|
||||
action = randObj.choice(self.grid.getPossibleActions(startState))
|
||||
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
|
||||
lastExperience = (startState, action, endState, reward)
|
||||
agent.update(*lastExperience)
|
||||
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
|
||||
qValues = {}
|
||||
weights = agent.getWeights()
|
||||
for state in states:
|
||||
possibleActions = self.grid.getPossibleActions(state)
|
||||
for action in actions:
|
||||
if not qValues.has_key(action):
|
||||
qValues[action] = {}
|
||||
if action in possibleActions:
|
||||
qValues[action][state] = agent.getQValue(state, action)
|
||||
else:
|
||||
qValues[action][state] = None
|
||||
qValuesPretty = {}
|
||||
for action in actions:
|
||||
qValuesPretty[action] = self.prettyValues(qValues[action])
|
||||
return (qValuesPretty, weights, actions, lastExperience)
|
||||
|
||||
def prettyPrint(self, elements, formatString):
|
||||
pretty = ''
|
||||
states = self.grid.getStates()
|
||||
for ybar in range(self.grid.grid.height):
|
||||
y = self.grid.grid.height-1-ybar
|
||||
row = []
|
||||
for x in range(self.grid.grid.width):
|
||||
if (x, y) in states:
|
||||
value = elements[(x, y)]
|
||||
if value is None:
|
||||
row.append(' illegal')
|
||||
else:
|
||||
row.append(formatString.format(elements[(x,y)]))
|
||||
else:
|
||||
row.append('_' * 10)
|
||||
pretty += ' %s\n' % (" ".join(row), )
|
||||
pretty += '\n'
|
||||
return pretty
|
||||
|
||||
def prettyValues(self, values):
|
||||
return self.prettyPrint(values, '{0:10.4f}')
|
||||
|
||||
def prettyPolicy(self, policy):
|
||||
return self.prettyPrint(policy, '{0:10s}')
|
||||
|
||||
def prettyValueSolutionString(self, name, pretty):
|
||||
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
|
||||
|
||||
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
|
||||
aList = self.parsePrettyValues(aPretty)
|
||||
bList = self.parsePrettyValues(bPretty)
|
||||
if len(aList) != len(bList):
|
||||
return False
|
||||
for a, b in zip(aList, bList):
|
||||
try:
|
||||
aNum = float(a)
|
||||
bNum = float(b)
|
||||
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
|
||||
error = abs(aNum - bNum)
|
||||
if error > tolerance:
|
||||
return False
|
||||
except ValueError:
|
||||
if a.strip() != b.strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
def parsePrettyValues(self, pretty):
|
||||
values = pretty.split()
|
||||
return values
|
||||
|
||||
|
||||
class QLearningTest(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(QLearningTest, self).__init__(question, testDict)
|
||||
self.discount = float(testDict['discount'])
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
self.env = gridworld.GridworldEnvironment(self.grid)
|
||||
self.epsilon = float(testDict['epsilon'])
|
||||
self.learningRate = float(testDict['learningRate'])
|
||||
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
|
||||
numExperiences = int(testDict['numExperiences'])
|
||||
maxPreExperiences = 10
|
||||
self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
|
||||
self.testOutFile = testDict['test_out_file']
|
||||
if maxPreExperiences < numExperiences:
|
||||
self.numsExperiencesForDisplay.append(numExperiences)
|
||||
|
||||
def writeFailureFile(self, string):
|
||||
with open(self.testOutFile, 'w') as handle:
|
||||
handle.write(string)
|
||||
|
||||
def removeFailureFileIfExists(self):
|
||||
if os.path.exists(self.testOutFile):
|
||||
os.remove(self.testOutFile)
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
failureOutputFileString = ''
|
||||
failureOutputStdString = ''
|
||||
for n in self.numsExperiencesForDisplay:
|
||||
checkValuesAndPolicy = (n == self.numsExperiencesForDisplay[-1])
|
||||
testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n, checkValuesAndPolicy)
|
||||
failureOutputStdString += stdOutString
|
||||
failureOutputFileString += fileOutString
|
||||
if not testPass:
|
||||
self.addMessage(failureOutputStdString)
|
||||
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
|
||||
self.writeFailureFile(failureOutputFileString)
|
||||
return self.testFail(grades)
|
||||
self.removeFailureFileIfExists()
|
||||
return self.testPass(grades)
|
||||
|
||||
def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy):
|
||||
testPass = True
|
||||
valuesPretty, qValuesPretty, actions, policyPretty, lastExperience = self.runAgent(moduleDict, n)
|
||||
stdOutString = ''
|
||||
fileOutString = "==================== Iteration %d ====================\n" % n
|
||||
if lastExperience is not None:
|
||||
fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience
|
||||
for action in actions:
|
||||
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
|
||||
qValues = qValuesPretty[action]
|
||||
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
|
||||
fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
|
||||
fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||
else:
|
||||
testPass = False
|
||||
outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
|
||||
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
if checkValuesAndPolicy:
|
||||
if not self.comparePrettyValues(valuesPretty, solutionDict['values']):
|
||||
testPass = False
|
||||
outString = "Values are NOT correct."
|
||||
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('values', valuesPretty)
|
||||
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('values', solutionDict['values'])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
|
||||
testPass = False
|
||||
outString = "Policy is NOT correct."
|
||||
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('policy', policyPretty)
|
||||
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('policy', solutionDict['policy'])
|
||||
stdOutString += outString
|
||||
fileOutString += outString
|
||||
return testPass, stdOutString, fileOutString
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
with open(filePath, 'w') as handle:
|
||||
valuesPretty = ''
|
||||
policyPretty = ''
|
||||
for n in self.numsExperiencesForDisplay:
|
||||
valuesPretty, qValuesPretty, actions, policyPretty, _ = self.runAgent(moduleDict, n)
|
||||
for action in actions:
|
||||
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
|
||||
handle.write(self.prettyValueSolutionString('values', valuesPretty))
|
||||
handle.write(self.prettyValueSolutionString('policy', policyPretty))
|
||||
return True
|
||||
|
||||
def runAgent(self, moduleDict, numExperiences):
|
||||
agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
|
||||
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
|
||||
states.sort()
|
||||
randObj = FixedRandom().random
|
||||
# choose a random start state and a random possible action from that state
|
||||
# get the next state and reward from the transition function
|
||||
lastExperience = None
|
||||
for i in range(numExperiences):
|
||||
startState = randObj.choice(states)
|
||||
action = randObj.choice(self.grid.getPossibleActions(startState))
|
||||
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
|
||||
lastExperience = (startState, action, endState, reward)
|
||||
agent.update(*lastExperience)
|
||||
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
|
||||
values = {}
|
||||
qValues = {}
|
||||
policy = {}
|
||||
for state in states:
|
||||
values[state] = agent.computeValueFromQValues(state)
|
||||
policy[state] = agent.computeActionFromQValues(state)
|
||||
possibleActions = self.grid.getPossibleActions(state)
|
||||
for action in actions:
|
||||
if not qValues.has_key(action):
|
||||
qValues[action] = {}
|
||||
if action in possibleActions:
|
||||
qValues[action][state] = agent.getQValue(state, action)
|
||||
else:
|
||||
qValues[action][state] = None
|
||||
valuesPretty = self.prettyValues(values)
|
||||
policyPretty = self.prettyPolicy(policy)
|
||||
qValuesPretty = {}
|
||||
for action in actions:
|
||||
qValuesPretty[action] = self.prettyValues(qValues[action])
|
||||
return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
|
||||
|
||||
def prettyPrint(self, elements, formatString):
|
||||
pretty = ''
|
||||
states = self.grid.getStates()
|
||||
for ybar in range(self.grid.grid.height):
|
||||
y = self.grid.grid.height-1-ybar
|
||||
row = []
|
||||
for x in range(self.grid.grid.width):
|
||||
if (x, y) in states:
|
||||
value = elements[(x, y)]
|
||||
if value is None:
|
||||
row.append(' illegal')
|
||||
else:
|
||||
row.append(formatString.format(elements[(x,y)]))
|
||||
else:
|
||||
row.append('_' * 10)
|
||||
pretty += ' %s\n' % (" ".join(row), )
|
||||
pretty += '\n'
|
||||
return pretty
|
||||
|
||||
def prettyValues(self, values):
|
||||
return self.prettyPrint(values, '{0:10.4f}')
|
||||
|
||||
def prettyPolicy(self, policy):
|
||||
return self.prettyPrint(policy, '{0:10s}')
|
||||
|
||||
def prettyValueSolutionString(self, name, pretty):
|
||||
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
|
||||
|
||||
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
|
||||
aList = self.parsePrettyValues(aPretty)
|
||||
bList = self.parsePrettyValues(bPretty)
|
||||
if len(aList) != len(bList):
|
||||
return False
|
||||
for a, b in zip(aList, bList):
|
||||
try:
|
||||
aNum = float(a)
|
||||
bNum = float(b)
|
||||
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
|
||||
error = abs(aNum - bNum)
|
||||
if error > tolerance:
|
||||
return False
|
||||
except ValueError:
|
||||
if a.strip() != b.strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
def parsePrettyValues(self, pretty):
|
||||
values = pretty.split()
|
||||
return values
|
||||
|
||||
|
||||
class EpsilonGreedyTest(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(EpsilonGreedyTest, self).__init__(question, testDict)
|
||||
self.discount = float(testDict['discount'])
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
self.env = gridworld.GridworldEnvironment(self.grid)
|
||||
self.epsilon = float(testDict['epsilon'])
|
||||
self.learningRate = float(testDict['learningRate'])
|
||||
self.numExperiences = int(testDict['numExperiences'])
|
||||
self.numIterations = int(testDict['iterations'])
|
||||
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
if self.testEpsilonGreedy(moduleDict):
|
||||
return self.testPass(grades)
|
||||
else:
|
||||
return self.testFail(grades)
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
with open(filePath, 'w') as handle:
|
||||
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||
handle.write('# File intentionally blank.\n')
|
||||
return True
|
||||
|
||||
def runAgent(self, moduleDict):
|
||||
agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
|
||||
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
|
||||
states.sort()
|
||||
randObj = FixedRandom().random
|
||||
# choose a random start state and a random possible action from that state
|
||||
# get the next state and reward from the transition function
|
||||
for i in range(self.numExperiences):
|
||||
startState = randObj.choice(states)
|
||||
action = randObj.choice(self.grid.getPossibleActions(startState))
|
||||
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
|
||||
agent.update(startState, action, endState, reward)
|
||||
return agent
|
||||
|
||||
def testEpsilonGreedy(self, moduleDict, tolerance=0.025):
|
||||
agent = self.runAgent(moduleDict)
|
||||
for state in self.grid.getStates():
|
||||
numLegalActions = len(agent.getLegalActions(state))
|
||||
if numLegalActions <= 1:
|
||||
continue
|
||||
numGreedyChoices = 0
|
||||
optimalAction = agent.computeActionFromQValues(state)
|
||||
for iteration in range(self.numIterations):
|
||||
# assume that their computeActionFromQValues implementation is correct (q4 tests this)
|
||||
if agent.getAction(state) == optimalAction:
|
||||
numGreedyChoices += 1
|
||||
# e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions
|
||||
# g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k)
|
||||
empiricalEpsilonNumerator = self.numIterations - numGreedyChoices
|
||||
empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(numLegalActions)
|
||||
empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator
|
||||
error = abs(empiricalEpsilon - self.epsilon)
|
||||
if error > tolerance:
|
||||
self.addMessage("Epsilon-greedy action selection is not correct.")
|
||||
self.addMessage("Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
### q6
|
||||
class Question6Test(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(Question6Test, self).__init__(question, testDict)
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
studentSolution = moduleDict['analysis'].question6()
|
||||
studentSolution = str(studentSolution).strip().lower()
|
||||
hashedSolution = sha1(studentSolution).hexdigest()
|
||||
if hashedSolution == '46729c96bb1e4081fdc81a8ff74b3e5db8fba415':
|
||||
return self.testPass(grades)
|
||||
else:
|
||||
self.addMessage("Solution is not correct.")
|
||||
self.addMessage(" Student solution: %s" % (studentSolution,))
|
||||
return self.testFail(grades)
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
handle = open(filePath, 'w')
|
||||
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||
handle.write('# File intentionally blank.\n')
|
||||
handle.close()
|
||||
return True
|
||||
|
||||
|
||||
### q7/q8
|
||||
### =====
|
||||
## Average wins of a pacman agent
|
||||
|
||||
class EvalAgentTest(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(EvalAgentTest, self).__init__(question, testDict)
|
||||
self.pacmanParams = testDict['pacmanParams']
|
||||
|
||||
self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None
|
||||
self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None
|
||||
self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None
|
||||
|
||||
self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()]
|
||||
self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()]
|
||||
self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()]
|
||||
|
||||
self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]])
|
||||
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
self.addMessage('Grading agent using command: python pacman.py %s'% (self.pacmanParams,))
|
||||
|
||||
startTime = time.time()
|
||||
games = pacman.runGames(** pacman.readCommand(self.pacmanParams.split(' ')))
|
||||
totalTime = time.time() - startTime
|
||||
numGames = len(games)
|
||||
|
||||
stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True),
|
||||
'games': games, 'scores': [g.state.getScore() for g in games],
|
||||
'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)}
|
||||
|
||||
averageScore = sum(stats['scores']) / float(len(stats['scores']))
|
||||
nonTimeouts = numGames - stats['timeouts']
|
||||
wins = stats['wins']
|
||||
|
||||
def gradeThreshold(value, minimum, thresholds, name):
|
||||
points = 0
|
||||
passed = (minimum == None) or (value >= minimum)
|
||||
if passed:
|
||||
for t in thresholds:
|
||||
if value >= t:
|
||||
points += 1
|
||||
return (passed, points, value, minimum, thresholds, name)
|
||||
|
||||
results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"),
|
||||
gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"),
|
||||
gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")]
|
||||
|
||||
totalPoints = 0
|
||||
for passed, points, value, minimum, thresholds, name in results:
|
||||
if minimum == None and len(thresholds)==0:
|
||||
continue
|
||||
|
||||
# print passed, points, value, minimum, thresholds, name
|
||||
totalPoints += points
|
||||
if not passed:
|
||||
assert points == 0
|
||||
self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum))
|
||||
else:
|
||||
self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds)))
|
||||
|
||||
if minimum != None:
|
||||
self.addMessage(" Grading scheme:")
|
||||
self.addMessage(" < %s: fail" % (minimum,))
|
||||
if len(thresholds)==0 or minimum != thresholds[0]:
|
||||
self.addMessage(" >= %s: 0 points" % (minimum,))
|
||||
for idx, threshold in enumerate(thresholds):
|
||||
self.addMessage(" >= %s: %s points" % (threshold, idx+1))
|
||||
elif len(thresholds) > 0:
|
||||
self.addMessage(" Grading scheme:")
|
||||
self.addMessage(" < %s: 0 points" % (thresholds[0],))
|
||||
for idx, threshold in enumerate(thresholds):
|
||||
self.addMessage(" >= %s: %s points" % (threshold, idx+1))
|
||||
|
||||
if any([not passed for passed, _, _, _, _, _ in results]):
|
||||
totalPoints = 0
|
||||
|
||||
return self.testPartial(grades, totalPoints, self.maxPoints)
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
with open(filePath, 'w') as handle:
|
||||
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||
handle.write('# File intentionally blank.\n')
|
||||
return True
|
||||
|
||||
|
||||
|
||||
|
||||
### q2/q3
|
||||
### =====
|
||||
## For each parameter setting, compute the optimal policy, see if it satisfies some properties
|
||||
|
||||
def followPath(policy, start, numSteps=100):
|
||||
state = start
|
||||
path = []
|
||||
for i in range(numSteps):
|
||||
if state not in policy:
|
||||
break
|
||||
action = policy[state]
|
||||
path.append("(%s,%s)" % state)
|
||||
if action == 'north': nextState = state[0],state[1]+1
|
||||
if action == 'south': nextState = state[0],state[1]-1
|
||||
if action == 'east': nextState = state[0]+1,state[1]
|
||||
if action == 'west': nextState = state[0]-1,state[1]
|
||||
if action == 'exit' or action == None:
|
||||
path.append('TERMINAL_STATE')
|
||||
break
|
||||
state = nextState
|
||||
|
||||
return path
|
||||
|
||||
def parseGrid(string):
|
||||
grid = [[entry.strip() for entry in line.split()] for line in string.split('\n')]
|
||||
for row in grid:
|
||||
for x, col in enumerate(row):
|
||||
try:
|
||||
col = int(col)
|
||||
except:
|
||||
pass
|
||||
if col == "_":
|
||||
col = ' '
|
||||
row[x] = col
|
||||
return gridworld.makeGrid(grid)
|
||||
|
||||
|
||||
def computePolicy(moduleDict, grid, discount):
|
||||
valueIterator = moduleDict['valueIterationAgents'].ValueIterationAgent(grid, discount=discount)
|
||||
policy = {}
|
||||
for state in grid.getStates():
|
||||
policy[state] = valueIterator.computeActionFromValues(state)
|
||||
return policy
|
||||
|
||||
|
||||
|
||||
class GridPolicyTest(testClasses.TestCase):
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
super(GridPolicyTest, self).__init__(question, testDict)
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
self.parameterFn = testDict['parameterFn']
|
||||
self.question2 = testDict.get('question2', 'false').lower() == 'true'
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
self.gridText = testDict['grid']
|
||||
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||
self.gridName = testDict['gridName']
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
self.policy = parseGrid(testDict['policy'])
|
||||
|
||||
# State the most probable path must visit
|
||||
# (x,y) for a particular location; (0,0) is bottom left
|
||||
# terminal for the terminal state
|
||||
self.pathVisits = testDict.get('pathVisits', None)
|
||||
|
||||
# State the most probable path must not visit
|
||||
# (x,y) for a particular location; (0,0) is bottom left
|
||||
# terminal for the terminal state
|
||||
self.pathNotVisits = testDict.get('pathNotVisits', None)
|
||||
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
if not hasattr(moduleDict['analysis'], self.parameterFn):
|
||||
self.addMessage('Method not implemented: analysis.%s' % (self.parameterFn,))
|
||||
return self.testFail(grades)
|
||||
|
||||
result = getattr(moduleDict['analysis'], self.parameterFn)()
|
||||
|
||||
if type(result) == str and result.lower()[0:3] == "not":
|
||||
self.addMessage('Actually, it is possible!')
|
||||
return self.testFail(grades)
|
||||
|
||||
if self.question2:
|
||||
livingReward = None
|
||||
try:
|
||||
discount, noise = result
|
||||
discount = float(discount)
|
||||
noise = float(noise)
|
||||
except:
|
||||
self.addMessage('Did not return a (discount, noise) pair; instead analysis.%s returned: %s' % (self.parameterFn, result))
|
||||
return self.testFail(grades)
|
||||
if discount != 0.9 and noise != 0.2:
|
||||
self.addMessage('Must change either the discount or the noise, not both. Returned (discount, noise) = %s' % (result,))
|
||||
return self.testFail(grades)
|
||||
else:
|
||||
try:
|
||||
discount, noise, livingReward = result
|
||||
discount = float(discount)
|
||||
noise = float(noise)
|
||||
livingReward = float(livingReward)
|
||||
except:
|
||||
self.addMessage('Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %s' % (self.parameterFn, result))
|
||||
return self.testFail(grades)
|
||||
|
||||
self.grid.setNoise(noise)
|
||||
if livingReward != None:
|
||||
self.grid.setLivingReward(livingReward)
|
||||
|
||||
start = self.grid.getStartState()
|
||||
policy = computePolicy(moduleDict, self.grid, discount)
|
||||
|
||||
## check policy
|
||||
actionMap = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west', 'X': 'exit'}
|
||||
width, height = self.policy.width, self.policy.height
|
||||
policyPassed = True
|
||||
for x in range(width):
|
||||
for y in range(height):
|
||||
if self.policy[x][y] in actionMap and policy[(x,y)] != actionMap[self.policy[x][y]]:
|
||||
differPoint = (x,y)
|
||||
policyPassed = False
|
||||
|
||||
if not policyPassed:
|
||||
self.addMessage('Policy not correct.')
|
||||
self.addMessage(' Student policy at %s: %s' % (differPoint, policy[differPoint]))
|
||||
self.addMessage(' Correct policy at %s: %s' % (differPoint, actionMap[self.policy[differPoint[0]][differPoint[1]]]))
|
||||
self.addMessage(' Student policy:')
|
||||
self.printPolicy(policy, False)
|
||||
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
|
||||
self.addMessage(" . at states where the policy is not defined (e.g. walls)")
|
||||
self.addMessage(' Correct policy specification:')
|
||||
self.printPolicy(self.policy, True)
|
||||
self.addMessage(" Legend: N,S,E,W for states in which the student policy must move north etc,")
|
||||
self.addMessage(" _ for states where it doesn't matter what the student policy does.")
|
||||
self.printGridworld()
|
||||
return self.testFail(grades)
|
||||
|
||||
## check path
|
||||
path = followPath(policy, self.grid.getStartState())
|
||||
|
||||
if self.pathVisits != None and self.pathVisits not in path:
|
||||
self.addMessage('Policy does not visit state %s when moving without noise.' % (self.pathVisits,))
|
||||
self.addMessage(' States visited: %s' % (path,))
|
||||
self.addMessage(' Student policy:')
|
||||
self.printPolicy(policy, False)
|
||||
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
|
||||
self.addMessage(" . at states where policy not defined")
|
||||
self.printGridworld()
|
||||
return self.testFail(grades)
|
||||
|
||||
if self.pathNotVisits != None and self.pathNotVisits in path:
|
||||
self.addMessage('Policy visits state %s when moving without noise.' % (self.pathNotVisits,))
|
||||
self.addMessage(' States visited: %s' % (path,))
|
||||
self.addMessage(' Student policy:')
|
||||
self.printPolicy(policy, False)
|
||||
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
|
||||
self.addMessage(" . at states where policy not defined")
|
||||
self.printGridworld()
|
||||
return self.testFail(grades)
|
||||
|
||||
return self.testPass(grades)
|
||||
|
||||
def printGridworld(self):
|
||||
self.addMessage(' Gridworld:')
|
||||
for line in self.gridText.split('\n'):
|
||||
self.addMessage(' ' + line)
|
||||
self.addMessage(' Legend: # wall, _ empty, S start, numbers terminal states with that reward.')
|
||||
|
||||
def printPolicy(self, policy, policyTypeIsGrid):
|
||||
if policyTypeIsGrid:
|
||||
legend = {'N': 'N', 'E': 'E', 'S': 'S', 'W': 'W', ' ': '_'}
|
||||
else:
|
||||
legend = {'north': 'N', 'east': 'E', 'south': 'S', 'west': 'W', 'exit': 'X', '.': '.', ' ': '_'}
|
||||
|
||||
for ybar in range(self.grid.grid.height):
|
||||
y = self.grid.grid.height-1-ybar
|
||||
if policyTypeIsGrid:
|
||||
self.addMessage(" %s" % (" ".join([legend[policy[x][y]] for x in range(self.grid.grid.width)]),))
|
||||
else:
|
||||
self.addMessage(" %s" % (" ".join([legend[policy.get((x,y), '.')] for x in range(self.grid.grid.width)]),))
|
||||
# for state in sorted(self.grid.getStates()):
|
||||
# if state != 'TERMINAL_STATE':
|
||||
# self.addMessage(' (%s,%s) %s' % (state[0], state[1], policy[state]))
|
||||
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
with open(filePath, 'w') as handle:
|
||||
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||
handle.write('# File intentionally blank.\n')
|
||||
return True
|
||||
|
189
reinforcement/testClasses.py
Normal file
189
reinforcement/testClasses.py
Normal file
|
@ -0,0 +1,189 @@
|
|||
# testClasses.py
|
||||
# --------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
# import modules from python standard library
|
||||
import inspect
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
# Class which models a question in a project. Note that questions have a
|
||||
# maximum number of points they are worth, and are composed of a series of
|
||||
# test cases
|
||||
class Question(object):
|
||||
|
||||
def raiseNotDefined(self):
|
||||
print 'Method not implemented: %s' % inspect.stack()[1][3]
|
||||
sys.exit(1)
|
||||
|
||||
def __init__(self, questionDict, display):
|
||||
self.maxPoints = int(questionDict['max_points'])
|
||||
self.testCases = []
|
||||
self.display = display
|
||||
|
||||
def getDisplay(self):
|
||||
return self.display
|
||||
|
||||
def getMaxPoints(self):
|
||||
return self.maxPoints
|
||||
|
||||
# Note that 'thunk' must be a function which accepts a single argument,
|
||||
# namely a 'grading' object
|
||||
def addTestCase(self, testCase, thunk):
|
||||
self.testCases.append((testCase, thunk))
|
||||
|
||||
def execute(self, grades):
|
||||
self.raiseNotDefined()
|
||||
|
||||
# Question in which all test cases must be passed in order to receive credit
|
||||
class PassAllTestsQuestion(Question):
|
||||
|
||||
def execute(self, grades):
|
||||
# TODO: is this the right way to use grades? The autograder doesn't seem to use it.
|
||||
testsFailed = False
|
||||
grades.assignZeroCredit()
|
||||
for _, f in self.testCases:
|
||||
if not f(grades):
|
||||
testsFailed = True
|
||||
if testsFailed:
|
||||
grades.fail("Tests failed.")
|
||||
else:
|
||||
grades.assignFullCredit()
|
||||
|
||||
|
||||
# Question in which predict credit is given for test cases with a ``points'' property.
|
||||
# All other tests are mandatory and must be passed.
|
||||
class HackedPartialCreditQuestion(Question):
|
||||
|
||||
def execute(self, grades):
|
||||
# TODO: is this the right way to use grades? The autograder doesn't seem to use it.
|
||||
grades.assignZeroCredit()
|
||||
|
||||
points = 0
|
||||
passed = True
|
||||
for testCase, f in self.testCases:
|
||||
testResult = f(grades)
|
||||
if "points" in testCase.testDict:
|
||||
if testResult: points += float(testCase.testDict["points"])
|
||||
else:
|
||||
passed = passed and testResult
|
||||
|
||||
## FIXME: Below terrible hack to match q3's logic
|
||||
if int(points) == self.maxPoints and not passed:
|
||||
grades.assignZeroCredit()
|
||||
else:
|
||||
grades.addPoints(int(points))
|
||||
|
||||
|
||||
class Q6PartialCreditQuestion(Question):
|
||||
"""Fails any test which returns False, otherwise doesn't effect the grades object.
|
||||
Partial credit tests will add the required points."""
|
||||
|
||||
def execute(self, grades):
|
||||
grades.assignZeroCredit()
|
||||
|
||||
results = []
|
||||
for _, f in self.testCases:
|
||||
results.append(f(grades))
|
||||
if False in results:
|
||||
grades.assignZeroCredit()
|
||||
|
||||
class PartialCreditQuestion(Question):
|
||||
"""Fails any test which returns False, otherwise doesn't effect the grades object.
|
||||
Partial credit tests will add the required points."""
|
||||
|
||||
def execute(self, grades):
|
||||
grades.assignZeroCredit()
|
||||
|
||||
for _, f in self.testCases:
|
||||
if not f(grades):
|
||||
grades.assignZeroCredit()
|
||||
grades.fail("Tests failed.")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
class NumberPassedQuestion(Question):
|
||||
"""Grade is the number of test cases passed."""
|
||||
|
||||
def execute(self, grades):
|
||||
grades.addPoints([f(grades) for _, f in self.testCases].count(True))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Template modeling a generic test case
|
||||
class TestCase(object):
|
||||
|
||||
def raiseNotDefined(self):
|
||||
print 'Method not implemented: %s' % inspect.stack()[1][3]
|
||||
sys.exit(1)
|
||||
|
||||
def getPath(self):
|
||||
return self.path
|
||||
|
||||
def __init__(self, question, testDict):
|
||||
self.question = question
|
||||
self.testDict = testDict
|
||||
self.path = testDict['path']
|
||||
self.messages = []
|
||||
|
||||
def __str__(self):
|
||||
self.raiseNotDefined()
|
||||
|
||||
def execute(self, grades, moduleDict, solutionDict):
|
||||
self.raiseNotDefined()
|
||||
|
||||
def writeSolution(self, moduleDict, filePath):
|
||||
self.raiseNotDefined()
|
||||
return True
|
||||
|
||||
# Tests should call the following messages for grading
|
||||
# to ensure a uniform format for test output.
|
||||
#
|
||||
# TODO: this is hairy, but we need to fix grading.py's interface
|
||||
# to get a nice hierarchical project - question - test structure,
|
||||
# then these should be moved into Question proper.
|
||||
def testPass(self, grades):
|
||||
grades.addMessage('PASS: %s' % (self.path,))
|
||||
for line in self.messages:
|
||||
grades.addMessage(' %s' % (line,))
|
||||
return True
|
||||
|
||||
def testFail(self, grades):
|
||||
grades.addMessage('FAIL: %s' % (self.path,))
|
||||
for line in self.messages:
|
||||
grades.addMessage(' %s' % (line,))
|
||||
return False
|
||||
|
||||
# This should really be question level?
|
||||
#
|
||||
def testPartial(self, grades, points, maxPoints):
|
||||
grades.addPoints(points)
|
||||
extraCredit = max(0, points - maxPoints)
|
||||
regularCredit = points - extraCredit
|
||||
|
||||
grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints))
|
||||
if extraCredit > 0:
|
||||
grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,))
|
||||
|
||||
for line in self.messages:
|
||||
grades.addMessage(' %s' % (line,))
|
||||
|
||||
return True
|
||||
|
||||
def addMessage(self, message):
|
||||
self.messages.extend(message.split('\n'))
|
||||
|
85
reinforcement/testParser.py
Normal file
85
reinforcement/testParser.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
# testParser.py
|
||||
# -------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
class TestParser(object):
|
||||
|
||||
def __init__(self, path):
|
||||
# save the path to the test file
|
||||
self.path = path
|
||||
|
||||
def removeComments(self, rawlines):
|
||||
# remove any portion of a line following a '#' symbol
|
||||
fixed_lines = []
|
||||
for l in rawlines:
|
||||
idx = l.find('#')
|
||||
if idx == -1:
|
||||
fixed_lines.append(l)
|
||||
else:
|
||||
fixed_lines.append(l[0:idx])
|
||||
return '\n'.join(fixed_lines)
|
||||
|
||||
def parse(self):
|
||||
# read in the test case and remove comments
|
||||
test = {}
|
||||
with open(self.path) as handle:
|
||||
raw_lines = handle.read().split('\n')
|
||||
|
||||
test_text = self.removeComments(raw_lines)
|
||||
test['__raw_lines__'] = raw_lines
|
||||
test['path'] = self.path
|
||||
test['__emit__'] = []
|
||||
lines = test_text.split('\n')
|
||||
i = 0
|
||||
# read a property in each loop cycle
|
||||
while(i < len(lines)):
|
||||
# skip blank lines
|
||||
if re.match('\A\s*\Z', lines[i]):
|
||||
test['__emit__'].append(("raw", raw_lines[i]))
|
||||
i += 1
|
||||
continue
|
||||
m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i])
|
||||
if m:
|
||||
test[m.group(1)] = m.group(2)
|
||||
test['__emit__'].append(("oneline", m.group(1)))
|
||||
i += 1
|
||||
continue
|
||||
m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i])
|
||||
if m:
|
||||
msg = []
|
||||
i += 1
|
||||
while(not re.match('\A\s*"""\s*\Z', lines[i])):
|
||||
msg.append(raw_lines[i])
|
||||
i += 1
|
||||
test[m.group(1)] = '\n'.join(msg)
|
||||
test['__emit__'].append(("multiline", m.group(1)))
|
||||
i += 1
|
||||
continue
|
||||
print 'error parsing test file: %s' % self.path
|
||||
sys.exit(1)
|
||||
return test
|
||||
|
||||
|
||||
def emitTestDict(testDict, handle):
|
||||
for kind, data in testDict['__emit__']:
|
||||
if kind == "raw":
|
||||
handle.write(data + "\n")
|
||||
elif kind == "oneline":
|
||||
handle.write('%s: "%s"\n' % (data, testDict[data]))
|
||||
elif kind == "multiline":
|
||||
handle.write('%s: """\n%s\n"""\n' % (data, testDict[data]))
|
||||
else:
|
||||
raise Exception("Bad __emit__")
|
0
reinforcement/test_cases/CONFIG
Normal file
0
reinforcement/test_cases/CONFIG
Normal file
410
reinforcement/test_cases/q1/1-tinygrid.solution
Normal file
410
reinforcement/test_cases/q1/1-tinygrid.solution
Normal file
|
@ -0,0 +1,410 @@
|
|||
values_k_0: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
-10.0000
|
||||
0.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q1/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q1/1-tinygrid.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
410
reinforcement/test_cases/q1/2-tinygrid-noisy.solution
Normal file
410
reinforcement/test_cases/q1/2-tinygrid-noisy.solution
Normal file
|
@ -0,0 +1,410 @@
|
|||
values_k_0: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
-10.0000
|
||||
0.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
-5.6250
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
5.6250
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
-10.0000
|
||||
5.6250
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
-4.5703
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
3.1641
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
6.6797
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
3.1641
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
-10.0000
|
||||
6.6797
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
-4.3726
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
3.7573
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
6.8774
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
3.7573
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
-10.0000
|
||||
6.8774
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
-4.3355
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
3.8686
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
6.9145
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
3.8686
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
-10.0000
|
||||
6.9145
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
-4.3285
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
3.8894
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
6.9215
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
3.8894
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
-10.0000
|
||||
6.9215
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
-4.3272
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
3.8933
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
6.9228
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
3.8933
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
-10.0000
|
||||
6.9228
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
-4.3270
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
3.8941
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
6.9230
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
3.8941
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
-10.0000
|
||||
6.9230
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
-4.3269
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
6.9231
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
-10.0000
|
||||
6.9231
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
-4.3269
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
6.9231
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
-10.0000
|
||||
6.9231
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-4.3269
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
6.9231
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q1/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q1/2-tinygrid-noisy.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
678
reinforcement/test_cases/q1/3-bridge.solution
Normal file
678
reinforcement/test_cases/q1/3-bridge.solution
Normal file
|
@ -0,0 +1,678 @@
|
|||
values_k_0: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.0750 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.4575 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.0750 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.4575 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -8.5000 -100.0000
|
||||
-100.0000 -8.5000 -100.0000
|
||||
-100.0000 -8.5000 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4363 illegal
|
||||
illegal -76.8974 illegal
|
||||
illegal -77.2225 illegal
|
||||
illegal -77.1900 illegal
|
||||
illegal -76.8187 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4363 illegal
|
||||
illegal -76.8974 illegal
|
||||
illegal -77.2225 illegal
|
||||
illegal -77.1900 illegal
|
||||
illegal -76.8187 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.0025 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -19.9769 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1737 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4663 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -19.9769 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1737 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4663 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
policy: """
|
||||
__________ exit __________
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit south exit
|
||||
exit south exit
|
||||
__________ exit __________
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q1/3-bridge.test
Normal file
27
reinforcement/test_cases/q1/3-bridge.test
Normal file
|
@ -0,0 +1,27 @@
|
|||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
544
reinforcement/test_cases/q1/4-discountgrid.solution
Normal file
544
reinforcement/test_cases/q1/4-discountgrid.solution
Normal file
|
@ -0,0 +1,544 @@
|
|||
values_k_0: """
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ __________ 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
-10.0000 0.0000 10.0000 0.0000 0.0000
|
||||
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||
-10.0000 0.0000 1.0000 0.0000 0.0000
|
||||
-10.0000 0.0000 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.9000 0.0000
|
||||
illegal -0.9000 __________ 0.0000 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0000
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 7.2000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.7200 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.9000 0.0000
|
||||
illegal -0.9000 __________ 0.0000 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0000
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal -7.2000 illegal 7.2000 0.0000
|
||||
illegal -7.2000 __________ 0.0000 0.0000
|
||||
illegal -7.2000 illegal 0.7200 0.0000
|
||||
illegal -7.2000 __________ __________ 0.0000
|
||||
illegal -7.2000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
-10.0000 7.2000 10.0000 7.2000 0.0000
|
||||
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||
-10.0000 0.7200 1.0000 0.7200 0.0000
|
||||
-10.0000 0.0000 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 5.1840 illegal 6.0840 0.6480
|
||||
illegal 4.2840 __________ 5.1840 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0648
|
||||
illegal -0.3816 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 7.8480 illegal 0.6480 0.0000
|
||||
illegal 0.7128 __________ 0.7128 0.0000
|
||||
illegal 0.7200 illegal 0.0648 0.0000
|
||||
illegal 0.0648 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.9000 0.6480
|
||||
illegal -0.3816 __________ 0.5184 0.0000
|
||||
illegal -0.8100 illegal 0.6084 0.0648
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal -6.5520 illegal 7.8480 5.1840
|
||||
illegal -6.4872 __________ 0.7128 0.0000
|
||||
illegal -7.2000 illegal 0.7848 0.5184
|
||||
illegal -7.1352 __________ __________ 0.0000
|
||||
illegal -7.2000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
-10.0000 7.8480 10.0000 7.8480 5.1840
|
||||
-10.0000 4.2840 __________ 5.1840 0.0000
|
||||
-10.0000 0.7200 1.0000 0.7848 0.5184
|
||||
-10.0000 0.0648 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 5.6506 illegal 7.0171 4.9054
|
||||
illegal 5.1361 __________ 6.1171 4.1990
|
||||
illegal 2.2745 illegal 3.8691 0.1173
|
||||
illegal -0.3758 __________ __________ 0.3732
|
||||
illegal -0.8533 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 8.2919 illegal 4.9054 4.1990
|
||||
illegal 3.8556 __________ 0.7770 0.5132
|
||||
illegal 1.1114 illegal 0.9104 0.3732
|
||||
illegal 0.1115 __________ __________ 0.0467
|
||||
illegal 0.0058 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 3.0845 illegal 5.0990 1.1729
|
||||
illegal 0.0040 __________ 1.0316 0.8398
|
||||
illegal -0.7633 illegal 0.7017 0.1173
|
||||
illegal -0.8942 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal -6.1081 illegal 8.3729 6.1171
|
||||
illegal -6.4289 __________ 4.5094 4.2457
|
||||
illegal -6.8086 illegal 1.2572 0.5651
|
||||
illegal -7.1352 __________ __________ 0.0467
|
||||
illegal -7.1942 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
-10.0000 8.2919 10.0000 8.3729 6.1171
|
||||
-10.0000 5.1361 __________ 6.1171 4.2457
|
||||
-10.0000 2.2745 1.0000 3.8691 0.5651
|
||||
-10.0000 0.1115 __________ __________ 0.3732
|
||||
-10.0000 0.0058 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 5.9702 illegal 7.4790 5.7084
|
||||
illegal 5.5324 __________ 6.9611 5.3370
|
||||
illegal 2.8880 illegal 4.5452 3.4560
|
||||
illegal 0.7477 __________ __________ 0.4740
|
||||
illegal -0.8198 0.0005 0.0000 0.2687
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 8.4085 illegal 5.7084 5.3370
|
||||
illegal 4.6490 __________ 4.1587 3.6583
|
||||
illegal 1.1923 illegal 1.3056 0.8225
|
||||
illegal 0.2855 __________ __________ 0.3196
|
||||
illegal 0.0106 0.0000 0.0000 0.0336
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 3.6980 illegal 5.8549 4.3610
|
||||
illegal 1.1999 __________ 3.7184 1.3395
|
||||
illegal -0.7298 illegal 2.9266 0.6678
|
||||
illegal -0.8858 __________ __________ 0.0672
|
||||
illegal -0.8958 0.0005 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal -5.9915 illegal 8.5041 6.9611
|
||||
illegal -6.2490 __________ 5.5061 5.0057
|
||||
illegal -6.7277 illegal 1.6188 3.2015
|
||||
illegal -6.9948 __________ __________ 0.3196
|
||||
illegal -7.1894 0.0042 0.0000 0.0336
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
-10.0000 8.4085 10.0000 8.5041 6.9611
|
||||
-10.0000 5.5324 __________ 6.9611 5.3370
|
||||
-10.0000 2.8880 1.0000 4.5452 3.4560
|
||||
-10.0000 0.7477 __________ __________ 0.4740
|
||||
-10.0000 0.0106 0.0042 0.0000 0.2687
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 6.0541 illegal 7.6495 6.4039
|
||||
illegal 5.6521 __________ 7.2298 6.1188
|
||||
illegal 3.1733 illegal 5.4130 4.5627
|
||||
illegal 1.2467 __________ __________ 2.5736
|
||||
illegal -0.3613 0.0040 0.0246 0.3655
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 8.4547 illegal 6.4039 6.1188
|
||||
illegal 5.0000 __________ 5.0171 4.7802
|
||||
illegal 1.2852 illegal 3.5239 3.0113
|
||||
illegal 0.7992 __________ __________ 0.6765
|
||||
illegal 0.0713 0.0008 0.1935 0.2603
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 3.9833 illegal 6.5385 5.2345
|
||||
illegal 1.6773 __________ 4.3794 3.5951
|
||||
illegal -0.2717 illegal 3.6736 1.0614
|
||||
illegal -0.8251 __________ __________ 0.2788
|
||||
illegal -0.8920 0.0040 0.0246 0.2177
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal -5.9453 illegal 8.5919 7.2298
|
||||
illegal -6.1833 __________ 6.1864 5.9496
|
||||
illegal -6.6348 illegal 1.7556 3.7955
|
||||
illegal -6.9391 __________ __________ 0.6765
|
||||
illegal -7.1318 0.0084 0.0030 0.0668
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
-10.0000 8.4547 10.0000 8.5919 7.2298
|
||||
-10.0000 5.6521 __________ 7.2298 6.1188
|
||||
-10.0000 3.1733 1.0000 5.4130 4.5627
|
||||
-10.0000 1.2467 __________ __________ 2.5736
|
||||
-10.0000 0.0713 0.0084 0.1935 0.3655
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 6.0874 illegal 7.7368 6.6294
|
||||
illegal 5.6961 __________ 7.3875 6.4068
|
||||
illegal 3.2595 illegal 5.7061 5.3034
|
||||
illegal 1.4970 __________ __________ 3.7484
|
||||
illegal -0.0017 0.0298 0.1730 1.9033
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 8.4696 illegal 6.6294 6.4068
|
||||
illegal 5.1160 __________ 5.6660 5.4669
|
||||
illegal 1.3409 illegal 4.4230 4.0675
|
||||
illegal 1.1896 __________ __________ 2.2966
|
||||
illegal 0.1246 0.1408 0.2980 0.5277
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 4.0695 illegal 6.7561 5.8295
|
||||
illegal 1.8935 __________ 5.0988 4.4865
|
||||
illegal 0.0876 illegal 4.3980 2.7508
|
||||
illegal -0.7365 __________ __________ 0.7264
|
||||
illegal -0.8479 0.0298 0.1730 0.3135
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal -5.9304 illegal 8.6239 7.3875
|
||||
illegal -6.1535 __________ 6.4659 6.2668
|
||||
illegal -6.5791 illegal 1.8579 4.6797
|
||||
illegal -6.9080 __________ __________ 2.2966
|
||||
illegal -7.0814 0.0528 0.0408 0.4038
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
-10.0000 8.4696 10.0000 8.6239 7.3875
|
||||
-10.0000 5.6961 __________ 7.3875 6.4068
|
||||
-10.0000 3.2595 1.0000 5.7061 5.3034
|
||||
-10.0000 1.4970 __________ __________ 3.7484
|
||||
-10.0000 0.1246 0.1408 0.2980 1.9033
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 6.0981 illegal 7.7741 6.7600
|
||||
illegal 5.7108 __________ 7.4507 6.5605
|
||||
illegal 3.2912 illegal 5.8863 5.6038
|
||||
illegal 1.5816 __________ __________ 4.4932
|
||||
illegal 0.1905 0.1394 0.3985 2.8970
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 8.4749 illegal 6.7600 6.5605
|
||||
illegal 5.1568 __________ 5.9026 5.7551
|
||||
illegal 1.3674 illegal 4.9969 4.7324
|
||||
illegal 1.3824 __________ __________ 3.3475
|
||||
illegal 0.2473 0.2399 1.4240 1.8790
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 4.1012 illegal 6.8839 6.0539
|
||||
illegal 1.9595 __________ 5.3499 5.0599
|
||||
illegal 0.2678 illegal 4.6757 3.6897
|
||||
illegal -0.6755 __________ __________ 2.0451
|
||||
illegal -0.7976 0.1394 0.3985 1.5685
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal -5.9251 illegal 8.6410 7.4507
|
||||
illegal -6.1444 __________ 6.6087 6.4612
|
||||
illegal -6.5526 illegal 1.8984 5.0224
|
||||
illegal -6.8954 __________ __________ 3.3475
|
||||
illegal -7.0541 0.1151 0.1550 0.7232
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
-10.0000 8.4749 10.0000 8.6410 7.4507
|
||||
-10.0000 5.7108 __________ 7.4507 6.5605
|
||||
-10.0000 3.2912 1.0000 5.8863 5.6038
|
||||
-10.0000 1.5816 __________ __________ 4.4932
|
||||
-10.0000 0.2473 0.2399 1.4240 2.8970
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 6.1019 illegal 7.7921 6.8128
|
||||
illegal 5.7159 __________ 7.4826 6.6255
|
||||
illegal 3.3017 illegal 5.9589 5.7577
|
||||
illegal 1.6120 __________ __________ 4.8435
|
||||
illegal 0.2603 0.3231 1.3076 3.6240
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 8.4767 illegal 6.8128 6.6255
|
||||
illegal 5.1707 __________ 6.0310 5.8985
|
||||
illegal 1.3763 illegal 5.2350 5.0295
|
||||
illegal 1.4572 __________ __________ 4.0001
|
||||
illegal 0.3373 1.0685 2.3421 2.7509
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 4.1117 illegal 6.9351 6.1718
|
||||
illegal 1.9836 __________ 5.4992 5.2957
|
||||
illegal 0.3287 illegal 4.8325 4.2692
|
||||
illegal -0.5796 __________ __________ 2.8946
|
||||
illegal -0.7003 0.3231 1.3076 2.4747
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal -5.9233 illegal 8.6483 7.4826
|
||||
illegal -6.1411 __________ 6.6720 6.5394
|
||||
illegal -6.5437 illegal 1.9203 5.2330
|
||||
illegal -6.8815 __________ __________ 4.0001
|
||||
illegal -7.0354 0.2213 0.4290 1.6904
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
-10.0000 8.4767 10.0000 8.6483 7.4826
|
||||
-10.0000 5.7159 __________ 7.4826 6.6255
|
||||
-10.0000 3.3017 1.0000 5.9589 5.7577
|
||||
-10.0000 1.6120 __________ __________ 4.8435
|
||||
-10.0000 0.3373 1.0685 2.3421 3.6240
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal 6.1032 illegal 7.8002 6.8392
|
||||
illegal 5.7177 __________ 7.4965 6.6572
|
||||
illegal 3.3055 illegal 5.9956 5.8249
|
||||
illegal 1.6223 __________ __________ 5.0174
|
||||
illegal 0.3568 1.0105 2.1087 4.0243
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal 8.4773 illegal 6.8392 6.6572
|
||||
illegal 5.1755 __________ 6.0850 5.9620
|
||||
illegal 1.3795 illegal 5.3553 5.1777
|
||||
illegal 1.4881 __________ __________ 4.3316
|
||||
illegal 0.9447 1.8787 3.0308 3.3713
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal 4.1155 illegal 6.9609 6.2222
|
||||
illegal 1.9917 __________ 5.5601 5.4153
|
||||
illegal 0.3506 illegal 4.8986 4.5418
|
||||
illegal -0.5121 __________ __________ 3.4811
|
||||
illegal -0.5610 1.0105 2.1087 3.1462
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal -5.9227 illegal 8.6518 7.4965
|
||||
illegal -6.1399 __________ 6.7021 6.5791
|
||||
illegal -6.5405 illegal 1.9297 5.3226
|
||||
illegal -6.8725 __________ __________ 4.3316
|
||||
illegal -7.0246 0.4352 1.1909 2.4484
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
-10.0000 8.4777 10.0000 8.6547 7.5087
|
||||
-10.0000 5.7186 __________ 7.5087 6.6836
|
||||
-10.0000 3.3074 1.0000 6.0258 5.8841
|
||||
-10.0000 2.0045 __________ __________ 5.1665
|
||||
-10.0000 2.9289 3.4513 3.9306 4.4765
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal 6.1039 illegal 7.8072 6.8610
|
||||
illegal 5.7186 __________ 7.5087 6.6836
|
||||
illegal 3.3074 illegal 6.0258 5.8841
|
||||
illegal 1.6617 __________ __________ 5.1665
|
||||
illegal 0.8539 3.1023 3.5435 4.4765
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal 8.4777 illegal 6.8610 6.6836
|
||||
illegal 5.1780 __________ 6.1334 6.0175
|
||||
illegal 1.4151 illegal 5.4546 5.3030
|
||||
illegal 2.0045 __________ __________ 4.6523
|
||||
illegal 2.9289 3.4513 3.9306 4.0910
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal 4.1174 illegal 6.9820 6.2669
|
||||
illegal 1.9960 __________ 5.6159 5.5138
|
||||
illegal 0.6333 illegal 4.9582 4.7918
|
||||
illegal 1.3892 __________ __________ 4.1531
|
||||
illegal 1.5194 3.1023 3.5435 3.9797
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal -5.9223 illegal 8.6547 7.5087
|
||||
illegal -6.1393 __________ 6.7275 6.6116
|
||||
illegal -6.5049 illegal 1.9381 5.4051
|
||||
illegal -6.6387 __________ __________ 4.6523
|
||||
illegal -6.7560 2.7300 3.1924 3.6979
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit east exit west west
|
||||
exit north __________ north north
|
||||
exit north exit north north
|
||||
exit east __________ __________ north
|
||||
exit east east east north
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
24
reinforcement/test_cases/q1/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q1/4-discountgrid.test
Normal file
|
@ -0,0 +1,24 @@
|
|||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q1/CONFIG
Normal file
2
reinforcement/test_cases/q1/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "6"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q2/1-bridge-grid.solution
Normal file
2
reinforcement/test_cases/q2/1-bridge-grid.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q2/1-bridge-grid.test.
|
||||
# File intentionally blank.
|
29
reinforcement/test_cases/q2/1-bridge-grid.test
Normal file
29
reinforcement/test_cases/q2/1-bridge-grid.test
Normal file
|
@ -0,0 +1,29 @@
|
|||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question2"
|
||||
question2: "true"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# -100 -100 -100 -100 -100 #
|
||||
1 S _ _ _ _ 10
|
||||
# -100 -100 -100 -100 -100 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _ _ _
|
||||
_ E _ _ _ _ _
|
||||
_ _ _ _ _ _ _
|
||||
"""
|
||||
|
2
reinforcement/test_cases/q2/CONFIG
Normal file
2
reinforcement/test_cases/q2/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "1"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q3/1-question-3.1.solution
Normal file
2
reinforcement/test_cases/q3/1-question-3.1.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q3/1-question-3.1.test.
|
||||
# File intentionally blank.
|
31
reinforcement/test_cases/q3/1-question-3.1.test
Normal file
31
reinforcement/test_cases/q3/1-question-3.1.test
Normal file
|
@ -0,0 +1,31 @@
|
|||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question3a"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
E E N _ _
|
||||
_ _ _ _ _
|
||||
"""
|
2
reinforcement/test_cases/q3/2-question-3.2.solution
Normal file
2
reinforcement/test_cases/q3/2-question-3.2.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q3/2-question-3.2.test.
|
||||
# File intentionally blank.
|
31
reinforcement/test_cases/q3/2-question-3.2.test
Normal file
31
reinforcement/test_cases/q3/2-question-3.2.test
Normal file
|
@ -0,0 +1,31 @@
|
|||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question3b"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
E E S _ _
|
||||
N _ S _ _
|
||||
N _ _ _ _
|
||||
N _ _ _ _
|
||||
_ _ _ _ _
|
||||
"""
|
2
reinforcement/test_cases/q3/3-question-3.3.solution
Normal file
2
reinforcement/test_cases/q3/3-question-3.3.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q3/3-question-3.3.test.
|
||||
# File intentionally blank.
|
31
reinforcement/test_cases/q3/3-question-3.3.test
Normal file
31
reinforcement/test_cases/q3/3-question-3.3.test
Normal file
|
@ -0,0 +1,31 @@
|
|||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question3c"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
E E E E N
|
||||
_ _ _ _ _
|
||||
"""
|
2
reinforcement/test_cases/q3/4-question-3.4.solution
Normal file
2
reinforcement/test_cases/q3/4-question-3.4.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q3/4-question-3.4.test.
|
||||
# File intentionally blank.
|
36
reinforcement/test_cases/q3/4-question-3.4.test
Normal file
36
reinforcement/test_cases/q3/4-question-3.4.test
Normal file
|
@ -0,0 +1,36 @@
|
|||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question3d"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
N _ _ _ _
|
||||
_ _ _ _ _
|
||||
"""
|
||||
|
||||
# State the most probable path must visit
|
||||
# (x,y) for a particular location; (0,0) is bottom left
|
||||
# TERMINAL_STATE for the terminal state
|
||||
pathVisits: "(4,2)"
|
2
reinforcement/test_cases/q3/5-question-3.5.solution
Normal file
2
reinforcement/test_cases/q3/5-question-3.5.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q3/5-question-3.5.test.
|
||||
# File intentionally blank.
|
36
reinforcement/test_cases/q3/5-question-3.5.test
Normal file
36
reinforcement/test_cases/q3/5-question-3.5.test
Normal file
|
@ -0,0 +1,36 @@
|
|||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question3e"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
"""
|
||||
|
||||
# State the most probable path must not visit
|
||||
# (x,y) for a particular location; (0,0) is bottom left
|
||||
# TERMINAL_STATE for the terminal state
|
||||
pathNotVisits: "TERMINAL_STATE"
|
2
reinforcement/test_cases/q3/CONFIG
Normal file
2
reinforcement/test_cases/q3/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "5"
|
||||
class: "NumberPassedQuestion"
|
342
reinforcement/test_cases/q4/1-tinygrid.solution
Normal file
342
reinforcement/test_cases/q4/1-tinygrid.solution
Normal file
|
@ -0,0 +1,342 @@
|
|||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.4534
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.4063
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
2.1267
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.3919
|
||||
illegal
|
||||
"""
|
||||
|
||||
values: """
|
||||
-9.4767
|
||||
2.1267
|
||||
9.8175
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q4/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q4/1-tinygrid.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
342
reinforcement/test_cases/q4/2-tinygrid-noisy.solution
Normal file
342
reinforcement/test_cases/q4/2-tinygrid-noisy.solution
Normal file
|
@ -0,0 +1,342 @@
|
|||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.6670
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.9499
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
3.2562
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.8236
|
||||
illegal
|
||||
"""
|
||||
|
||||
values: """
|
||||
-9.4767
|
||||
3.2562
|
||||
9.8175
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q4/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q4/2-tinygrid-noisy.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
570
reinforcement/test_cases/q4/3-bridge.solution
Normal file
570
reinforcement/test_cases/q4/3-bridge.solution
Normal file
|
@ -0,0 +1,570 @@
|
|||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.1000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -5.8648 illegal
|
||||
illegal -0.7995 illegal
|
||||
illegal -0.1671 illegal
|
||||
illegal -1.2642 illegal
|
||||
illegal -0.5871 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -17.0676 illegal
|
||||
illegal -26.5534 illegal
|
||||
illegal -3.6957 illegal
|
||||
illegal -43.5952 illegal
|
||||
illegal -31.6884 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_exit: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 illegal -96.9097
|
||||
-97.7472 illegal -94.1850
|
||||
-89.0581 illegal -96.9097
|
||||
-97.2187 illegal -87.8423
|
||||
-92.8210 illegal -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -6.8377 illegal
|
||||
illegal -6.7277 illegal
|
||||
illegal -3.4723 illegal
|
||||
illegal -8.4015 illegal
|
||||
illegal -5.5718 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -27.0626 illegal
|
||||
illegal -39.0610 illegal
|
||||
illegal -40.5887 illegal
|
||||
illegal -16.2839 illegal
|
||||
illegal -20.7770 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 -5.8648 -96.9097
|
||||
-97.7472 -0.7995 -94.1850
|
||||
-89.0581 -0.1671 -96.9097
|
||||
-97.2187 -1.2642 -87.8423
|
||||
-92.8210 -0.5871 -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
policy: """
|
||||
__________ exit __________
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
__________ exit __________
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q4/3-bridge.test
Normal file
27
reinforcement/test_cases/q4/3-bridge.test
Normal file
|
@ -0,0 +1,27 @@
|
|||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
456
reinforcement/test_cases/q4/4-discountgrid.solution
Normal file
456
reinforcement/test_cases/q4/4-discountgrid.solution
Normal file
|
@ -0,0 +1,456 @@
|
|||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
-1.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal -0.0900 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
-1.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_north: """
|
||||
illegal 4.3205 illegal 6.1517 3.8095
|
||||
illegal 4.4238 __________ 5.2284 3.5129
|
||||
illegal 1.0694 illegal 3.6867 2.0418
|
||||
illegal 0.3423 __________ __________ 1.0655
|
||||
illegal 0.0073 0.0079 0.0484 0.3768
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_east: """
|
||||
illegal 8.0584 illegal 3.7245 3.3947
|
||||
illegal 2.0499 __________ 3.2373 2.1742
|
||||
illegal 0.8687 illegal 1.7398 1.2671
|
||||
illegal 0.2927 __________ __________ 0.6669
|
||||
illegal 0.0239 0.0097 0.1611 0.2051
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-9.9999 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_south: """
|
||||
illegal -0.3521 illegal 3.6948 2.9139
|
||||
illegal -0.5605 __________ 2.1346 1.5674
|
||||
illegal 0.2093 illegal 1.5389 0.5521
|
||||
illegal -0.5505 __________ __________ 0.1006
|
||||
illegal -1.8501 0.0060 0.0514 0.1223
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_west: """
|
||||
illegal -6.2001 illegal 7.5146 4.9014
|
||||
illegal -5.4013 __________ 4.0484 3.4126
|
||||
illegal -8.0399 illegal 0.9653 1.6081
|
||||
illegal -7.4767 __________ __________ 0.3934
|
||||
illegal -6.3432 0.0179 0.0188 0.1028
|
||||
"""
|
||||
|
||||
values: """
|
||||
-10.0000 8.0584 10.0000 7.5146 4.9014
|
||||
-10.0000 4.4238 __________ 5.2284 3.5129
|
||||
-10.0000 1.0694 1.0000 3.6867 2.0418
|
||||
-10.0000 0.3423 __________ __________ 1.0655
|
||||
-9.9999 0.0239 0.0179 0.1611 0.3768
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit east exit west west
|
||||
exit north __________ north north
|
||||
exit north exit north north
|
||||
exit north __________ __________ north
|
||||
exit east west east north
|
||||
"""
|
||||
|
24
reinforcement/test_cases/q4/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q4/4-discountgrid.test
Normal file
|
@ -0,0 +1,24 @@
|
|||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q4/CONFIG
Normal file
2
reinforcement/test_cases/q4/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "5"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q5/1-tinygrid.solution
Normal file
2
reinforcement/test_cases/q5/1-tinygrid.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q5/1-tinygrid.test.
|
||||
# File intentionally blank.
|
22
reinforcement/test_cases/q5/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q5/1-tinygrid.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q5/2-tinygrid-noisy.solution
Normal file
2
reinforcement/test_cases/q5/2-tinygrid-noisy.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q5/2-tinygrid-noisy.test.
|
||||
# File intentionally blank.
|
22
reinforcement/test_cases/q5/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q5/2-tinygrid-noisy.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q5/3-bridge.solution
Normal file
2
reinforcement/test_cases/q5/3-bridge.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q5/3-bridge.test.
|
||||
# File intentionally blank.
|
27
reinforcement/test_cases/q5/3-bridge.test
Normal file
27
reinforcement/test_cases/q5/3-bridge.test
Normal file
|
@ -0,0 +1,27 @@
|
|||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q5/4-discountgrid.solution
Normal file
2
reinforcement/test_cases/q5/4-discountgrid.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q5/4-discountgrid.test.
|
||||
# File intentionally blank.
|
24
reinforcement/test_cases/q5/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q5/4-discountgrid.test
Normal file
|
@ -0,0 +1,24 @@
|
|||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q5/CONFIG
Normal file
2
reinforcement/test_cases/q5/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "3"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q6/CONFIG
Normal file
2
reinforcement/test_cases/q6/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "1"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q6/grade-agent.solution
Normal file
2
reinforcement/test_cases/q6/grade-agent.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q6/grade-agent.test.
|
||||
# File intentionally blank.
|
2
reinforcement/test_cases/q6/grade-agent.test
Normal file
2
reinforcement/test_cases/q6/grade-agent.test
Normal file
|
@ -0,0 +1,2 @@
|
|||
class: "Question6Test"
|
||||
|
2
reinforcement/test_cases/q7/CONFIG
Normal file
2
reinforcement/test_cases/q7/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "1"
|
||||
class: "PartialCreditQuestion"
|
2
reinforcement/test_cases/q7/grade-agent.solution
Normal file
2
reinforcement/test_cases/q7/grade-agent.solution
Normal file
|
@ -0,0 +1,2 @@
|
|||
# This is the solution file for test_cases/q7/grade-agent.test.
|
||||
# File intentionally blank.
|
6
reinforcement/test_cases/q7/grade-agent.test
Normal file
6
reinforcement/test_cases/q7/grade-agent.test
Normal file
|
@ -0,0 +1,6 @@
|
|||
class: "EvalAgentTest"
|
||||
|
||||
# 100 test games after 2000 training games
|
||||
pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
|
||||
|
||||
winsThresholds: "70"
|
429
reinforcement/test_cases/q8/1-tinygrid.solution
Normal file
429
reinforcement/test_cases/q8/1-tinygrid.solution
Normal file
|
@ -0,0 +1,429 @@
|
|||
weights_k_0: """
|
||||
{((0, 0), 'exit'): 0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{((0, 0), 'exit'): 1.9,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.17195000000000002,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{((0, 0), 'exit'): 4.0951,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.17195000000000002,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{((0, 0), 'exit'): 4.68559,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.17195000000000002,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_100: """
|
||||
{((0, 0), 'exit'): 9.817519963685992,
|
||||
((0, 1), 'east'): 0.40629236674335106,
|
||||
((0, 1), 'north'): -0.4534185789984799,
|
||||
((0, 1), 'south'): 2.126721095524319,
|
||||
((0, 1), 'west'): 0.39193283364906867,
|
||||
((0, 2), 'exit'): -9.476652366972639}
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.4534
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.4063
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
2.1267
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.3919
|
||||
illegal
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q8/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q8/1-tinygrid.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
429
reinforcement/test_cases/q8/2-tinygrid-noisy.solution
Normal file
429
reinforcement/test_cases/q8/2-tinygrid-noisy.solution
Normal file
|
@ -0,0 +1,429 @@
|
|||
weights_k_0: """
|
||||
{((0, 0), 'exit'): 0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{((0, 0), 'exit'): 1.9,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.257925,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{((0, 0), 'exit'): 4.0951,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.257925,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{((0, 0), 'exit'): 4.68559,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.257925,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_100: """
|
||||
{((0, 0), 'exit'): 9.817519963685992,
|
||||
((0, 1), 'east'): 0.9498968104823575,
|
||||
((0, 1), 'north'): -0.66699795412272,
|
||||
((0, 1), 'south'): 3.256207905310105,
|
||||
((0, 1), 'west'): 0.8236280735014627,
|
||||
((0, 2), 'exit'): -9.476652366972639}
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.6670
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.9499
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
3.2562
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.8236
|
||||
illegal
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q8/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q8/2-tinygrid-noisy.test
Normal file
|
@ -0,0 +1,22 @@
|
|||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
935
reinforcement/test_cases/q8/3-bridge.solution
Normal file
935
reinforcement/test_cases/q8/3-bridge.solution
Normal file
|
@ -0,0 +1,935 @@
|
|||
weights_k_0: """
|
||||
{((0, 1), 'exit'): 0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): 0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{((0, 1), 'exit'): 0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{((0, 1), 'exit'): 0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0.0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): -10.0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0.0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): -10.0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0.1,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0.0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.1000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_500: """
|
||||
{((0, 1), 'exit'): -92.82102012308148,
|
||||
((0, 2), 'exit'): -97.21871610556306,
|
||||
((0, 3), 'exit'): -89.05810108684878,
|
||||
((0, 4), 'exit'): -97.74716004550608,
|
||||
((0, 5), 'exit'): -96.56631617970748,
|
||||
((1, 0), 'exit'): 0.9576088417247839,
|
||||
((1, 1), 'east'): -31.68839649871871,
|
||||
((1, 1), 'north'): -0.5871409700255297,
|
||||
((1, 1), 'south'): -5.571799344704395,
|
||||
((1, 1), 'west'): -20.777007017445538,
|
||||
((1, 2), 'east'): -43.595242197319,
|
||||
((1, 2), 'north'): -1.264202431807023,
|
||||
((1, 2), 'south'): -8.401530599975509,
|
||||
((1, 2), 'west'): -16.283916171605192,
|
||||
((1, 3), 'east'): -3.6956691,
|
||||
((1, 3), 'north'): -0.16712710492783758,
|
||||
((1, 3), 'south'): -3.4722840178579073,
|
||||
((1, 3), 'west'): -40.58867937480968,
|
||||
((1, 4), 'east'): -26.553386621338632,
|
||||
((1, 4), 'north'): -0.799493322153628,
|
||||
((1, 4), 'south'): -6.727671187497919,
|
||||
((1, 4), 'west'): -39.06095135014759,
|
||||
((1, 5), 'east'): -17.067638934181446,
|
||||
((1, 5), 'north'): -5.864753060887024,
|
||||
((1, 5), 'south'): -6.83769420759525,
|
||||
((1, 5), 'west'): -27.062643066307515,
|
||||
((1, 6), 'exit'): 9.353891811077332,
|
||||
((2, 1), 'exit'): -97.21871610556306,
|
||||
((2, 2), 'exit'): -87.84233454094309,
|
||||
((2, 3), 'exit'): -96.90968456173674,
|
||||
((2, 4), 'exit'): -94.185026299696,
|
||||
((2, 5), 'exit'): -96.90968456173674}
|
||||
"""
|
||||
|
||||
q_values_k_500_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -5.8648 illegal
|
||||
illegal -0.7995 illegal
|
||||
illegal -0.1671 illegal
|
||||
illegal -1.2642 illegal
|
||||
illegal -0.5871 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -17.0676 illegal
|
||||
illegal -26.5534 illegal
|
||||
illegal -3.6957 illegal
|
||||
illegal -43.5952 illegal
|
||||
illegal -31.6884 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_exit: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 illegal -96.9097
|
||||
-97.7472 illegal -94.1850
|
||||
-89.0581 illegal -96.9097
|
||||
-97.2187 illegal -87.8423
|
||||
-92.8210 illegal -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -6.8377 illegal
|
||||
illegal -6.7277 illegal
|
||||
illegal -3.4723 illegal
|
||||
illegal -8.4015 illegal
|
||||
illegal -5.5718 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -27.0626 illegal
|
||||
illegal -39.0610 illegal
|
||||
illegal -40.5887 illegal
|
||||
illegal -16.2839 illegal
|
||||
illegal -20.7770 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q8/3-bridge.test
Normal file
27
reinforcement/test_cases/q8/3-bridge.test
Normal file
|
@ -0,0 +1,27 @@
|
|||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
1210
reinforcement/test_cases/q8/4-discountgrid.solution
Normal file
1210
reinforcement/test_cases/q8/4-discountgrid.solution
Normal file
File diff suppressed because it is too large
Load diff
24
reinforcement/test_cases/q8/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q8/4-discountgrid.test
Normal file
|
@ -0,0 +1,24 @@
|
|||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
880
reinforcement/test_cases/q8/5-coord-extractor.solution
Normal file
880
reinforcement/test_cases/q8/5-coord-extractor.solution
Normal file
|
@ -0,0 +1,880 @@
|
|||
weights_k_0: """
|
||||
{'action=east': 0,
|
||||
'action=exit': 0,
|
||||
'action=north': 0,
|
||||
'action=south': 0,
|
||||
'action=west': 0,
|
||||
'x=0': 0,
|
||||
'x=1': 0,
|
||||
'x=2': 0,
|
||||
'x=3': 0,
|
||||
'x=4': 0,
|
||||
'y=0': 0,
|
||||
'y=1': 0,
|
||||
'y=2': 0,
|
||||
'y=3': 0,
|
||||
'y=4': 0,
|
||||
(0, 0): 0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{'action=east': 0,
|
||||
'action=exit': 0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': 0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0,
|
||||
'x=4': 0,
|
||||
'y=0': 0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0,
|
||||
'y=4': 0,
|
||||
(0, 0): 0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{'action=east': 0,
|
||||
'action=exit': 0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': 0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0,
|
||||
'y=0': 0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0,
|
||||
(0, 0): 0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{'action=east': 0,
|
||||
'action=exit': -1.0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': -1.0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0,
|
||||
'y=0': -1.0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ illegal illegal
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ __________ illegal
|
||||
-4.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': -1.0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0,
|
||||
'y=0': -1.0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ illegal illegal
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ __________ illegal
|
||||
-4.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -1.0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -1.0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ illegal illegal
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ __________ illegal
|
||||
-4.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.7000000000000002,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -1.7000000000000002,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -1.7000000000000002,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-5.1000 illegal -1.7000 illegal illegal
|
||||
-5.8000 illegal __________ illegal illegal
|
||||
-5.1000 illegal -1.7000 illegal illegal
|
||||
-5.1000 illegal __________ __________ illegal
|
||||
-6.1000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.4300000000000002,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -1.7000000000000002,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0.27,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -1.7000000000000002,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0.27,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0.27,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-4.8300 illegal -0.8900 illegal illegal
|
||||
-5.5300 illegal __________ illegal illegal
|
||||
-4.8300 illegal -0.6200 illegal illegal
|
||||
-4.8300 illegal __________ __________ illegal
|
||||
-5.8300 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.947,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -2.217,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0.27,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -2.217,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0.27,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): -0.517,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0.27,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-6.3810 illegal -1.4070 illegal illegal
|
||||
-7.0810 illegal __________ illegal illegal
|
||||
-6.3810 illegal -1.1370 illegal illegal
|
||||
-6.8980 illegal __________ __________ illegal
|
||||
-7.3810 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.947,
|
||||
'action=north': -0.62082,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -2.217,
|
||||
'x=1': -0.62082,
|
||||
'x=2': 0.27,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -2.217,
|
||||
'y=1': -0.62082,
|
||||
'y=2': 0.27,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): -0.517,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): -0.62082,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0.27,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal -1.8625 illegal -0.6208 -0.6208
|
||||
illegal -1.8625 __________ -0.6208 -0.6208
|
||||
illegal -1.8625 illegal -0.6208 -0.6208
|
||||
illegal -2.4833 __________ __________ -0.6208
|
||||
illegal -1.8625 -0.0808 -0.6208 -0.6208
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.2416 __________ 0.0000 0.0000
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.8625 __________ __________ 0.0000
|
||||
illegal -1.2416 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-6.3810 illegal -1.4070 illegal illegal
|
||||
-7.0810 illegal __________ illegal illegal
|
||||
-6.3810 illegal -1.1370 illegal illegal
|
||||
-6.8980 illegal __________ __________ illegal
|
||||
-7.3810 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.2416 __________ 0.0000 0.0000
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.8625 __________ __________ 0.0000
|
||||
illegal -1.2416 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.2416 __________ 0.0000 0.0000
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.8625 __________ __________ 0.0000
|
||||
illegal -1.2416 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_3000: """
|
||||
{'action=east': 6.719916513522846,
|
||||
'action=exit': -2.2444981376861555,
|
||||
'action=north': 4.568574519923728,
|
||||
'action=south': 3.761510351874819,
|
||||
'action=west': 1.2828606322891556,
|
||||
'x=0': -3.604063955849794,
|
||||
'x=1': 0.6731476152061693,
|
||||
'x=2': 4.000208353074704,
|
||||
'x=3': 5.988311380073477,
|
||||
'x=4': 7.0307604874198235,
|
||||
'y=0': -3.604063955849794,
|
||||
'y=1': 0.6731476152061693,
|
||||
'y=2': 4.000208353074704,
|
||||
'y=3': 5.988311380073477,
|
||||
'y=4': 7.0307604874198235,
|
||||
(0, 0): -0.7073688447583666,
|
||||
(0, 1): -0.7542862401704076,
|
||||
(0, 2): -0.7043014501203066,
|
||||
(0, 3): -0.7433344649617668,
|
||||
(0, 4): -0.6947729558389527,
|
||||
(1, 0): 2.364273811399719,
|
||||
(1, 1): -0.2695405704605499,
|
||||
(1, 2): -0.7105979212702271,
|
||||
(1, 3): -1.4866826750327933,
|
||||
(1, 4): 0.7756949705700219,
|
||||
(2, 0): 2.64064253491107,
|
||||
(2, 2): -3.7381118310263166,
|
||||
(2, 4): 5.097677649189953,
|
||||
(3, 0): 2.505262939441149,
|
||||
(3, 2): 0.27218788923837256,
|
||||
(3, 3): 2.2611084206093195,
|
||||
(3, 4): 0.9497521307846304,
|
||||
(4, 0): 1.7330586015291545,
|
||||
(4, 1): 0.980194046153168,
|
||||
(4, 2): 0.78786289128181,
|
||||
(4, 3): 1.493343270762865,
|
||||
(4, 4): 2.0363016776928333}
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_north: """
|
||||
illegal 6.6906 illegal 17.4949 20.6664
|
||||
illegal 4.4282 __________ 18.8063 20.1234
|
||||
illegal 5.2043 illegal 16.8174 19.4180
|
||||
illegal 5.6453 __________ __________ 19.6103
|
||||
illegal 8.2791 15.2096 19.0505 20.3632
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_east: """
|
||||
illegal 8.8419 illegal 19.6463 22.8177
|
||||
illegal 6.5795 __________ 20.9576 22.2748
|
||||
illegal 7.3556 illegal 18.9687 21.5693
|
||||
illegal 7.7967 __________ __________ 21.7616
|
||||
illegal 10.4305 17.3610 21.2018 22.5145
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_exit: """
|
||||
-10.1474 illegal 10.8536 illegal illegal
|
||||
-10.1960 illegal __________ illegal illegal
|
||||
-10.1569 illegal 2.0178 illegal illegal
|
||||
-10.2069 illegal __________ __________ illegal
|
||||
-10.1600 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_south: """
|
||||
illegal 5.8835 illegal 16.6879 19.8593
|
||||
illegal 3.6211 __________ 17.9992 19.3164
|
||||
illegal 4.3972 illegal 16.0103 18.6109
|
||||
illegal 4.8383 __________ __________ 18.8032
|
||||
illegal 7.4721 14.4026 18.2434 19.5561
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_west: """
|
||||
illegal 3.4049 illegal 14.2092 17.3807
|
||||
illegal 1.1425 __________ 15.5206 16.8377
|
||||
illegal 1.9186 illegal 13.5317 16.1322
|
||||
illegal 2.3596 __________ __________ 16.3246
|
||||
illegal 4.9934 11.9239 15.7647 17.0774
|
||||
"""
|
||||
|
25
reinforcement/test_cases/q8/5-coord-extractor.test
Normal file
25
reinforcement/test_cases/q8/5-coord-extractor.test
Normal file
|
@ -0,0 +1,25 @@
|
|||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
extractor: "CoordinateExtractor"
|
||||
|
2
reinforcement/test_cases/q8/CONFIG
Normal file
2
reinforcement/test_cases/q8/CONFIG
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_points: "3"
|
||||
class: "PassAllTestsQuestion"
|
81
reinforcement/textDisplay.py
Normal file
81
reinforcement/textDisplay.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
# textDisplay.py
|
||||
# --------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import time
|
||||
try:
|
||||
import pacman
|
||||
except:
|
||||
pass
|
||||
|
||||
DRAW_EVERY = 1
|
||||
SLEEP_TIME = 0 # This can be overwritten by __init__
|
||||
DISPLAY_MOVES = False
|
||||
QUIET = False # Supresses output
|
||||
|
||||
class NullGraphics:
|
||||
def initialize(self, state, isBlue = False):
|
||||
pass
|
||||
|
||||
def update(self, state):
|
||||
pass
|
||||
|
||||
def checkNullDisplay(self):
|
||||
return True
|
||||
|
||||
def pause(self):
|
||||
time.sleep(SLEEP_TIME)
|
||||
|
||||
def draw(self, state):
|
||||
print state
|
||||
|
||||
def updateDistributions(self, dist):
|
||||
pass
|
||||
|
||||
def finish(self):
|
||||
pass
|
||||
|
||||
class PacmanGraphics:
|
||||
def __init__(self, speed=None):
|
||||
if speed != None:
|
||||
global SLEEP_TIME
|
||||
SLEEP_TIME = speed
|
||||
|
||||
def initialize(self, state, isBlue = False):
|
||||
self.draw(state)
|
||||
self.pause()
|
||||
self.turn = 0
|
||||
self.agentCounter = 0
|
||||
|
||||
def update(self, state):
|
||||
numAgents = len(state.agentStates)
|
||||
self.agentCounter = (self.agentCounter + 1) % numAgents
|
||||
if self.agentCounter == 0:
|
||||
self.turn += 1
|
||||
if DISPLAY_MOVES:
|
||||
ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)]
|
||||
print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts
|
||||
if self.turn % DRAW_EVERY == 0:
|
||||
self.draw(state)
|
||||
self.pause()
|
||||
if state._win or state._lose:
|
||||
self.draw(state)
|
||||
|
||||
def pause(self):
|
||||
time.sleep(SLEEP_TIME)
|
||||
|
||||
def draw(self, state):
|
||||
print state
|
||||
|
||||
def finish(self):
|
||||
pass
|
324
reinforcement/textGridworldDisplay.py
Normal file
324
reinforcement/textGridworldDisplay.py
Normal file
|
@ -0,0 +1,324 @@
|
|||
# textGridworldDisplay.py
|
||||
# -----------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import util
|
||||
|
||||
class TextGridworldDisplay:
|
||||
|
||||
def __init__(self, gridworld):
|
||||
self.gridworld = gridworld
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
|
||||
def pause(self):
|
||||
pass
|
||||
|
||||
def displayValues(self, agent, currentState = None, message = None):
|
||||
if message != None:
|
||||
print message
|
||||
values = util.Counter()
|
||||
policy = {}
|
||||
states = self.gridworld.getStates()
|
||||
for state in states:
|
||||
values[state] = agent.getValue(state)
|
||||
policy[state] = agent.getPolicy(state)
|
||||
prettyPrintValues(self.gridworld, values, policy, currentState)
|
||||
|
||||
def displayNullValues(self, agent, currentState = None, message = None):
|
||||
if message != None: print message
|
||||
prettyPrintNullValues(self.gridworld, currentState)
|
||||
|
||||
def displayQValues(self, agent, currentState = None, message = None):
|
||||
if message != None: print message
|
||||
qValues = util.Counter()
|
||||
states = self.gridworld.getStates()
|
||||
for state in states:
|
||||
for action in self.gridworld.getPossibleActions(state):
|
||||
qValues[(state, action)] = agent.getQValue(state, action)
|
||||
prettyPrintQValues(self.gridworld, qValues, currentState)
|
||||
|
||||
|
||||
def prettyPrintValues(gridWorld, values, policy=None, currentState = None):
|
||||
grid = gridWorld.grid
|
||||
maxLen = 11
|
||||
newRows = []
|
||||
for y in range(grid.height):
|
||||
newRow = []
|
||||
for x in range(grid.width):
|
||||
state = (x, y)
|
||||
value = values[state]
|
||||
action = None
|
||||
if policy != None and state in policy:
|
||||
action = policy[state]
|
||||
actions = gridWorld.getPossibleActions(state)
|
||||
if action not in actions and 'exit' in actions:
|
||||
action = 'exit'
|
||||
valString = None
|
||||
if action == 'exit':
|
||||
valString = border('%.2f' % value)
|
||||
else:
|
||||
valString = '\n\n%.2f\n\n' % value
|
||||
valString += ' '*maxLen
|
||||
if grid[x][y] == 'S':
|
||||
valString = '\n\nS: %.2f\n\n' % value
|
||||
valString += ' '*maxLen
|
||||
if grid[x][y] == '#':
|
||||
valString = '\n#####\n#####\n#####\n'
|
||||
valString += ' '*maxLen
|
||||
pieces = [valString]
|
||||
text = ("\n".join(pieces)).split('\n')
|
||||
if currentState == state:
|
||||
l = len(text[1])
|
||||
if l == 0:
|
||||
text[1] = '*'
|
||||
else:
|
||||
text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
|
||||
if action == 'east':
|
||||
text[2] = ' ' + text[2] + ' >'
|
||||
elif action == 'west':
|
||||
text[2] = '< ' + text[2] + ' '
|
||||
elif action == 'north':
|
||||
text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
|
||||
elif action == 'south':
|
||||
text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
|
||||
newCell = "\n".join(text)
|
||||
newRow.append(newCell)
|
||||
newRows.append(newRow)
|
||||
numCols = grid.width
|
||||
for rowNum, row in enumerate(newRows):
|
||||
row.insert(0,"\n\n"+str(rowNum))
|
||||
newRows.reverse()
|
||||
colLabels = [str(colNum) for colNum in range(numCols)]
|
||||
colLabels.insert(0,' ')
|
||||
finalRows = [colLabels] + newRows
|
||||
print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
|
||||
|
||||
|
||||
def prettyPrintNullValues(gridWorld, currentState = None):
|
||||
grid = gridWorld.grid
|
||||
maxLen = 11
|
||||
newRows = []
|
||||
for y in range(grid.height):
|
||||
newRow = []
|
||||
for x in range(grid.width):
|
||||
state = (x, y)
|
||||
|
||||
# value = values[state]
|
||||
|
||||
action = None
|
||||
# if policy != None and state in policy:
|
||||
# action = policy[state]
|
||||
#
|
||||
actions = gridWorld.getPossibleActions(state)
|
||||
|
||||
if action not in actions and 'exit' in actions:
|
||||
action = 'exit'
|
||||
|
||||
valString = None
|
||||
# if action == 'exit':
|
||||
# valString = border('%.2f' % value)
|
||||
# else:
|
||||
# valString = '\n\n%.2f\n\n' % value
|
||||
# valString += ' '*maxLen
|
||||
|
||||
if grid[x][y] == 'S':
|
||||
valString = '\n\nS\n\n'
|
||||
valString += ' '*maxLen
|
||||
elif grid[x][y] == '#':
|
||||
valString = '\n#####\n#####\n#####\n'
|
||||
valString += ' '*maxLen
|
||||
elif type(grid[x][y]) == float or type(grid[x][y]) == int:
|
||||
valString = border('%.2f' % float(grid[x][y]))
|
||||
else: valString = border(' ')
|
||||
pieces = [valString]
|
||||
|
||||
text = ("\n".join(pieces)).split('\n')
|
||||
|
||||
if currentState == state:
|
||||
l = len(text[1])
|
||||
if l == 0:
|
||||
text[1] = '*'
|
||||
else:
|
||||
text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
|
||||
|
||||
if action == 'east':
|
||||
text[2] = ' ' + text[2] + ' >'
|
||||
elif action == 'west':
|
||||
text[2] = '< ' + text[2] + ' '
|
||||
elif action == 'north':
|
||||
text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
|
||||
elif action == 'south':
|
||||
text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
|
||||
newCell = "\n".join(text)
|
||||
newRow.append(newCell)
|
||||
newRows.append(newRow)
|
||||
numCols = grid.width
|
||||
for rowNum, row in enumerate(newRows):
|
||||
row.insert(0,"\n\n"+str(rowNum))
|
||||
newRows.reverse()
|
||||
colLabels = [str(colNum) for colNum in range(numCols)]
|
||||
colLabels.insert(0,' ')
|
||||
finalRows = [colLabels] + newRows
|
||||
print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
|
||||
|
||||
def prettyPrintQValues(gridWorld, qValues, currentState=None):
|
||||
grid = gridWorld.grid
|
||||
maxLen = 11
|
||||
newRows = []
|
||||
for y in range(grid.height):
|
||||
newRow = []
|
||||
for x in range(grid.width):
|
||||
state = (x, y)
|
||||
actions = gridWorld.getPossibleActions(state)
|
||||
if actions == None or len(actions) == 0:
|
||||
actions = [None]
|
||||
bestQ = max([qValues[(state, action)] for action in actions])
|
||||
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
|
||||
|
||||
# display cell
|
||||
qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions])
|
||||
northString = ('north' in qStrings and qStrings['north']) or ' '
|
||||
southString = ('south' in qStrings and qStrings['south']) or ' '
|
||||
eastString = ('east' in qStrings and qStrings['east']) or ' '
|
||||
westString = ('west' in qStrings and qStrings['west']) or ' '
|
||||
exitString = ('exit' in qStrings and qStrings['exit']) or ' '
|
||||
|
||||
eastLen = len(eastString)
|
||||
westLen = len(westString)
|
||||
if eastLen < westLen:
|
||||
eastString = ' '*(westLen-eastLen)+eastString
|
||||
if westLen < eastLen:
|
||||
westString = westString+' '*(eastLen-westLen)
|
||||
|
||||
if 'north' in bestActions:
|
||||
northString = '/'+northString+'\\'
|
||||
if 'south' in bestActions:
|
||||
southString = '\\'+southString+'/'
|
||||
if 'east' in bestActions:
|
||||
eastString = ''+eastString+'>'
|
||||
else:
|
||||
eastString = ''+eastString+' '
|
||||
if 'west' in bestActions:
|
||||
westString = '<'+westString+''
|
||||
else:
|
||||
westString = ' '+westString+''
|
||||
if 'exit' in bestActions:
|
||||
exitString = '[ '+exitString+' ]'
|
||||
|
||||
|
||||
ewString = westString + " " + eastString
|
||||
if state == currentState:
|
||||
ewString = westString + " * " + eastString
|
||||
if state == gridWorld.getStartState():
|
||||
ewString = westString + " S " + eastString
|
||||
if state == currentState and state == gridWorld.getStartState():
|
||||
ewString = westString + " S:* " + eastString
|
||||
|
||||
text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString]
|
||||
|
||||
if grid[x][y] == '#':
|
||||
text = ['', '\n#####\n#####\n#####', '']
|
||||
|
||||
newCell = "\n".join(text)
|
||||
newRow.append(newCell)
|
||||
newRows.append(newRow)
|
||||
numCols = grid.width
|
||||
for rowNum, row in enumerate(newRows):
|
||||
row.insert(0,"\n\n\n"+str(rowNum))
|
||||
newRows.reverse()
|
||||
colLabels = [str(colNum) for colNum in range(numCols)]
|
||||
colLabels.insert(0,' ')
|
||||
finalRows = [colLabels] + newRows
|
||||
|
||||
print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True)
|
||||
|
||||
def border(text):
|
||||
length = len(text)
|
||||
pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)]
|
||||
return '\n'.join(pieces)
|
||||
|
||||
# INDENTING CODE
|
||||
|
||||
# Indenting code based on a post from George Sakkis
|
||||
# (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662)
|
||||
|
||||
import cStringIO,operator
|
||||
|
||||
def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
|
||||
separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
|
||||
"""Indents a table by column.
|
||||
- rows: A sequence of sequences of items, one sequence per row.
|
||||
- hasHeader: True if the first row consists of the columns' names.
|
||||
- headerChar: Character to be used for the row separator line
|
||||
(if hasHeader==True or separateRows==True).
|
||||
- delim: The column delimiter.
|
||||
- justify: Determines how are data justified in their column.
|
||||
Valid values are 'left','right' and 'center'.
|
||||
- separateRows: True if rows are to be separated by a line
|
||||
of 'headerChar's.
|
||||
- prefix: A string prepended to each printed row.
|
||||
- postfix: A string appended to each printed row.
|
||||
- wrapfunc: A function f(text) for wrapping text; each element in
|
||||
the table is first wrapped by this function."""
|
||||
# closure for breaking logical rows to physical, using wrapfunc
|
||||
def rowWrapper(row):
|
||||
newRows = [wrapfunc(item).split('\n') for item in row]
|
||||
return [[substr or '' for substr in item] for item in map(None,*newRows)]
|
||||
# break each logical row into one or more physical ones
|
||||
logicalRows = [rowWrapper(row) for row in rows]
|
||||
# columns of physical rows
|
||||
columns = map(None,*reduce(operator.add,logicalRows))
|
||||
# get the maximum of each column by the string length of its items
|
||||
maxWidths = [max([len(str(item)) for item in column]) for column in columns]
|
||||
rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
|
||||
len(delim)*(len(maxWidths)-1))
|
||||
# select the appropriate justify method
|
||||
justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
|
||||
output=cStringIO.StringIO()
|
||||
if separateRows: print >> output, rowSeparator
|
||||
for physicalRows in logicalRows:
|
||||
for row in physicalRows:
|
||||
print >> output, \
|
||||
prefix \
|
||||
+ delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
|
||||
+ postfix
|
||||
if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
|
||||
return output.getvalue()
|
||||
|
||||
import math
|
||||
def wrap_always(text, width):
|
||||
"""A simple word-wrap function that wraps text on exactly width characters.
|
||||
It doesn't split the text in words."""
|
||||
return '\n'.join([ text[width*i:width*(i+1)] \
|
||||
for i in xrange(int(math.ceil(1.*len(text)/width))) ])
|
||||
|
||||
|
||||
# TEST OF DISPLAY CODE
|
||||
|
||||
if __name__ == '__main__':
|
||||
import gridworld, util
|
||||
|
||||
grid = gridworld.getCliffGrid3()
|
||||
print grid.getStates()
|
||||
|
||||
policy = dict([(state,'east') for state in grid.getStates()])
|
||||
values = util.Counter(dict([(state,1000.23) for state in grid.getStates()]))
|
||||
prettyPrintValues(grid, values, policy, currentState = (0,0))
|
||||
|
||||
stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()]
|
||||
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
|
||||
qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates]))
|
||||
qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])]))
|
||||
prettyPrintQValues(grid, qValues, currentState = (0,0))
|
653
reinforcement/util.py
Normal file
653
reinforcement/util.py
Normal file
|
@ -0,0 +1,653 @@
|
|||
# util.py
|
||||
# -------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
import heapq, random
|
||||
import cStringIO
|
||||
|
||||
|
||||
class FixedRandom:
|
||||
def __init__(self):
|
||||
fixedState = (3, (2147483648L, 507801126L, 683453281L, 310439348L, 2597246090L, \
|
||||
2209084787L, 2267831527L, 979920060L, 3098657677L, 37650879L, 807947081L, 3974896263L, \
|
||||
881243242L, 3100634921L, 1334775171L, 3965168385L, 746264660L, 4074750168L, 500078808L, \
|
||||
776561771L, 702988163L, 1636311725L, 2559226045L, 157578202L, 2498342920L, 2794591496L, \
|
||||
4130598723L, 496985844L, 2944563015L, 3731321600L, 3514814613L, 3362575829L, 3038768745L, \
|
||||
2206497038L, 1108748846L, 1317460727L, 3134077628L, 988312410L, 1674063516L, 746456451L, \
|
||||
3958482413L, 1857117812L, 708750586L, 1583423339L, 3466495450L, 1536929345L, 1137240525L, \
|
||||
3875025632L, 2466137587L, 1235845595L, 4214575620L, 3792516855L, 657994358L, 1241843248L, \
|
||||
1695651859L, 3678946666L, 1929922113L, 2351044952L, 2317810202L, 2039319015L, 460787996L, \
|
||||
3654096216L, 4068721415L, 1814163703L, 2904112444L, 1386111013L, 574629867L, 2654529343L, \
|
||||
3833135042L, 2725328455L, 552431551L, 4006991378L, 1331562057L, 3710134542L, 303171486L, \
|
||||
1203231078L, 2670768975L, 54570816L, 2679609001L, 578983064L, 1271454725L, 3230871056L, \
|
||||
2496832891L, 2944938195L, 1608828728L, 367886575L, 2544708204L, 103775539L, 1912402393L, \
|
||||
1098482180L, 2738577070L, 3091646463L, 1505274463L, 2079416566L, 659100352L, 839995305L, \
|
||||
1696257633L, 274389836L, 3973303017L, 671127655L, 1061109122L, 517486945L, 1379749962L, \
|
||||
3421383928L, 3116950429L, 2165882425L, 2346928266L, 2892678711L, 2936066049L, 1316407868L, \
|
||||
2873411858L, 4279682888L, 2744351923L, 3290373816L, 1014377279L, 955200944L, 4220990860L, \
|
||||
2386098930L, 1772997650L, 3757346974L, 1621616438L, 2877097197L, 442116595L, 2010480266L, \
|
||||
2867861469L, 2955352695L, 605335967L, 2222936009L, 2067554933L, 4129906358L, 1519608541L, \
|
||||
1195006590L, 1942991038L, 2736562236L, 279162408L, 1415982909L, 4099901426L, 1732201505L, \
|
||||
2934657937L, 860563237L, 2479235483L, 3081651097L, 2244720867L, 3112631622L, 1636991639L, \
|
||||
3860393305L, 2312061927L, 48780114L, 1149090394L, 2643246550L, 1764050647L, 3836789087L, \
|
||||
3474859076L, 4237194338L, 1735191073L, 2150369208L, 92164394L, 756974036L, 2314453957L, \
|
||||
323969533L, 4267621035L, 283649842L, 810004843L, 727855536L, 1757827251L, 3334960421L, \
|
||||
3261035106L, 38417393L, 2660980472L, 1256633965L, 2184045390L, 811213141L, 2857482069L, \
|
||||
2237770878L, 3891003138L, 2787806886L, 2435192790L, 2249324662L, 3507764896L, 995388363L, \
|
||||
856944153L, 619213904L, 3233967826L, 3703465555L, 3286531781L, 3863193356L, 2992340714L, \
|
||||
413696855L, 3865185632L, 1704163171L, 3043634452L, 2225424707L, 2199018022L, 3506117517L, \
|
||||
3311559776L, 3374443561L, 1207829628L, 668793165L, 1822020716L, 2082656160L, 1160606415L, \
|
||||
3034757648L, 741703672L, 3094328738L, 459332691L, 2702383376L, 1610239915L, 4162939394L, \
|
||||
557861574L, 3805706338L, 3832520705L, 1248934879L, 3250424034L, 892335058L, 74323433L, \
|
||||
3209751608L, 3213220797L, 3444035873L, 3743886725L, 1783837251L, 610968664L, 580745246L, \
|
||||
4041979504L, 201684874L, 2673219253L, 1377283008L, 3497299167L, 2344209394L, 2304982920L, \
|
||||
3081403782L, 2599256854L, 3184475235L, 3373055826L, 695186388L, 2423332338L, 222864327L, \
|
||||
1258227992L, 3627871647L, 3487724980L, 4027953808L, 3053320360L, 533627073L, 3026232514L, \
|
||||
2340271949L, 867277230L, 868513116L, 2158535651L, 2487822909L, 3428235761L, 3067196046L, \
|
||||
3435119657L, 1908441839L, 788668797L, 3367703138L, 3317763187L, 908264443L, 2252100381L, \
|
||||
764223334L, 4127108988L, 384641349L, 3377374722L, 1263833251L, 1958694944L, 3847832657L, \
|
||||
1253909612L, 1096494446L, 555725445L, 2277045895L, 3340096504L, 1383318686L, 4234428127L, \
|
||||
1072582179L, 94169494L, 1064509968L, 2681151917L, 2681864920L, 734708852L, 1338914021L, \
|
||||
1270409500L, 1789469116L, 4191988204L, 1716329784L, 2213764829L, 3712538840L, 919910444L, \
|
||||
1318414447L, 3383806712L, 3054941722L, 3378649942L, 1205735655L, 1268136494L, 2214009444L, \
|
||||
2532395133L, 3232230447L, 230294038L, 342599089L, 772808141L, 4096882234L, 3146662953L, \
|
||||
2784264306L, 1860954704L, 2675279609L, 2984212876L, 2466966981L, 2627986059L, 2985545332L, \
|
||||
2578042598L, 1458940786L, 2944243755L, 3959506256L, 1509151382L, 325761900L, 942251521L, \
|
||||
4184289782L, 2756231555L, 3297811774L, 1169708099L, 3280524138L, 3805245319L, 3227360276L, \
|
||||
3199632491L, 2235795585L, 2865407118L, 36763651L, 2441503575L, 3314890374L, 1755526087L, \
|
||||
17915536L, 1196948233L, 949343045L, 3815841867L, 489007833L, 2654997597L, 2834744136L, \
|
||||
417688687L, 2843220846L, 85621843L, 747339336L, 2043645709L, 3520444394L, 1825470818L, \
|
||||
647778910L, 275904777L, 1249389189L, 3640887431L, 4200779599L, 323384601L, 3446088641L, \
|
||||
4049835786L, 1718989062L, 3563787136L, 44099190L, 3281263107L, 22910812L, 1826109246L, \
|
||||
745118154L, 3392171319L, 1571490704L, 354891067L, 815955642L, 1453450421L, 940015623L, \
|
||||
796817754L, 1260148619L, 3898237757L, 176670141L, 1870249326L, 3317738680L, 448918002L, \
|
||||
4059166594L, 2003827551L, 987091377L, 224855998L, 3520570137L, 789522610L, 2604445123L, \
|
||||
454472869L, 475688926L, 2990723466L, 523362238L, 3897608102L, 806637149L, 2642229586L, \
|
||||
2928614432L, 1564415411L, 1691381054L, 3816907227L, 4082581003L, 1895544448L, 3728217394L, \
|
||||
3214813157L, 4054301607L, 1882632454L, 2873728645L, 3694943071L, 1297991732L, 2101682438L, \
|
||||
3952579552L, 678650400L, 1391722293L, 478833748L, 2976468591L, 158586606L, 2576499787L, \
|
||||
662690848L, 3799889765L, 3328894692L, 2474578497L, 2383901391L, 1718193504L, 3003184595L, \
|
||||
3630561213L, 1929441113L, 3848238627L, 1594310094L, 3040359840L, 3051803867L, 2462788790L, \
|
||||
954409915L, 802581771L, 681703307L, 545982392L, 2738993819L, 8025358L, 2827719383L, \
|
||||
770471093L, 3484895980L, 3111306320L, 3900000891L, 2116916652L, 397746721L, 2087689510L, \
|
||||
721433935L, 1396088885L, 2751612384L, 1998988613L, 2135074843L, 2521131298L, 707009172L, \
|
||||
2398321482L, 688041159L, 2264560137L, 482388305L, 207864885L, 3735036991L, 3490348331L, \
|
||||
1963642811L, 3260224305L, 3493564223L, 1939428454L, 1128799656L, 1366012432L, 2858822447L, \
|
||||
1428147157L, 2261125391L, 1611208390L, 1134826333L, 2374102525L, 3833625209L, 2266397263L, \
|
||||
3189115077L, 770080230L, 2674657172L, 4280146640L, 3604531615L, 4235071805L, 3436987249L, \
|
||||
509704467L, 2582695198L, 4256268040L, 3391197562L, 1460642842L, 1617931012L, 457825497L, \
|
||||
1031452907L, 1330422862L, 4125947620L, 2280712485L, 431892090L, 2387410588L, 2061126784L, \
|
||||
896457479L, 3480499461L, 2488196663L, 4021103792L, 1877063114L, 2744470201L, 1046140599L, \
|
||||
2129952955L, 3583049218L, 4217723693L, 2720341743L, 820661843L, 1079873609L, 3360954200L, \
|
||||
3652304997L, 3335838575L, 2178810636L, 1908053374L, 4026721976L, 1793145418L, 476541615L, \
|
||||
973420250L, 515553040L, 919292001L, 2601786155L, 1685119450L, 3030170809L, 1590676150L, \
|
||||
1665099167L, 651151584L, 2077190587L, 957892642L, 646336572L, 2743719258L, 866169074L, \
|
||||
851118829L, 4225766285L, 963748226L, 799549420L, 1955032629L, 799460000L, 2425744063L, \
|
||||
2441291571L, 1928963772L, 528930629L, 2591962884L, 3495142819L, 1896021824L, 901320159L, \
|
||||
3181820243L, 843061941L, 3338628510L, 3782438992L, 9515330L, 1705797226L, 953535929L, \
|
||||
764833876L, 3202464965L, 2970244591L, 519154982L, 3390617541L, 566616744L, 3438031503L, \
|
||||
1853838297L, 170608755L, 1393728434L, 676900116L, 3184965776L, 1843100290L, 78995357L, \
|
||||
2227939888L, 3460264600L, 1745705055L, 1474086965L, 572796246L, 4081303004L, 882828851L, \
|
||||
1295445825L, 137639900L, 3304579600L, 2722437017L, 4093422709L, 273203373L, 2666507854L, \
|
||||
3998836510L, 493829981L, 1623949669L, 3482036755L, 3390023939L, 833233937L, 1639668730L, \
|
||||
1499455075L, 249728260L, 1210694006L, 3836497489L, 1551488720L, 3253074267L, 3388238003L, \
|
||||
2372035079L, 3945715164L, 2029501215L, 3362012634L, 2007375355L, 4074709820L, 631485888L, \
|
||||
3135015769L, 4273087084L, 3648076204L, 2739943601L, 1374020358L, 1760722448L, 3773939706L, \
|
||||
1313027823L, 1895251226L, 4224465911L, 421382535L, 1141067370L, 3660034846L, 3393185650L, \
|
||||
1850995280L, 1451917312L, 3841455409L, 3926840308L, 1397397252L, 2572864479L, 2500171350L, \
|
||||
3119920613L, 531400869L, 1626487579L, 1099320497L, 407414753L, 2438623324L, 99073255L, \
|
||||
3175491512L, 656431560L, 1153671785L, 236307875L, 2824738046L, 2320621382L, 892174056L, \
|
||||
230984053L, 719791226L, 2718891946L, 624L), None)
|
||||
self.random = random.Random()
|
||||
self.random.setstate(fixedState)
|
||||
|
||||
"""
|
||||
Data structures useful for implementing SearchAgents
|
||||
"""
|
||||
|
||||
class Stack:
|
||||
"A container with a last-in-first-out (LIFO) queuing policy."
|
||||
def __init__(self):
|
||||
self.list = []
|
||||
|
||||
def push(self,item):
|
||||
"Push 'item' onto the stack"
|
||||
self.list.append(item)
|
||||
|
||||
def pop(self):
|
||||
"Pop the most recently pushed item from the stack"
|
||||
return self.list.pop()
|
||||
|
||||
def isEmpty(self):
|
||||
"Returns true if the stack is empty"
|
||||
return len(self.list) == 0
|
||||
|
||||
class Queue:
|
||||
"A container with a first-in-first-out (FIFO) queuing policy."
|
||||
def __init__(self):
|
||||
self.list = []
|
||||
|
||||
def push(self,item):
|
||||
"Enqueue the 'item' into the queue"
|
||||
self.list.insert(0,item)
|
||||
|
||||
def pop(self):
|
||||
"""
|
||||
Dequeue the earliest enqueued item still in the queue. This
|
||||
operation removes the item from the queue.
|
||||
"""
|
||||
return self.list.pop()
|
||||
|
||||
def isEmpty(self):
|
||||
"Returns true if the queue is empty"
|
||||
return len(self.list) == 0
|
||||
|
||||
class PriorityQueue:
|
||||
"""
|
||||
Implements a priority queue data structure. Each inserted item
|
||||
has a priority associated with it and the client is usually interested
|
||||
in quick retrieval of the lowest-priority item in the queue. This
|
||||
data structure allows O(1) access to the lowest-priority item.
|
||||
|
||||
Note that this PriorityQueue does not allow you to change the priority
|
||||
of an item. However, you may insert the same item multiple times with
|
||||
different priorities.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.heap = []
|
||||
self.count = 0
|
||||
|
||||
def push(self, item, priority):
|
||||
# FIXME: restored old behaviour to check against old results better
|
||||
# FIXED: restored to stable behaviour
|
||||
entry = (priority, self.count, item)
|
||||
# entry = (priority, item)
|
||||
heapq.heappush(self.heap, entry)
|
||||
self.count += 1
|
||||
|
||||
def pop(self):
|
||||
(_, _, item) = heapq.heappop(self.heap)
|
||||
# (_, item) = heapq.heappop(self.heap)
|
||||
return item
|
||||
|
||||
def isEmpty(self):
|
||||
return len(self.heap) == 0
|
||||
|
||||
class PriorityQueueWithFunction(PriorityQueue):
|
||||
"""
|
||||
Implements a priority queue with the same push/pop signature of the
|
||||
Queue and the Stack classes. This is designed for drop-in replacement for
|
||||
those two classes. The caller has to provide a priority function, which
|
||||
extracts each item's priority.
|
||||
"""
|
||||
def __init__(self, priorityFunction):
|
||||
"priorityFunction (item) -> priority"
|
||||
self.priorityFunction = priorityFunction # store the priority function
|
||||
PriorityQueue.__init__(self) # super-class initializer
|
||||
|
||||
def push(self, item):
|
||||
"Adds an item to the queue with priority from the priority function"
|
||||
PriorityQueue.push(self, item, self.priorityFunction(item))
|
||||
|
||||
|
||||
def manhattanDistance( xy1, xy2 ):
|
||||
"Returns the Manhattan distance between points xy1 and xy2"
|
||||
return abs( xy1[0] - xy2[0] ) + abs( xy1[1] - xy2[1] )
|
||||
|
||||
"""
|
||||
Data structures and functions useful for various course projects
|
||||
|
||||
The search project should not need anything below this line.
|
||||
"""
|
||||
|
||||
class Counter(dict):
|
||||
"""
|
||||
A counter keeps track of counts for a set of keys.
|
||||
|
||||
The counter class is an extension of the standard python
|
||||
dictionary type. It is specialized to have number values
|
||||
(integers or floats), and includes a handful of additional
|
||||
functions to ease the task of counting data. In particular,
|
||||
all keys are defaulted to have value 0. Using a dictionary:
|
||||
|
||||
a = {}
|
||||
print a['test']
|
||||
|
||||
would give an error, while the Counter class analogue:
|
||||
|
||||
>>> a = Counter()
|
||||
>>> print a['test']
|
||||
0
|
||||
|
||||
returns the default 0 value. Note that to reference a key
|
||||
that you know is contained in the counter,
|
||||
you can still use the dictionary syntax:
|
||||
|
||||
>>> a = Counter()
|
||||
>>> a['test'] = 2
|
||||
>>> print a['test']
|
||||
2
|
||||
|
||||
This is very useful for counting things without initializing their counts,
|
||||
see for example:
|
||||
|
||||
>>> a['blah'] += 1
|
||||
>>> print a['blah']
|
||||
1
|
||||
|
||||
The counter also includes additional functionality useful in implementing
|
||||
the classifiers for this assignment. Two counters can be added,
|
||||
subtracted or multiplied together. See below for details. They can
|
||||
also be normalized and their total count and arg max can be extracted.
|
||||
"""
|
||||
def __getitem__(self, idx):
|
||||
self.setdefault(idx, 0)
|
||||
return dict.__getitem__(self, idx)
|
||||
|
||||
def incrementAll(self, keys, count):
|
||||
"""
|
||||
Increments all elements of keys by the same count.
|
||||
|
||||
>>> a = Counter()
|
||||
>>> a.incrementAll(['one','two', 'three'], 1)
|
||||
>>> a['one']
|
||||
1
|
||||
>>> a['two']
|
||||
1
|
||||
"""
|
||||
for key in keys:
|
||||
self[key] += count
|
||||
|
||||
def argMax(self):
|
||||
"""
|
||||
Returns the key with the highest value.
|
||||
"""
|
||||
if len(self.keys()) == 0: return None
|
||||
all = self.items()
|
||||
values = [x[1] for x in all]
|
||||
maxIndex = values.index(max(values))
|
||||
return all[maxIndex][0]
|
||||
|
||||
def sortedKeys(self):
|
||||
"""
|
||||
Returns a list of keys sorted by their values. Keys
|
||||
with the highest values will appear first.
|
||||
|
||||
>>> a = Counter()
|
||||
>>> a['first'] = -2
|
||||
>>> a['second'] = 4
|
||||
>>> a['third'] = 1
|
||||
>>> a.sortedKeys()
|
||||
['second', 'third', 'first']
|
||||
"""
|
||||
sortedItems = self.items()
|
||||
compare = lambda x, y: sign(y[1] - x[1])
|
||||
sortedItems.sort(cmp=compare)
|
||||
return [x[0] for x in sortedItems]
|
||||
|
||||
def totalCount(self):
|
||||
"""
|
||||
Returns the sum of counts for all keys.
|
||||
"""
|
||||
return sum(self.values())
|
||||
|
||||
def normalize(self):
|
||||
"""
|
||||
Edits the counter such that the total count of all
|
||||
keys sums to 1. The ratio of counts for all keys
|
||||
will remain the same. Note that normalizing an empty
|
||||
Counter will result in an error.
|
||||
"""
|
||||
total = float(self.totalCount())
|
||||
if total == 0: return
|
||||
for key in self.keys():
|
||||
self[key] = self[key] / total
|
||||
|
||||
def divideAll(self, divisor):
|
||||
"""
|
||||
Divides all counts by divisor
|
||||
"""
|
||||
divisor = float(divisor)
|
||||
for key in self:
|
||||
self[key] /= divisor
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
Returns a copy of the counter
|
||||
"""
|
||||
return Counter(dict.copy(self))
|
||||
|
||||
def __mul__(self, y ):
|
||||
"""
|
||||
Multiplying two counters gives the dot product of their vectors where
|
||||
each unique label is a vector element.
|
||||
|
||||
>>> a = Counter()
|
||||
>>> b = Counter()
|
||||
>>> a['first'] = -2
|
||||
>>> a['second'] = 4
|
||||
>>> b['first'] = 3
|
||||
>>> b['second'] = 5
|
||||
>>> a['third'] = 1.5
|
||||
>>> a['fourth'] = 2.5
|
||||
>>> a * b
|
||||
14
|
||||
"""
|
||||
sum = 0
|
||||
x = self
|
||||
if len(x) > len(y):
|
||||
x,y = y,x
|
||||
for key in x:
|
||||
if key not in y:
|
||||
continue
|
||||
sum += x[key] * y[key]
|
||||
return sum
|
||||
|
||||
def __radd__(self, y):
|
||||
"""
|
||||
Adding another counter to a counter increments the current counter
|
||||
by the values stored in the second counter.
|
||||
|
||||
>>> a = Counter()
|
||||
>>> b = Counter()
|
||||
>>> a['first'] = -2
|
||||
>>> a['second'] = 4
|
||||
>>> b['first'] = 3
|
||||
>>> b['third'] = 1
|
||||
>>> a += b
|
||||
>>> a['first']
|
||||
1
|
||||
"""
|
||||
for key, value in y.items():
|
||||
self[key] += value
|
||||
|
||||
def __add__( self, y ):
|
||||
"""
|
||||
Adding two counters gives a counter with the union of all keys and
|
||||
counts of the second added to counts of the first.
|
||||
|
||||
>>> a = Counter()
|
||||
>>> b = Counter()
|
||||
>>> a['first'] = -2
|
||||
>>> a['second'] = 4
|
||||
>>> b['first'] = 3
|
||||
>>> b['third'] = 1
|
||||
>>> (a + b)['first']
|
||||
1
|
||||
"""
|
||||
addend = Counter()
|
||||
for key in self:
|
||||
if key in y:
|
||||
addend[key] = self[key] + y[key]
|
||||
else:
|
||||
addend[key] = self[key]
|
||||
for key in y:
|
||||
if key in self:
|
||||
continue
|
||||
addend[key] = y[key]
|
||||
return addend
|
||||
|
||||
def __sub__( self, y ):
|
||||
"""
|
||||
Subtracting a counter from another gives a counter with the union of all keys and
|
||||
counts of the second subtracted from counts of the first.
|
||||
|
||||
>>> a = Counter()
|
||||
>>> b = Counter()
|
||||
>>> a['first'] = -2
|
||||
>>> a['second'] = 4
|
||||
>>> b['first'] = 3
|
||||
>>> b['third'] = 1
|
||||
>>> (a - b)['first']
|
||||
-5
|
||||
"""
|
||||
addend = Counter()
|
||||
for key in self:
|
||||
if key in y:
|
||||
addend[key] = self[key] - y[key]
|
||||
else:
|
||||
addend[key] = self[key]
|
||||
for key in y:
|
||||
if key in self:
|
||||
continue
|
||||
addend[key] = -1 * y[key]
|
||||
return addend
|
||||
|
||||
def raiseNotDefined():
|
||||
fileName = inspect.stack()[1][1]
|
||||
line = inspect.stack()[1][2]
|
||||
method = inspect.stack()[1][3]
|
||||
|
||||
print "*** Method not implemented: %s at line %s of %s" % (method, line, fileName)
|
||||
sys.exit(1)
|
||||
|
||||
def normalize(vectorOrCounter):
|
||||
"""
|
||||
normalize a vector or counter by dividing each value by the sum of all values
|
||||
"""
|
||||
normalizedCounter = Counter()
|
||||
if type(vectorOrCounter) == type(normalizedCounter):
|
||||
counter = vectorOrCounter
|
||||
total = float(counter.totalCount())
|
||||
if total == 0: return counter
|
||||
for key in counter.keys():
|
||||
value = counter[key]
|
||||
normalizedCounter[key] = value / total
|
||||
return normalizedCounter
|
||||
else:
|
||||
vector = vectorOrCounter
|
||||
s = float(sum(vector))
|
||||
if s == 0: return vector
|
||||
return [el / s for el in vector]
|
||||
|
||||
def nSample(distribution, values, n):
|
||||
if sum(distribution) != 1:
|
||||
distribution = normalize(distribution)
|
||||
rand = [random.random() for i in range(n)]
|
||||
rand.sort()
|
||||
samples = []
|
||||
samplePos, distPos, cdf = 0,0, distribution[0]
|
||||
while samplePos < n:
|
||||
if rand[samplePos] < cdf:
|
||||
samplePos += 1
|
||||
samples.append(values[distPos])
|
||||
else:
|
||||
distPos += 1
|
||||
cdf += distribution[distPos]
|
||||
return samples
|
||||
|
||||
def sample(distribution, values = None):
|
||||
if type(distribution) == Counter:
|
||||
items = sorted(distribution.items())
|
||||
distribution = [i[1] for i in items]
|
||||
values = [i[0] for i in items]
|
||||
if sum(distribution) != 1:
|
||||
distribution = normalize(distribution)
|
||||
choice = random.random()
|
||||
i, total= 0, distribution[0]
|
||||
while choice > total:
|
||||
i += 1
|
||||
total += distribution[i]
|
||||
return values[i]
|
||||
|
||||
def sampleFromCounter(ctr):
|
||||
items = sorted(ctr.items())
|
||||
return sample([v for k,v in items], [k for k,v in items])
|
||||
|
||||
def getProbability(value, distribution, values):
|
||||
"""
|
||||
Gives the probability of a value under a discrete distribution
|
||||
defined by (distributions, values).
|
||||
"""
|
||||
total = 0.0
|
||||
for prob, val in zip(distribution, values):
|
||||
if val == value:
|
||||
total += prob
|
||||
return total
|
||||
|
||||
def flipCoin( p ):
|
||||
r = random.random()
|
||||
return r < p
|
||||
|
||||
def chooseFromDistribution( distribution ):
|
||||
"Takes either a counter or a list of (prob, key) pairs and samples"
|
||||
if type(distribution) == dict or type(distribution) == Counter:
|
||||
return sample(distribution)
|
||||
r = random.random()
|
||||
base = 0.0
|
||||
for prob, element in distribution:
|
||||
base += prob
|
||||
if r <= base: return element
|
||||
|
||||
def nearestPoint( pos ):
|
||||
"""
|
||||
Finds the nearest grid point to a position (discretizes).
|
||||
"""
|
||||
( current_row, current_col ) = pos
|
||||
|
||||
grid_row = int( current_row + 0.5 )
|
||||
grid_col = int( current_col + 0.5 )
|
||||
return ( grid_row, grid_col )
|
||||
|
||||
def sign( x ):
|
||||
"""
|
||||
Returns 1 or -1 depending on the sign of x
|
||||
"""
|
||||
if( x >= 0 ):
|
||||
return 1
|
||||
else:
|
||||
return -1
|
||||
|
||||
def arrayInvert(array):
|
||||
"""
|
||||
Inverts a matrix stored as a list of lists.
|
||||
"""
|
||||
result = [[] for i in array]
|
||||
for outer in array:
|
||||
for inner in range(len(outer)):
|
||||
result[inner].append(outer[inner])
|
||||
return result
|
||||
|
||||
def matrixAsList( matrix, value = True ):
|
||||
"""
|
||||
Turns a matrix into a list of coordinates matching the specified value
|
||||
"""
|
||||
rows, cols = len( matrix ), len( matrix[0] )
|
||||
cells = []
|
||||
for row in range( rows ):
|
||||
for col in range( cols ):
|
||||
if matrix[row][col] == value:
|
||||
cells.append( ( row, col ) )
|
||||
return cells
|
||||
|
||||
def lookup(name, namespace):
|
||||
"""
|
||||
Get a method or class from any imported module from its name.
|
||||
Usage: lookup(functionName, globals())
|
||||
"""
|
||||
dots = name.count('.')
|
||||
if dots > 0:
|
||||
moduleName, objName = '.'.join(name.split('.')[:-1]), name.split('.')[-1]
|
||||
module = __import__(moduleName)
|
||||
return getattr(module, objName)
|
||||
else:
|
||||
modules = [obj for obj in namespace.values() if str(type(obj)) == "<type 'module'>"]
|
||||
options = [getattr(module, name) for module in modules if name in dir(module)]
|
||||
options += [obj[1] for obj in namespace.items() if obj[0] == name ]
|
||||
if len(options) == 1: return options[0]
|
||||
if len(options) > 1: raise Exception, 'Name conflict for %s'
|
||||
raise Exception, '%s not found as a method or class' % name
|
||||
|
||||
def pause():
|
||||
"""
|
||||
Pauses the output stream awaiting user feedback.
|
||||
"""
|
||||
print "<Press enter/return to continue>"
|
||||
raw_input()
|
||||
|
||||
|
||||
# code to handle timeouts
|
||||
#
|
||||
# FIXME
|
||||
# NOTE: TimeoutFuncton is NOT reentrant. Later timeouts will silently
|
||||
# disable earlier timeouts. Could be solved by maintaining a global list
|
||||
# of active time outs. Currently, questions which have test cases calling
|
||||
# this have all student code so wrapped.
|
||||
#
|
||||
import signal
|
||||
import time
|
||||
class TimeoutFunctionException(Exception):
|
||||
"""Exception to raise on a timeout"""
|
||||
pass
|
||||
|
||||
|
||||
class TimeoutFunction:
|
||||
def __init__(self, function, timeout):
|
||||
self.timeout = timeout
|
||||
self.function = function
|
||||
|
||||
def handle_timeout(self, signum, frame):
|
||||
raise TimeoutFunctionException()
|
||||
|
||||
def __call__(self, *args, **keyArgs):
|
||||
# If we have SIGALRM signal, use it to cause an exception if and
|
||||
# when this function runs too long. Otherwise check the time taken
|
||||
# after the method has returned, and throw an exception then.
|
||||
if hasattr(signal, 'SIGALRM'):
|
||||
old = signal.signal(signal.SIGALRM, self.handle_timeout)
|
||||
signal.alarm(self.timeout)
|
||||
try:
|
||||
result = self.function(*args, **keyArgs)
|
||||
finally:
|
||||
signal.signal(signal.SIGALRM, old)
|
||||
signal.alarm(0)
|
||||
else:
|
||||
startTime = time.time()
|
||||
result = self.function(*args, **keyArgs)
|
||||
timeElapsed = time.time() - startTime
|
||||
if timeElapsed >= self.timeout:
|
||||
self.handle_timeout(None, None)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
_ORIGINAL_STDOUT = None
|
||||
_ORIGINAL_STDERR = None
|
||||
_MUTED = False
|
||||
|
||||
class WritableNull:
|
||||
def write(self, string):
|
||||
pass
|
||||
|
||||
def mutePrint():
|
||||
global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
|
||||
if _MUTED:
|
||||
return
|
||||
_MUTED = True
|
||||
|
||||
_ORIGINAL_STDOUT = sys.stdout
|
||||
#_ORIGINAL_STDERR = sys.stderr
|
||||
sys.stdout = WritableNull()
|
||||
#sys.stderr = WritableNull()
|
||||
|
||||
def unmutePrint():
|
||||
global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
|
||||
if not _MUTED:
|
||||
return
|
||||
_MUTED = False
|
||||
|
||||
sys.stdout = _ORIGINAL_STDOUT
|
||||
#sys.stderr = _ORIGINAL_STDERR
|
||||
|
123
reinforcement/valueIterationAgents.py
Normal file
123
reinforcement/valueIterationAgents.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# valueIterationAgents.py
|
||||
# -----------------------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import mdp, util
|
||||
|
||||
from learningAgents import ValueEstimationAgent
|
||||
|
||||
|
||||
class ValueIterationAgent(ValueEstimationAgent):
|
||||
"""
|
||||
* Please read learningAgents.py before reading this.*
|
||||
|
||||
A ValueIterationAgent takes a Markov decision process
|
||||
(see mdp.py) on initialization and runs value iteration
|
||||
for a given number of iterations using the supplied
|
||||
discount factor.
|
||||
"""
|
||||
|
||||
def __init__(self, mdp, discount=0.9, iterations=100):
|
||||
"""
|
||||
Your value iteration agent should take an mdp on
|
||||
construction, run the indicated number of iterations
|
||||
and then act according to the resulting policy.
|
||||
|
||||
Some useful mdp methods you will use:
|
||||
mdp.getStates()
|
||||
mdp.getPossibleActions(state)
|
||||
mdp.getTransitionStatesAndProbs(state, action)
|
||||
mdp.getReward(state, action, nextState)
|
||||
mdp.isTerminal(state)
|
||||
"""
|
||||
self.mdp = mdp
|
||||
self.discount = discount
|
||||
self.iterations = iterations
|
||||
self.values = util.Counter() # A Counter is a dict with default 0
|
||||
|
||||
# Write value iteration code here
|
||||
"*** YOUR CODE HERE ***"
|
||||
states = self.mdp.getStates()
|
||||
|
||||
print "__init__ ... states: " + str(states)
|
||||
|
||||
for i in range(iterations):
|
||||
# On reprend les valeurs de l'itération précédente comme référence
|
||||
# Copie pour batch
|
||||
q_copy = self.values.copy()
|
||||
for state in states:
|
||||
q_new = None
|
||||
for action in self.mdp.getPossibleActions(state):
|
||||
q = self.computeQValueFromValues(state, action)
|
||||
# Garder la meilleure Q value
|
||||
if q_new is None or q_new < q:
|
||||
q_new = q
|
||||
# Gérer le cas sans successeurs
|
||||
if q_new is None:
|
||||
q_copy[state] = 0
|
||||
else:
|
||||
q_copy[state] = q_new
|
||||
# On met à jour pout les prochaines itérations
|
||||
self.values = q_copy
|
||||
|
||||
def getValue(self, state):
|
||||
"""
|
||||
Return the value of the state (computed in __init__).
|
||||
"""
|
||||
return self.values[state]
|
||||
|
||||
def computeQValueFromValues(self, state, action):
|
||||
"""
|
||||
Compute the Q-value of action in state from the
|
||||
value function stored in self.values.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
values = []
|
||||
for nextState, prob in self.mdp.getTransitionStatesAndProbs(state,action):
|
||||
reward = self.mdp.getReward(state, action, nextState)
|
||||
discount = self.discount
|
||||
next_state_value = self.values[nextState]
|
||||
values.append(prob*(reward+discount*next_state_value))
|
||||
return sum(values)
|
||||
|
||||
def computeActionFromValues(self, state):
|
||||
"""
|
||||
The policy is the best action in the given state
|
||||
according to the values currently stored in self.values.
|
||||
|
||||
You may break ties any way you see fit. Note that if
|
||||
there are no legal actions, which is the case at the
|
||||
terminal state, you should return None.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
possibleActions = self.mdp.getPossibleActions(state)
|
||||
|
||||
if len(possibleActions) == 0:
|
||||
return None
|
||||
|
||||
q_values = [self.computeQValueFromValues(state, action) for action in possibleActions]
|
||||
print "computeActionFromValues ... q_values: "+str(q_values)
|
||||
print "index:"+str(q_values.index(max(q_values)))
|
||||
print "action:"+str(possibleActions[q_values.index(max(q_values))])
|
||||
return possibleActions[q_values.index(max(q_values))]
|
||||
|
||||
def getPolicy(self, state):
|
||||
return self.computeActionFromValues(state)
|
||||
|
||||
def getAction(self, state):
|
||||
"Returns the policy at the state (no exploration)."
|
||||
return self.computeActionFromValues(state)
|
||||
|
||||
def getQValue(self, state, action):
|
||||
return self.computeQValueFromValues(state, action)
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue