ajout des fichiers du tp 2 q1 répondue

This commit is contained in:
François Pelletier 2019-04-10 23:58:15 -04:00
parent e4cca7e1fa
commit c3b9ff7050
101 changed files with 16680 additions and 1 deletions

1
reinforcement/VERSION Normal file
View file

@ -0,0 +1 @@
v1.001

73
reinforcement/analysis.py Normal file
View file

@ -0,0 +1,73 @@
# analysis.py
# -----------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
######################
# ANALYSIS QUESTIONS #
######################
# Set the given parameters to obtain the specified policies through
# value iteration.
def question2():
answerDiscount = 0.9
answerNoise = 0.2
return answerDiscount, answerNoise
def question3a():
answerDiscount = None
answerNoise = None
answerLivingReward = None
return answerDiscount, answerNoise, answerLivingReward
# If not possible, return 'NOT POSSIBLE'
def question3b():
answerDiscount = None
answerNoise = None
answerLivingReward = None
return answerDiscount, answerNoise, answerLivingReward
# If not possible, return 'NOT POSSIBLE'
def question3c():
answerDiscount = None
answerNoise = None
answerLivingReward = None
return answerDiscount, answerNoise, answerLivingReward
# If not possible, return 'NOT POSSIBLE'
def question3d():
answerDiscount = None
answerNoise = None
answerLivingReward = None
return answerDiscount, answerNoise, answerLivingReward
# If not possible, return 'NOT POSSIBLE'
def question3e():
answerDiscount = None
answerNoise = None
answerLivingReward = None
return answerDiscount, answerNoise, answerLivingReward
# If not possible, return 'NOT POSSIBLE'
def question6():
answerEpsilon = None
answerLearningRate = None
return answerEpsilon, answerLearningRate
# If not possible, return 'NOT POSSIBLE'
if __name__ == '__main__':
print 'Answers to analysis questions:'
import analysis
for q in [q for q in dir(analysis) if q.startswith('question')]:
response = getattr(analysis, q)()
print ' Question %s:\t%s' % (q, str(response))

351
reinforcement/autograder.py Normal file
View file

@ -0,0 +1,351 @@
# autograder.py
# -------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
# imports from python standard library
import grading
import imp
import optparse
import os
import re
import sys
import projectParams
import random
random.seed(0)
try:
from pacman import GameState
except:
pass
# register arguments and set default values
def readCommand(argv):
parser = optparse.OptionParser(description = 'Run public tests on student code')
parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False, noGraphics=False)
parser.add_option('--test-directory',
dest = 'testRoot',
default = 'test_cases',
help = 'Root test directory which contains subdirectories corresponding to each question')
parser.add_option('--student-code',
dest = 'studentCode',
default = projectParams.STUDENT_CODE_DEFAULT,
help = 'comma separated list of student code files')
parser.add_option('--code-directory',
dest = 'codeRoot',
default = "",
help = 'Root directory containing the student and testClass code')
parser.add_option('--test-case-code',
dest = 'testCaseCode',
default = projectParams.PROJECT_TEST_CLASSES,
help = 'class containing testClass classes for this project')
parser.add_option('--generate-solutions',
dest = 'generateSolutions',
action = 'store_true',
help = 'Write solutions generated to .solution file')
parser.add_option('--edx-output',
dest = 'edxOutput',
action = 'store_true',
help = 'Generate edX output files')
parser.add_option('--mute',
dest = 'muteOutput',
action = 'store_true',
help = 'Mute output from executing tests')
parser.add_option('--print-tests', '-p',
dest = 'printTestCase',
action = 'store_true',
help = 'Print each test case before running them.')
parser.add_option('--test', '-t',
dest = 'runTest',
default = None,
help = 'Run one particular test. Relative to test root.')
parser.add_option('--question', '-q',
dest = 'gradeQuestion',
default = None,
help = 'Grade one particular question.')
parser.add_option('--no-graphics',
dest = 'noGraphics',
action = 'store_true',
help = 'No graphics display for pacman games.')
(options, args) = parser.parse_args(argv)
return options
# confirm we should author solution files
def confirmGenerate():
print 'WARNING: this action will overwrite any solution files.'
print 'Are you sure you want to proceed? (yes/no)'
while True:
ans = sys.stdin.readline().strip()
if ans == 'yes':
break
elif ans == 'no':
sys.exit(0)
else:
print 'please answer either "yes" or "no"'
# TODO: Fix this so that it tracebacks work correctly
# Looking at source of the traceback module, presuming it works
# the same as the intepreters, it uses co_filename. This is,
# however, a readonly attribute.
def setModuleName(module, filename):
functionType = type(confirmGenerate)
classType = type(optparse.Option)
for i in dir(module):
o = getattr(module, i)
if hasattr(o, '__file__'): continue
if type(o) == functionType:
setattr(o, '__file__', filename)
elif type(o) == classType:
setattr(o, '__file__', filename)
# TODO: assign member __file__'s?
#print i, type(o)
#from cStringIO import StringIO
def loadModuleString(moduleSource):
# Below broken, imp doesn't believe its being passed a file:
# ValueError: load_module arg#2 should be a file or None
#
#f = StringIO(moduleCodeDict[k])
#tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE))
tmp = imp.new_module(k)
exec moduleCodeDict[k] in tmp.__dict__
setModuleName(tmp, k)
return tmp
import py_compile
def loadModuleFile(moduleName, filePath):
with open(filePath, 'r') as f:
return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE))
def readFile(path, root=""):
"Read file from disk at specified path and return as string"
with open(os.path.join(root, path), 'r') as handle:
return handle.read()
#######################################################################
# Error Hint Map
#######################################################################
# TODO: use these
ERROR_HINT_MAP = {
'q1': {
"<type 'exceptions.IndexError'>": """
We noticed that your project threw an IndexError on q1.
While many things may cause this, it may have been from
assuming a certain number of successors from a state space
or assuming a certain number of actions available from a given
state. Try making your code more general (no hardcoded indices)
and submit again!
"""
},
'q3': {
"<type 'exceptions.AttributeError'>": """
We noticed that your project threw an AttributeError on q3.
While many things may cause this, it may have been from assuming
a certain size or structure to the state space. For example, if you have
a line of code assuming that the state is (x, y) and we run your code
on a state space with (x, y, z), this error could be thrown. Try
making your code more general and submit again!
"""
}
}
import pprint
def splitStrings(d):
d2 = dict(d)
for k in d:
if k[0:2] == "__":
del d2[k]
continue
if d2[k].find("\n") >= 0:
d2[k] = d2[k].split("\n")
return d2
def printTest(testDict, solutionDict):
pp = pprint.PrettyPrinter(indent=4)
print "Test case:"
for line in testDict["__raw_lines__"]:
print " |", line
print "Solution:"
for line in solutionDict["__raw_lines__"]:
print " |", line
def runTest(testName, moduleDict, printTestCase=False, display=None):
import testParser
import testClasses
for module in moduleDict:
setattr(sys.modules[__name__], module, moduleDict[module])
testDict = testParser.TestParser(testName + ".test").parse()
solutionDict = testParser.TestParser(testName + ".solution").parse()
test_out_file = os.path.join('%s.test_output' % testName)
testDict['test_out_file'] = test_out_file
testClass = getattr(projectTestClasses, testDict['class'])
questionClass = getattr(testClasses, 'Question')
question = questionClass({'max_points': 0}, display)
testCase = testClass(question, testDict)
if printTestCase:
printTest(testDict, solutionDict)
# This is a fragile hack to create a stub grades object
grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)])
testCase.execute(grades, moduleDict, solutionDict)
# returns all the tests you need to run in order to run question
def getDepends(testParser, testRoot, question):
allDeps = [question]
questionDict = testParser.TestParser(os.path.join(testRoot, question, 'CONFIG')).parse()
if 'depends' in questionDict:
depends = questionDict['depends'].split()
for d in depends:
# run dependencies first
allDeps = getDepends(testParser, testRoot, d) + allDeps
return allDeps
# get list of questions to grade
def getTestSubdirs(testParser, testRoot, questionToGrade):
problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse()
if questionToGrade != None:
questions = getDepends(testParser, testRoot, questionToGrade)
if len(questions) > 1:
print 'Note: due to dependencies, the following tests will be run: %s' % ' '.join(questions)
return questions
if 'order' in problemDict:
return problemDict['order'].split()
return sorted(os.listdir(testRoot))
# evaluate student code
def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False,
printTestCase=False, questionToGrade=None, display=None):
# imports of testbench code. note that the testClasses import must follow
# the import of student code due to dependencies
import testParser
import testClasses
for module in moduleDict:
setattr(sys.modules[__name__], module, moduleDict[module])
questions = []
questionDicts = {}
test_subdirs = getTestSubdirs(testParser, testRoot, questionToGrade)
for q in test_subdirs:
subdir_path = os.path.join(testRoot, q)
if not os.path.isdir(subdir_path) or q[0] == '.':
continue
# create a question object
questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse()
questionClass = getattr(testClasses, questionDict['class'])
question = questionClass(questionDict, display)
questionDicts[q] = questionDict
# load test cases into question
tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path))
tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests)
for t in sorted(tests):
test_file = os.path.join(subdir_path, '%s.test' % t)
solution_file = os.path.join(subdir_path, '%s.solution' % t)
test_out_file = os.path.join(subdir_path, '%s.test_output' % t)
testDict = testParser.TestParser(test_file).parse()
if testDict.get("disabled", "false").lower() == "true":
continue
testDict['test_out_file'] = test_out_file
testClass = getattr(projectTestClasses, testDict['class'])
testCase = testClass(question, testDict)
def makefun(testCase, solution_file):
if generateSolutions:
# write solution file to disk
return lambda grades: testCase.writeSolution(moduleDict, solution_file)
else:
# read in solution dictionary and pass as an argument
testDict = testParser.TestParser(test_file).parse()
solutionDict = testParser.TestParser(solution_file).parse()
if printTestCase:
return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict)
else:
return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
question.addTestCase(testCase, makefun(testCase, solution_file))
# Note extra function is necessary for scoping reasons
def makefun(question):
return lambda grades: question.execute(grades)
setattr(sys.modules[__name__], q, makefun(question))
questions.append((q, question.getMaxPoints()))
grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput)
if questionToGrade == None:
for q in questionDicts:
for prereq in questionDicts[q].get('depends', '').split():
grades.addPrereq(q, prereq)
grades.grade(sys.modules[__name__], bonusPic = projectParams.BONUS_PIC)
return grades.points
def getDisplay(graphicsByDefault, options=None):
graphics = graphicsByDefault
if options is not None and options.noGraphics:
graphics = False
if graphics:
try:
import graphicsDisplay
return graphicsDisplay.PacmanGraphics(1, frameTime=.05)
except ImportError:
pass
import textDisplay
return textDisplay.NullGraphics()
if __name__ == '__main__':
options = readCommand(sys.argv)
if options.generateSolutions:
confirmGenerate()
codePaths = options.studentCode.split(',')
# moduleCodeDict = {}
# for cp in codePaths:
# moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
# moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot)
# moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot)
# moduleDict = loadModuleDict(moduleCodeDict)
moduleDict = {}
for cp in codePaths:
moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp))
moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1)
moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode))
if options.runTest != None:
runTest(options.runTest, moduleDict, printTestCase=options.printTestCase, display=getDisplay(True, options))
else:
evaluate(options.generateSolutions, options.testRoot, moduleDict,
edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase,
questionToGrade=options.gradeQuestion, display=getDisplay(options.gradeQuestion!=None, options))

384
reinforcement/crawler.py Normal file
View file

@ -0,0 +1,384 @@
# crawler.py
# ----------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
#!/usr/bin/python
import math
from math import pi as PI
import time
import environment
import random
class CrawlingRobotEnvironment(environment.Environment):
def __init__(self, crawlingRobot):
self.crawlingRobot = crawlingRobot
# The state is of the form (armAngle, handAngle)
# where the angles are bucket numbers, not actual
# degree measurements
self.state = None
self.nArmStates = 9
self.nHandStates = 13
# create a list of arm buckets and hand buckets to
# discretize the state space
minArmAngle,maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
minHandAngle,maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates-1)
handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates-1)
self.armBuckets = [minArmAngle+(armIncrement*i) \
for i in range(self.nArmStates)]
self.handBuckets = [minHandAngle+(handIncrement*i) \
for i in range(self.nHandStates)]
# Reset
self.reset()
def getCurrentState(self):
"""
Return the current state
of the crawling robot
"""
return self.state
def getPossibleActions(self, state):
"""
Returns possible actions
for the states in the
current state
"""
actions = list()
currArmBucket,currHandBucket = state
if currArmBucket > 0: actions.append('arm-down')
if currArmBucket < self.nArmStates-1: actions.append('arm-up')
if currHandBucket > 0: actions.append('hand-down')
if currHandBucket < self.nHandStates-1: actions.append('hand-up')
return actions
def doAction(self, action):
"""
Perform the action and update
the current state of the Environment
and return the reward for the
current state, the next state
and the taken action.
Returns:
nextState, reward
"""
nextState, reward = None, None
oldX,oldY = self.crawlingRobot.getRobotPosition()
armBucket,handBucket = self.state
armAngle,handAngle = self.crawlingRobot.getAngles()
if action == 'arm-up':
newArmAngle = self.armBuckets[armBucket+1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket+1,handBucket)
if action == 'arm-down':
newArmAngle = self.armBuckets[armBucket-1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket-1,handBucket)
if action == 'hand-up':
newHandAngle = self.handBuckets[handBucket+1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket+1)
if action == 'hand-down':
newHandAngle = self.handBuckets[handBucket-1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket-1)
newX,newY = self.crawlingRobot.getRobotPosition()
# a simple reward function
reward = newX - oldX
self.state = nextState
return nextState, reward
def reset(self):
"""
Resets the Environment to the initial state
"""
## Initialize the state to be the middle
## value for each parameter e.g. if there are 13 and 19
## buckets for the arm and hand parameters, then the intial
## state should be (6,9)
##
## Also call self.crawlingRobot.setAngles()
## to the initial arm and hand angle
armState = self.nArmStates/2
handState = self.nHandStates/2
self.state = armState,handState
self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
class CrawlingRobot:
def setAngles(self, armAngle, handAngle):
"""
set the robot's arm and hand angles
to the passed in values
"""
self.armAngle = armAngle
self.handAngle = handAngle
def getAngles(self):
"""
returns the pair of (armAngle, handAngle)
"""
return self.armAngle, self.handAngle
def getRobotPosition(self):
"""
returns the (x,y) coordinates
of the lower-left point of the
robot
"""
return self.robotPos
def moveArm(self, newArmAngle):
"""
move the robot arm to 'newArmAngle'
"""
oldArmAngle = self.armAngle
if newArmAngle > self.maxArmAngle:
raise 'Crawling Robot: Arm Raised too high. Careful!'
if newArmAngle < self.minArmAngle:
raise 'Crawling Robot: Arm Raised too low. Careful!'
disp = self.displacement(self.armAngle, self.handAngle,
newArmAngle, self.handAngle)
curXPos = self.robotPos[0]
self.robotPos = (curXPos+disp, self.robotPos[1])
self.armAngle = newArmAngle
# Position and Velocity Sign Post
self.positions.append(self.getRobotPosition()[0])
# self.angleSums.append(abs(math.degrees(oldArmAngle)-math.degrees(newArmAngle)))
if len(self.positions) > 100:
self.positions.pop(0)
# self.angleSums.pop(0)
def moveHand(self, newHandAngle):
"""
move the robot hand to 'newArmAngle'
"""
oldHandAngle = self.handAngle
if newHandAngle > self.maxHandAngle:
raise 'Crawling Robot: Hand Raised too high. Careful!'
if newHandAngle < self.minHandAngle:
raise 'Crawling Robot: Hand Raised too low. Careful!'
disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
curXPos = self.robotPos[0]
self.robotPos = (curXPos+disp, self.robotPos[1])
self.handAngle = newHandAngle
# Position and Velocity Sign Post
self.positions.append(self.getRobotPosition()[0])
# self.angleSums.append(abs(math.degrees(oldHandAngle)-math.degrees(newHandAngle)))
if len(self.positions) > 100:
self.positions.pop(0)
# self.angleSums.pop(0)
def getMinAndMaxArmAngles(self):
"""
get the lower- and upper- bound
for the arm angles returns (min,max) pair
"""
return self.minArmAngle, self.maxArmAngle
def getMinAndMaxHandAngles(self):
"""
get the lower- and upper- bound
for the hand angles returns (min,max) pair
"""
return self.minHandAngle, self.maxHandAngle
def getRotationAngle(self):
"""
get the current angle the
robot body is rotated off the ground
"""
armCos, armSin = self.__getCosAndSin(self.armAngle)
handCos, handSin = self.__getCosAndSin(self.handAngle)
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
if y < 0:
return math.atan(-y/x)
return 0.0
## You shouldn't need methods below here
def __getCosAndSin(self, angle):
return math.cos(angle), math.sin(angle)
def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
armCos, armSin = self.__getCosAndSin(armDegree)
oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
handCos, handSin = self.__getCosAndSin(handDegree)
xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
if y < 0:
if yOld <= 0:
return math.sqrt(xOld*xOld + yOld*yOld) - math.sqrt(x*x + y*y)
return (xOld - yOld*(x-xOld) / (y - yOld)) - math.sqrt(x*x + y*y)
else:
if yOld >= 0:
return 0.0
return -(x - y * (xOld-x)/(yOld-y)) + math.sqrt(xOld*xOld + yOld*yOld)
raise 'Never Should See This!'
def draw(self, stepCount, stepDelay):
x1, y1 = self.getRobotPosition()
x1 = x1 % self.totWidth
## Check Lower Still on the ground
if y1 != self.groundY:
raise 'Flying Robot!!'
rotationAngle = self.getRotationAngle()
cosRot, sinRot = self.__getCosAndSin(rotationAngle)
x2 = x1 + self.robotWidth * cosRot
y2 = y1 - self.robotWidth * sinRot
x3 = x1 - self.robotHeight * sinRot
y3 = y1 - self.robotHeight * cosRot
x4 = x3 + cosRot*self.robotWidth
y4 = y3 - sinRot*self.robotWidth
self.canvas.coords(self.robotBody,x1,y1,x2,y2,x4,y4,x3,y3)
armCos, armSin = self.__getCosAndSin(rotationAngle+self.armAngle)
xArm = x4 + self.armLength * armCos
yArm = y4 - self.armLength * armSin
self.canvas.coords(self.robotArm,x4,y4,xArm,yArm)
handCos, handSin = self.__getCosAndSin(self.handAngle+rotationAngle)
xHand = xArm + self.handLength * handCos
yHand = yArm - self.handLength * handSin
self.canvas.coords(self.robotHand,xArm,yArm,xHand,yHand)
# Position and Velocity Sign Post
# time = len(self.positions) + 0.5 * sum(self.angleSums)
# velocity = (self.positions[-1]-self.positions[0]) / time
# if len(self.positions) == 1: return
steps = (stepCount - self.lastStep)
if steps==0:return
# pos = self.positions[-1]
# velocity = (pos - self.lastPos) / steps
# g = .9 ** (10 * stepDelay)
# g = .99 ** steps
# self.velAvg = g * self.velAvg + (1 - g) * velocity
# g = .999 ** steps
# self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity
pos = self.positions[-1]
velocity = pos - self.positions[-2]
vel2 = (pos - self.positions[0]) / len(self.positions)
self.velAvg = .9 * self.velAvg + .1 * vel2
velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
# velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
velocityMsg = 'Velocity: %.2f' % velocity
positionMsg = 'Position: %2.f' % pos
stepMsg = 'Step: %d' % stepCount
if 'vel_msg' in dir(self):
self.canvas.delete(self.vel_msg)
self.canvas.delete(self.pos_msg)
self.canvas.delete(self.step_msg)
self.canvas.delete(self.velavg_msg)
# self.canvas.delete(self.velavg2_msg)
# self.velavg2_msg = self.canvas.create_text(850,190,text=velMsg2)
self.velavg_msg = self.canvas.create_text(650,190,text=velMsg)
self.vel_msg = self.canvas.create_text(450,190,text=velocityMsg)
self.pos_msg = self.canvas.create_text(250,190,text=positionMsg)
self.step_msg = self.canvas.create_text(50,190,text=stepMsg)
# self.lastPos = pos
self.lastStep = stepCount
# self.lastVel = velocity
def __init__(self, canvas):
## Canvas ##
self.canvas = canvas
self.velAvg = 0
# self.velAvg2 = 0
# self.lastPos = 0
self.lastStep = 0
# self.lastVel = 0
## Arm and Hand Degrees ##
self.armAngle = self.oldArmDegree = 0.0
self.handAngle = self.oldHandDegree = -PI/6
self.maxArmAngle = PI/6
self.minArmAngle = -PI/6
self.maxHandAngle = 0
self.minHandAngle = -(5.0/6.0) * PI
## Draw Ground ##
self.totWidth = canvas.winfo_reqwidth()
self.totHeight = canvas.winfo_reqheight()
self.groundHeight = 40
self.groundY = self.totHeight - self.groundHeight
self.ground = canvas.create_rectangle(0,
self.groundY,self.totWidth,self.totHeight, fill='blue')
## Robot Body ##
self.robotWidth = 80
self.robotHeight = 40
self.robotPos = (20, self.groundY)
self.robotBody = canvas.create_polygon(0,0,0,0,0,0,0,0, fill='green')
## Robot Arm ##
self.armLength = 60
self.robotArm = canvas.create_line(0,0,0,0,fill='orange',width=5)
## Robot Hand ##
self.handLength = 40
self.robotHand = canvas.create_line(0,0,0,0,fill='red',width=3)
self.positions = [0,0]
# self.angleSums = [0,0]
if __name__ == '__main__':
from graphicsCrawlerDisplay import *
run()

View file

@ -0,0 +1,56 @@
# environment.py
# --------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
#!/usr/bin/python
class Environment:
def getCurrentState(self):
"""
Returns the current state of enviornment
"""
abstract
def getPossibleActions(self, state):
"""
Returns possible actions the agent
can take in the given state. Can
return the empty list if we are in
a terminal state.
"""
abstract
def doAction(self, action):
"""
Performs the given action in the current
environment state and updates the enviornment.
Returns a (reward, nextState) pair
"""
abstract
def reset(self):
"""
Resets the current state to the start state
"""
abstract
def isTerminal(self):
"""
Has the enviornment entered a terminal
state? This means there are no successors
"""
state = self.getCurrentState()
actions = self.getPossibleActions(state)
return len(actions) == 0

View file

@ -0,0 +1,103 @@
# featureExtractors.py
# --------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
"Feature extractors for Pacman game states"
from game import Directions, Actions
import util
class FeatureExtractor:
def getFeatures(self, state, action):
"""
Returns a dict from features to counts
Usually, the count will just be 1.0 for
indicator functions.
"""
util.raiseNotDefined()
class IdentityExtractor(FeatureExtractor):
def getFeatures(self, state, action):
feats = util.Counter()
feats[(state,action)] = 1.0
return feats
class CoordinateExtractor(FeatureExtractor):
def getFeatures(self, state, action):
feats = util.Counter()
feats[state] = 1.0
feats['x=%d' % state[0]] = 1.0
feats['y=%d' % state[0]] = 1.0
feats['action=%s' % action] = 1.0
return feats
def closestFood(pos, food, walls):
"""
closestFood -- this is similar to the function that we have
worked on in the search project; here its all in one place
"""
fringe = [(pos[0], pos[1], 0)]
expanded = set()
while fringe:
pos_x, pos_y, dist = fringe.pop(0)
if (pos_x, pos_y) in expanded:
continue
expanded.add((pos_x, pos_y))
# if we find a food at this location then exit
if food[pos_x][pos_y]:
return dist
# otherwise spread out from the location to its neighbours
nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
for nbr_x, nbr_y in nbrs:
fringe.append((nbr_x, nbr_y, dist+1))
# no food found
return None
class SimpleExtractor(FeatureExtractor):
"""
Returns simple features for a basic reflex Pacman:
- whether food will be eaten
- how far away the next food is
- whether a ghost collision is imminent
- whether a ghost is one step away
"""
def getFeatures(self, state, action):
# extract the grid of food and wall locations and get the ghost locations
food = state.getFood()
walls = state.getWalls()
ghosts = state.getGhostPositions()
features = util.Counter()
features["bias"] = 1.0
# compute the location of pacman after he takes the action
x, y = state.getPacmanPosition()
dx, dy = Actions.directionToVector(action)
next_x, next_y = int(x + dx), int(y + dy)
# count the number of ghosts 1-step away
features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
# if there is no danger of ghosts then add the food feature
if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
features["eats-food"] = 1.0
dist = closestFood((next_x, next_y), food, walls)
if dist is not None:
# make the distance a number less than one otherwise the update
# will diverge wildly
features["closest-food"] = float(dist) / (walls.width * walls.height)
features.divideAll(10.0)
return features

729
reinforcement/game.py Normal file
View file

@ -0,0 +1,729 @@
# game.py
# -------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
# game.py
# -------
# Licensing Information: Please do not distribute or publish solutions to this
# project. You are free to use and extend these projects for educational
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
from util import *
import time, os
import traceback
import sys
#######################
# Parts worth reading #
#######################
class Agent:
"""
An agent must define a getAction method, but may also define the
following methods which will be called if they exist:
def registerInitialState(self, state): # inspects the starting state
"""
def __init__(self, index=0):
self.index = index
def getAction(self, state):
"""
The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
must return an action from Directions.{North, South, East, West, Stop}
"""
raiseNotDefined()
class Directions:
NORTH = 'North'
SOUTH = 'South'
EAST = 'East'
WEST = 'West'
STOP = 'Stop'
LEFT = {NORTH: WEST,
SOUTH: EAST,
EAST: NORTH,
WEST: SOUTH,
STOP: STOP}
RIGHT = dict([(y,x) for x, y in LEFT.items()])
REVERSE = {NORTH: SOUTH,
SOUTH: NORTH,
EAST: WEST,
WEST: EAST,
STOP: STOP}
class Configuration:
"""
A Configuration holds the (x,y) coordinate of a character, along with its
traveling direction.
The convention for positions, like a graph, is that (0,0) is the lower left corner, x increases
horizontally and y increases vertically. Therefore, north is the direction of increasing y, or (0,1).
"""
def __init__(self, pos, direction):
self.pos = pos
self.direction = direction
def getPosition(self):
return (self.pos)
def getDirection(self):
return self.direction
def isInteger(self):
x,y = self.pos
return x == int(x) and y == int(y)
def __eq__(self, other):
if other == None: return False
return (self.pos == other.pos and self.direction == other.direction)
def __hash__(self):
x = hash(self.pos)
y = hash(self.direction)
return hash(x + 13 * y)
def __str__(self):
return "(x,y)="+str(self.pos)+", "+str(self.direction)
def generateSuccessor(self, vector):
"""
Generates a new configuration reached by translating the current
configuration by the action vector. This is a low-level call and does
not attempt to respect the legality of the movement.
Actions are movement vectors.
"""
x, y= self.pos
dx, dy = vector
direction = Actions.vectorToDirection(vector)
if direction == Directions.STOP:
direction = self.direction # There is no stop direction
return Configuration((x + dx, y+dy), direction)
class AgentState:
"""
AgentStates hold the state of an agent (configuration, speed, scared, etc).
"""
def __init__( self, startConfiguration, isPacman ):
self.start = startConfiguration
self.configuration = startConfiguration
self.isPacman = isPacman
self.scaredTimer = 0
self.numCarrying = 0
self.numReturned = 0
def __str__( self ):
if self.isPacman:
return "Pacman: " + str( self.configuration )
else:
return "Ghost: " + str( self.configuration )
def __eq__( self, other ):
if other == None:
return False
return self.configuration == other.configuration and self.scaredTimer == other.scaredTimer
def __hash__(self):
return hash(hash(self.configuration) + 13 * hash(self.scaredTimer))
def copy( self ):
state = AgentState( self.start, self.isPacman )
state.configuration = self.configuration
state.scaredTimer = self.scaredTimer
state.numCarrying = self.numCarrying
state.numReturned = self.numReturned
return state
def getPosition(self):
if self.configuration == None: return None
return self.configuration.getPosition()
def getDirection(self):
return self.configuration.getDirection()
class Grid:
"""
A 2-dimensional array of objects backed by a list of lists. Data is accessed
via grid[x][y] where (x,y) are positions on a Pacman map with x horizontal,
y vertical and the origin (0,0) in the bottom left corner.
The __str__ method constructs an output that is oriented like a pacman board.
"""
def __init__(self, width, height, initialValue=False, bitRepresentation=None):
if initialValue not in [False, True]: raise Exception('Grids can only contain booleans')
self.CELLS_PER_INT = 30
self.width = width
self.height = height
self.data = [[initialValue for y in range(height)] for x in range(width)]
if bitRepresentation:
self._unpackBits(bitRepresentation)
def __getitem__(self, i):
return self.data[i]
def __setitem__(self, key, item):
self.data[key] = item
def __str__(self):
out = [[str(self.data[x][y])[0] for x in range(self.width)] for y in range(self.height)]
out.reverse()
return '\n'.join([''.join(x) for x in out])
def __eq__(self, other):
if other == None: return False
return self.data == other.data
def __hash__(self):
# return hash(str(self))
base = 1
h = 0
for l in self.data:
for i in l:
if i:
h += base
base *= 2
return hash(h)
def copy(self):
g = Grid(self.width, self.height)
g.data = [x[:] for x in self.data]
return g
def deepCopy(self):
return self.copy()
def shallowCopy(self):
g = Grid(self.width, self.height)
g.data = self.data
return g
def count(self, item =True ):
return sum([x.count(item) for x in self.data])
def asList(self, key = True):
list = []
for x in range(self.width):
for y in range(self.height):
if self[x][y] == key: list.append( (x,y) )
return list
def packBits(self):
"""
Returns an efficient int list representation
(width, height, bitPackedInts...)
"""
bits = [self.width, self.height]
currentInt = 0
for i in range(self.height * self.width):
bit = self.CELLS_PER_INT - (i % self.CELLS_PER_INT) - 1
x, y = self._cellIndexToPosition(i)
if self[x][y]:
currentInt += 2 ** bit
if (i + 1) % self.CELLS_PER_INT == 0:
bits.append(currentInt)
currentInt = 0
bits.append(currentInt)
return tuple(bits)
def _cellIndexToPosition(self, index):
x = index / self.height
y = index % self.height
return x, y
def _unpackBits(self, bits):
"""
Fills in data from a bit-level representation
"""
cell = 0
for packed in bits:
for bit in self._unpackInt(packed, self.CELLS_PER_INT):
if cell == self.width * self.height: break
x, y = self._cellIndexToPosition(cell)
self[x][y] = bit
cell += 1
def _unpackInt(self, packed, size):
bools = []
if packed < 0: raise ValueError, "must be a positive integer"
for i in range(size):
n = 2 ** (self.CELLS_PER_INT - i - 1)
if packed >= n:
bools.append(True)
packed -= n
else:
bools.append(False)
return bools
def reconstituteGrid(bitRep):
if type(bitRep) is not type((1,2)):
return bitRep
width, height = bitRep[:2]
return Grid(width, height, bitRepresentation= bitRep[2:])
####################################
# Parts you shouldn't have to read #
####################################
class Actions:
"""
A collection of static methods for manipulating move actions.
"""
# Directions
_directions = {Directions.NORTH: (0, 1),
Directions.SOUTH: (0, -1),
Directions.EAST: (1, 0),
Directions.WEST: (-1, 0),
Directions.STOP: (0, 0)}
_directionsAsList = _directions.items()
TOLERANCE = .001
def reverseDirection(action):
if action == Directions.NORTH:
return Directions.SOUTH
if action == Directions.SOUTH:
return Directions.NORTH
if action == Directions.EAST:
return Directions.WEST
if action == Directions.WEST:
return Directions.EAST
return action
reverseDirection = staticmethod(reverseDirection)
def vectorToDirection(vector):
dx, dy = vector
if dy > 0:
return Directions.NORTH
if dy < 0:
return Directions.SOUTH
if dx < 0:
return Directions.WEST
if dx > 0:
return Directions.EAST
return Directions.STOP
vectorToDirection = staticmethod(vectorToDirection)
def directionToVector(direction, speed = 1.0):
dx, dy = Actions._directions[direction]
return (dx * speed, dy * speed)
directionToVector = staticmethod(directionToVector)
def getPossibleActions(config, walls):
possible = []
x, y = config.pos
x_int, y_int = int(x + 0.5), int(y + 0.5)
# In between grid points, all agents must continue straight
if (abs(x - x_int) + abs(y - y_int) > Actions.TOLERANCE):
return [config.getDirection()]
for dir, vec in Actions._directionsAsList:
dx, dy = vec
next_y = y_int + dy
next_x = x_int + dx
if not walls[next_x][next_y]: possible.append(dir)
return possible
getPossibleActions = staticmethod(getPossibleActions)
def getLegalNeighbors(position, walls):
x,y = position
x_int, y_int = int(x + 0.5), int(y + 0.5)
neighbors = []
for dir, vec in Actions._directionsAsList:
dx, dy = vec
next_x = x_int + dx
if next_x < 0 or next_x == walls.width: continue
next_y = y_int + dy
if next_y < 0 or next_y == walls.height: continue
if not walls[next_x][next_y]: neighbors.append((next_x, next_y))
return neighbors
getLegalNeighbors = staticmethod(getLegalNeighbors)
def getSuccessor(position, action):
dx, dy = Actions.directionToVector(action)
x, y = position
return (x + dx, y + dy)
getSuccessor = staticmethod(getSuccessor)
class GameStateData:
"""
"""
def __init__( self, prevState = None ):
"""
Generates a new data packet by copying information from its predecessor.
"""
if prevState != None:
self.food = prevState.food.shallowCopy()
self.capsules = prevState.capsules[:]
self.agentStates = self.copyAgentStates( prevState.agentStates )
self.layout = prevState.layout
self._eaten = prevState._eaten
self.score = prevState.score
self._foodEaten = None
self._foodAdded = None
self._capsuleEaten = None
self._agentMoved = None
self._lose = False
self._win = False
self.scoreChange = 0
def deepCopy( self ):
state = GameStateData( self )
state.food = self.food.deepCopy()
state.layout = self.layout.deepCopy()
state._agentMoved = self._agentMoved
state._foodEaten = self._foodEaten
state._foodAdded = self._foodAdded
state._capsuleEaten = self._capsuleEaten
return state
def copyAgentStates( self, agentStates ):
copiedStates = []
for agentState in agentStates:
copiedStates.append( agentState.copy() )
return copiedStates
def __eq__( self, other ):
"""
Allows two states to be compared.
"""
if other == None: return False
# TODO Check for type of other
if not self.agentStates == other.agentStates: return False
if not self.food == other.food: return False
if not self.capsules == other.capsules: return False
if not self.score == other.score: return False
return True
def __hash__( self ):
"""
Allows states to be keys of dictionaries.
"""
for i, state in enumerate( self.agentStates ):
try:
int(hash(state))
except TypeError, e:
print e
#hash(state)
return int((hash(tuple(self.agentStates)) + 13*hash(self.food) + 113* hash(tuple(self.capsules)) + 7 * hash(self.score)) % 1048575 )
def __str__( self ):
width, height = self.layout.width, self.layout.height
map = Grid(width, height)
if type(self.food) == type((1,2)):
self.food = reconstituteGrid(self.food)
for x in range(width):
for y in range(height):
food, walls = self.food, self.layout.walls
map[x][y] = self._foodWallStr(food[x][y], walls[x][y])
for agentState in self.agentStates:
if agentState == None: continue
if agentState.configuration == None: continue
x,y = [int( i ) for i in nearestPoint( agentState.configuration.pos )]
agent_dir = agentState.configuration.direction
if agentState.isPacman:
map[x][y] = self._pacStr( agent_dir )
else:
map[x][y] = self._ghostStr( agent_dir )
for x, y in self.capsules:
map[x][y] = 'o'
return str(map) + ("\nScore: %d\n" % self.score)
def _foodWallStr( self, hasFood, hasWall ):
if hasFood:
return '.'
elif hasWall:
return '%'
else:
return ' '
def _pacStr( self, dir ):
if dir == Directions.NORTH:
return 'v'
if dir == Directions.SOUTH:
return '^'
if dir == Directions.WEST:
return '>'
return '<'
def _ghostStr( self, dir ):
return 'G'
if dir == Directions.NORTH:
return 'M'
if dir == Directions.SOUTH:
return 'W'
if dir == Directions.WEST:
return '3'
return 'E'
def initialize( self, layout, numGhostAgents ):
"""
Creates an initial game state from a layout array (see layout.py).
"""
self.food = layout.food.copy()
#self.capsules = []
self.capsules = layout.capsules[:]
self.layout = layout
self.score = 0
self.scoreChange = 0
self.agentStates = []
numGhosts = 0
for isPacman, pos in layout.agentPositions:
if not isPacman:
if numGhosts == numGhostAgents: continue # Max ghosts reached already
else: numGhosts += 1
self.agentStates.append( AgentState( Configuration( pos, Directions.STOP), isPacman) )
self._eaten = [False for a in self.agentStates]
try:
import boinc
_BOINC_ENABLED = True
except:
_BOINC_ENABLED = False
class Game:
"""
The Game manages the control flow, soliciting actions from agents.
"""
def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False ):
self.agentCrashed = False
self.agents = agents
self.display = display
self.rules = rules
self.startingIndex = startingIndex
self.gameOver = False
self.muteAgents = muteAgents
self.catchExceptions = catchExceptions
self.moveHistory = []
self.totalAgentTimes = [0 for agent in agents]
self.totalAgentTimeWarnings = [0 for agent in agents]
self.agentTimeout = False
import cStringIO
self.agentOutput = [cStringIO.StringIO() for agent in agents]
def getProgress(self):
if self.gameOver:
return 1.0
else:
return self.rules.getProgress(self)
def _agentCrash( self, agentIndex, quiet=False):
"Helper method for handling agent crashes"
if not quiet: traceback.print_exc()
self.gameOver = True
self.agentCrashed = True
self.rules.agentCrash(self, agentIndex)
OLD_STDOUT = None
OLD_STDERR = None
def mute(self, agentIndex):
if not self.muteAgents: return
global OLD_STDOUT, OLD_STDERR
import cStringIO
OLD_STDOUT = sys.stdout
OLD_STDERR = sys.stderr
sys.stdout = self.agentOutput[agentIndex]
sys.stderr = self.agentOutput[agentIndex]
def unmute(self):
if not self.muteAgents: return
global OLD_STDOUT, OLD_STDERR
# Revert stdout/stderr to originals
sys.stdout = OLD_STDOUT
sys.stderr = OLD_STDERR
def run( self ):
"""
Main control loop for game play.
"""
self.display.initialize(self.state.data)
self.numMoves = 0
###self.display.initialize(self.state.makeObservation(1).data)
# inform learning agents of the game start
for i in range(len(self.agents)):
agent = self.agents[i]
if not agent:
self.mute(i)
# this is a null agent, meaning it failed to load
# the other team wins
print >>sys.stderr, "Agent %d failed to load" % i
self.unmute()
self._agentCrash(i, quiet=True)
return
if ("registerInitialState" in dir(agent)):
self.mute(i)
if self.catchExceptions:
try:
timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
try:
start_time = time.time()
timed_func(self.state.deepCopy())
time_taken = time.time() - start_time
self.totalAgentTimes[i] += time_taken
except TimeoutFunctionException:
print >>sys.stderr, "Agent %d ran out of time on startup!" % i
self.unmute()
self.agentTimeout = True
self._agentCrash(i, quiet=True)
return
except Exception,data:
self._agentCrash(i, quiet=False)
self.unmute()
return
else:
agent.registerInitialState(self.state.deepCopy())
## TODO: could this exceed the total time
self.unmute()
agentIndex = self.startingIndex
numAgents = len( self.agents )
while not self.gameOver:
# Fetch the next agent
agent = self.agents[agentIndex]
move_time = 0
skip_action = False
# Generate an observation of the state
if 'observationFunction' in dir( agent ):
self.mute(agentIndex)
if self.catchExceptions:
try:
timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
try:
start_time = time.time()
observation = timed_func(self.state.deepCopy())
except TimeoutFunctionException:
skip_action = True
move_time += time.time() - start_time
self.unmute()
except Exception,data:
self._agentCrash(agentIndex, quiet=False)
self.unmute()
return
else:
observation = agent.observationFunction(self.state.deepCopy())
self.unmute()
else:
observation = self.state.deepCopy()
# Solicit an action
action = None
self.mute(agentIndex)
if self.catchExceptions:
try:
timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
try:
start_time = time.time()
if skip_action:
raise TimeoutFunctionException()
action = timed_func( observation )
except TimeoutFunctionException:
print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex
self.agentTimeout = True
self._agentCrash(agentIndex, quiet=True)
self.unmute()
return
move_time += time.time() - start_time
if move_time > self.rules.getMoveWarningTime(agentIndex):
self.totalAgentTimeWarnings[agentIndex] += 1
print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
self.agentTimeout = True
self._agentCrash(agentIndex, quiet=True)
self.unmute()
return
self.totalAgentTimes[agentIndex] += move_time
#print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])
self.agentTimeout = True
self._agentCrash(agentIndex, quiet=True)
self.unmute()
return
self.unmute()
except Exception,data:
self._agentCrash(agentIndex)
self.unmute()
return
else:
action = agent.getAction(observation)
self.unmute()
# Execute the action
self.moveHistory.append( (agentIndex, action) )
if self.catchExceptions:
try:
self.state = self.state.generateSuccessor( agentIndex, action )
except Exception,data:
self.mute(agentIndex)
self._agentCrash(agentIndex)
self.unmute()
return
else:
self.state = self.state.generateSuccessor( agentIndex, action )
# Change the display
self.display.update( self.state.data )
###idx = agentIndex - agentIndex % 2 + 1
###self.display.update( self.state.makeObservation(idx).data )
# Allow for game specific conditions (winning, losing, etc.)
self.rules.process(self.state, self)
# Track progress
if agentIndex == numAgents + 1: self.numMoves += 1
# Next agent
agentIndex = ( agentIndex + 1 ) % numAgents
if _BOINC_ENABLED:
boinc.set_fraction_done(self.getProgress())
# inform a learning agent of the game result
for agentIndex, agent in enumerate(self.agents):
if "final" in dir( agent ) :
try:
self.mute(agentIndex)
agent.final( self.state )
self.unmute()
except Exception,data:
if not self.catchExceptions: raise
self._agentCrash(agentIndex)
self.unmute()
return
self.display.finish()

View file

@ -0,0 +1,81 @@
# ghostAgents.py
# --------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from game import Agent
from game import Actions
from game import Directions
import random
from util import manhattanDistance
import util
class GhostAgent( Agent ):
def __init__( self, index ):
self.index = index
def getAction( self, state ):
dist = self.getDistribution(state)
if len(dist) == 0:
return Directions.STOP
else:
return util.chooseFromDistribution( dist )
def getDistribution(self, state):
"Returns a Counter encoding a distribution over actions from the provided state."
util.raiseNotDefined()
class RandomGhost( GhostAgent ):
"A ghost that chooses a legal action uniformly at random."
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
class DirectionalGhost( GhostAgent ):
"A ghost that prefers to rush Pacman, or flee when scared."
def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ):
self.index = index
self.prob_attack = prob_attack
self.prob_scaredFlee = prob_scaredFlee
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist

282
reinforcement/grading.py Normal file
View file

@ -0,0 +1,282 @@
# grading.py
# ----------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
"Common code for autograders"
import cgi
import time
import sys
import traceback
import pdb
from collections import defaultdict
import util
class Grades:
"A data structure for project grades, along with formatting code to display them"
def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False):
"""
Defines the grading scheme for a project
projectName: project name
questionsAndMaxesDict: a list of (question name, max points per question)
"""
self.questions = [el[0] for el in questionsAndMaxesList]
self.maxes = dict(questionsAndMaxesList)
self.points = Counter()
self.messages = dict([(q, []) for q in self.questions])
self.project = projectName
self.start = time.localtime()[1:6]
self.sane = True # Sanity checks
self.currentQuestion = None # Which question we're grading
self.edxOutput = edxOutput
self.mute = muteOutput
self.prereqs = defaultdict(set)
#print 'Autograder transcript for %s' % self.project
print 'Starting on %d-%d at %d:%02d:%02d' % self.start
def addPrereq(self, question, prereq):
self.prereqs[question].add(prereq)
def grade(self, gradingModule, exceptionMap = {}, bonusPic = False):
"""
Grades each question
gradingModule: the module with all the grading functions (pass in with sys.modules[__name__])
"""
completedQuestions = set([])
for q in self.questions:
print '\nQuestion %s' % q
print '=' * (9 + len(q))
print
self.currentQuestion = q
incompleted = self.prereqs[q].difference(completedQuestions)
if len(incompleted) > 0:
prereq = incompleted.pop()
print \
"""*** NOTE: Make sure to complete Question %s before working on Question %s,
*** because Question %s builds upon your answer for Question %s.
""" % (prereq, q, q, prereq)
continue
if self.mute: util.mutePrint()
try:
util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
#TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function
except Exception, inst:
self.addExceptionMessage(q, inst, traceback)
self.addErrorHints(exceptionMap, inst, q[1])
except:
self.fail('FAIL: Terminated with a string exception.')
finally:
if self.mute: util.unmutePrint()
if self.points[q] >= self.maxes[q]:
completedQuestions.add(q)
print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q])
print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6]
print "\nProvisional grades\n=================="
for q in self.questions:
print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q])
print '------------------'
print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values()))
if bonusPic and self.points.totalCount() == 25:
print """
ALL HAIL GRANDPAC.
LONG LIVE THE GHOSTBUSTING KING.
--- ---- ---
| \ / + \ / |
| + \--/ \--/ + |
| + + |
| + + + |
@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
\ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
V \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@
\ / @@@@@@@@@@@@@@@@@@@@@@@@@@
V @@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@
/\ @@@@@@@@@@@@@@@@@@@@@@
/ \ @@@@@@@@@@@@@@@@@@@@@@@@@
/\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@
/ \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
/ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@
"""
print """
Your grades are NOT yet registered. To register your grades, make sure
to follow your instructor's guidelines to receive credit on your project.
"""
if self.edxOutput:
self.produceOutput()
def addExceptionMessage(self, q, inst, traceback):
"""
Method to format the exception message, this is more complicated because
we need to cgi.escape the traceback but wrap the exception in a <pre> tag
"""
self.fail('FAIL: Exception raised: %s' % inst)
self.addMessage('')
for line in traceback.format_exc().split('\n'):
self.addMessage(line)
def addErrorHints(self, exceptionMap, errorInstance, questionNum):
typeOf = str(type(errorInstance))
questionName = 'q' + questionNum
errorHint = ''
# question specific error hints
if exceptionMap.get(questionName):
questionMap = exceptionMap.get(questionName)
if (questionMap.get(typeOf)):
errorHint = questionMap.get(typeOf)
# fall back to general error messages if a question specific
# one does not exist
if (exceptionMap.get(typeOf)):
errorHint = exceptionMap.get(typeOf)
# dont include the HTML if we have no error hint
if not errorHint:
return ''
for line in errorHint.split('\n'):
self.addMessage(line)
def produceOutput(self):
edxOutput = open('edx_response.html', 'w')
edxOutput.write("<div>")
# first sum
total_possible = sum(self.maxes.values())
total_score = sum(self.points.values())
checkOrX = '<span class="incorrect"/>'
if (total_score >= total_possible):
checkOrX = '<span class="correct"/>'
header = """
<h3>
Total score ({total_score} / {total_possible})
</h3>
""".format(total_score = total_score,
total_possible = total_possible,
checkOrX = checkOrX
)
edxOutput.write(header)
for q in self.questions:
if len(q) == 2:
name = q[1]
else:
name = q
checkOrX = '<span class="incorrect"/>'
if (self.points[q] == self.maxes[q]):
checkOrX = '<span class="correct"/>'
#messages = '\n<br/>\n'.join(self.messages[q])
messages = "<pre>%s</pre>" % '\n'.join(self.messages[q])
output = """
<div class="test">
<section>
<div class="shortform">
Question {q} ({points}/{max}) {checkOrX}
</div>
<div class="longform">
{messages}
</div>
</section>
</div>
""".format(q = name,
max = self.maxes[q],
messages = messages,
checkOrX = checkOrX,
points = self.points[q]
)
# print "*** output for Question %s " % q[1]
# print output
edxOutput.write(output)
edxOutput.write("</div>")
edxOutput.close()
edxOutput = open('edx_grade', 'w')
edxOutput.write(str(self.points.totalCount()))
edxOutput.close()
def fail(self, message, raw=False):
"Sets sanity check bit to false and outputs a message"
self.sane = False
self.assignZeroCredit()
self.addMessage(message, raw)
def assignZeroCredit(self):
self.points[self.currentQuestion] = 0
def addPoints(self, amt):
self.points[self.currentQuestion] += amt
def deductPoints(self, amt):
self.points[self.currentQuestion] -= amt
def assignFullCredit(self, message="", raw=False):
self.points[self.currentQuestion] = self.maxes[self.currentQuestion]
if message != "":
self.addMessage(message, raw)
def addMessage(self, message, raw=False):
if not raw:
# We assume raw messages, formatted for HTML, are printed separately
if self.mute: util.unmutePrint()
print '*** ' + message
if self.mute: util.mutePrint()
message = cgi.escape(message)
self.messages[self.currentQuestion].append(message)
def addMessageToEmail(self, message):
print "WARNING**** addMessageToEmail is deprecated %s" % message
for line in message.split('\n'):
pass
#print '%%% ' + line + ' %%%'
#self.messages[self.currentQuestion].append(line)
class Counter(dict):
"""
Dict with default 0
"""
def __getitem__(self, idx):
try:
return dict.__getitem__(self, idx)
except KeyError:
return 0
def totalCount(self):
"""
Returns the sum of counts for all keys.
"""
return sum(self.values())

View file

@ -0,0 +1,333 @@
# graphicsCrawlerDisplay.py
# -------------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
# graphicsCrawlerDisplay.py
# -------------------------
# Licensing Information: Please do not distribute or publish solutions to this
# project. You are free to use and extend these projects for educational
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and Pieter
# Abbeel in Spring 2013.
# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
import Tkinter
import qlearningAgents
import time
import threading
import sys
import crawler
#import pendulum
import math
from math import pi as PI
robotType = 'crawler'
class Application:
def sigmoid(self, x):
return 1.0 / (1.0 + 2.0 ** (-x))
def incrementSpeed(self, inc):
self.tickTime *= inc
# self.epsilon = min(1.0, self.epsilon)
# self.epsilon = max(0.0,self.epsilon)
# self.learner.setSpeed(self.epsilon)
self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
def incrementEpsilon(self, inc):
self.ep += inc
self.epsilon = self.sigmoid(self.ep)
self.learner.setEpsilon(self.epsilon)
self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
def incrementGamma(self, inc):
self.ga += inc
self.gamma = self.sigmoid(self.ga)
self.learner.setDiscount(self.gamma)
self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
def incrementAlpha(self, inc):
self.al += inc
self.alpha = self.sigmoid(self.al)
self.learner.setLearningRate(self.alpha)
self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
def __initGUI(self, win):
## Window ##
self.win = win
## Initialize Frame ##
win.grid()
self.dec = -.5
self.inc = .5
self.tickTime = 0.1
## Epsilon Button + Label ##
self.setupSpeedButtonAndLabel(win)
self.setupEpsilonButtonAndLabel(win)
## Gamma Button + Label ##
self.setUpGammaButtonAndLabel(win)
## Alpha Button + Label ##
self.setupAlphaButtonAndLabel(win)
## Exit Button ##
#self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
#self.exit_button.grid(row=0, column=9)
## Simulation Buttons ##
# self.setupSimulationButtons(win)
## Canvas ##
self.canvas = Tkinter.Canvas(root, height=200, width=1000)
self.canvas.grid(row=2,columnspan=10)
def setupAlphaButtonAndLabel(self, win):
self.alpha_minus = Tkinter.Button(win,
text="-",command=(lambda: self.incrementAlpha(self.dec)))
self.alpha_minus.grid(row=1, column=3, padx=10)
self.alpha = self.sigmoid(self.al)
self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
self.alpha_label.grid(row=1, column=4)
self.alpha_plus = Tkinter.Button(win,
text="+",command=(lambda: self.incrementAlpha(self.inc)))
self.alpha_plus.grid(row=1, column=5, padx=10)
def setUpGammaButtonAndLabel(self, win):
self.gamma_minus = Tkinter.Button(win,
text="-",command=(lambda: self.incrementGamma(self.dec)))
self.gamma_minus.grid(row=1, column=0, padx=10)
self.gamma = self.sigmoid(self.ga)
self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
self.gamma_label.grid(row=1, column=1)
self.gamma_plus = Tkinter.Button(win,
text="+",command=(lambda: self.incrementGamma(self.inc)))
self.gamma_plus.grid(row=1, column=2, padx=10)
def setupEpsilonButtonAndLabel(self, win):
self.epsilon_minus = Tkinter.Button(win,
text="-",command=(lambda: self.incrementEpsilon(self.dec)))
self.epsilon_minus.grid(row=0, column=3)
self.epsilon = self.sigmoid(self.ep)
self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
self.epsilon_label.grid(row=0, column=4)
self.epsilon_plus = Tkinter.Button(win,
text="+",command=(lambda: self.incrementEpsilon(self.inc)))
self.epsilon_plus.grid(row=0, column=5)
def setupSpeedButtonAndLabel(self, win):
self.speed_minus = Tkinter.Button(win,
text="-",command=(lambda: self.incrementSpeed(.5)))
self.speed_minus.grid(row=0, column=0)
self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
self.speed_label.grid(row=0, column=1)
self.speed_plus = Tkinter.Button(win,
text="+",command=(lambda: self.incrementSpeed(2)))
self.speed_plus.grid(row=0, column=2)
def skip5kSteps(self):
self.stepsToSkip = 5000
def __init__(self, win):
self.ep = 0
self.ga = 2
self.al = 2
self.stepCount = 0
## Init Gui
self.__initGUI(win)
# Init environment
if robotType == 'crawler':
self.robot = crawler.CrawlingRobot(self.canvas)
self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
elif robotType == 'pendulum':
self.robot = pendulum.PendulumRobot(self.canvas)
self.robotEnvironment = \
pendulum.PendulumRobotEnvironment(self.robot)
else:
raise "Unknown RobotType"
# Init Agent
simulationFn = lambda agent: \
simulation.SimulationEnvironment(self.robotEnvironment,agent)
actionFn = lambda state: \
self.robotEnvironment.getPossibleActions(state)
self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
self.learner.setEpsilon(self.epsilon)
self.learner.setLearningRate(self.alpha)
self.learner.setDiscount(self.gamma)
# Start GUI
self.running = True
self.stopped = False
self.stepsToSkip = 0
self.thread = threading.Thread(target=self.run)
self.thread.start()
def exit(self):
self.running = False
for i in range(5):
if not self.stopped:
time.sleep(0.1)
try:
self.win.destroy()
except:
pass
sys.exit(0)
def step(self):
self.stepCount += 1
state = self.robotEnvironment.getCurrentState()
actions = self.robotEnvironment.getPossibleActions(state)
if len(actions) == 0.0:
self.robotEnvironment.reset()
state = self.robotEnvironment.getCurrentState()
actions = self.robotEnvironment.getPossibleActions(state)
print 'Reset!'
action = self.learner.getAction(state)
if action == None:
raise 'None action returned: Code Not Complete'
nextState, reward = self.robotEnvironment.doAction(action)
self.learner.observeTransition(state, action, nextState, reward)
def animatePolicy(self):
if robotType != 'pendulum':
raise 'Only pendulum can animatePolicy'
totWidth = self.canvas.winfo_reqwidth()
totHeight = self.canvas.winfo_reqheight()
length = 0.48 * min(totWidth, totHeight)
x,y = totWidth-length-30, length+10
angleMin, angleMax = self.robot.getMinAndMaxAngle()
velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
if not 'animatePolicyBox' in dir(self):
self.canvas.create_line(x,y,x+length,y)
self.canvas.create_line(x+length,y,x+length,y-length)
self.canvas.create_line(x+length,y-length,x,y-length)
self.canvas.create_line(x,y-length,x,y)
self.animatePolicyBox = 1
self.canvas.create_text(x+length/2,y+10,text='angle')
self.canvas.create_text(x-30,y-length/2,text='velocity')
self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
angleDelta = (angleMax-angleMin) / 100
velDelta = (velMax-velMin) / 100
for i in range(100):
angle = angleMin + i * angleDelta
for j in range(100):
vel = velMin + j * velDelta
state = self.robotEnvironment.getState(angle,vel)
max, argMax = None, None
if not self.learner.seenState(state):
argMax = 'unseen'
else:
for action in ('kickLeft','kickRight','doNothing'):
qVal = self.learner.getQValue(state, action)
if max == None or qVal > max:
max, argMax = qVal, action
if argMax != 'unseen':
if argMax == 'kickLeft':
color = 'blue'
elif argMax == 'kickRight':
color = 'red'
elif argMax == 'doNothing':
color = 'white'
dx = length / 100.0
dy = length / 100.0
x0, y0 = x+i*dx, y-j*dy
self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
def run(self):
self.stepCount = 0
self.learner.startEpisode()
while True:
minSleep = .01
tm = max(minSleep, self.tickTime)
time.sleep(tm)
self.stepsToSkip = int(tm / self.tickTime) - 1
if not self.running:
self.stopped = True
return
for i in range(self.stepsToSkip):
self.step()
self.stepsToSkip = 0
self.step()
# self.robot.draw()
self.learner.stopEpisode()
def start(self):
self.win.mainloop()
def run():
global root
root = Tkinter.Tk()
root.title( 'Crawler GUI' )
root.resizable( 0, 0 )
# root.mainloop()
app = Application(root)
def update_gui():
app.robot.draw(app.stepCount, app.tickTime)
root.after(10, update_gui)
update_gui()
root.protocol( 'WM_DELETE_WINDOW', app.exit)
try:
app.start()
except:
app.exit()

View file

@ -0,0 +1,679 @@
# graphicsDisplay.py
# ------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from graphicsUtils import *
import math, time
from game import Directions
###########################
# GRAPHICS DISPLAY CODE #
###########################
# Most code by Dan Klein and John Denero written or rewritten for cs188, UC Berkeley.
# Some code from a Pacman implementation by LiveWires, and used / modified with permission.
DEFAULT_GRID_SIZE = 30.0
INFO_PANE_HEIGHT = 35
BACKGROUND_COLOR = formatColor(0,0,0)
WALL_COLOR = formatColor(0.0/255.0, 51.0/255.0, 255.0/255.0)
INFO_PANE_COLOR = formatColor(.4,.4,0)
SCORE_COLOR = formatColor(.9, .9, .9)
PACMAN_OUTLINE_WIDTH = 2
PACMAN_CAPTURE_OUTLINE_WIDTH = 4
GHOST_COLORS = []
GHOST_COLORS.append(formatColor(.9,0,0)) # Red
GHOST_COLORS.append(formatColor(0,.3,.9)) # Blue
GHOST_COLORS.append(formatColor(.98,.41,.07)) # Orange
GHOST_COLORS.append(formatColor(.1,.75,.7)) # Green
GHOST_COLORS.append(formatColor(1.0,0.6,0.0)) # Yellow
GHOST_COLORS.append(formatColor(.4,0.13,0.91)) # Purple
TEAM_COLORS = GHOST_COLORS[:2]
GHOST_SHAPE = [
( 0, 0.3 ),
( 0.25, 0.75 ),
( 0.5, 0.3 ),
( 0.75, 0.75 ),
( 0.75, -0.5 ),
( 0.5, -0.75 ),
(-0.5, -0.75 ),
(-0.75, -0.5 ),
(-0.75, 0.75 ),
(-0.5, 0.3 ),
(-0.25, 0.75 )
]
GHOST_SIZE = 0.65
SCARED_COLOR = formatColor(1,1,1)
GHOST_VEC_COLORS = map(colorToVector, GHOST_COLORS)
PACMAN_COLOR = formatColor(255.0/255.0,255.0/255.0,61.0/255)
PACMAN_SCALE = 0.5
#pacman_speed = 0.25
# Food
FOOD_COLOR = formatColor(1,1,1)
FOOD_SIZE = 0.1
# Laser
LASER_COLOR = formatColor(1,0,0)
LASER_SIZE = 0.02
# Capsule graphics
CAPSULE_COLOR = formatColor(1,1,1)
CAPSULE_SIZE = 0.25
# Drawing walls
WALL_RADIUS = 0.15
class InfoPane:
def __init__(self, layout, gridSize):
self.gridSize = gridSize
self.width = (layout.width) * gridSize
self.base = (layout.height + 1) * gridSize
self.height = INFO_PANE_HEIGHT
self.fontSize = 24
self.textColor = PACMAN_COLOR
self.drawPane()
def toScreen(self, pos, y = None):
"""
Translates a point relative from the bottom left of the info pane.
"""
if y == None:
x,y = pos
else:
x = pos
x = self.gridSize + x # Margin
y = self.base + y
return x,y
def drawPane(self):
self.scoreText = text( self.toScreen(0, 0 ), self.textColor, "SCORE: 0", "Times", self.fontSize, "bold")
def initializeGhostDistances(self, distances):
self.ghostDistanceText = []
size = 20
if self.width < 240:
size = 12
if self.width < 160:
size = 10
for i, d in enumerate(distances):
t = text( self.toScreen(self.width/2 + self.width/8 * i, 0), GHOST_COLORS[i+1], d, "Times", size, "bold")
self.ghostDistanceText.append(t)
def updateScore(self, score):
changeText(self.scoreText, "SCORE: % 4d" % score)
def setTeam(self, isBlue):
text = "RED TEAM"
if isBlue: text = "BLUE TEAM"
self.teamText = text( self.toScreen(300, 0 ), self.textColor, text, "Times", self.fontSize, "bold")
def updateGhostDistances(self, distances):
if len(distances) == 0: return
if 'ghostDistanceText' not in dir(self): self.initializeGhostDistances(distances)
else:
for i, d in enumerate(distances):
changeText(self.ghostDistanceText[i], d)
def drawGhost(self):
pass
def drawPacman(self):
pass
def drawWarning(self):
pass
def clearIcon(self):
pass
def updateMessage(self, message):
pass
def clearMessage(self):
pass
class PacmanGraphics:
def __init__(self, zoom=1.0, frameTime=0.0, capture=False):
self.have_window = 0
self.currentGhostImages = {}
self.pacmanImage = None
self.zoom = zoom
self.gridSize = DEFAULT_GRID_SIZE * zoom
self.capture = capture
self.frameTime = frameTime
def checkNullDisplay(self):
return False
def initialize(self, state, isBlue = False):
self.isBlue = isBlue
self.startGraphics(state)
# self.drawDistributions(state)
self.distributionImages = None # Initialized lazily
self.drawStaticObjects(state)
self.drawAgentObjects(state)
# Information
self.previousState = state
def startGraphics(self, state):
self.layout = state.layout
layout = self.layout
self.width = layout.width
self.height = layout.height
self.make_window(self.width, self.height)
self.infoPane = InfoPane(layout, self.gridSize)
self.currentState = layout
def drawDistributions(self, state):
walls = state.layout.walls
dist = []
for x in range(walls.width):
distx = []
dist.append(distx)
for y in range(walls.height):
( screen_x, screen_y ) = self.to_screen( (x, y) )
block = square( (screen_x, screen_y),
0.5 * self.gridSize,
color = BACKGROUND_COLOR,
filled = 1, behind=2)
distx.append(block)
self.distributionImages = dist
def drawStaticObjects(self, state):
layout = self.layout
self.drawWalls(layout.walls)
self.food = self.drawFood(layout.food)
self.capsules = self.drawCapsules(layout.capsules)
refresh()
def drawAgentObjects(self, state):
self.agentImages = [] # (agentState, image)
for index, agent in enumerate(state.agentStates):
if agent.isPacman:
image = self.drawPacman(agent, index)
self.agentImages.append( (agent, image) )
else:
image = self.drawGhost(agent, index)
self.agentImages.append( (agent, image) )
refresh()
def swapImages(self, agentIndex, newState):
"""
Changes an image from a ghost to a pacman or vis versa (for capture)
"""
prevState, prevImage = self.agentImages[agentIndex]
for item in prevImage: remove_from_screen(item)
if newState.isPacman:
image = self.drawPacman(newState, agentIndex)
self.agentImages[agentIndex] = (newState, image )
else:
image = self.drawGhost(newState, agentIndex)
self.agentImages[agentIndex] = (newState, image )
refresh()
def update(self, newState):
agentIndex = newState._agentMoved
agentState = newState.agentStates[agentIndex]
if self.agentImages[agentIndex][0].isPacman != agentState.isPacman: self.swapImages(agentIndex, agentState)
prevState, prevImage = self.agentImages[agentIndex]
if agentState.isPacman:
self.animatePacman(agentState, prevState, prevImage)
else:
self.moveGhost(agentState, agentIndex, prevState, prevImage)
self.agentImages[agentIndex] = (agentState, prevImage)
if newState._foodEaten != None:
self.removeFood(newState._foodEaten, self.food)
if newState._capsuleEaten != None:
self.removeCapsule(newState._capsuleEaten, self.capsules)
self.infoPane.updateScore(newState.score)
if 'ghostDistances' in dir(newState):
self.infoPane.updateGhostDistances(newState.ghostDistances)
def make_window(self, width, height):
grid_width = (width-1) * self.gridSize
grid_height = (height-1) * self.gridSize
screen_width = 2*self.gridSize + grid_width
screen_height = 2*self.gridSize + grid_height + INFO_PANE_HEIGHT
begin_graphics(screen_width,
screen_height,
BACKGROUND_COLOR,
"CS188 Pacman")
def drawPacman(self, pacman, index):
position = self.getPosition(pacman)
screen_point = self.to_screen(position)
endpoints = self.getEndpoints(self.getDirection(pacman))
width = PACMAN_OUTLINE_WIDTH
outlineColor = PACMAN_COLOR
fillColor = PACMAN_COLOR
if self.capture:
outlineColor = TEAM_COLORS[index % 2]
fillColor = GHOST_COLORS[index]
width = PACMAN_CAPTURE_OUTLINE_WIDTH
return [circle(screen_point, PACMAN_SCALE * self.gridSize,
fillColor = fillColor, outlineColor = outlineColor,
endpoints = endpoints,
width = width)]
def getEndpoints(self, direction, position=(0,0)):
x, y = position
pos = x - int(x) + y - int(y)
width = 30 + 80 * math.sin(math.pi* pos)
delta = width / 2
if (direction == 'West'):
endpoints = (180+delta, 180-delta)
elif (direction == 'North'):
endpoints = (90+delta, 90-delta)
elif (direction == 'South'):
endpoints = (270+delta, 270-delta)
else:
endpoints = (0+delta, 0-delta)
return endpoints
def movePacman(self, position, direction, image):
screenPosition = self.to_screen(position)
endpoints = self.getEndpoints( direction, position )
r = PACMAN_SCALE * self.gridSize
moveCircle(image[0], screenPosition, r, endpoints)
refresh()
def animatePacman(self, pacman, prevPacman, image):
if self.frameTime < 0:
print 'Press any key to step forward, "q" to play'
keys = wait_for_keys()
if 'q' in keys:
self.frameTime = 0.1
if self.frameTime > 0.01 or self.frameTime < 0:
start = time.time()
fx, fy = self.getPosition(prevPacman)
px, py = self.getPosition(pacman)
frames = 4.0
for i in range(1,int(frames) + 1):
pos = px*i/frames + fx*(frames-i)/frames, py*i/frames + fy*(frames-i)/frames
self.movePacman(pos, self.getDirection(pacman), image)
refresh()
sleep(abs(self.frameTime) / frames)
else:
self.movePacman(self.getPosition(pacman), self.getDirection(pacman), image)
refresh()
def getGhostColor(self, ghost, ghostIndex):
if ghost.scaredTimer > 0:
return SCARED_COLOR
else:
return GHOST_COLORS[ghostIndex]
def drawGhost(self, ghost, agentIndex):
pos = self.getPosition(ghost)
dir = self.getDirection(ghost)
(screen_x, screen_y) = (self.to_screen(pos) )
coords = []
for (x, y) in GHOST_SHAPE:
coords.append((x*self.gridSize*GHOST_SIZE + screen_x, y*self.gridSize*GHOST_SIZE + screen_y))
colour = self.getGhostColor(ghost, agentIndex)
body = polygon(coords, colour, filled = 1)
WHITE = formatColor(1.0, 1.0, 1.0)
BLACK = formatColor(0.0, 0.0, 0.0)
dx = 0
dy = 0
if dir == 'North':
dy = -0.2
if dir == 'South':
dy = 0.2
if dir == 'East':
dx = 0.2
if dir == 'West':
dx = -0.2
leftEye = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
rightEye = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
leftPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
rightPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
ghostImageParts = []
ghostImageParts.append(body)
ghostImageParts.append(leftEye)
ghostImageParts.append(rightEye)
ghostImageParts.append(leftPupil)
ghostImageParts.append(rightPupil)
return ghostImageParts
def moveEyes(self, pos, dir, eyes):
(screen_x, screen_y) = (self.to_screen(pos) )
dx = 0
dy = 0
if dir == 'North':
dy = -0.2
if dir == 'South':
dy = 0.2
if dir == 'East':
dx = 0.2
if dir == 'West':
dx = -0.2
moveCircle(eyes[0],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
moveCircle(eyes[1],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
moveCircle(eyes[2],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
moveCircle(eyes[3],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
def moveGhost(self, ghost, ghostIndex, prevGhost, ghostImageParts):
old_x, old_y = self.to_screen(self.getPosition(prevGhost))
new_x, new_y = self.to_screen(self.getPosition(ghost))
delta = new_x - old_x, new_y - old_y
for ghostImagePart in ghostImageParts:
move_by(ghostImagePart, delta)
refresh()
if ghost.scaredTimer > 0:
color = SCARED_COLOR
else:
color = GHOST_COLORS[ghostIndex]
edit(ghostImageParts[0], ('fill', color), ('outline', color))
self.moveEyes(self.getPosition(ghost), self.getDirection(ghost), ghostImageParts[-4:])
refresh()
def getPosition(self, agentState):
if agentState.configuration == None: return (-1000, -1000)
return agentState.getPosition()
def getDirection(self, agentState):
if agentState.configuration == None: return Directions.STOP
return agentState.configuration.getDirection()
def finish(self):
end_graphics()
def to_screen(self, point):
( x, y ) = point
#y = self.height - y
x = (x + 1)*self.gridSize
y = (self.height - y)*self.gridSize
return ( x, y )
# Fixes some TK issue with off-center circles
def to_screen2(self, point):
( x, y ) = point
#y = self.height - y
x = (x + 1)*self.gridSize
y = (self.height - y)*self.gridSize
return ( x, y )
def drawWalls(self, wallMatrix):
wallColor = WALL_COLOR
for xNum, x in enumerate(wallMatrix):
if self.capture and (xNum * 2) < wallMatrix.width: wallColor = TEAM_COLORS[0]
if self.capture and (xNum * 2) >= wallMatrix.width: wallColor = TEAM_COLORS[1]
for yNum, cell in enumerate(x):
if cell: # There's a wall here
pos = (xNum, yNum)
screen = self.to_screen(pos)
screen2 = self.to_screen2(pos)
# draw each quadrant of the square based on adjacent walls
wIsWall = self.isWall(xNum-1, yNum, wallMatrix)
eIsWall = self.isWall(xNum+1, yNum, wallMatrix)
nIsWall = self.isWall(xNum, yNum+1, wallMatrix)
sIsWall = self.isWall(xNum, yNum-1, wallMatrix)
nwIsWall = self.isWall(xNum-1, yNum+1, wallMatrix)
swIsWall = self.isWall(xNum-1, yNum-1, wallMatrix)
neIsWall = self.isWall(xNum+1, yNum+1, wallMatrix)
seIsWall = self.isWall(xNum+1, yNum-1, wallMatrix)
# NE quadrant
if (not nIsWall) and (not eIsWall):
# inner circle
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (0,91), 'arc')
if (nIsWall) and (not eIsWall):
# vertical line
line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
if (not nIsWall) and (eIsWall):
# horizontal line
line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
if (nIsWall) and (eIsWall) and (not neIsWall):
# outer circle
circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (180,271), 'arc')
line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
# NW quadrant
if (not nIsWall) and (not wIsWall):
# inner circle
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (90,181), 'arc')
if (nIsWall) and (not wIsWall):
# vertical line
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
if (not nIsWall) and (wIsWall):
# horizontal line
line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
if (nIsWall) and (wIsWall) and (not nwIsWall):
# outer circle
circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (270,361), 'arc')
line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(-1)*WALL_RADIUS)), wallColor)
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
# SE quadrant
if (not sIsWall) and (not eIsWall):
# inner circle
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (270,361), 'arc')
if (sIsWall) and (not eIsWall):
# vertical line
line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
if (not sIsWall) and (eIsWall):
# horizontal line
line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
if (sIsWall) and (eIsWall) and (not seIsWall):
# outer circle
circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (90,181), 'arc')
line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5, self.gridSize*(1)*WALL_RADIUS)), wallColor)
line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
# SW quadrant
if (not sIsWall) and (not wIsWall):
# inner circle
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (180,271), 'arc')
if (sIsWall) and (not wIsWall):
# vertical line
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
if (not sIsWall) and (wIsWall):
# horizontal line
line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
if (sIsWall) and (wIsWall) and (not swIsWall):
# outer circle
circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (0,91), 'arc')
line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(1)*WALL_RADIUS)), wallColor)
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
def isWall(self, x, y, walls):
if x < 0 or y < 0:
return False
if x >= walls.width or y >= walls.height:
return False
return walls[x][y]
def drawFood(self, foodMatrix ):
foodImages = []
color = FOOD_COLOR
for xNum, x in enumerate(foodMatrix):
if self.capture and (xNum * 2) <= foodMatrix.width: color = TEAM_COLORS[0]
if self.capture and (xNum * 2) > foodMatrix.width: color = TEAM_COLORS[1]
imageRow = []
foodImages.append(imageRow)
for yNum, cell in enumerate(x):
if cell: # There's food here
screen = self.to_screen((xNum, yNum ))
dot = circle( screen,
FOOD_SIZE * self.gridSize,
outlineColor = color, fillColor = color,
width = 1)
imageRow.append(dot)
else:
imageRow.append(None)
return foodImages
def drawCapsules(self, capsules ):
capsuleImages = {}
for capsule in capsules:
( screen_x, screen_y ) = self.to_screen(capsule)
dot = circle( (screen_x, screen_y),
CAPSULE_SIZE * self.gridSize,
outlineColor = CAPSULE_COLOR,
fillColor = CAPSULE_COLOR,
width = 1)
capsuleImages[capsule] = dot
return capsuleImages
def removeFood(self, cell, foodImages ):
x, y = cell
remove_from_screen(foodImages[x][y])
def removeCapsule(self, cell, capsuleImages ):
x, y = cell
remove_from_screen(capsuleImages[(x, y)])
def drawExpandedCells(self, cells):
"""
Draws an overlay of expanded grid positions for search agents
"""
n = float(len(cells))
baseColor = [1.0, 0.0, 0.0]
self.clearExpandedCells()
self.expandedCells = []
for k, cell in enumerate(cells):
screenPos = self.to_screen( cell)
cellColor = formatColor(*[(n-k) * c * .5 / n + .25 for c in baseColor])
block = square(screenPos,
0.5 * self.gridSize,
color = cellColor,
filled = 1, behind=2)
self.expandedCells.append(block)
if self.frameTime < 0:
refresh()
def clearExpandedCells(self):
if 'expandedCells' in dir(self) and len(self.expandedCells) > 0:
for cell in self.expandedCells:
remove_from_screen(cell)
def updateDistributions(self, distributions):
"Draws an agent's belief distributions"
# copy all distributions so we don't change their state
distributions = map(lambda x: x.copy(), distributions)
if self.distributionImages == None:
self.drawDistributions(self.previousState)
for x in range(len(self.distributionImages)):
for y in range(len(self.distributionImages[0])):
image = self.distributionImages[x][y]
weights = [dist[ (x,y) ] for dist in distributions]
if sum(weights) != 0:
pass
# Fog of war
color = [0.0,0.0,0.0]
colors = GHOST_VEC_COLORS[1:] # With Pacman
if self.capture: colors = GHOST_VEC_COLORS
for weight, gcolor in zip(weights, colors):
color = [min(1.0, c + 0.95 * g * weight ** .3) for c,g in zip(color, gcolor)]
changeColor(image, formatColor(*color))
refresh()
class FirstPersonPacmanGraphics(PacmanGraphics):
def __init__(self, zoom = 1.0, showGhosts = True, capture = False, frameTime=0):
PacmanGraphics.__init__(self, zoom, frameTime=frameTime)
self.showGhosts = showGhosts
self.capture = capture
def initialize(self, state, isBlue = False):
self.isBlue = isBlue
PacmanGraphics.startGraphics(self, state)
# Initialize distribution images
walls = state.layout.walls
dist = []
self.layout = state.layout
# Draw the rest
self.distributionImages = None # initialize lazily
self.drawStaticObjects(state)
self.drawAgentObjects(state)
# Information
self.previousState = state
def lookAhead(self, config, state):
if config.getDirection() == 'Stop':
return
else:
pass
# Draw relevant ghosts
allGhosts = state.getGhostStates()
visibleGhosts = state.getVisibleGhosts()
for i, ghost in enumerate(allGhosts):
if ghost in visibleGhosts:
self.drawGhost(ghost, i)
else:
self.currentGhostImages[i] = None
def getGhostColor(self, ghost, ghostIndex):
return GHOST_COLORS[ghostIndex]
def getPosition(self, ghostState):
if not self.showGhosts and not ghostState.isPacman and ghostState.getPosition()[1] > 1:
return (-1000, -1000)
else:
return PacmanGraphics.getPosition(self, ghostState)
def add(x, y):
return (x[0] + y[0], x[1] + y[1])
# Saving graphical output
# -----------------------
# Note: to make an animated gif from this postscript output, try the command:
# convert -delay 7 -loop 1 -compress lzw -layers optimize frame* out.gif
# convert is part of imagemagick (freeware)
SAVE_POSTSCRIPT = False
POSTSCRIPT_OUTPUT_DIR = 'frames'
FRAME_NUMBER = 0
import os
def saveFrame():
"Saves the current graphical output as a postscript file"
global SAVE_POSTSCRIPT, FRAME_NUMBER, POSTSCRIPT_OUTPUT_DIR
if not SAVE_POSTSCRIPT: return
if not os.path.exists(POSTSCRIPT_OUTPUT_DIR): os.mkdir(POSTSCRIPT_OUTPUT_DIR)
name = os.path.join(POSTSCRIPT_OUTPUT_DIR, 'frame_%08d.ps' % FRAME_NUMBER)
FRAME_NUMBER += 1
writePostscript(name) # writes the current canvas

View file

@ -0,0 +1,348 @@
# graphicsGridworldDisplay.py
# ---------------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import util
from graphicsUtils import *
class GraphicsGridworldDisplay:
def __init__(self, gridworld, size=120, speed=1.0):
self.gridworld = gridworld
self.size = size
self.speed = speed
def start(self):
setup(self.gridworld, size=self.size)
def pause(self):
wait_for_keys()
def displayValues(self, agent, currentState = None, message = 'Agent Values'):
values = util.Counter()
policy = {}
states = self.gridworld.getStates()
for state in states:
values[state] = agent.getValue(state)
policy[state] = agent.getPolicy(state)
drawValues(self.gridworld, values, policy, currentState, message)
sleep(0.05 / self.speed)
def displayNullValues(self, currentState = None, message = ''):
values = util.Counter()
#policy = {}
states = self.gridworld.getStates()
for state in states:
values[state] = 0.0
#policy[state] = agent.getPolicy(state)
drawNullValues(self.gridworld, currentState,'')
# drawValues(self.gridworld, values, policy, currentState, message)
sleep(0.05 / self.speed)
def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
qValues = util.Counter()
states = self.gridworld.getStates()
for state in states:
for action in self.gridworld.getPossibleActions(state):
qValues[(state, action)] = agent.getQValue(state, action)
drawQValues(self.gridworld, qValues, currentState, message)
sleep(0.05 / self.speed)
BACKGROUND_COLOR = formatColor(0,0,0)
EDGE_COLOR = formatColor(1,1,1)
OBSTACLE_COLOR = formatColor(0.5,0.5,0.5)
TEXT_COLOR = formatColor(1,1,1)
MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7)
LOCATION_COLOR = formatColor(0,0,1)
WINDOW_SIZE = -1
GRID_SIZE = -1
GRID_HEIGHT = -1
MARGIN = -1
def setup(gridworld, title = "Gridworld Display", size = 120):
global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT
grid = gridworld.grid
WINDOW_SIZE = size
GRID_SIZE = size
GRID_HEIGHT = grid.height
MARGIN = GRID_SIZE * 0.75
screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2
screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2
begin_graphics(screen_width,
screen_height,
BACKGROUND_COLOR, title=title)
def drawNullValues(gridworld, currentState = None, message = ''):
grid = gridworld.grid
blank()
for x in range(grid.width):
for y in range(grid.height):
state = (x, y)
gridType = grid[x][y]
isExit = (str(gridType) != gridType)
isCurrent = (currentState == state)
if gridType == '#':
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
else:
drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent)
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'):
grid = gridworld.grid
blank()
valueList = [values[state] for state in gridworld.getStates()] + [0.0]
minValue = min(valueList)
maxValue = max(valueList)
for x in range(grid.width):
for y in range(grid.height):
state = (x, y)
gridType = grid[x][y]
isExit = (str(gridType) != gridType)
isCurrent = (currentState == state)
if gridType == '#':
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
else:
value = values[state]
action = None
if policy != None and state in policy:
action = policy[state]
actions = gridworld.getPossibleActions(state)
if action not in actions and 'exit' in actions:
action = 'exit'
valString = '%.2f' % value
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
grid = gridworld.grid
blank()
stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
minValue = min(qValueList)
maxValue = max(qValueList)
for x in range(grid.width):
for y in range(grid.height):
state = (x, y)
gridType = grid[x][y]
isExit = (str(gridType) != gridType)
isCurrent = (currentState == state)
actions = gridworld.getPossibleActions(state)
if actions == None or len(actions) == 0:
actions = [None]
bestQ = max([qValues[(state, action)] for action in actions])
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
q = util.Counter()
valStrings = {}
for action in actions:
v = qValues[(state, action)]
q[action] += v
valStrings[action] = '%.2f' % v
if gridType == '#':
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
elif isExit:
action = 'exit'
value = q[action]
valString = '%.2f' % value
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
else:
drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
def blank():
clear_screen()
def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent):
square_color = getColor(0, -1, 1)
if isObstacle:
square_color = OBSTACLE_COLOR
(screen_x, screen_y) = to_screen((x, y))
square( (screen_x, screen_y),
0.5* GRID_SIZE,
color = square_color,
filled = 1,
width = 1)
square( (screen_x, screen_y),
0.5* GRID_SIZE,
color = EDGE_COLOR,
filled = 0,
width = 3)
if isTerminal and not isObstacle:
square( (screen_x, screen_y),
0.4* GRID_SIZE,
color = EDGE_COLOR,
filled = 0,
width = 2)
text( (screen_x, screen_y),
TEXT_COLOR,
str(grid[x][y]),
"Courier", -24, "bold", "c")
text_color = TEXT_COLOR
if not isObstacle and isCurrent:
circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
# if not isObstacle:
# text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c")
def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent):
square_color = getColor(val, min, max)
if isObstacle:
square_color = OBSTACLE_COLOR
(screen_x, screen_y) = to_screen((x, y))
square( (screen_x, screen_y),
0.5* GRID_SIZE,
color = square_color,
filled = 1,
width = 1)
square( (screen_x, screen_y),
0.5* GRID_SIZE,
color = EDGE_COLOR,
filled = 0,
width = 3)
if isTerminal and not isObstacle:
square( (screen_x, screen_y),
0.4* GRID_SIZE,
color = EDGE_COLOR,
filled = 0,
width = 2)
if action == 'north':
polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
if action == 'south':
polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
if action == 'west':
polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
if action == 'east':
polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
text_color = TEXT_COLOR
if not isObstacle and isCurrent:
circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR )
if not isObstacle:
text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c")
def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent):
(screen_x, screen_y) = to_screen((x, y))
center = (screen_x, screen_y)
nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
n = (screen_x, screen_y-0.5*GRID_SIZE+5)
s = (screen_x, screen_y+0.5*GRID_SIZE-5)
w = (screen_x-0.5*GRID_SIZE+5, screen_y)
e = (screen_x+0.5*GRID_SIZE-5, screen_y)
actions = qVals.keys()
for action in actions:
wedge_color = getColor(qVals[action], minVal, maxVal)
if action == 'north':
polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False)
#text(n, text_color, valStr, "Courier", 8, "bold", "n")
if action == 'south':
polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False)
#text(s, text_color, valStr, "Courier", 8, "bold", "s")
if action == 'east':
polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False)
#text(e, text_color, valStr, "Courier", 8, "bold", "e")
if action == 'west':
polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False)
#text(w, text_color, valStr, "Courier", 8, "bold", "w")
square( (screen_x, screen_y),
0.5* GRID_SIZE,
color = EDGE_COLOR,
filled = 0,
width = 3)
line(ne, sw, color = EDGE_COLOR)
line(nw, se, color = EDGE_COLOR)
if isCurrent:
circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
for action in actions:
text_color = TEXT_COLOR
if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR
valStr = ""
if action in valStrs:
valStr = valStrs[action]
h = -20
if action == 'north':
#polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0)
text(n, text_color, valStr, "Courier", h, "bold", "n")
if action == 'south':
#polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0)
text(s, text_color, valStr, "Courier", h, "bold", "s")
if action == 'east':
#polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0)
text(e, text_color, valStr, "Courier", h, "bold", "e")
if action == 'west':
#polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0)
text(w, text_color, valStr, "Courier", h, "bold", "w")
def getColor(val, minVal, max):
r, g = 0.0, 0.0
if val < 0 and minVal < 0:
r = val * 0.65 / minVal
if val > 0 and max > 0:
g = val * 0.65 / max
return formatColor(r,g,0.0)
def square(pos, size, color, filled, width):
x, y = pos
dx, dy = size, size
return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False)
def to_screen(point):
( gamex, gamey ) = point
x = gamex*GRID_SIZE + MARGIN
y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN
return ( x, y )
def to_grid(point):
(x, y) = point
x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
print point, "-->", (x, y)
return (x, y)

View file

@ -0,0 +1,398 @@
# graphicsUtils.py
# ----------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import sys
import math
import random
import string
import time
import types
import Tkinter
_Windows = sys.platform == 'win32' # True if on Win95/98/NT
_root_window = None # The root window for graphics output
_canvas = None # The canvas which holds graphics
_canvas_xs = None # Size of canvas object
_canvas_ys = None
_canvas_x = None # Current position on canvas
_canvas_y = None
_canvas_col = None # Current colour (set to black below)
_canvas_tsize = 12
_canvas_tserifs = 0
def formatColor(r, g, b):
return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255))
def colorToVector(color):
return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]])
if _Windows:
_canvas_tfonts = ['times new roman', 'lucida console']
else:
_canvas_tfonts = ['times', 'lucidasans-24']
pass # XXX need defaults here
def sleep(secs):
global _root_window
if _root_window == None:
time.sleep(secs)
else:
_root_window.update_idletasks()
_root_window.after(int(1000 * secs), _root_window.quit)
_root_window.mainloop()
def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None):
global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color
# Check for duplicate call
if _root_window is not None:
# Lose the window.
_root_window.destroy()
# Save the canvas size parameters
_canvas_xs, _canvas_ys = width - 1, height - 1
_canvas_x, _canvas_y = 0, _canvas_ys
_bg_color = color
# Create the root window
_root_window = Tkinter.Tk()
_root_window.protocol('WM_DELETE_WINDOW', _destroy_window)
_root_window.title(title or 'Graphics Window')
_root_window.resizable(0, 0)
# Create the canvas object
try:
_canvas = Tkinter.Canvas(_root_window, width=width, height=height)
_canvas.pack()
draw_background()
_canvas.update()
except:
_root_window = None
raise
# Bind to key-down and key-up events
_root_window.bind( "<KeyPress>", _keypress )
_root_window.bind( "<KeyRelease>", _keyrelease )
_root_window.bind( "<FocusIn>", _clear_keys )
_root_window.bind( "<FocusOut>", _clear_keys )
_root_window.bind( "<Button-1>", _leftclick )
_root_window.bind( "<Button-2>", _rightclick )
_root_window.bind( "<Button-3>", _rightclick )
_root_window.bind( "<Control-Button-1>", _ctrl_leftclick)
_clear_keys()
_leftclick_loc = None
_rightclick_loc = None
_ctrl_leftclick_loc = None
def _leftclick(event):
global _leftclick_loc
_leftclick_loc = (event.x, event.y)
def _rightclick(event):
global _rightclick_loc
_rightclick_loc = (event.x, event.y)
def _ctrl_leftclick(event):
global _ctrl_leftclick_loc
_ctrl_leftclick_loc = (event.x, event.y)
def wait_for_click():
while True:
global _leftclick_loc
global _rightclick_loc
global _ctrl_leftclick_loc
if _leftclick_loc != None:
val = _leftclick_loc
_leftclick_loc = None
return val, 'left'
if _rightclick_loc != None:
val = _rightclick_loc
_rightclick_loc = None
return val, 'right'
if _ctrl_leftclick_loc != None:
val = _ctrl_leftclick_loc
_ctrl_leftclick_loc = None
return val, 'ctrl_left'
sleep(0.05)
def draw_background():
corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)]
polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False)
def _destroy_window(event=None):
sys.exit(0)
# global _root_window
# _root_window.destroy()
# _root_window = None
#print "DESTROY"
def end_graphics():
global _root_window, _canvas, _mouse_enabled
try:
try:
sleep(1)
if _root_window != None:
_root_window.destroy()
except SystemExit, e:
print 'Ending graphics raised an exception:', e
finally:
_root_window = None
_canvas = None
_mouse_enabled = 0
_clear_keys()
def clear_screen(background=None):
global _canvas_x, _canvas_y
_canvas.delete('all')
draw_background()
_canvas_x, _canvas_y = 0, _canvas_ys
def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1):
c = []
for coord in coords:
c.append(coord[0])
c.append(coord[1])
if fillColor == None: fillColor = outlineColor
if filled == 0: fillColor = ""
poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width)
if behind > 0:
_canvas.tag_lower(poly, behind) # Higher should be more visible
return poly
def square(pos, r, color, filled=1, behind=0):
x, y = pos
coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)]
return polygon(coords, color, color, filled, 0, behind=behind)
def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2):
x, y = pos
x0, x1 = x - r - 1, x + r
y0, y1 = y - r - 1, y + r
if endpoints == None:
e = [0, 359]
else:
e = list(endpoints)
while e[0] > e[1]: e[1] = e[1] + 360
return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor,
extent=e[1] - e[0], start=e[0], style=style, width=width)
def image(pos, file="../../blueghost.gif"):
x, y = pos
# img = PhotoImage(file=file)
return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW)
def refresh():
_canvas.update_idletasks()
def moveCircle(id, pos, r, endpoints=None):
global _canvas_x, _canvas_y
x, y = pos
# x0, x1 = x - r, x + r + 1
# y0, y1 = y - r, y + r + 1
x0, x1 = x - r - 1, x + r
y0, y1 = y - r - 1, y + r
if endpoints == None:
e = [0, 359]
else:
e = list(endpoints)
while e[0] > e[1]: e[1] = e[1] + 360
edit(id, ('start', e[0]), ('extent', e[1] - e[0]))
move_to(id, x0, y0)
def edit(id, *args):
_canvas.itemconfigure(id, **dict(args))
def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"):
global _canvas_x, _canvas_y
x, y = pos
font = (font, str(size), style)
return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor)
def changeText(id, newText, font=None, size=12, style='normal'):
_canvas.itemconfigure(id, text=newText)
if font != None:
_canvas.itemconfigure(id, font=(font, '-%d' % size, style))
def changeColor(id, newColor):
_canvas.itemconfigure(id, fill=newColor)
def line(here, there, color=formatColor(0, 0, 0), width=2):
x0, y0 = here[0], here[1]
x1, y1 = there[0], there[1]
return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width)
##############################################################################
### Keypress handling ########################################################
##############################################################################
# We bind to key-down and key-up events.
_keysdown = {}
_keyswaiting = {}
# This holds an unprocessed key release. We delay key releases by up to
# one call to keys_pressed() to get round a problem with auto repeat.
_got_release = None
def _keypress(event):
global _got_release
#remap_arrows(event)
_keysdown[event.keysym] = 1
_keyswaiting[event.keysym] = 1
# print event.char, event.keycode
_got_release = None
def _keyrelease(event):
global _got_release
#remap_arrows(event)
try:
del _keysdown[event.keysym]
except:
pass
_got_release = 1
def remap_arrows(event):
# TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT)
if event.char in ['a', 's', 'd', 'w']:
return
if event.keycode in [37, 101]: # LEFT ARROW (win / x)
event.char = 'a'
if event.keycode in [38, 99]: # UP ARROW
event.char = 'w'
if event.keycode in [39, 102]: # RIGHT ARROW
event.char = 'd'
if event.keycode in [40, 104]: # DOWN ARROW
event.char = 's'
def _clear_keys(event=None):
global _keysdown, _got_release, _keyswaiting
_keysdown = {}
_keyswaiting = {}
_got_release = None
def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent,
d_w=Tkinter.tkinter.DONT_WAIT):
d_o_e(d_w)
if _got_release:
d_o_e(d_w)
return _keysdown.keys()
def keys_waiting():
global _keyswaiting
keys = _keyswaiting.keys()
_keyswaiting = {}
return keys
# Block for a list of keys...
def wait_for_keys():
keys = []
while keys == []:
keys = keys_pressed()
sleep(0.05)
return keys
def remove_from_screen(x,
d_o_e=Tkinter.tkinter.dooneevent,
d_w=Tkinter.tkinter.DONT_WAIT):
_canvas.delete(x)
d_o_e(d_w)
def _adjust_coords(coord_list, x, y):
for i in range(0, len(coord_list), 2):
coord_list[i] = coord_list[i] + x
coord_list[i + 1] = coord_list[i + 1] + y
return coord_list
def move_to(object, x, y=None,
d_o_e=Tkinter.tkinter.dooneevent,
d_w=Tkinter.tkinter.DONT_WAIT):
if y is None:
try: x, y = x
except: raise 'incomprehensible coordinates'
horiz = True
newCoords = []
current_x, current_y = _canvas.coords(object)[0:2] # first point
for coord in _canvas.coords(object):
if horiz:
inc = x - current_x
else:
inc = y - current_y
horiz = not horiz
newCoords.append(coord + inc)
_canvas.coords(object, *newCoords)
d_o_e(d_w)
def move_by(object, x, y=None,
d_o_e=Tkinter.tkinter.dooneevent,
d_w=Tkinter.tkinter.DONT_WAIT, lift=False):
if y is None:
try: x, y = x
except: raise Exception, 'incomprehensible coordinates'
horiz = True
newCoords = []
for coord in _canvas.coords(object):
if horiz:
inc = x
else:
inc = y
horiz = not horiz
newCoords.append(coord + inc)
_canvas.coords(object, *newCoords)
d_o_e(d_w)
if lift:
_canvas.tag_raise(object)
def writePostscript(filename):
"Writes the current canvas to a postscript file."
psfile = file(filename, 'w')
psfile.write(_canvas.postscript(pageanchor='sw',
y='0.c',
x='0.c'))
psfile.close()
ghost_shape = [
(0, - 0.5),
(0.25, - 0.75),
(0.5, - 0.5),
(0.75, - 0.75),
(0.75, 0.5),
(0.5, 0.75),
(- 0.5, 0.75),
(- 0.75, 0.5),
(- 0.75, - 0.75),
(- 0.5, - 0.5),
(- 0.25, - 0.75)
]
if __name__ == '__main__':
begin_graphics()
clear_screen()
ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape]
g = polygon(ghost_shape, formatColor(1, 1, 1))
move_to(g, (50, 50))
circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15])
sleep(2)

585
reinforcement/gridworld.py Normal file
View file

@ -0,0 +1,585 @@
# gridworld.py
# ------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import random
import sys
import mdp
import environment
import util
import optparse
class Gridworld(mdp.MarkovDecisionProcess):
"""
Gridworld
"""
def __init__(self, grid):
# layout
if type(grid) == type([]): grid = makeGrid(grid)
self.grid = grid
# parameters
self.livingReward = 0.0
self.noise = 0.2
def setLivingReward(self, reward):
"""
The (negative) reward for exiting "normal" states.
Note that in the R+N text, this reward is on entering
a state and therefore is not clearly part of the state's
future rewards.
"""
self.livingReward = reward
def setNoise(self, noise):
"""
The probability of moving in an unintended direction.
"""
self.noise = noise
def getPossibleActions(self, state):
"""
Returns list of valid actions for 'state'.
Note that you can request moves into walls and
that "exit" states transition to the terminal
state under the special action "done".
"""
if state == self.grid.terminalState:
return ()
x,y = state
if type(self.grid[x][y]) == int:
return ('exit',)
return ('north','west','south','east')
def getStates(self):
"""
Return list of all states.
"""
# The true terminal state.
states = [self.grid.terminalState]
for x in range(self.grid.width):
for y in range(self.grid.height):
if self.grid[x][y] != '#':
state = (x,y)
states.append(state)
return states
def getReward(self, state, action, nextState):
"""
Get reward for state, action, nextState transition.
Note that the reward depends only on the state being
departed (as in the R+N book examples, which more or
less use this convention).
"""
if state == self.grid.terminalState:
return 0.0
x, y = state
cell = self.grid[x][y]
if type(cell) == int or type(cell) == float:
return cell
return self.livingReward
def getStartState(self):
for x in range(self.grid.width):
for y in range(self.grid.height):
if self.grid[x][y] == 'S':
return (x, y)
raise 'Grid has no start state'
def isTerminal(self, state):
"""
Only the TERMINAL_STATE state is *actually* a terminal state.
The other "exit" states are technically non-terminals with
a single action "exit" which leads to the true terminal state.
This convention is to make the grids line up with the examples
in the R+N textbook.
"""
return state == self.grid.terminalState
def getTransitionStatesAndProbs(self, state, action):
"""
Returns list of (nextState, prob) pairs
representing the states reachable
from 'state' by taking 'action' along
with their transition probabilities.
"""
if action not in self.getPossibleActions(state):
raise "Illegal action!"
if self.isTerminal(state):
return []
x, y = state
if type(self.grid[x][y]) == int or type(self.grid[x][y]) == float:
termState = self.grid.terminalState
return [(termState, 1.0)]
successors = []
northState = (self.__isAllowed(y+1,x) and (x,y+1)) or state
westState = (self.__isAllowed(y,x-1) and (x-1,y)) or state
southState = (self.__isAllowed(y-1,x) and (x,y-1)) or state
eastState = (self.__isAllowed(y,x+1) and (x+1,y)) or state
if action == 'north' or action == 'south':
if action == 'north':
successors.append((northState,1-self.noise))
else:
successors.append((southState,1-self.noise))
massLeft = self.noise
successors.append((westState,massLeft/2.0))
successors.append((eastState,massLeft/2.0))
if action == 'west' or action == 'east':
if action == 'west':
successors.append((westState,1-self.noise))
else:
successors.append((eastState,1-self.noise))
massLeft = self.noise
successors.append((northState,massLeft/2.0))
successors.append((southState,massLeft/2.0))
successors = self.__aggregate(successors)
return successors
def __aggregate(self, statesAndProbs):
counter = util.Counter()
for state, prob in statesAndProbs:
counter[state] += prob
newStatesAndProbs = []
for state, prob in counter.items():
newStatesAndProbs.append((state, prob))
return newStatesAndProbs
def __isAllowed(self, y, x):
if y < 0 or y >= self.grid.height: return False
if x < 0 or x >= self.grid.width: return False
return self.grid[x][y] != '#'
class GridworldEnvironment(environment.Environment):
def __init__(self, gridWorld):
self.gridWorld = gridWorld
self.reset()
def getCurrentState(self):
return self.state
def getPossibleActions(self, state):
return self.gridWorld.getPossibleActions(state)
def doAction(self, action):
state = self.getCurrentState()
(nextState, reward) = self.getRandomNextState(state, action)
self.state = nextState
return (nextState, reward)
def getRandomNextState(self, state, action, randObj=None):
rand = -1.0
if randObj is None:
rand = random.random()
else:
rand = randObj.random()
sum = 0.0
successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
for nextState, prob in successors:
sum += prob
if sum > 1.0:
raise 'Total transition probability more than one; sample failure.'
if rand < sum:
reward = self.gridWorld.getReward(state, action, nextState)
return (nextState, reward)
raise 'Total transition probability less than one; sample failure.'
def reset(self):
self.state = self.gridWorld.getStartState()
class Grid:
"""
A 2-dimensional array of immutables backed by a list of lists. Data is accessed
via grid[x][y] where (x,y) are cartesian coordinates with x horizontal,
y vertical and the origin (0,0) in the bottom left corner.
The __str__ method constructs an output that is oriented appropriately.
"""
def __init__(self, width, height, initialValue=' '):
self.width = width
self.height = height
self.data = [[initialValue for y in range(height)] for x in range(width)]
self.terminalState = 'TERMINAL_STATE'
def __getitem__(self, i):
return self.data[i]
def __setitem__(self, key, item):
self.data[key] = item
def __eq__(self, other):
if other == None: return False
return self.data == other.data
def __hash__(self):
return hash(self.data)
def copy(self):
g = Grid(self.width, self.height)
g.data = [x[:] for x in self.data]
return g
def deepCopy(self):
return self.copy()
def shallowCopy(self):
g = Grid(self.width, self.height)
g.data = self.data
return g
def _getLegacyText(self):
t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)]
t.reverse()
return t
def __str__(self):
return str(self._getLegacyText())
def makeGrid(gridString):
width, height = len(gridString[0]), len(gridString)
grid = Grid(width, height)
for ybar, line in enumerate(gridString):
y = height - ybar - 1
for x, el in enumerate(line):
grid[x][y] = el
return grid
def getCliffGrid():
grid = [[' ',' ',' ',' ',' '],
['S',' ',' ',' ',10],
[-100,-100, -100, -100, -100]]
return Gridworld(makeGrid(grid))
def getCliffGrid2():
grid = [[' ',' ',' ',' ',' '],
[8,'S',' ',' ',10],
[-100,-100, -100, -100, -100]]
return Gridworld(grid)
def getDiscountGrid():
grid = [[' ',' ',' ',' ',' '],
[' ','#',' ',' ',' '],
[' ','#', 1,'#', 10],
['S',' ',' ',' ',' '],
[-10,-10, -10, -10, -10]]
return Gridworld(grid)
def getBridgeGrid():
grid = [[ '#',-100, -100, -100, -100, -100, '#'],
[ 1, 'S', ' ', ' ', ' ', ' ', 10],
[ '#',-100, -100, -100, -100, -100, '#']]
return Gridworld(grid)
def getBookGrid():
grid = [[' ',' ',' ',+1],
[' ','#',' ',-1],
['S',' ',' ',' ']]
return Gridworld(grid)
def getMazeGrid():
grid = [[' ',' ',' ',+1],
['#','#',' ','#'],
[' ','#',' ',' '],
[' ','#','#',' '],
['S',' ',' ',' ']]
return Gridworld(grid)
def getUserAction(state, actionFunction):
"""
Get an action from the user (rather than the agent).
Used for debugging and lecture demos.
"""
import graphicsUtils
action = None
while True:
keys = graphicsUtils.wait_for_keys()
if 'Up' in keys: action = 'north'
if 'Down' in keys: action = 'south'
if 'Left' in keys: action = 'west'
if 'Right' in keys: action = 'east'
if 'q' in keys: sys.exit(0)
if action == None: continue
break
actions = actionFunction(state)
if action not in actions:
action = actions[0]
return action
def printString(x): print x
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
returns = 0
totalDiscount = 1.0
environment.reset()
if 'startEpisode' in dir(agent): agent.startEpisode()
message("BEGINNING EPISODE: "+str(episode)+"\n")
while True:
# DISPLAY CURRENT STATE
state = environment.getCurrentState()
display(state)
pause()
# END IF IN A TERMINAL STATE
actions = environment.getPossibleActions(state)
if len(actions) == 0:
message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
return returns
# GET ACTION (USUALLY FROM AGENT)
action = decision(state)
if action == None:
raise 'Error: Agent returned None action'
# EXECUTE ACTION
nextState, reward = environment.doAction(action)
message("Started in state: "+str(state)+
"\nTook action: "+str(action)+
"\nEnded in state: "+str(nextState)+
"\nGot reward: "+str(reward)+"\n")
# UPDATE LEARNER
if 'observeTransition' in dir(agent):
agent.observeTransition(state, action, nextState, reward)
returns += reward * totalDiscount
totalDiscount *= discount
if 'stopEpisode' in dir(agent):
agent.stopEpisode()
def parseOptions():
optParser = optparse.OptionParser()
optParser.add_option('-d', '--discount',action='store',
type='float',dest='discount',default=0.9,
help='Discount on future (default %default)')
optParser.add_option('-r', '--livingReward',action='store',
type='float',dest='livingReward',default=0.0,
metavar="R", help='Reward for living for a time step (default %default)')
optParser.add_option('-n', '--noise',action='store',
type='float',dest='noise',default=0.2,
metavar="P", help='How often action results in ' +
'unintended direction (default %default)' )
optParser.add_option('-e', '--epsilon',action='store',
type='float',dest='epsilon',default=0.3,
metavar="E", help='Chance of taking a random action in q-learning (default %default)')
optParser.add_option('-l', '--learningRate',action='store',
type='float',dest='learningRate',default=0.5,
metavar="P", help='TD learning rate (default %default)' )
optParser.add_option('-i', '--iterations',action='store',
type='int',dest='iters',default=10,
metavar="K", help='Number of rounds of value iteration (default %default)')
optParser.add_option('-k', '--episodes',action='store',
type='int',dest='episodes',default=1,
metavar="K", help='Number of epsiodes of the MDP to run (default %default)')
optParser.add_option('-g', '--grid',action='store',
metavar="G", type='string',dest='grid',default="BookGrid",
help='Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)' )
optParser.add_option('-w', '--windowSize', metavar="X", type='int',dest='gridSize',default=150,
help='Request a window width of X pixels *per grid cell* (default %default)')
optParser.add_option('-a', '--agent',action='store', metavar="A",
type='string',dest='agent',default="random",
help='Agent type (options are \'random\', \'value\' and \'q\', default %default)')
optParser.add_option('-t', '--text',action='store_true',
dest='textDisplay',default=False,
help='Use text-only ASCII display')
optParser.add_option('-p', '--pause',action='store_true',
dest='pause',default=False,
help='Pause GUI after each time step when running the MDP')
optParser.add_option('-q', '--quiet',action='store_true',
dest='quiet',default=False,
help='Skip display of any learning episodes')
optParser.add_option('-s', '--speed',action='store', metavar="S", type=float,
dest='speed',default=1.0,
help='Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)')
optParser.add_option('-m', '--manual',action='store_true',
dest='manual',default=False,
help='Manually control agent')
optParser.add_option('-v', '--valueSteps',action='store_true' ,default=False,
help='Display each step of value iteration')
opts, args = optParser.parse_args()
if opts.manual and opts.agent != 'q':
print '## Disabling Agents in Manual Mode (-m) ##'
opts.agent = None
# MANAGE CONFLICTS
if opts.textDisplay or opts.quiet:
# if opts.quiet:
opts.pause = False
# opts.manual = False
if opts.manual:
opts.pause = True
return opts
if __name__ == '__main__':
opts = parseOptions()
###########################
# GET THE GRIDWORLD
###########################
import gridworld
mdpFunction = getattr(gridworld, "get"+opts.grid)
mdp = mdpFunction()
mdp.setLivingReward(opts.livingReward)
mdp.setNoise(opts.noise)
env = gridworld.GridworldEnvironment(mdp)
###########################
# GET THE DISPLAY ADAPTER
###########################
import textGridworldDisplay
display = textGridworldDisplay.TextGridworldDisplay(mdp)
if not opts.textDisplay:
import graphicsGridworldDisplay
display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)
try:
display.start()
except KeyboardInterrupt:
sys.exit(0)
###########################
# GET THE AGENT
###########################
import valueIterationAgents, qlearningAgents
a = None
if opts.agent == 'value':
a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters)
elif opts.agent == 'q':
#env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
#simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
gridWorldEnv = GridworldEnvironment(mdp)
actionFn = lambda state: mdp.getPossibleActions(state)
qLearnOpts = {'gamma': opts.discount,
'alpha': opts.learningRate,
'epsilon': opts.epsilon,
'actionFn': actionFn}
a = qlearningAgents.QLearningAgent(**qLearnOpts)
elif opts.agent == 'random':
# # No reason to use the random agent without episodes
if opts.episodes == 0:
opts.episodes = 10
class RandomAgent:
def getAction(self, state):
return random.choice(mdp.getPossibleActions(state))
def getValue(self, state):
return 0.0
def getQValue(self, state, action):
return 0.0
def getPolicy(self, state):
"NOTE: 'random' is a special policy value; don't use it in your code."
return 'random'
def update(self, state, action, nextState, reward):
pass
a = RandomAgent()
else:
if not opts.manual: raise 'Unknown agent type: '+opts.agent
###########################
# RUN EPISODES
###########################
# DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
try:
if not opts.manual and opts.agent == 'value':
if opts.valueSteps:
for i in range(opts.iters):
tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i)
display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS")
display.pause()
display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS")
display.pause()
display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS")
display.pause()
except KeyboardInterrupt:
sys.exit(0)
# FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
displayCallback = lambda x: None
if not opts.quiet:
if opts.manual and opts.agent == None:
displayCallback = lambda state: display.displayNullValues(state)
else:
if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES")
messageCallback = lambda x: printString(x)
if opts.quiet:
messageCallback = lambda x: None
# FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
pauseCallback = lambda : None
if opts.pause:
pauseCallback = lambda : display.pause()
# FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
if opts.manual:
decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions)
else:
decisionCallback = a.getAction
# RUN EPISODES
if opts.episodes > 0:
print
print "RUNNING", opts.episodes, "EPISODES"
print
returns = 0
for episode in range(1, opts.episodes+1):
returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
if opts.episodes > 0:
print
print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes)
print
print
# DISPLAY POST-LEARNING VALUES / Q-VALUES
if opts.agent == 'q' and not opts.manual:
try:
display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES")
display.pause()
display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES")
display.pause()
except KeyboardInterrupt:
sys.exit(0)

View file

@ -0,0 +1,84 @@
# keyboardAgents.py
# -----------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from game import Agent
from game import Directions
import random
class KeyboardAgent(Agent):
"""
An agent controlled by the keyboard.
"""
# NOTE: Arrow keys also work.
WEST_KEY = 'a'
EAST_KEY = 'd'
NORTH_KEY = 'w'
SOUTH_KEY = 's'
STOP_KEY = 'q'
def __init__( self, index = 0 ):
self.lastMove = Directions.STOP
self.index = index
self.keys = []
def getAction( self, state):
from graphicsUtils import keys_waiting
from graphicsUtils import keys_pressed
keys = keys_waiting() + keys_pressed()
if keys != []:
self.keys = keys
legal = state.getLegalActions(self.index)
move = self.getMove(legal)
if move == Directions.STOP:
# Try to move in the same direction as before
if self.lastMove in legal:
move = self.lastMove
if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP
if move not in legal:
move = random.choice(legal)
self.lastMove = move
return move
def getMove(self, legal):
move = Directions.STOP
if (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal: move = Directions.WEST
if (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST
if (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal: move = Directions.NORTH
if (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
return move
class KeyboardAgent2(KeyboardAgent):
"""
A second agent controlled by the keyboard.
"""
# NOTE: Arrow keys also work.
WEST_KEY = 'j'
EAST_KEY = "l"
NORTH_KEY = 'i'
SOUTH_KEY = 'k'
STOP_KEY = 'u'
def getMove(self, legal):
move = Directions.STOP
if (self.WEST_KEY in self.keys) and Directions.WEST in legal: move = Directions.WEST
if (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST
if (self.NORTH_KEY in self.keys) and Directions.NORTH in legal: move = Directions.NORTH
if (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
return move

149
reinforcement/layout.py Normal file
View file

@ -0,0 +1,149 @@
# layout.py
# ---------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from util import manhattanDistance
from game import Grid
import os
import random
VISIBILITY_MATRIX_CACHE = {}
class Layout:
"""
A Layout manages the static information about the game board.
"""
def __init__(self, layoutText):
self.width = len(layoutText[0])
self.height= len(layoutText)
self.walls = Grid(self.width, self.height, False)
self.food = Grid(self.width, self.height, False)
self.capsules = []
self.agentPositions = []
self.numGhosts = 0
self.processLayoutText(layoutText)
self.layoutText = layoutText
self.totalFood = len(self.food.asList())
# self.initializeVisibilityMatrix()
def getNumGhosts(self):
return self.numGhosts
def initializeVisibilityMatrix(self):
global VISIBILITY_MATRIX_CACHE
if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE:
from game import Directions
vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)]
dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST]
vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()})
for x in range(self.width):
for y in range(self.height):
if self.walls[x][y] == False:
for vec, direction in zip(vecs, dirs):
dx, dy = vec
nextx, nexty = x + dx, y + dy
while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] :
vis[x][y][direction].add((nextx, nexty))
nextx, nexty = x + dx, y + dy
self.visibility = vis
VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis
else:
self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)]
def isWall(self, pos):
x, col = pos
return self.walls[x][col]
def getRandomLegalPosition(self):
x = random.choice(range(self.width))
y = random.choice(range(self.height))
while self.isWall( (x, y) ):
x = random.choice(range(self.width))
y = random.choice(range(self.height))
return (x,y)
def getRandomCorner(self):
poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
return random.choice(poses)
def getFurthestCorner(self, pacPos):
poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses])
return pos
def isVisibleFrom(self, ghostPos, pacPos, pacDirection):
row, col = [int(x) for x in pacPos]
return ghostPos in self.visibility[row][col][pacDirection]
def __str__(self):
return "\n".join(self.layoutText)
def deepCopy(self):
return Layout(self.layoutText[:])
def processLayoutText(self, layoutText):
"""
Coordinates are flipped from the input format to the (x,y) convention here
The shape of the maze. Each character
represents a different type of object.
% - Wall
. - Food
o - Capsule
G - Ghost
P - Pacman
Other characters are ignored.
"""
maxY = self.height - 1
for y in range(self.height):
for x in range(self.width):
layoutChar = layoutText[maxY - y][x]
self.processLayoutChar(x, y, layoutChar)
self.agentPositions.sort()
self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions]
def processLayoutChar(self, x, y, layoutChar):
if layoutChar == '%':
self.walls[x][y] = True
elif layoutChar == '.':
self.food[x][y] = True
elif layoutChar == 'o':
self.capsules.append((x, y))
elif layoutChar == 'P':
self.agentPositions.append( (0, (x, y) ) )
elif layoutChar in ['G']:
self.agentPositions.append( (1, (x, y) ) )
self.numGhosts += 1
elif layoutChar in ['1', '2', '3', '4']:
self.agentPositions.append( (int(layoutChar), (x,y)))
self.numGhosts += 1
def getLayout(name, back = 2):
if name.endswith('.lay'):
layout = tryToLoad('layouts/' + name)
if layout == None: layout = tryToLoad(name)
else:
layout = tryToLoad('layouts/' + name + '.lay')
if layout == None: layout = tryToLoad(name + '.lay')
if layout == None and back >= 0:
curdir = os.path.abspath('.')
os.chdir('..')
layout = getLayout(name, back -1)
os.chdir(curdir)
return layout
def tryToLoad(fullname):
if(not os.path.exists(fullname)): return None
f = open(fullname)
try: return Layout([line.strip() for line in f])
finally: f.close()

View file

@ -0,0 +1,7 @@
%%%%%%%%%%%%%%%%%%%
%G. G ....%
%.% % %%%%%% %.%%.%
%.%o% % o% %.o%.%
%.%%%.% %%% %..%.%
%..... P %..%G%
%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,9 @@
%%%%%%%%%%%%%%%%%%%%
%o...%........%...o%
%.%%.%.%%..%%.%.%%.%
%...... G GG%......%
%.%.%%.%% %%%.%%.%.%
%.%....% ooo%.%..%.%
%.%.%%.% %% %.%.%%.%
%o%......P....%....%
%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,11 @@
%%%%%%%%%%%%%%%%%%%%
%o...%........%....%
%.%%.%.%%%%%%.%.%%.%
%.%..............%.%
%.%.%%.%% %%.%%.%.%
%......%G G%......%
%.%.%%.%%%%%%.%%.%.%
%.%..............%.%
%.%%.%.%%%%%%.%.%%.%
%....%...P....%...o%
%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,7 @@
%%%%%%%%
%P %
% .% . %
% % %
% .% . %
% G%
%%%%%%%%

View file

@ -0,0 +1,5 @@
%%%%%%%%%
%.P G%
% %.%G%%%
%G %%%
%%%%%%%%%

View file

@ -0,0 +1,9 @@
%%%%%%%%%%%%%%%%%%%%%%%%%
%.. P .... .... %
%.. ... ... ... ... %
%.. ... ... ... ... %
%.. .... .... G %
%.. ... ... ... ... %
%.. ... ... ... ... %
%.. .... .... o%
%%%%%%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,27 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%............%%............%
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
%o%%%%.%%%%%.%%.%%%%%.%%%%o%
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
%..........................%
%.%%%%.%%.%%%%%%%%.%%.%%%%.%
%.%%%%.%%.%%%%%%%%.%%.%%%%.%
%......%%....%%....%%......%
%%%%%%.%%%%% %% %%%%%.%%%%%%
%%%%%%.%%%%% %% %%%%%.%%%%%%
%%%%%%.% %.%%%%%%
%%%%%%.% %%%% %%%% %.%%%%%%
% . %G GG G% . %
%%%%%%.% %%%%%%%%%% %.%%%%%%
%%%%%%.% %.%%%%%%
%%%%%%.% %%%%%%%%%% %.%%%%%%
%............%%............%
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
%o..%%....... .......%%..o%
%%%.%%.%%.%%%%%%%%.%%.%%.%%%
%%%.%%.%%.%%%%%%%%.%%.%%.%%%
%......%%....%%....%%......%
%.%%%%%%%%%%.%%.%%%%%%%%%%.%
%.............P............%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,7 @@
%%%%%%%%%%%%%%%%%%%%
%......%G G%......%
%.%%...%% %%...%%.%
%.%o.%........%.o%.%
%.%%.%.%%%%%%.%.%%.%
%........P.........%
%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,7 @@
%%%%%%%
% P %
% %%% %
% %. %
% %%% %
%. G %
%%%%%%%

View file

@ -0,0 +1,10 @@
%%%%%
% . %
%.G.%
% . %
%. .%
% %
% .%
% %
%P .%
%%%%%

View file

@ -0,0 +1,5 @@
%%%%%%%%
% P G%
%G%%%%%%
%.... %
%%%%%%%%

View file

@ -0,0 +1,13 @@
%%%%%%%%%%%%%%%%%%%%
%o...%........%...o%
%.%%.%.%%..%%.%.%%.%
%.%.....%..%.....%.%
%.%.%%.%% %%.%%.%.%
%...... GGGG%.%....%
%.%....%%%%%%.%..%.%
%.%....% oo%.%..%.%
%.%....% %%%%.%..%.%
%.%...........%..%.%
%.%%.%.%%%%%%.%.%%.%
%o...%...P....%...o%
%%%%%%%%%%%%%%%%%%%%

View file

@ -0,0 +1,258 @@
# learningAgents.py
# -----------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from game import Directions, Agent, Actions
import random,util,time
class ValueEstimationAgent(Agent):
"""
Abstract agent which assigns values to (state,action)
Q-Values for an environment. As well as a value to a
state and a policy given respectively by,
V(s) = max_{a in actions} Q(s,a)
policy(s) = arg_max_{a in actions} Q(s,a)
Both ValueIterationAgent and QLearningAgent inherit
from this agent. While a ValueIterationAgent has
a model of the environment via a MarkovDecisionProcess
(see mdp.py) that is used to estimate Q-Values before
ever actually acting, the QLearningAgent estimates
Q-Values while acting in the environment.
"""
def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10):
"""
Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,...
alpha - learning rate
epsilon - exploration rate
gamma - discount factor
numTraining - number of training episodes, i.e. no learning after these many episodes
"""
self.alpha = float(alpha)
self.epsilon = float(epsilon)
self.discount = float(gamma)
self.numTraining = int(numTraining)
####################################
# Override These Functions #
####################################
def getQValue(self, state, action):
"""
Should return Q(state,action)
"""
util.raiseNotDefined()
def getValue(self, state):
"""
What is the value of this state under the best action?
Concretely, this is given by
V(s) = max_{a in actions} Q(s,a)
"""
util.raiseNotDefined()
def getPolicy(self, state):
"""
What is the best action to take in the state. Note that because
we might want to explore, this might not coincide with getAction
Concretely, this is given by
policy(s) = arg_max_{a in actions} Q(s,a)
If many actions achieve the maximal Q-value,
it doesn't matter which is selected.
"""
util.raiseNotDefined()
def getAction(self, state):
"""
state: can call state.getLegalActions()
Choose an action and return it.
"""
util.raiseNotDefined()
class ReinforcementAgent(ValueEstimationAgent):
"""
Abstract Reinforcemnt Agent: A ValueEstimationAgent
which estimates Q-Values (as well as policies) from experience
rather than a model
What you need to know:
- The environment will call
observeTransition(state,action,nextState,deltaReward),
which will call update(state, action, nextState, deltaReward)
which you should override.
- Use self.getLegalActions(state) to know which actions
are available in a state
"""
####################################
# Override These Functions #
####################################
def update(self, state, action, nextState, reward):
"""
This class will call this function, which you write, after
observing a transition and reward
"""
util.raiseNotDefined()
####################################
# Read These Functions #
####################################
def getLegalActions(self,state):
"""
Get the actions available for a given
state. This is what you should use to
obtain legal actions for a state
"""
return self.actionFn(state)
def observeTransition(self, state,action,nextState,deltaReward):
"""
Called by environment to inform agent that a transition has
been observed. This will result in a call to self.update
on the same arguments
NOTE: Do *not* override or call this function
"""
self.episodeRewards += deltaReward
self.update(state,action,nextState,deltaReward)
def startEpisode(self):
"""
Called by environment when new episode is starting
"""
self.lastState = None
self.lastAction = None
self.episodeRewards = 0.0
def stopEpisode(self):
"""
Called by environment when episode is done
"""
if self.episodesSoFar < self.numTraining:
self.accumTrainRewards += self.episodeRewards
else:
self.accumTestRewards += self.episodeRewards
self.episodesSoFar += 1
if self.episodesSoFar >= self.numTraining:
# Take off the training wheels
self.epsilon = 0.0 # no exploration
self.alpha = 0.0 # no learning
def isInTraining(self):
return self.episodesSoFar < self.numTraining
def isInTesting(self):
return not self.isInTraining()
def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1):
"""
actionFn: Function which takes a state and returns the list of legal actions
alpha - learning rate
epsilon - exploration rate
gamma - discount factor
numTraining - number of training episodes, i.e. no learning after these many episodes
"""
if actionFn == None:
actionFn = lambda state: state.getLegalActions()
self.actionFn = actionFn
self.episodesSoFar = 0
self.accumTrainRewards = 0.0
self.accumTestRewards = 0.0
self.numTraining = int(numTraining)
self.epsilon = float(epsilon)
self.alpha = float(alpha)
self.discount = float(gamma)
################################
# Controls needed for Crawler #
################################
def setEpsilon(self, epsilon):
self.epsilon = epsilon
def setLearningRate(self, alpha):
self.alpha = alpha
def setDiscount(self, discount):
self.discount = discount
def doAction(self,state,action):
"""
Called by inherited class when
an action is taken in a state
"""
self.lastState = state
self.lastAction = action
###################
# Pacman Specific #
###################
def observationFunction(self, state):
"""
This is where we ended up after our last action.
The simulation should somehow ensure this is called
"""
if not self.lastState is None:
reward = state.getScore() - self.lastState.getScore()
self.observeTransition(self.lastState, self.lastAction, state, reward)
return state
def registerInitialState(self, state):
self.startEpisode()
if self.episodesSoFar == 0:
print 'Beginning %d episodes of Training' % (self.numTraining)
def final(self, state):
"""
Called by Pacman game at the terminal state
"""
deltaReward = state.getScore() - self.lastState.getScore()
self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
self.stopEpisode()
# Make sure we have this var
if not 'episodeStartTime' in self.__dict__:
self.episodeStartTime = time.time()
if not 'lastWindowAccumRewards' in self.__dict__:
self.lastWindowAccumRewards = 0.0
self.lastWindowAccumRewards += state.getScore()
NUM_EPS_UPDATE = 100
if self.episodesSoFar % NUM_EPS_UPDATE == 0:
print 'Reinforcement Learning Status:'
windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
if self.episodesSoFar <= self.numTraining:
trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
print '\tCompleted %d out of %d training episodes' % (
self.episodesSoFar,self.numTraining)
print '\tAverage Rewards over all training: %.2f' % (
trainAvg)
else:
testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
print '\tAverage Rewards over testing: %.2f' % testAvg
print '\tAverage Rewards for last %d episodes: %.2f' % (
NUM_EPS_UPDATE,windowAvg)
print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
self.lastWindowAccumRewards = 0.0
self.episodeStartTime = time.time()
if self.episodesSoFar == self.numTraining:
msg = 'Training Done (turning off epsilon and alpha)'
print '%s\n%s' % (msg,'-' * len(msg))

67
reinforcement/mdp.py Normal file
View file

@ -0,0 +1,67 @@
# mdp.py
# ------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import random
class MarkovDecisionProcess:
def getStates(self):
"""
Return a list of all states in the MDP.
Not generally possible for large MDPs.
"""
abstract
def getStartState(self):
"""
Return the start state of the MDP.
"""
abstract
def getPossibleActions(self, state):
"""
Return list of possible actions from 'state'.
"""
abstract
def getTransitionStatesAndProbs(self, state, action):
"""
Returns list of (nextState, prob) pairs
representing the states reachable
from 'state' by taking 'action' along
with their transition probabilities.
Note that in Q-Learning and reinforcment
learning in general, we do not know these
probabilities nor do we directly model them.
"""
abstract
def getReward(self, state, action, nextState):
"""
Get the reward for the state, action, nextState transition.
Not available in reinforcement learning.
"""
abstract
def isTerminal(self, state):
"""
Returns true if the current state is a terminal state. By convention,
a terminal state has zero future rewards. Sometimes the terminal state(s)
may have no possible actions. It is also common to think of the terminal
state as having a self-loop action 'pass' with zero reward; the formulations
are equivalent.
"""
abstract

684
reinforcement/pacman.py Normal file
View file

@ -0,0 +1,684 @@
# pacman.py
# ---------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
"""
Pacman.py holds the logic for the classic pacman game along with the main
code to run a game. This file is divided into three sections:
(i) Your interface to the pacman world:
Pacman is a complex environment. You probably don't want to
read through all of the code we wrote to make the game runs
correctly. This section contains the parts of the code
that you will need to understand in order to complete the
project. There is also some code in game.py that you should
understand.
(ii) The hidden secrets of pacman:
This section contains all of the logic code that the pacman
environment uses to decide who can move where, who dies when
things collide, etc. You shouldn't need to read this section
of code, but you can if you want.
(iii) Framework to start a game:
The final section contains the code for reading the command
you use to set up the game, then starting up a new game, along with
linking in all the external parts (agent functions, graphics).
Check this section out to see all the options available to you.
To play your first game, type 'python pacman.py' from the command line.
The keys are 'a', 's', 'd', and 'w' to move (or arrow keys). Have fun!
"""
from game import GameStateData
from game import Game
from game import Directions
from game import Actions
from util import nearestPoint
from util import manhattanDistance
import util, layout
import sys, types, time, random, os
###################################################
# YOUR INTERFACE TO THE PACMAN WORLD: A GameState #
###################################################
class GameState:
"""
A GameState specifies the full game state, including the food, capsules,
agent configurations and score changes.
GameStates are used by the Game object to capture the actual state of the game and
can be used by agents to reason about the game.
Much of the information in a GameState is stored in a GameStateData object. We
strongly suggest that you access that data via the accessor methods below rather
than referring to the GameStateData object directly.
Note that in classic Pacman, Pacman is always agent 0.
"""
####################################################
# Accessor methods: use these to access state data #
####################################################
# static variable keeps track of which states have had getLegalActions called
explored = set()
def getAndResetExplored():
tmp = GameState.explored.copy()
GameState.explored = set()
return tmp
getAndResetExplored = staticmethod(getAndResetExplored)
def getLegalActions( self, agentIndex=0 ):
"""
Returns the legal actions for the agent specified.
"""
# GameState.explored.add(self)
if self.isWin() or self.isLose(): return []
if agentIndex == 0: # Pacman is moving
return PacmanRules.getLegalActions( self )
else:
return GhostRules.getLegalActions( self, agentIndex )
def generateSuccessor( self, agentIndex, action):
"""
Returns the successor state after the specified agent takes the action.
"""
# Check that successors exist
if self.isWin() or self.isLose(): raise Exception('Can\'t generate a successor of a terminal state.')
# Copy current state
state = GameState(self)
# Let agent's logic deal with its action's effects on the board
if agentIndex == 0: # Pacman is moving
state.data._eaten = [False for i in range(state.getNumAgents())]
PacmanRules.applyAction( state, action )
else: # A ghost is moving
GhostRules.applyAction( state, action, agentIndex )
# Time passes
if agentIndex == 0:
state.data.scoreChange += -TIME_PENALTY # Penalty for waiting around
else:
GhostRules.decrementTimer( state.data.agentStates[agentIndex] )
# Resolve multi-agent effects
GhostRules.checkDeath( state, agentIndex )
# Book keeping
state.data._agentMoved = agentIndex
state.data.score += state.data.scoreChange
GameState.explored.add(self)
GameState.explored.add(state)
return state
def getLegalPacmanActions( self ):
return self.getLegalActions( 0 )
def generatePacmanSuccessor( self, action ):
"""
Generates the successor state after the specified pacman move
"""
return self.generateSuccessor( 0, action )
def getPacmanState( self ):
"""
Returns an AgentState object for pacman (in game.py)
state.pos gives the current position
state.direction gives the travel vector
"""
return self.data.agentStates[0].copy()
def getPacmanPosition( self ):
return self.data.agentStates[0].getPosition()
def getGhostStates( self ):
return self.data.agentStates[1:]
def getGhostState( self, agentIndex ):
if agentIndex == 0 or agentIndex >= self.getNumAgents():
raise Exception("Invalid index passed to getGhostState")
return self.data.agentStates[agentIndex]
def getGhostPosition( self, agentIndex ):
if agentIndex == 0:
raise Exception("Pacman's index passed to getGhostPosition")
return self.data.agentStates[agentIndex].getPosition()
def getGhostPositions(self):
return [s.getPosition() for s in self.getGhostStates()]
def getNumAgents( self ):
return len( self.data.agentStates )
def getScore( self ):
return float(self.data.score)
def getCapsules(self):
"""
Returns a list of positions (x,y) of the remaining capsules.
"""
return self.data.capsules
def getNumFood( self ):
return self.data.food.count()
def getFood(self):
"""
Returns a Grid of boolean food indicator variables.
Grids can be accessed via list notation, so to check
if there is food at (x,y), just call
currentFood = state.getFood()
if currentFood[x][y] == True: ...
"""
return self.data.food
def getWalls(self):
"""
Returns a Grid of boolean wall indicator variables.
Grids can be accessed via list notation, so to check
if there is a wall at (x,y), just call
walls = state.getWalls()
if walls[x][y] == True: ...
"""
return self.data.layout.walls
def hasFood(self, x, y):
return self.data.food[x][y]
def hasWall(self, x, y):
return self.data.layout.walls[x][y]
def isLose( self ):
return self.data._lose
def isWin( self ):
return self.data._win
#############################################
# Helper methods: #
# You shouldn't need to call these directly #
#############################################
def __init__( self, prevState = None ):
"""
Generates a new state by copying information from its predecessor.
"""
if prevState != None: # Initial state
self.data = GameStateData(prevState.data)
else:
self.data = GameStateData()
def deepCopy( self ):
state = GameState( self )
state.data = self.data.deepCopy()
return state
def __eq__( self, other ):
"""
Allows two states to be compared.
"""
return hasattr(other, 'data') and self.data == other.data
def __hash__( self ):
"""
Allows states to be keys of dictionaries.
"""
return hash( self.data )
def __str__( self ):
return str(self.data)
def initialize( self, layout, numGhostAgents=1000 ):
"""
Creates an initial game state from a layout array (see layout.py).
"""
self.data.initialize(layout, numGhostAgents)
############################################################################
# THE HIDDEN SECRETS OF PACMAN #
# #
# You shouldn't need to look through the code in this section of the file. #
############################################################################
SCARED_TIME = 40 # Moves ghosts are scared
COLLISION_TOLERANCE = 0.7 # How close ghosts must be to Pacman to kill
TIME_PENALTY = 1 # Number of points lost each round
class ClassicGameRules:
"""
These game rules manage the control flow of a game, deciding when
and how the game starts and ends.
"""
def __init__(self, timeout=30):
self.timeout = timeout
def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False):
agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()]
initState = GameState()
initState.initialize( layout, len(ghostAgents) )
game = Game(agents, display, self, catchExceptions=catchExceptions)
game.state = initState
self.initialState = initState.deepCopy()
self.quiet = quiet
return game
def process(self, state, game):
"""
Checks to see whether it is time to end the game.
"""
if state.isWin(): self.win(state, game)
if state.isLose(): self.lose(state, game)
def win( self, state, game ):
if not self.quiet: print "Pacman emerges victorious! Score: %d" % state.data.score
game.gameOver = True
def lose( self, state, game ):
if not self.quiet: print "Pacman died! Score: %d" % state.data.score
game.gameOver = True
def getProgress(self, game):
return float(game.state.getNumFood()) / self.initialState.getNumFood()
def agentCrash(self, game, agentIndex):
if agentIndex == 0:
print "Pacman crashed"
else:
print "A ghost crashed"
def getMaxTotalTime(self, agentIndex):
return self.timeout
def getMaxStartupTime(self, agentIndex):
return self.timeout
def getMoveWarningTime(self, agentIndex):
return self.timeout
def getMoveTimeout(self, agentIndex):
return self.timeout
def getMaxTimeWarnings(self, agentIndex):
return 0
class PacmanRules:
"""
These functions govern how pacman interacts with his environment under
the classic game rules.
"""
PACMAN_SPEED=1
def getLegalActions( state ):
"""
Returns a list of possible actions.
"""
return Actions.getPossibleActions( state.getPacmanState().configuration, state.data.layout.walls )
getLegalActions = staticmethod( getLegalActions )
def applyAction( state, action ):
"""
Edits the state to reflect the results of the action.
"""
legal = PacmanRules.getLegalActions( state )
if action not in legal:
raise Exception("Illegal action " + str(action))
pacmanState = state.data.agentStates[0]
# Update Configuration
vector = Actions.directionToVector( action, PacmanRules.PACMAN_SPEED )
pacmanState.configuration = pacmanState.configuration.generateSuccessor( vector )
# Eat
next = pacmanState.configuration.getPosition()
nearest = nearestPoint( next )
if manhattanDistance( nearest, next ) <= 0.5 :
# Remove food
PacmanRules.consume( nearest, state )
applyAction = staticmethod( applyAction )
def consume( position, state ):
x,y = position
# Eat food
if state.data.food[x][y]:
state.data.scoreChange += 10
state.data.food = state.data.food.copy()
state.data.food[x][y] = False
state.data._foodEaten = position
# TODO: cache numFood?
numFood = state.getNumFood()
if numFood == 0 and not state.data._lose:
state.data.scoreChange += 500
state.data._win = True
# Eat capsule
if( position in state.getCapsules() ):
state.data.capsules.remove( position )
state.data._capsuleEaten = position
# Reset all ghosts' scared timers
for index in range( 1, len( state.data.agentStates ) ):
state.data.agentStates[index].scaredTimer = SCARED_TIME
consume = staticmethod( consume )
class GhostRules:
"""
These functions dictate how ghosts interact with their environment.
"""
GHOST_SPEED=1.0
def getLegalActions( state, ghostIndex ):
"""
Ghosts cannot stop, and cannot turn around unless they
reach a dead end, but can turn 90 degrees at intersections.
"""
conf = state.getGhostState( ghostIndex ).configuration
possibleActions = Actions.getPossibleActions( conf, state.data.layout.walls )
reverse = Actions.reverseDirection( conf.direction )
if Directions.STOP in possibleActions:
possibleActions.remove( Directions.STOP )
if reverse in possibleActions and len( possibleActions ) > 1:
possibleActions.remove( reverse )
return possibleActions
getLegalActions = staticmethod( getLegalActions )
def applyAction( state, action, ghostIndex):
legal = GhostRules.getLegalActions( state, ghostIndex )
if action not in legal:
raise Exception("Illegal ghost action " + str(action))
ghostState = state.data.agentStates[ghostIndex]
speed = GhostRules.GHOST_SPEED
if ghostState.scaredTimer > 0: speed /= 2.0
vector = Actions.directionToVector( action, speed )
ghostState.configuration = ghostState.configuration.generateSuccessor( vector )
applyAction = staticmethod( applyAction )
def decrementTimer( ghostState):
timer = ghostState.scaredTimer
if timer == 1:
ghostState.configuration.pos = nearestPoint( ghostState.configuration.pos )
ghostState.scaredTimer = max( 0, timer - 1 )
decrementTimer = staticmethod( decrementTimer )
def checkDeath( state, agentIndex):
pacmanPosition = state.getPacmanPosition()
if agentIndex == 0: # Pacman just moved; Anyone can kill him
for index in range( 1, len( state.data.agentStates ) ):
ghostState = state.data.agentStates[index]
ghostPosition = ghostState.configuration.getPosition()
if GhostRules.canKill( pacmanPosition, ghostPosition ):
GhostRules.collide( state, ghostState, index )
else:
ghostState = state.data.agentStates[agentIndex]
ghostPosition = ghostState.configuration.getPosition()
if GhostRules.canKill( pacmanPosition, ghostPosition ):
GhostRules.collide( state, ghostState, agentIndex )
checkDeath = staticmethod( checkDeath )
def collide( state, ghostState, agentIndex):
if ghostState.scaredTimer > 0:
state.data.scoreChange += 200
GhostRules.placeGhost(state, ghostState)
ghostState.scaredTimer = 0
# Added for first-person
state.data._eaten[agentIndex] = True
else:
if not state.data._win:
state.data.scoreChange -= 500
state.data._lose = True
collide = staticmethod( collide )
def canKill( pacmanPosition, ghostPosition ):
return manhattanDistance( ghostPosition, pacmanPosition ) <= COLLISION_TOLERANCE
canKill = staticmethod( canKill )
def placeGhost(state, ghostState):
ghostState.configuration = ghostState.start
placeGhost = staticmethod( placeGhost )
#############################
# FRAMEWORK TO START A GAME #
#############################
def default(str):
return str + ' [Default: %default]'
def parseAgentArgs(str):
if str == None: return {}
pieces = str.split(',')
opts = {}
for p in pieces:
if '=' in p:
key, val = p.split('=')
else:
key,val = p, 1
opts[key] = val
return opts
def readCommand( argv ):
"""
Processes the command used to run pacman from the command line.
"""
from optparse import OptionParser
usageStr = """
USAGE: python pacman.py <options>
EXAMPLES: (1) python pacman.py
- starts an interactive game
(2) python pacman.py --layout smallClassic --zoom 2
OR python pacman.py -l smallClassic -z 2
- starts an interactive game on a smaller board, zoomed in
"""
parser = OptionParser(usageStr)
parser.add_option('-n', '--numGames', dest='numGames', type='int',
help=default('the number of GAMES to play'), metavar='GAMES', default=1)
parser.add_option('-l', '--layout', dest='layout',
help=default('the LAYOUT_FILE from which to load the map layout'),
metavar='LAYOUT_FILE', default='mediumClassic')
parser.add_option('-p', '--pacman', dest='pacman',
help=default('the agent TYPE in the pacmanAgents module to use'),
metavar='TYPE', default='KeyboardAgent')
parser.add_option('-t', '--textGraphics', action='store_true', dest='textGraphics',
help='Display output as text only', default=False)
parser.add_option('-q', '--quietTextGraphics', action='store_true', dest='quietGraphics',
help='Generate minimal output and no graphics', default=False)
parser.add_option('-g', '--ghosts', dest='ghost',
help=default('the ghost agent TYPE in the ghostAgents module to use'),
metavar = 'TYPE', default='RandomGhost')
parser.add_option('-k', '--numghosts', type='int', dest='numGhosts',
help=default('The maximum number of ghosts to use'), default=4)
parser.add_option('-z', '--zoom', type='float', dest='zoom',
help=default('Zoom the size of the graphics window'), default=1.0)
parser.add_option('-f', '--fixRandomSeed', action='store_true', dest='fixRandomSeed',
help='Fixes the random seed to always play the same game', default=False)
parser.add_option('-r', '--recordActions', action='store_true', dest='record',
help='Writes game histories to a file (named by the time they were played)', default=False)
parser.add_option('--replay', dest='gameToReplay',
help='A recorded game file (pickle) to replay', default=None)
parser.add_option('-a','--agentArgs',dest='agentArgs',
help='Comma separated values sent to agent. e.g. "opt1=val1,opt2,opt3=val3"')
parser.add_option('-x', '--numTraining', dest='numTraining', type='int',
help=default('How many episodes are training (suppresses output)'), default=0)
parser.add_option('--frameTime', dest='frameTime', type='float',
help=default('Time to delay between frames; <0 means keyboard'), default=0.1)
parser.add_option('-c', '--catchExceptions', action='store_true', dest='catchExceptions',
help='Turns on exception handling and timeouts during games', default=False)
parser.add_option('--timeout', dest='timeout', type='int',
help=default('Maximum length of time an agent can spend computing in a single game'), default=30)
options, otherjunk = parser.parse_args(argv)
if len(otherjunk) != 0:
raise Exception('Command line input not understood: ' + str(otherjunk))
args = dict()
# Fix the random seed
if options.fixRandomSeed: random.seed('cs188')
# Choose a layout
args['layout'] = layout.getLayout( options.layout )
if args['layout'] == None: raise Exception("The layout " + options.layout + " cannot be found")
# Choose a Pacman agent
noKeyboard = options.gameToReplay == None and (options.textGraphics or options.quietGraphics)
pacmanType = loadAgent(options.pacman, noKeyboard)
agentOpts = parseAgentArgs(options.agentArgs)
if options.numTraining > 0:
args['numTraining'] = options.numTraining
if 'numTraining' not in agentOpts: agentOpts['numTraining'] = options.numTraining
pacman = pacmanType(**agentOpts) # Instantiate Pacman with agentArgs
args['pacman'] = pacman
# Don't display training games
if 'numTrain' in agentOpts:
options.numQuiet = int(agentOpts['numTrain'])
options.numIgnore = int(agentOpts['numTrain'])
# Choose a ghost agent
ghostType = loadAgent(options.ghost, noKeyboard)
args['ghosts'] = [ghostType( i+1 ) for i in range( options.numGhosts )]
# Choose a display format
if options.quietGraphics:
import textDisplay
args['display'] = textDisplay.NullGraphics()
elif options.textGraphics:
import textDisplay
textDisplay.SLEEP_TIME = options.frameTime
args['display'] = textDisplay.PacmanGraphics()
else:
import graphicsDisplay
args['display'] = graphicsDisplay.PacmanGraphics(options.zoom, frameTime = options.frameTime)
args['numGames'] = options.numGames
args['record'] = options.record
args['catchExceptions'] = options.catchExceptions
args['timeout'] = options.timeout
# Special case: recorded games don't use the runGames method or args structure
if options.gameToReplay != None:
print 'Replaying recorded game %s.' % options.gameToReplay
import cPickle
f = open(options.gameToReplay)
try: recorded = cPickle.load(f)
finally: f.close()
recorded['display'] = args['display']
replayGame(**recorded)
sys.exit(0)
return args
def loadAgent(pacman, nographics):
# Looks through all pythonPath Directories for the right module,
pythonPathStr = os.path.expandvars("$PYTHONPATH")
if pythonPathStr.find(';') == -1:
pythonPathDirs = pythonPathStr.split(':')
else:
pythonPathDirs = pythonPathStr.split(';')
pythonPathDirs.append('.')
for moduleDir in pythonPathDirs:
if not os.path.isdir(moduleDir): continue
moduleNames = [f for f in os.listdir(moduleDir) if f.endswith('gents.py')]
for modulename in moduleNames:
try:
module = __import__(modulename[:-3])
except ImportError:
continue
if pacman in dir(module):
if nographics and modulename == 'keyboardAgents.py':
raise Exception('Using the keyboard requires graphics (not text display)')
return getattr(module, pacman)
raise Exception('The agent ' + pacman + ' is not specified in any *Agents.py.')
def replayGame( layout, actions, display ):
import pacmanAgents, ghostAgents
rules = ClassicGameRules()
agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(layout.getNumGhosts())]
game = rules.newGame( layout, agents[0], agents[1:], display )
state = game.state
display.initialize(state.data)
for action in actions:
# Execute the action
state = state.generateSuccessor( *action )
# Change the display
display.update( state.data )
# Allow for game specific conditions (winning, losing, etc.)
rules.process(state, game)
display.finish()
def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30 ):
import __main__
__main__.__dict__['_display'] = display
rules = ClassicGameRules(timeout)
games = []
for i in range( numGames ):
beQuiet = i < numTraining
if beQuiet:
# Suppress output and graphics
import textDisplay
gameDisplay = textDisplay.NullGraphics()
rules.quiet = True
else:
gameDisplay = display
rules.quiet = False
game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions)
game.run()
if not beQuiet: games.append(game)
if record:
import time, cPickle
fname = ('recorded-game-%d' % (i + 1)) + '-'.join([str(t) for t in time.localtime()[1:6]])
f = file(fname, 'w')
components = {'layout': layout, 'actions': game.moveHistory}
cPickle.dump(components, f)
f.close()
if (numGames-numTraining) > 0:
scores = [game.state.getScore() for game in games]
wins = [game.state.isWin() for game in games]
winRate = wins.count(True)/ float(len(wins))
print 'Average Score:', sum(scores) / float(len(scores))
print 'Scores: ', ', '.join([str(score) for score in scores])
print 'Win Rate: %d/%d (%.2f)' % (wins.count(True), len(wins), winRate)
print 'Record: ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins])
return games
if __name__ == '__main__':
"""
The main function called when pacman.py is run
from the command line:
> python pacman.py
See the usage string for more details.
> python pacman.py --help
"""
args = readCommand( sys.argv[1:] ) # Get game components based on input
runGames( **args )
# import cProfile
# cProfile.run("runGames( **args )")
pass

View file

@ -0,0 +1,52 @@
# pacmanAgents.py
# ---------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from pacman import Directions
from game import Agent
import random
import game
import util
class LeftTurnAgent(game.Agent):
"An agent that turns left at every opportunity"
def getAction(self, state):
legal = state.getLegalPacmanActions()
current = state.getPacmanState().configuration.direction
if current == Directions.STOP: current = Directions.NORTH
left = Directions.LEFT[current]
if left in legal: return left
if current in legal: return current
if Directions.RIGHT[current] in legal: return Directions.RIGHT[current]
if Directions.LEFT[left] in legal: return Directions.LEFT[left]
return Directions.STOP
class GreedyAgent(Agent):
def __init__(self, evalFn="scoreEvaluation"):
self.evaluationFunction = util.lookup(evalFn, globals())
assert self.evaluationFunction != None
def getAction(self, state):
# Generate candidate actions
legal = state.getLegalPacmanActions()
if Directions.STOP in legal: legal.remove(Directions.STOP)
successors = [(state.generateSuccessor(0, action), action) for action in legal]
scored = [(self.evaluationFunction(state), action) for state, action in successors]
bestScore = max(scored)[0]
bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
return random.choice(bestActions)
def scoreEvaluation(state):
return state.getScore()

View file

@ -0,0 +1,18 @@
# projectParams.py
# ----------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py'
PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py'
PROJECT_NAME = 'Project 3: Reinforcement learning'
BONUS_PIC = False

View file

@ -0,0 +1,186 @@
# qlearningAgents.py
# ------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
from game import *
from learningAgents import ReinforcementAgent
from featureExtractors import *
import random,util,math
class QLearningAgent(ReinforcementAgent):
"""
Q-Learning Agent
Functions you should fill in:
- computeValueFromQValues
- computeActionFromQValues
- getQValue
- getAction
- update
Instance variables you have access to
- self.epsilon (exploration prob)
- self.alpha (learning rate)
- self.discount (discount rate)
Functions you should use
- self.getLegalActions(state)
which returns legal actions for a state
"""
def __init__(self, **args):
"You can initialize Q-values here..."
ReinforcementAgent.__init__(self, **args)
"*** YOUR CODE HERE ***"
def getQValue(self, state, action):
"""
Returns Q(state,action)
Should return 0.0 if we have never seen a state
or the Q node value otherwise
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
def computeValueFromQValues(self, state):
"""
Returns max_action Q(state,action)
where the max is over legal actions. Note that if
there are no legal actions, which is the case at the
terminal state, you should return a value of 0.0.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
def computeActionFromQValues(self, state):
"""
Compute the best action to take in a state. Note that if there
are no legal actions, which is the case at the terminal state,
you should return None.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
def getAction(self, state):
"""
Compute the action to take in the current state. With
probability self.epsilon, we should take a random action and
take the best policy action otherwise. Note that if there are
no legal actions, which is the case at the terminal state, you
should choose None as the action.
HINT: You might want to use util.flipCoin(prob)
HINT: To pick randomly from a list, use random.choice(list)
"""
# Pick Action
legalActions = self.getLegalActions(state)
action = None
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
return action
def update(self, state, action, nextState, reward):
"""
The parent class calls this to observe a
state = action => nextState and reward transition.
You should do your Q-Value update here
NOTE: You should never call this function,
it will be called on your behalf
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
def getPolicy(self, state):
return self.computeActionFromQValues(state)
def getValue(self, state):
return self.computeValueFromQValues(state)
class PacmanQAgent(QLearningAgent):
"Exactly the same as QLearningAgent, but with different default parameters"
def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
"""
These default parameters can be changed from the pacman.py command line.
For example, to change the exploration rate, try:
python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
alpha - learning rate
epsilon - exploration rate
gamma - discount factor
numTraining - number of training episodes, i.e. no learning after these many episodes
"""
args['epsilon'] = epsilon
args['gamma'] = gamma
args['alpha'] = alpha
args['numTraining'] = numTraining
self.index = 0 # This is always Pacman
QLearningAgent.__init__(self, **args)
def getAction(self, state):
"""
Simply calls the getAction method of QLearningAgent and then
informs parent of action for Pacman. Do not change or remove this
method.
"""
action = QLearningAgent.getAction(self,state)
self.doAction(state,action)
return action
class ApproximateQAgent(PacmanQAgent):
"""
ApproximateQLearningAgent
You should only have to overwrite getQValue
and update. All other QLearningAgent functions
should work as is.
"""
def __init__(self, extractor='IdentityExtractor', **args):
self.featExtractor = util.lookup(extractor, globals())()
PacmanQAgent.__init__(self, **args)
self.weights = util.Counter()
def getWeights(self):
return self.weights
def getQValue(self, state, action):
"""
Should return Q(state,action) = w * featureVector
where * is the dotProduct operator
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
def update(self, state, action, nextState, reward):
"""
Should update your weights based on transition
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
def final(self, state):
"Called at the end of each game."
# call the super-class final method
PacmanQAgent.final(self, state)
# did we finish training?
if self.episodesSoFar == self.numTraining:
# you might want to print your weights here for debugging
"*** YOUR CODE HERE ***"
pass

View file

@ -0,0 +1,924 @@
# reinforcementTestClasses.py
# ---------------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import testClasses
import random, math, traceback, sys, os
import layout, textDisplay, pacman, gridworld
import time
from util import Counter, TimeoutFunction, FixedRandom
from collections import defaultdict
from pprint import PrettyPrinter
from hashlib import sha1
pp = PrettyPrinter()
VERBOSE = False
import gridworld
LIVINGREWARD = -0.1
NOISE = 0.2
class ValueIterationTest(testClasses.TestCase):
def __init__(self, question, testDict):
super(ValueIterationTest, self).__init__(question, testDict)
self.discount = float(testDict['discount'])
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
iterations = int(testDict['valueIterations'])
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
maxPreIterations = 10
self.numsIterationsForDisplay = range(min(iterations, maxPreIterations))
self.testOutFile = testDict['test_out_file']
if maxPreIterations < iterations:
self.numsIterationsForDisplay.append(iterations)
def writeFailureFile(self, string):
with open(self.testOutFile, 'w') as handle:
handle.write(string)
def removeFailureFileIfExists(self):
if os.path.exists(self.testOutFile):
os.remove(self.testOutFile)
def execute(self, grades, moduleDict, solutionDict):
failureOutputFileString = ''
failureOutputStdString = ''
for n in self.numsIterationsForDisplay:
checkPolicy = (n == self.numsIterationsForDisplay[-1])
testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
failureOutputStdString += stdOutString
failureOutputFileString += fileOutString
if not testPass:
self.addMessage(failureOutputStdString)
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
self.writeFailureFile(failureOutputFileString)
return self.testFail(grades)
self.removeFailureFileIfExists()
return self.testPass(grades)
def executeNIterations(self, grades, moduleDict, solutionDict, n, checkPolicy):
testPass = True
valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
stdOutString = ''
fileOutString = ''
valuesKey = "values_k_%d" % n
if self.comparePrettyValues(valuesPretty, solutionDict[valuesKey]):
fileOutString += "Values at iteration %d are correct.\n" % n
fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
else:
testPass = False
outString = "Values at iteration %d are NOT correct.\n" % n
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, solutionDict[valuesKey])
stdOutString += outString
fileOutString += outString
for action in actions:
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
qValues = qValuesPretty[action]
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
fileOutString += "Q-Values at iteration %d for action %s are correct.\n" % (n, action)
fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
else:
testPass = False
outString = "Q-Values at iteration %d for action %s are NOT correct.\n" % (n, action)
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
stdOutString += outString
fileOutString += outString
if checkPolicy:
if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
testPass = False
outString = "Policy is NOT correct.\n"
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString('policy', policyPretty)
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString('policy', solutionDict['policy'])
stdOutString += outString
fileOutString += outString
return testPass, stdOutString, fileOutString
def writeSolution(self, moduleDict, filePath):
with open(filePath, 'w') as handle:
policyPretty = ''
actions = []
for n in self.numsIterationsForDisplay:
valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
handle.write(self.prettyValueSolutionString('values_k_%d' % n, valuesPretty))
for action in actions:
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
handle.write(self.prettyValueSolutionString('policy', policyPretty))
handle.write(self.prettyValueSolutionString('actions', '\n'.join(actions) + '\n'))
return True
def runAgent(self, moduleDict, numIterations):
agent = moduleDict['valueIterationAgents'].ValueIterationAgent(self.grid, discount=self.discount, iterations=numIterations)
states = self.grid.getStates()
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
values = {}
qValues = {}
policy = {}
for state in states:
values[state] = agent.getValue(state)
policy[state] = agent.computeActionFromValues(state)
possibleActions = self.grid.getPossibleActions(state)
for action in actions:
if not qValues.has_key(action):
qValues[action] = {}
if action in possibleActions:
qValues[action][state] = agent.computeQValueFromValues(state, action)
else:
qValues[action][state] = None
valuesPretty = self.prettyValues(values)
policyPretty = self.prettyPolicy(policy)
qValuesPretty = {}
for action in actions:
qValuesPretty[action] = self.prettyValues(qValues[action])
return (valuesPretty, qValuesPretty, actions, policyPretty)
def prettyPrint(self, elements, formatString):
pretty = ''
states = self.grid.getStates()
for ybar in range(self.grid.grid.height):
y = self.grid.grid.height-1-ybar
row = []
for x in range(self.grid.grid.width):
if (x, y) in states:
value = elements[(x, y)]
if value is None:
row.append(' illegal')
else:
row.append(formatString.format(elements[(x,y)]))
else:
row.append('_' * 10)
pretty += ' %s\n' % (" ".join(row), )
pretty += '\n'
return pretty
def prettyValues(self, values):
return self.prettyPrint(values, '{0:10.4f}')
def prettyPolicy(self, policy):
return self.prettyPrint(policy, '{0:10s}')
def prettyValueSolutionString(self, name, pretty):
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
aList = self.parsePrettyValues(aPretty)
bList = self.parsePrettyValues(bPretty)
if len(aList) != len(bList):
return False
for a, b in zip(aList, bList):
try:
aNum = float(a)
bNum = float(b)
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
error = abs(aNum - bNum)
if error > tolerance:
return False
except ValueError:
if a.strip() != b.strip():
return False
return True
def parsePrettyValues(self, pretty):
values = pretty.split()
return values
class ApproximateQLearningTest(testClasses.TestCase):
def __init__(self, question, testDict):
super(ApproximateQLearningTest, self).__init__(question, testDict)
self.discount = float(testDict['discount'])
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
self.env = gridworld.GridworldEnvironment(self.grid)
self.epsilon = float(testDict['epsilon'])
self.learningRate = float(testDict['learningRate'])
self.extractor = 'IdentityExtractor'
if 'extractor' in testDict:
self.extractor = testDict['extractor']
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
numExperiences = int(testDict['numExperiences'])
maxPreExperiences = 10
self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
self.testOutFile = testDict['test_out_file']
if maxPreExperiences < numExperiences:
self.numsExperiencesForDisplay.append(numExperiences)
def writeFailureFile(self, string):
with open(self.testOutFile, 'w') as handle:
handle.write(string)
def removeFailureFileIfExists(self):
if os.path.exists(self.testOutFile):
os.remove(self.testOutFile)
def execute(self, grades, moduleDict, solutionDict):
failureOutputFileString = ''
failureOutputStdString = ''
for n in self.numsExperiencesForDisplay:
testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n)
failureOutputStdString += stdOutString
failureOutputFileString += fileOutString
if not testPass:
self.addMessage(failureOutputStdString)
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
self.writeFailureFile(failureOutputFileString)
return self.testFail(grades)
self.removeFailureFileIfExists()
return self.testPass(grades)
def executeNExperiences(self, grades, moduleDict, solutionDict, n):
testPass = True
qValuesPretty, weights, actions, lastExperience = self.runAgent(moduleDict, n)
stdOutString = ''
fileOutString = "==================== Iteration %d ====================\n" % n
if lastExperience is not None:
fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience
weightsKey = 'weights_k_%d' % n
if weights == eval(solutionDict[weightsKey]):
fileOutString += "Weights at iteration %d are correct." % n
fileOutString += " Student/correct solution:\n\n%s\n\n" % pp.pformat(weights)
for action in actions:
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
qValues = qValuesPretty[action]
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
else:
testPass = False
outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
stdOutString += outString
fileOutString += outString
return testPass, stdOutString, fileOutString
def writeSolution(self, moduleDict, filePath):
with open(filePath, 'w') as handle:
for n in self.numsExperiencesForDisplay:
qValuesPretty, weights, actions, _ = self.runAgent(moduleDict, n)
handle.write(self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights)))
for action in actions:
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
return True
def runAgent(self, moduleDict, numExperiences):
agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts)
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
states.sort()
randObj = FixedRandom().random
# choose a random start state and a random possible action from that state
# get the next state and reward from the transition function
lastExperience = None
for i in range(numExperiences):
startState = randObj.choice(states)
action = randObj.choice(self.grid.getPossibleActions(startState))
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
lastExperience = (startState, action, endState, reward)
agent.update(*lastExperience)
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
qValues = {}
weights = agent.getWeights()
for state in states:
possibleActions = self.grid.getPossibleActions(state)
for action in actions:
if not qValues.has_key(action):
qValues[action] = {}
if action in possibleActions:
qValues[action][state] = agent.getQValue(state, action)
else:
qValues[action][state] = None
qValuesPretty = {}
for action in actions:
qValuesPretty[action] = self.prettyValues(qValues[action])
return (qValuesPretty, weights, actions, lastExperience)
def prettyPrint(self, elements, formatString):
pretty = ''
states = self.grid.getStates()
for ybar in range(self.grid.grid.height):
y = self.grid.grid.height-1-ybar
row = []
for x in range(self.grid.grid.width):
if (x, y) in states:
value = elements[(x, y)]
if value is None:
row.append(' illegal')
else:
row.append(formatString.format(elements[(x,y)]))
else:
row.append('_' * 10)
pretty += ' %s\n' % (" ".join(row), )
pretty += '\n'
return pretty
def prettyValues(self, values):
return self.prettyPrint(values, '{0:10.4f}')
def prettyPolicy(self, policy):
return self.prettyPrint(policy, '{0:10s}')
def prettyValueSolutionString(self, name, pretty):
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
aList = self.parsePrettyValues(aPretty)
bList = self.parsePrettyValues(bPretty)
if len(aList) != len(bList):
return False
for a, b in zip(aList, bList):
try:
aNum = float(a)
bNum = float(b)
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
error = abs(aNum - bNum)
if error > tolerance:
return False
except ValueError:
if a.strip() != b.strip():
return False
return True
def parsePrettyValues(self, pretty):
values = pretty.split()
return values
class QLearningTest(testClasses.TestCase):
def __init__(self, question, testDict):
super(QLearningTest, self).__init__(question, testDict)
self.discount = float(testDict['discount'])
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
self.env = gridworld.GridworldEnvironment(self.grid)
self.epsilon = float(testDict['epsilon'])
self.learningRate = float(testDict['learningRate'])
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
numExperiences = int(testDict['numExperiences'])
maxPreExperiences = 10
self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
self.testOutFile = testDict['test_out_file']
if maxPreExperiences < numExperiences:
self.numsExperiencesForDisplay.append(numExperiences)
def writeFailureFile(self, string):
with open(self.testOutFile, 'w') as handle:
handle.write(string)
def removeFailureFileIfExists(self):
if os.path.exists(self.testOutFile):
os.remove(self.testOutFile)
def execute(self, grades, moduleDict, solutionDict):
failureOutputFileString = ''
failureOutputStdString = ''
for n in self.numsExperiencesForDisplay:
checkValuesAndPolicy = (n == self.numsExperiencesForDisplay[-1])
testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n, checkValuesAndPolicy)
failureOutputStdString += stdOutString
failureOutputFileString += fileOutString
if not testPass:
self.addMessage(failureOutputStdString)
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
self.writeFailureFile(failureOutputFileString)
return self.testFail(grades)
self.removeFailureFileIfExists()
return self.testPass(grades)
def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy):
testPass = True
valuesPretty, qValuesPretty, actions, policyPretty, lastExperience = self.runAgent(moduleDict, n)
stdOutString = ''
fileOutString = "==================== Iteration %d ====================\n" % n
if lastExperience is not None:
fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience
for action in actions:
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
qValues = qValuesPretty[action]
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
else:
testPass = False
outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
stdOutString += outString
fileOutString += outString
if checkValuesAndPolicy:
if not self.comparePrettyValues(valuesPretty, solutionDict['values']):
testPass = False
outString = "Values are NOT correct."
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('values', valuesPretty)
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('values', solutionDict['values'])
stdOutString += outString
fileOutString += outString
if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
testPass = False
outString = "Policy is NOT correct."
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('policy', policyPretty)
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('policy', solutionDict['policy'])
stdOutString += outString
fileOutString += outString
return testPass, stdOutString, fileOutString
def writeSolution(self, moduleDict, filePath):
with open(filePath, 'w') as handle:
valuesPretty = ''
policyPretty = ''
for n in self.numsExperiencesForDisplay:
valuesPretty, qValuesPretty, actions, policyPretty, _ = self.runAgent(moduleDict, n)
for action in actions:
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
handle.write(self.prettyValueSolutionString('values', valuesPretty))
handle.write(self.prettyValueSolutionString('policy', policyPretty))
return True
def runAgent(self, moduleDict, numExperiences):
agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
states.sort()
randObj = FixedRandom().random
# choose a random start state and a random possible action from that state
# get the next state and reward from the transition function
lastExperience = None
for i in range(numExperiences):
startState = randObj.choice(states)
action = randObj.choice(self.grid.getPossibleActions(startState))
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
lastExperience = (startState, action, endState, reward)
agent.update(*lastExperience)
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
values = {}
qValues = {}
policy = {}
for state in states:
values[state] = agent.computeValueFromQValues(state)
policy[state] = agent.computeActionFromQValues(state)
possibleActions = self.grid.getPossibleActions(state)
for action in actions:
if not qValues.has_key(action):
qValues[action] = {}
if action in possibleActions:
qValues[action][state] = agent.getQValue(state, action)
else:
qValues[action][state] = None
valuesPretty = self.prettyValues(values)
policyPretty = self.prettyPolicy(policy)
qValuesPretty = {}
for action in actions:
qValuesPretty[action] = self.prettyValues(qValues[action])
return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
def prettyPrint(self, elements, formatString):
pretty = ''
states = self.grid.getStates()
for ybar in range(self.grid.grid.height):
y = self.grid.grid.height-1-ybar
row = []
for x in range(self.grid.grid.width):
if (x, y) in states:
value = elements[(x, y)]
if value is None:
row.append(' illegal')
else:
row.append(formatString.format(elements[(x,y)]))
else:
row.append('_' * 10)
pretty += ' %s\n' % (" ".join(row), )
pretty += '\n'
return pretty
def prettyValues(self, values):
return self.prettyPrint(values, '{0:10.4f}')
def prettyPolicy(self, policy):
return self.prettyPrint(policy, '{0:10s}')
def prettyValueSolutionString(self, name, pretty):
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
aList = self.parsePrettyValues(aPretty)
bList = self.parsePrettyValues(bPretty)
if len(aList) != len(bList):
return False
for a, b in zip(aList, bList):
try:
aNum = float(a)
bNum = float(b)
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
error = abs(aNum - bNum)
if error > tolerance:
return False
except ValueError:
if a.strip() != b.strip():
return False
return True
def parsePrettyValues(self, pretty):
values = pretty.split()
return values
class EpsilonGreedyTest(testClasses.TestCase):
def __init__(self, question, testDict):
super(EpsilonGreedyTest, self).__init__(question, testDict)
self.discount = float(testDict['discount'])
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
self.env = gridworld.GridworldEnvironment(self.grid)
self.epsilon = float(testDict['epsilon'])
self.learningRate = float(testDict['learningRate'])
self.numExperiences = int(testDict['numExperiences'])
self.numIterations = int(testDict['iterations'])
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
def execute(self, grades, moduleDict, solutionDict):
if self.testEpsilonGreedy(moduleDict):
return self.testPass(grades)
else:
return self.testFail(grades)
def writeSolution(self, moduleDict, filePath):
with open(filePath, 'w') as handle:
handle.write('# This is the solution file for %s.\n' % self.path)
handle.write('# File intentionally blank.\n')
return True
def runAgent(self, moduleDict):
agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
states.sort()
randObj = FixedRandom().random
# choose a random start state and a random possible action from that state
# get the next state and reward from the transition function
for i in range(self.numExperiences):
startState = randObj.choice(states)
action = randObj.choice(self.grid.getPossibleActions(startState))
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
agent.update(startState, action, endState, reward)
return agent
def testEpsilonGreedy(self, moduleDict, tolerance=0.025):
agent = self.runAgent(moduleDict)
for state in self.grid.getStates():
numLegalActions = len(agent.getLegalActions(state))
if numLegalActions <= 1:
continue
numGreedyChoices = 0
optimalAction = agent.computeActionFromQValues(state)
for iteration in range(self.numIterations):
# assume that their computeActionFromQValues implementation is correct (q4 tests this)
if agent.getAction(state) == optimalAction:
numGreedyChoices += 1
# e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions
# g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k)
empiricalEpsilonNumerator = self.numIterations - numGreedyChoices
empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(numLegalActions)
empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator
error = abs(empiricalEpsilon - self.epsilon)
if error > tolerance:
self.addMessage("Epsilon-greedy action selection is not correct.")
self.addMessage("Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance))
return False
return True
### q6
class Question6Test(testClasses.TestCase):
def __init__(self, question, testDict):
super(Question6Test, self).__init__(question, testDict)
def execute(self, grades, moduleDict, solutionDict):
studentSolution = moduleDict['analysis'].question6()
studentSolution = str(studentSolution).strip().lower()
hashedSolution = sha1(studentSolution).hexdigest()
if hashedSolution == '46729c96bb1e4081fdc81a8ff74b3e5db8fba415':
return self.testPass(grades)
else:
self.addMessage("Solution is not correct.")
self.addMessage(" Student solution: %s" % (studentSolution,))
return self.testFail(grades)
def writeSolution(self, moduleDict, filePath):
handle = open(filePath, 'w')
handle.write('# This is the solution file for %s.\n' % self.path)
handle.write('# File intentionally blank.\n')
handle.close()
return True
### q7/q8
### =====
## Average wins of a pacman agent
class EvalAgentTest(testClasses.TestCase):
def __init__(self, question, testDict):
super(EvalAgentTest, self).__init__(question, testDict)
self.pacmanParams = testDict['pacmanParams']
self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None
self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None
self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None
self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()]
self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()]
self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()]
self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]])
def execute(self, grades, moduleDict, solutionDict):
self.addMessage('Grading agent using command: python pacman.py %s'% (self.pacmanParams,))
startTime = time.time()
games = pacman.runGames(** pacman.readCommand(self.pacmanParams.split(' ')))
totalTime = time.time() - startTime
numGames = len(games)
stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True),
'games': games, 'scores': [g.state.getScore() for g in games],
'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)}
averageScore = sum(stats['scores']) / float(len(stats['scores']))
nonTimeouts = numGames - stats['timeouts']
wins = stats['wins']
def gradeThreshold(value, minimum, thresholds, name):
points = 0
passed = (minimum == None) or (value >= minimum)
if passed:
for t in thresholds:
if value >= t:
points += 1
return (passed, points, value, minimum, thresholds, name)
results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"),
gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"),
gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")]
totalPoints = 0
for passed, points, value, minimum, thresholds, name in results:
if minimum == None and len(thresholds)==0:
continue
# print passed, points, value, minimum, thresholds, name
totalPoints += points
if not passed:
assert points == 0
self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum))
else:
self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds)))
if minimum != None:
self.addMessage(" Grading scheme:")
self.addMessage(" < %s: fail" % (minimum,))
if len(thresholds)==0 or minimum != thresholds[0]:
self.addMessage(" >= %s: 0 points" % (minimum,))
for idx, threshold in enumerate(thresholds):
self.addMessage(" >= %s: %s points" % (threshold, idx+1))
elif len(thresholds) > 0:
self.addMessage(" Grading scheme:")
self.addMessage(" < %s: 0 points" % (thresholds[0],))
for idx, threshold in enumerate(thresholds):
self.addMessage(" >= %s: %s points" % (threshold, idx+1))
if any([not passed for passed, _, _, _, _, _ in results]):
totalPoints = 0
return self.testPartial(grades, totalPoints, self.maxPoints)
def writeSolution(self, moduleDict, filePath):
with open(filePath, 'w') as handle:
handle.write('# This is the solution file for %s.\n' % self.path)
handle.write('# File intentionally blank.\n')
return True
### q2/q3
### =====
## For each parameter setting, compute the optimal policy, see if it satisfies some properties
def followPath(policy, start, numSteps=100):
state = start
path = []
for i in range(numSteps):
if state not in policy:
break
action = policy[state]
path.append("(%s,%s)" % state)
if action == 'north': nextState = state[0],state[1]+1
if action == 'south': nextState = state[0],state[1]-1
if action == 'east': nextState = state[0]+1,state[1]
if action == 'west': nextState = state[0]-1,state[1]
if action == 'exit' or action == None:
path.append('TERMINAL_STATE')
break
state = nextState
return path
def parseGrid(string):
grid = [[entry.strip() for entry in line.split()] for line in string.split('\n')]
for row in grid:
for x, col in enumerate(row):
try:
col = int(col)
except:
pass
if col == "_":
col = ' '
row[x] = col
return gridworld.makeGrid(grid)
def computePolicy(moduleDict, grid, discount):
valueIterator = moduleDict['valueIterationAgents'].ValueIterationAgent(grid, discount=discount)
policy = {}
for state in grid.getStates():
policy[state] = valueIterator.computeActionFromValues(state)
return policy
class GridPolicyTest(testClasses.TestCase):
def __init__(self, question, testDict):
super(GridPolicyTest, self).__init__(question, testDict)
# Function in module in analysis that returns (discount, noise)
self.parameterFn = testDict['parameterFn']
self.question2 = testDict.get('question2', 'false').lower() == 'true'
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
self.gridText = testDict['grid']
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
self.gridName = testDict['gridName']
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
self.policy = parseGrid(testDict['policy'])
# State the most probable path must visit
# (x,y) for a particular location; (0,0) is bottom left
# terminal for the terminal state
self.pathVisits = testDict.get('pathVisits', None)
# State the most probable path must not visit
# (x,y) for a particular location; (0,0) is bottom left
# terminal for the terminal state
self.pathNotVisits = testDict.get('pathNotVisits', None)
def execute(self, grades, moduleDict, solutionDict):
if not hasattr(moduleDict['analysis'], self.parameterFn):
self.addMessage('Method not implemented: analysis.%s' % (self.parameterFn,))
return self.testFail(grades)
result = getattr(moduleDict['analysis'], self.parameterFn)()
if type(result) == str and result.lower()[0:3] == "not":
self.addMessage('Actually, it is possible!')
return self.testFail(grades)
if self.question2:
livingReward = None
try:
discount, noise = result
discount = float(discount)
noise = float(noise)
except:
self.addMessage('Did not return a (discount, noise) pair; instead analysis.%s returned: %s' % (self.parameterFn, result))
return self.testFail(grades)
if discount != 0.9 and noise != 0.2:
self.addMessage('Must change either the discount or the noise, not both. Returned (discount, noise) = %s' % (result,))
return self.testFail(grades)
else:
try:
discount, noise, livingReward = result
discount = float(discount)
noise = float(noise)
livingReward = float(livingReward)
except:
self.addMessage('Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %s' % (self.parameterFn, result))
return self.testFail(grades)
self.grid.setNoise(noise)
if livingReward != None:
self.grid.setLivingReward(livingReward)
start = self.grid.getStartState()
policy = computePolicy(moduleDict, self.grid, discount)
## check policy
actionMap = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west', 'X': 'exit'}
width, height = self.policy.width, self.policy.height
policyPassed = True
for x in range(width):
for y in range(height):
if self.policy[x][y] in actionMap and policy[(x,y)] != actionMap[self.policy[x][y]]:
differPoint = (x,y)
policyPassed = False
if not policyPassed:
self.addMessage('Policy not correct.')
self.addMessage(' Student policy at %s: %s' % (differPoint, policy[differPoint]))
self.addMessage(' Correct policy at %s: %s' % (differPoint, actionMap[self.policy[differPoint[0]][differPoint[1]]]))
self.addMessage(' Student policy:')
self.printPolicy(policy, False)
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
self.addMessage(" . at states where the policy is not defined (e.g. walls)")
self.addMessage(' Correct policy specification:')
self.printPolicy(self.policy, True)
self.addMessage(" Legend: N,S,E,W for states in which the student policy must move north etc,")
self.addMessage(" _ for states where it doesn't matter what the student policy does.")
self.printGridworld()
return self.testFail(grades)
## check path
path = followPath(policy, self.grid.getStartState())
if self.pathVisits != None and self.pathVisits not in path:
self.addMessage('Policy does not visit state %s when moving without noise.' % (self.pathVisits,))
self.addMessage(' States visited: %s' % (path,))
self.addMessage(' Student policy:')
self.printPolicy(policy, False)
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
self.addMessage(" . at states where policy not defined")
self.printGridworld()
return self.testFail(grades)
if self.pathNotVisits != None and self.pathNotVisits in path:
self.addMessage('Policy visits state %s when moving without noise.' % (self.pathNotVisits,))
self.addMessage(' States visited: %s' % (path,))
self.addMessage(' Student policy:')
self.printPolicy(policy, False)
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
self.addMessage(" . at states where policy not defined")
self.printGridworld()
return self.testFail(grades)
return self.testPass(grades)
def printGridworld(self):
self.addMessage(' Gridworld:')
for line in self.gridText.split('\n'):
self.addMessage(' ' + line)
self.addMessage(' Legend: # wall, _ empty, S start, numbers terminal states with that reward.')
def printPolicy(self, policy, policyTypeIsGrid):
if policyTypeIsGrid:
legend = {'N': 'N', 'E': 'E', 'S': 'S', 'W': 'W', ' ': '_'}
else:
legend = {'north': 'N', 'east': 'E', 'south': 'S', 'west': 'W', 'exit': 'X', '.': '.', ' ': '_'}
for ybar in range(self.grid.grid.height):
y = self.grid.grid.height-1-ybar
if policyTypeIsGrid:
self.addMessage(" %s" % (" ".join([legend[policy[x][y]] for x in range(self.grid.grid.width)]),))
else:
self.addMessage(" %s" % (" ".join([legend[policy.get((x,y), '.')] for x in range(self.grid.grid.width)]),))
# for state in sorted(self.grid.getStates()):
# if state != 'TERMINAL_STATE':
# self.addMessage(' (%s,%s) %s' % (state[0], state[1], policy[state]))
def writeSolution(self, moduleDict, filePath):
with open(filePath, 'w') as handle:
handle.write('# This is the solution file for %s.\n' % self.path)
handle.write('# File intentionally blank.\n')
return True

View file

@ -0,0 +1,189 @@
# testClasses.py
# --------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
# import modules from python standard library
import inspect
import re
import sys
# Class which models a question in a project. Note that questions have a
# maximum number of points they are worth, and are composed of a series of
# test cases
class Question(object):
def raiseNotDefined(self):
print 'Method not implemented: %s' % inspect.stack()[1][3]
sys.exit(1)
def __init__(self, questionDict, display):
self.maxPoints = int(questionDict['max_points'])
self.testCases = []
self.display = display
def getDisplay(self):
return self.display
def getMaxPoints(self):
return self.maxPoints
# Note that 'thunk' must be a function which accepts a single argument,
# namely a 'grading' object
def addTestCase(self, testCase, thunk):
self.testCases.append((testCase, thunk))
def execute(self, grades):
self.raiseNotDefined()
# Question in which all test cases must be passed in order to receive credit
class PassAllTestsQuestion(Question):
def execute(self, grades):
# TODO: is this the right way to use grades? The autograder doesn't seem to use it.
testsFailed = False
grades.assignZeroCredit()
for _, f in self.testCases:
if not f(grades):
testsFailed = True
if testsFailed:
grades.fail("Tests failed.")
else:
grades.assignFullCredit()
# Question in which predict credit is given for test cases with a ``points'' property.
# All other tests are mandatory and must be passed.
class HackedPartialCreditQuestion(Question):
def execute(self, grades):
# TODO: is this the right way to use grades? The autograder doesn't seem to use it.
grades.assignZeroCredit()
points = 0
passed = True
for testCase, f in self.testCases:
testResult = f(grades)
if "points" in testCase.testDict:
if testResult: points += float(testCase.testDict["points"])
else:
passed = passed and testResult
## FIXME: Below terrible hack to match q3's logic
if int(points) == self.maxPoints and not passed:
grades.assignZeroCredit()
else:
grades.addPoints(int(points))
class Q6PartialCreditQuestion(Question):
"""Fails any test which returns False, otherwise doesn't effect the grades object.
Partial credit tests will add the required points."""
def execute(self, grades):
grades.assignZeroCredit()
results = []
for _, f in self.testCases:
results.append(f(grades))
if False in results:
grades.assignZeroCredit()
class PartialCreditQuestion(Question):
"""Fails any test which returns False, otherwise doesn't effect the grades object.
Partial credit tests will add the required points."""
def execute(self, grades):
grades.assignZeroCredit()
for _, f in self.testCases:
if not f(grades):
grades.assignZeroCredit()
grades.fail("Tests failed.")
return False
class NumberPassedQuestion(Question):
"""Grade is the number of test cases passed."""
def execute(self, grades):
grades.addPoints([f(grades) for _, f in self.testCases].count(True))
# Template modeling a generic test case
class TestCase(object):
def raiseNotDefined(self):
print 'Method not implemented: %s' % inspect.stack()[1][3]
sys.exit(1)
def getPath(self):
return self.path
def __init__(self, question, testDict):
self.question = question
self.testDict = testDict
self.path = testDict['path']
self.messages = []
def __str__(self):
self.raiseNotDefined()
def execute(self, grades, moduleDict, solutionDict):
self.raiseNotDefined()
def writeSolution(self, moduleDict, filePath):
self.raiseNotDefined()
return True
# Tests should call the following messages for grading
# to ensure a uniform format for test output.
#
# TODO: this is hairy, but we need to fix grading.py's interface
# to get a nice hierarchical project - question - test structure,
# then these should be moved into Question proper.
def testPass(self, grades):
grades.addMessage('PASS: %s' % (self.path,))
for line in self.messages:
grades.addMessage(' %s' % (line,))
return True
def testFail(self, grades):
grades.addMessage('FAIL: %s' % (self.path,))
for line in self.messages:
grades.addMessage(' %s' % (line,))
return False
# This should really be question level?
#
def testPartial(self, grades, points, maxPoints):
grades.addPoints(points)
extraCredit = max(0, points - maxPoints)
regularCredit = points - extraCredit
grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints))
if extraCredit > 0:
grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,))
for line in self.messages:
grades.addMessage(' %s' % (line,))
return True
def addMessage(self, message):
self.messages.extend(message.split('\n'))

View file

@ -0,0 +1,85 @@
# testParser.py
# -------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import re
import sys
class TestParser(object):
def __init__(self, path):
# save the path to the test file
self.path = path
def removeComments(self, rawlines):
# remove any portion of a line following a '#' symbol
fixed_lines = []
for l in rawlines:
idx = l.find('#')
if idx == -1:
fixed_lines.append(l)
else:
fixed_lines.append(l[0:idx])
return '\n'.join(fixed_lines)
def parse(self):
# read in the test case and remove comments
test = {}
with open(self.path) as handle:
raw_lines = handle.read().split('\n')
test_text = self.removeComments(raw_lines)
test['__raw_lines__'] = raw_lines
test['path'] = self.path
test['__emit__'] = []
lines = test_text.split('\n')
i = 0
# read a property in each loop cycle
while(i < len(lines)):
# skip blank lines
if re.match('\A\s*\Z', lines[i]):
test['__emit__'].append(("raw", raw_lines[i]))
i += 1
continue
m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i])
if m:
test[m.group(1)] = m.group(2)
test['__emit__'].append(("oneline", m.group(1)))
i += 1
continue
m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i])
if m:
msg = []
i += 1
while(not re.match('\A\s*"""\s*\Z', lines[i])):
msg.append(raw_lines[i])
i += 1
test[m.group(1)] = '\n'.join(msg)
test['__emit__'].append(("multiline", m.group(1)))
i += 1
continue
print 'error parsing test file: %s' % self.path
sys.exit(1)
return test
def emitTestDict(testDict, handle):
for kind, data in testDict['__emit__']:
if kind == "raw":
handle.write(data + "\n")
elif kind == "oneline":
handle.write('%s: "%s"\n' % (data, testDict[data]))
elif kind == "multiline":
handle.write('%s: """\n%s\n"""\n' % (data, testDict[data]))
else:
raise Exception("Bad __emit__")

View file

View file

@ -0,0 +1,410 @@
values_k_0: """
0.0000
0.0000
0.0000
"""
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
values_k_1: """
-10.0000
0.0000
10.0000
"""
q_values_k_1_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_1_action_south: """
illegal
5.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
values_k_2: """
-10.0000
5.0000
10.0000
"""
q_values_k_2_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_2_action_east: """
illegal
2.5000
illegal
"""
q_values_k_2_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_2_action_south: """
illegal
5.0000
illegal
"""
q_values_k_2_action_west: """
illegal
2.5000
illegal
"""
values_k_3: """
-10.0000
5.0000
10.0000
"""
q_values_k_3_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_3_action_east: """
illegal
2.5000
illegal
"""
q_values_k_3_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_3_action_south: """
illegal
5.0000
illegal
"""
q_values_k_3_action_west: """
illegal
2.5000
illegal
"""
values_k_4: """
-10.0000
5.0000
10.0000
"""
q_values_k_4_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_4_action_east: """
illegal
2.5000
illegal
"""
q_values_k_4_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_4_action_south: """
illegal
5.0000
illegal
"""
q_values_k_4_action_west: """
illegal
2.5000
illegal
"""
values_k_5: """
-10.0000
5.0000
10.0000
"""
q_values_k_5_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_5_action_east: """
illegal
2.5000
illegal
"""
q_values_k_5_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_5_action_south: """
illegal
5.0000
illegal
"""
q_values_k_5_action_west: """
illegal
2.5000
illegal
"""
values_k_6: """
-10.0000
5.0000
10.0000
"""
q_values_k_6_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_6_action_east: """
illegal
2.5000
illegal
"""
q_values_k_6_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_6_action_south: """
illegal
5.0000
illegal
"""
q_values_k_6_action_west: """
illegal
2.5000
illegal
"""
values_k_7: """
-10.0000
5.0000
10.0000
"""
q_values_k_7_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_7_action_east: """
illegal
2.5000
illegal
"""
q_values_k_7_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_7_action_south: """
illegal
5.0000
illegal
"""
q_values_k_7_action_west: """
illegal
2.5000
illegal
"""
values_k_8: """
-10.0000
5.0000
10.0000
"""
q_values_k_8_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_8_action_east: """
illegal
2.5000
illegal
"""
q_values_k_8_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_8_action_south: """
illegal
5.0000
illegal
"""
q_values_k_8_action_west: """
illegal
2.5000
illegal
"""
values_k_9: """
-10.0000
5.0000
10.0000
"""
q_values_k_9_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_9_action_east: """
illegal
2.5000
illegal
"""
q_values_k_9_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_9_action_south: """
illegal
5.0000
illegal
"""
q_values_k_9_action_west: """
illegal
2.5000
illegal
"""
values_k_100: """
-10.0000
5.0000
10.0000
"""
q_values_k_100_action_north: """
illegal
-5.0000
illegal
"""
q_values_k_100_action_east: """
illegal
2.5000
illegal
"""
q_values_k_100_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_100_action_south: """
illegal
5.0000
illegal
"""
q_values_k_100_action_west: """
illegal
2.5000
illegal
"""
policy: """
exit
south
exit
"""
actions: """
north
east
exit
south
west
"""

View file

@ -0,0 +1,22 @@
class: "ValueIterationTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.5"
noise: "0.0"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,410 @@
values_k_0: """
0.0000
0.0000
0.0000
"""
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
values_k_1: """
-10.0000
0.0000
10.0000
"""
q_values_k_1_action_north: """
illegal
-5.6250
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_1_action_south: """
illegal
5.6250
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
values_k_2: """
-10.0000
5.6250
10.0000
"""
q_values_k_2_action_north: """
illegal
-4.5703
illegal
"""
q_values_k_2_action_east: """
illegal
3.1641
illegal
"""
q_values_k_2_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_2_action_south: """
illegal
6.6797
illegal
"""
q_values_k_2_action_west: """
illegal
3.1641
illegal
"""
values_k_3: """
-10.0000
6.6797
10.0000
"""
q_values_k_3_action_north: """
illegal
-4.3726
illegal
"""
q_values_k_3_action_east: """
illegal
3.7573
illegal
"""
q_values_k_3_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_3_action_south: """
illegal
6.8774
illegal
"""
q_values_k_3_action_west: """
illegal
3.7573
illegal
"""
values_k_4: """
-10.0000
6.8774
10.0000
"""
q_values_k_4_action_north: """
illegal
-4.3355
illegal
"""
q_values_k_4_action_east: """
illegal
3.8686
illegal
"""
q_values_k_4_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_4_action_south: """
illegal
6.9145
illegal
"""
q_values_k_4_action_west: """
illegal
3.8686
illegal
"""
values_k_5: """
-10.0000
6.9145
10.0000
"""
q_values_k_5_action_north: """
illegal
-4.3285
illegal
"""
q_values_k_5_action_east: """
illegal
3.8894
illegal
"""
q_values_k_5_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_5_action_south: """
illegal
6.9215
illegal
"""
q_values_k_5_action_west: """
illegal
3.8894
illegal
"""
values_k_6: """
-10.0000
6.9215
10.0000
"""
q_values_k_6_action_north: """
illegal
-4.3272
illegal
"""
q_values_k_6_action_east: """
illegal
3.8933
illegal
"""
q_values_k_6_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_6_action_south: """
illegal
6.9228
illegal
"""
q_values_k_6_action_west: """
illegal
3.8933
illegal
"""
values_k_7: """
-10.0000
6.9228
10.0000
"""
q_values_k_7_action_north: """
illegal
-4.3270
illegal
"""
q_values_k_7_action_east: """
illegal
3.8941
illegal
"""
q_values_k_7_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_7_action_south: """
illegal
6.9230
illegal
"""
q_values_k_7_action_west: """
illegal
3.8941
illegal
"""
values_k_8: """
-10.0000
6.9230
10.0000
"""
q_values_k_8_action_north: """
illegal
-4.3269
illegal
"""
q_values_k_8_action_east: """
illegal
3.8942
illegal
"""
q_values_k_8_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_8_action_south: """
illegal
6.9231
illegal
"""
q_values_k_8_action_west: """
illegal
3.8942
illegal
"""
values_k_9: """
-10.0000
6.9231
10.0000
"""
q_values_k_9_action_north: """
illegal
-4.3269
illegal
"""
q_values_k_9_action_east: """
illegal
3.8942
illegal
"""
q_values_k_9_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_9_action_south: """
illegal
6.9231
illegal
"""
q_values_k_9_action_west: """
illegal
3.8942
illegal
"""
values_k_100: """
-10.0000
6.9231
10.0000
"""
q_values_k_100_action_north: """
illegal
-4.3269
illegal
"""
q_values_k_100_action_east: """
illegal
3.8942
illegal
"""
q_values_k_100_action_exit: """
-10.0000
illegal
10.0000
"""
q_values_k_100_action_south: """
illegal
6.9231
illegal
"""
q_values_k_100_action_west: """
illegal
3.8942
illegal
"""
policy: """
exit
south
exit
"""
actions: """
north
east
exit
south
west
"""

View file

@ -0,0 +1,22 @@
class: "ValueIterationTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.75"
noise: "0.25"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,678 @@
values_k_0: """
__________ 0.0000 __________
0.0000 0.0000 0.0000
0.0000 0.0000 0.0000
0.0000 0.0000 0.0000
0.0000 0.0000 0.0000
0.0000 0.0000 0.0000
__________ 0.0000 __________
"""
q_values_k_0_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_0_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
values_k_1: """
__________ 10.0000 __________
-100.0000 0.0000 -100.0000
-100.0000 0.0000 -100.0000
-100.0000 0.0000 -100.0000
-100.0000 0.0000 -100.0000
-100.0000 0.0000 -100.0000
__________ 1.0000 __________
"""
q_values_k_1_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -8.5000 illegal
illegal -8.5000 illegal
illegal -8.5000 illegal
illegal -8.5000 illegal
__________ illegal __________
"""
q_values_k_1_action_east: """
__________ illegal __________
illegal -76.0750 illegal
illegal -76.5000 illegal
illegal -76.5000 illegal
illegal -76.5000 illegal
illegal -76.4575 illegal
__________ illegal __________
"""
q_values_k_1_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_1_action_south: """
__________ illegal __________
illegal -8.5000 illegal
illegal -8.5000 illegal
illegal -8.5000 illegal
illegal -8.5000 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_1_action_west: """
__________ illegal __________
illegal -76.0750 illegal
illegal -76.5000 illegal
illegal -76.5000 illegal
illegal -76.5000 illegal
illegal -76.4575 illegal
__________ illegal __________
"""
values_k_2: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -8.5000 -100.0000
-100.0000 -8.5000 -100.0000
-100.0000 -8.5000 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_2_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.0025 illegal
illegal -15.0025 illegal
illegal -15.0025 illegal
__________ illegal __________
"""
q_values_k_2_action_east: """
__________ illegal __________
illegal -76.4363 illegal
illegal -76.8974 illegal
illegal -77.2225 illegal
illegal -77.1900 illegal
illegal -76.8187 illegal
__________ illegal __________
"""
q_values_k_2_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_2_action_south: """
__________ illegal __________
illegal -15.0025 illegal
illegal -15.0025 illegal
illegal -15.0025 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_2_action_west: """
__________ illegal __________
illegal -76.4363 illegal
illegal -76.8974 illegal
illegal -77.2225 illegal
illegal -77.1900 illegal
illegal -76.8187 illegal
__________ illegal __________
"""
values_k_3: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.0025 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_3_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -19.9769 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_3_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1737 illegal
illegal -77.5016 illegal
illegal -77.4663 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_3_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_3_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -19.9769 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_3_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1737 illegal
illegal -77.5016 illegal
illegal -77.4663 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_4: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_4_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_4_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_4_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_4_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_4_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_5: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_5_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_5_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_5_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_5_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_5_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_6: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_6_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_6_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_6_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_6_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_6_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_7: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_7_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_7_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_7_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_7_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_7_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_8: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_8_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_8_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_8_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_8_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_8_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_9: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_9_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_9_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_9_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_9_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_9_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
values_k_100: """
__________ 10.0000 __________
-100.0000 -0.8500 -100.0000
-100.0000 -9.1502 -100.0000
-100.0000 -15.4999 -100.0000
-100.0000 -14.4173 -100.0000
-100.0000 -7.7350 -100.0000
__________ 1.0000 __________
"""
q_values_k_100_action_north: """
__________ illegal __________
illegal -0.8500 illegal
illegal -9.1502 illegal
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
__________ illegal __________
"""
q_values_k_100_action_east: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
q_values_k_100_action_exit: """
__________ 10.0000 __________
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
-100.0000 illegal -100.0000
__________ 1.0000 __________
"""
q_values_k_100_action_south: """
__________ illegal __________
illegal -15.4999 illegal
illegal -20.3575 illegal
illegal -19.5292 illegal
illegal -14.4173 illegal
illegal -7.7350 illegal
__________ illegal __________
"""
q_values_k_100_action_west: """
__________ illegal __________
illegal -76.4639 illegal
illegal -77.1949 illegal
illegal -77.5016 illegal
illegal -77.4875 illegal
illegal -77.0702 illegal
__________ illegal __________
"""
policy: """
__________ exit __________
exit north exit
exit north exit
exit north exit
exit south exit
exit south exit
__________ exit __________
"""
actions: """
north
east
exit
south
west
"""

View file

@ -0,0 +1,27 @@
class: "ValueIterationTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# 10 #
-100 _ -100
-100 _ -100
-100 _ -100
-100 _ -100
-100 S -100
# 1 #
"""
gridName: "bridgeGrid"
discount: "0.85"
noise: "0.1"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "500"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,544 @@
values_k_0: """
0.0000 0.0000 0.0000 0.0000 0.0000
0.0000 0.0000 __________ 0.0000 0.0000
0.0000 0.0000 0.0000 0.0000 0.0000
0.0000 0.0000 __________ __________ 0.0000
0.0000 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
values_k_1: """
-10.0000 0.0000 10.0000 0.0000 0.0000
-10.0000 0.0000 __________ 0.0000 0.0000
-10.0000 0.0000 1.0000 0.0000 0.0000
-10.0000 0.0000 __________ __________ 0.0000
-10.0000 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_north: """
illegal 0.0000 illegal 0.9000 0.0000
illegal -0.9000 __________ 0.0000 0.0000
illegal -0.8100 illegal 0.0900 0.0000
illegal -0.9000 __________ __________ 0.0000
illegal -0.9000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_east: """
illegal 7.2000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.7200 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_1_action_south: """
illegal 0.0000 illegal 0.9000 0.0000
illegal -0.9000 __________ 0.0000 0.0000
illegal -0.8100 illegal 0.0900 0.0000
illegal -0.9000 __________ __________ 0.0000
illegal -0.9000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_west: """
illegal -7.2000 illegal 7.2000 0.0000
illegal -7.2000 __________ 0.0000 0.0000
illegal -7.2000 illegal 0.7200 0.0000
illegal -7.2000 __________ __________ 0.0000
illegal -7.2000 0.0000 0.0000 0.0000
"""
values_k_2: """
-10.0000 7.2000 10.0000 7.2000 0.0000
-10.0000 0.0000 __________ 0.0000 0.0000
-10.0000 0.7200 1.0000 0.7200 0.0000
-10.0000 0.0000 __________ __________ 0.0000
-10.0000 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_north: """
illegal 5.1840 illegal 6.0840 0.6480
illegal 4.2840 __________ 5.1840 0.0000
illegal -0.8100 illegal 0.0900 0.0648
illegal -0.3816 __________ __________ 0.0000
illegal -0.9000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_east: """
illegal 7.8480 illegal 0.6480 0.0000
illegal 0.7128 __________ 0.7128 0.0000
illegal 0.7200 illegal 0.0648 0.0000
illegal 0.0648 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_2_action_south: """
illegal 0.0000 illegal 0.9000 0.6480
illegal -0.3816 __________ 0.5184 0.0000
illegal -0.8100 illegal 0.6084 0.0648
illegal -0.9000 __________ __________ 0.0000
illegal -0.9000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_west: """
illegal -6.5520 illegal 7.8480 5.1840
illegal -6.4872 __________ 0.7128 0.0000
illegal -7.2000 illegal 0.7848 0.5184
illegal -7.1352 __________ __________ 0.0000
illegal -7.2000 0.0000 0.0000 0.0000
"""
values_k_3: """
-10.0000 7.8480 10.0000 7.8480 5.1840
-10.0000 4.2840 __________ 5.1840 0.0000
-10.0000 0.7200 1.0000 0.7848 0.5184
-10.0000 0.0648 __________ __________ 0.0000
-10.0000 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_north: """
illegal 5.6506 illegal 7.0171 4.9054
illegal 5.1361 __________ 6.1171 4.1990
illegal 2.2745 illegal 3.8691 0.1173
illegal -0.3758 __________ __________ 0.3732
illegal -0.8533 0.0000 0.0000 0.0000
"""
q_values_k_3_action_east: """
illegal 8.2919 illegal 4.9054 4.1990
illegal 3.8556 __________ 0.7770 0.5132
illegal 1.1114 illegal 0.9104 0.3732
illegal 0.1115 __________ __________ 0.0467
illegal 0.0058 0.0000 0.0000 0.0000
"""
q_values_k_3_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_3_action_south: """
illegal 3.0845 illegal 5.0990 1.1729
illegal 0.0040 __________ 1.0316 0.8398
illegal -0.7633 illegal 0.7017 0.1173
illegal -0.8942 __________ __________ 0.0000
illegal -0.9000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_west: """
illegal -6.1081 illegal 8.3729 6.1171
illegal -6.4289 __________ 4.5094 4.2457
illegal -6.8086 illegal 1.2572 0.5651
illegal -7.1352 __________ __________ 0.0467
illegal -7.1942 0.0000 0.0000 0.0000
"""
values_k_4: """
-10.0000 8.2919 10.0000 8.3729 6.1171
-10.0000 5.1361 __________ 6.1171 4.2457
-10.0000 2.2745 1.0000 3.8691 0.5651
-10.0000 0.1115 __________ __________ 0.3732
-10.0000 0.0058 0.0000 0.0000 0.0000
"""
q_values_k_4_action_north: """
illegal 5.9702 illegal 7.4790 5.7084
illegal 5.5324 __________ 6.9611 5.3370
illegal 2.8880 illegal 4.5452 3.4560
illegal 0.7477 __________ __________ 0.4740
illegal -0.8198 0.0005 0.0000 0.2687
"""
q_values_k_4_action_east: """
illegal 8.4085 illegal 5.7084 5.3370
illegal 4.6490 __________ 4.1587 3.6583
illegal 1.1923 illegal 1.3056 0.8225
illegal 0.2855 __________ __________ 0.3196
illegal 0.0106 0.0000 0.0000 0.0336
"""
q_values_k_4_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_4_action_south: """
illegal 3.6980 illegal 5.8549 4.3610
illegal 1.1999 __________ 3.7184 1.3395
illegal -0.7298 illegal 2.9266 0.6678
illegal -0.8858 __________ __________ 0.0672
illegal -0.8958 0.0005 0.0000 0.0000
"""
q_values_k_4_action_west: """
illegal -5.9915 illegal 8.5041 6.9611
illegal -6.2490 __________ 5.5061 5.0057
illegal -6.7277 illegal 1.6188 3.2015
illegal -6.9948 __________ __________ 0.3196
illegal -7.1894 0.0042 0.0000 0.0336
"""
values_k_5: """
-10.0000 8.4085 10.0000 8.5041 6.9611
-10.0000 5.5324 __________ 6.9611 5.3370
-10.0000 2.8880 1.0000 4.5452 3.4560
-10.0000 0.7477 __________ __________ 0.4740
-10.0000 0.0106 0.0042 0.0000 0.2687
"""
q_values_k_5_action_north: """
illegal 6.0541 illegal 7.6495 6.4039
illegal 5.6521 __________ 7.2298 6.1188
illegal 3.1733 illegal 5.4130 4.5627
illegal 1.2467 __________ __________ 2.5736
illegal -0.3613 0.0040 0.0246 0.3655
"""
q_values_k_5_action_east: """
illegal 8.4547 illegal 6.4039 6.1188
illegal 5.0000 __________ 5.0171 4.7802
illegal 1.2852 illegal 3.5239 3.0113
illegal 0.7992 __________ __________ 0.6765
illegal 0.0713 0.0008 0.1935 0.2603
"""
q_values_k_5_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_5_action_south: """
illegal 3.9833 illegal 6.5385 5.2345
illegal 1.6773 __________ 4.3794 3.5951
illegal -0.2717 illegal 3.6736 1.0614
illegal -0.8251 __________ __________ 0.2788
illegal -0.8920 0.0040 0.0246 0.2177
"""
q_values_k_5_action_west: """
illegal -5.9453 illegal 8.5919 7.2298
illegal -6.1833 __________ 6.1864 5.9496
illegal -6.6348 illegal 1.7556 3.7955
illegal -6.9391 __________ __________ 0.6765
illegal -7.1318 0.0084 0.0030 0.0668
"""
values_k_6: """
-10.0000 8.4547 10.0000 8.5919 7.2298
-10.0000 5.6521 __________ 7.2298 6.1188
-10.0000 3.1733 1.0000 5.4130 4.5627
-10.0000 1.2467 __________ __________ 2.5736
-10.0000 0.0713 0.0084 0.1935 0.3655
"""
q_values_k_6_action_north: """
illegal 6.0874 illegal 7.7368 6.6294
illegal 5.6961 __________ 7.3875 6.4068
illegal 3.2595 illegal 5.7061 5.3034
illegal 1.4970 __________ __________ 3.7484
illegal -0.0017 0.0298 0.1730 1.9033
"""
q_values_k_6_action_east: """
illegal 8.4696 illegal 6.6294 6.4068
illegal 5.1160 __________ 5.6660 5.4669
illegal 1.3409 illegal 4.4230 4.0675
illegal 1.1896 __________ __________ 2.2966
illegal 0.1246 0.1408 0.2980 0.5277
"""
q_values_k_6_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_6_action_south: """
illegal 4.0695 illegal 6.7561 5.8295
illegal 1.8935 __________ 5.0988 4.4865
illegal 0.0876 illegal 4.3980 2.7508
illegal -0.7365 __________ __________ 0.7264
illegal -0.8479 0.0298 0.1730 0.3135
"""
q_values_k_6_action_west: """
illegal -5.9304 illegal 8.6239 7.3875
illegal -6.1535 __________ 6.4659 6.2668
illegal -6.5791 illegal 1.8579 4.6797
illegal -6.9080 __________ __________ 2.2966
illegal -7.0814 0.0528 0.0408 0.4038
"""
values_k_7: """
-10.0000 8.4696 10.0000 8.6239 7.3875
-10.0000 5.6961 __________ 7.3875 6.4068
-10.0000 3.2595 1.0000 5.7061 5.3034
-10.0000 1.4970 __________ __________ 3.7484
-10.0000 0.1246 0.1408 0.2980 1.9033
"""
q_values_k_7_action_north: """
illegal 6.0981 illegal 7.7741 6.7600
illegal 5.7108 __________ 7.4507 6.5605
illegal 3.2912 illegal 5.8863 5.6038
illegal 1.5816 __________ __________ 4.4932
illegal 0.1905 0.1394 0.3985 2.8970
"""
q_values_k_7_action_east: """
illegal 8.4749 illegal 6.7600 6.5605
illegal 5.1568 __________ 5.9026 5.7551
illegal 1.3674 illegal 4.9969 4.7324
illegal 1.3824 __________ __________ 3.3475
illegal 0.2473 0.2399 1.4240 1.8790
"""
q_values_k_7_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_7_action_south: """
illegal 4.1012 illegal 6.8839 6.0539
illegal 1.9595 __________ 5.3499 5.0599
illegal 0.2678 illegal 4.6757 3.6897
illegal -0.6755 __________ __________ 2.0451
illegal -0.7976 0.1394 0.3985 1.5685
"""
q_values_k_7_action_west: """
illegal -5.9251 illegal 8.6410 7.4507
illegal -6.1444 __________ 6.6087 6.4612
illegal -6.5526 illegal 1.8984 5.0224
illegal -6.8954 __________ __________ 3.3475
illegal -7.0541 0.1151 0.1550 0.7232
"""
values_k_8: """
-10.0000 8.4749 10.0000 8.6410 7.4507
-10.0000 5.7108 __________ 7.4507 6.5605
-10.0000 3.2912 1.0000 5.8863 5.6038
-10.0000 1.5816 __________ __________ 4.4932
-10.0000 0.2473 0.2399 1.4240 2.8970
"""
q_values_k_8_action_north: """
illegal 6.1019 illegal 7.7921 6.8128
illegal 5.7159 __________ 7.4826 6.6255
illegal 3.3017 illegal 5.9589 5.7577
illegal 1.6120 __________ __________ 4.8435
illegal 0.2603 0.3231 1.3076 3.6240
"""
q_values_k_8_action_east: """
illegal 8.4767 illegal 6.8128 6.6255
illegal 5.1707 __________ 6.0310 5.8985
illegal 1.3763 illegal 5.2350 5.0295
illegal 1.4572 __________ __________ 4.0001
illegal 0.3373 1.0685 2.3421 2.7509
"""
q_values_k_8_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_8_action_south: """
illegal 4.1117 illegal 6.9351 6.1718
illegal 1.9836 __________ 5.4992 5.2957
illegal 0.3287 illegal 4.8325 4.2692
illegal -0.5796 __________ __________ 2.8946
illegal -0.7003 0.3231 1.3076 2.4747
"""
q_values_k_8_action_west: """
illegal -5.9233 illegal 8.6483 7.4826
illegal -6.1411 __________ 6.6720 6.5394
illegal -6.5437 illegal 1.9203 5.2330
illegal -6.8815 __________ __________ 4.0001
illegal -7.0354 0.2213 0.4290 1.6904
"""
values_k_9: """
-10.0000 8.4767 10.0000 8.6483 7.4826
-10.0000 5.7159 __________ 7.4826 6.6255
-10.0000 3.3017 1.0000 5.9589 5.7577
-10.0000 1.6120 __________ __________ 4.8435
-10.0000 0.3373 1.0685 2.3421 3.6240
"""
q_values_k_9_action_north: """
illegal 6.1032 illegal 7.8002 6.8392
illegal 5.7177 __________ 7.4965 6.6572
illegal 3.3055 illegal 5.9956 5.8249
illegal 1.6223 __________ __________ 5.0174
illegal 0.3568 1.0105 2.1087 4.0243
"""
q_values_k_9_action_east: """
illegal 8.4773 illegal 6.8392 6.6572
illegal 5.1755 __________ 6.0850 5.9620
illegal 1.3795 illegal 5.3553 5.1777
illegal 1.4881 __________ __________ 4.3316
illegal 0.9447 1.8787 3.0308 3.3713
"""
q_values_k_9_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_9_action_south: """
illegal 4.1155 illegal 6.9609 6.2222
illegal 1.9917 __________ 5.5601 5.4153
illegal 0.3506 illegal 4.8986 4.5418
illegal -0.5121 __________ __________ 3.4811
illegal -0.5610 1.0105 2.1087 3.1462
"""
q_values_k_9_action_west: """
illegal -5.9227 illegal 8.6518 7.4965
illegal -6.1399 __________ 6.7021 6.5791
illegal -6.5405 illegal 1.9297 5.3226
illegal -6.8725 __________ __________ 4.3316
illegal -7.0246 0.4352 1.1909 2.4484
"""
values_k_100: """
-10.0000 8.4777 10.0000 8.6547 7.5087
-10.0000 5.7186 __________ 7.5087 6.6836
-10.0000 3.3074 1.0000 6.0258 5.8841
-10.0000 2.0045 __________ __________ 5.1665
-10.0000 2.9289 3.4513 3.9306 4.4765
"""
q_values_k_100_action_north: """
illegal 6.1039 illegal 7.8072 6.8610
illegal 5.7186 __________ 7.5087 6.6836
illegal 3.3074 illegal 6.0258 5.8841
illegal 1.6617 __________ __________ 5.1665
illegal 0.8539 3.1023 3.5435 4.4765
"""
q_values_k_100_action_east: """
illegal 8.4777 illegal 6.8610 6.6836
illegal 5.1780 __________ 6.1334 6.0175
illegal 1.4151 illegal 5.4546 5.3030
illegal 2.0045 __________ __________ 4.6523
illegal 2.9289 3.4513 3.9306 4.0910
"""
q_values_k_100_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-10.0000 illegal illegal illegal illegal
"""
q_values_k_100_action_south: """
illegal 4.1174 illegal 6.9820 6.2669
illegal 1.9960 __________ 5.6159 5.5138
illegal 0.6333 illegal 4.9582 4.7918
illegal 1.3892 __________ __________ 4.1531
illegal 1.5194 3.1023 3.5435 3.9797
"""
q_values_k_100_action_west: """
illegal -5.9223 illegal 8.6547 7.5087
illegal -6.1393 __________ 6.7275 6.6116
illegal -6.5049 illegal 1.9381 5.4051
illegal -6.6387 __________ __________ 4.6523
illegal -6.7560 2.7300 3.1924 3.6979
"""
policy: """
exit east exit west west
exit north __________ north north
exit north exit north north
exit east __________ __________ north
exit east east east north
"""
actions: """
north
east
exit
south
west
"""

View file

@ -0,0 +1,24 @@
class: "ValueIterationTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,2 @@
max_points: "6"
class: "PassAllTestsQuestion"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q2/1-bridge-grid.test.
# File intentionally blank.

View file

@ -0,0 +1,29 @@
class: "GridPolicyTest"
# Function in module in analysis that returns (discount, noise)
parameterFn: "question2"
question2: "true"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# -100 -100 -100 -100 -100 #
1 S _ _ _ _ 10
# -100 -100 -100 -100 -100 #
"""
gridName: "bridgeGrid"
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
policy: """
_ _ _ _ _ _ _
_ E _ _ _ _ _
_ _ _ _ _ _ _
"""

View file

@ -0,0 +1,2 @@
max_points: "1"
class: "PassAllTestsQuestion"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q3/1-question-3.1.test.
# File intentionally blank.

View file

@ -0,0 +1,31 @@
class: "GridPolicyTest"
# Function in module in analysis that returns (discount, noise)
parameterFn: "question3a"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
_ _ _ _ _
_ # _ _ _
_ # 1 # 10
S _ _ _ _
-10 -10 -10 -10 -10
"""
gridName: "discountGrid"
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
policy: """
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
E E N _ _
_ _ _ _ _
"""

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q3/2-question-3.2.test.
# File intentionally blank.

View file

@ -0,0 +1,31 @@
class: "GridPolicyTest"
# Function in module in analysis that returns (discount, noise)
parameterFn: "question3b"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
_ _ _ _ _
_ # _ _ _
_ # 1 # 10
S _ _ _ _
-10 -10 -10 -10 -10
"""
gridName: "discountGrid"
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
policy: """
E E S _ _
N _ S _ _
N _ _ _ _
N _ _ _ _
_ _ _ _ _
"""

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q3/3-question-3.3.test.
# File intentionally blank.

View file

@ -0,0 +1,31 @@
class: "GridPolicyTest"
# Function in module in analysis that returns (discount, noise)
parameterFn: "question3c"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
_ _ _ _ _
_ # _ _ _
_ # 1 # 10
S _ _ _ _
-10 -10 -10 -10 -10
"""
gridName: "discountGrid"
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
policy: """
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
E E E E N
_ _ _ _ _
"""

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q3/4-question-3.4.test.
# File intentionally blank.

View file

@ -0,0 +1,36 @@
class: "GridPolicyTest"
# Function in module in analysis that returns (discount, noise)
parameterFn: "question3d"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
_ _ _ _ _
_ # _ _ _
_ # 1 # 10
S _ _ _ _
-10 -10 -10 -10 -10
"""
gridName: "discountGrid"
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
policy: """
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
N _ _ _ _
_ _ _ _ _
"""
# State the most probable path must visit
# (x,y) for a particular location; (0,0) is bottom left
# TERMINAL_STATE for the terminal state
pathVisits: "(4,2)"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q3/5-question-3.5.test.
# File intentionally blank.

View file

@ -0,0 +1,36 @@
class: "GridPolicyTest"
# Function in module in analysis that returns (discount, noise)
parameterFn: "question3e"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
_ _ _ _ _
_ # _ _ _
_ # 1 # 10
S _ _ _ _
-10 -10 -10 -10 -10
"""
gridName: "discountGrid"
# Policy specification
# _ policy choice not checked
# N, E, S, W policy action must be north, east, south, west
#
policy: """
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
"""
# State the most probable path must not visit
# (x,y) for a particular location; (0,0) is bottom left
# TERMINAL_STATE for the terminal state
pathNotVisits: "TERMINAL_STATE"

View file

@ -0,0 +1,2 @@
max_points: "5"
class: "NumberPassedQuestion"

View file

@ -0,0 +1,342 @@
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
q_values_k_1_action_north: """
illegal
0.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_1_action_south: """
illegal
0.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
q_values_k_2_action_north: """
illegal
0.0000
illegal
"""
q_values_k_2_action_east: """
illegal
0.0000
illegal
"""
q_values_k_2_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_2_action_south: """
illegal
0.0000
illegal
"""
q_values_k_2_action_west: """
illegal
0.0000
illegal
"""
q_values_k_3_action_north: """
illegal
0.0000
illegal
"""
q_values_k_3_action_east: """
illegal
0.0000
illegal
"""
q_values_k_3_action_exit: """
0.0000
illegal
1.9000
"""
q_values_k_3_action_south: """
illegal
0.0000
illegal
"""
q_values_k_3_action_west: """
illegal
0.0000
illegal
"""
q_values_k_4_action_north: """
illegal
0.0000
illegal
"""
q_values_k_4_action_east: """
illegal
0.0000
illegal
"""
q_values_k_4_action_exit: """
0.0000
illegal
2.7100
"""
q_values_k_4_action_south: """
illegal
0.0000
illegal
"""
q_values_k_4_action_west: """
illegal
0.0000
illegal
"""
q_values_k_5_action_north: """
illegal
0.0000
illegal
"""
q_values_k_5_action_east: """
illegal
0.0000
illegal
"""
q_values_k_5_action_exit: """
-1.0000
illegal
2.7100
"""
q_values_k_5_action_south: """
illegal
0.0000
illegal
"""
q_values_k_5_action_west: """
illegal
0.0000
illegal
"""
q_values_k_6_action_north: """
illegal
0.0000
illegal
"""
q_values_k_6_action_east: """
illegal
0.0000
illegal
"""
q_values_k_6_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_6_action_south: """
illegal
0.0000
illegal
"""
q_values_k_6_action_west: """
illegal
0.0000
illegal
"""
q_values_k_7_action_north: """
illegal
0.0000
illegal
"""
q_values_k_7_action_east: """
illegal
0.0000
illegal
"""
q_values_k_7_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_7_action_south: """
illegal
0.1720
illegal
"""
q_values_k_7_action_west: """
illegal
0.0000
illegal
"""
q_values_k_8_action_north: """
illegal
0.0000
illegal
"""
q_values_k_8_action_east: """
illegal
0.0000
illegal
"""
q_values_k_8_action_exit: """
-1.0000
illegal
4.0951
"""
q_values_k_8_action_south: """
illegal
0.1720
illegal
"""
q_values_k_8_action_west: """
illegal
0.0000
illegal
"""
q_values_k_9_action_north: """
illegal
0.0000
illegal
"""
q_values_k_9_action_east: """
illegal
0.0000
illegal
"""
q_values_k_9_action_exit: """
-1.0000
illegal
4.6856
"""
q_values_k_9_action_south: """
illegal
0.1720
illegal
"""
q_values_k_9_action_west: """
illegal
0.0000
illegal
"""
q_values_k_100_action_north: """
illegal
-0.4534
illegal
"""
q_values_k_100_action_east: """
illegal
0.4063
illegal
"""
q_values_k_100_action_exit: """
-9.4767
illegal
9.8175
"""
q_values_k_100_action_south: """
illegal
2.1267
illegal
"""
q_values_k_100_action_west: """
illegal
0.3919
illegal
"""
values: """
-9.4767
2.1267
9.8175
"""
policy: """
exit
south
exit
"""

View file

@ -0,0 +1,22 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.5"
noise: "0.0"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,342 @@
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
q_values_k_1_action_north: """
illegal
0.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_1_action_south: """
illegal
0.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
q_values_k_2_action_north: """
illegal
0.0000
illegal
"""
q_values_k_2_action_east: """
illegal
0.0000
illegal
"""
q_values_k_2_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_2_action_south: """
illegal
0.0000
illegal
"""
q_values_k_2_action_west: """
illegal
0.0000
illegal
"""
q_values_k_3_action_north: """
illegal
0.0000
illegal
"""
q_values_k_3_action_east: """
illegal
0.0000
illegal
"""
q_values_k_3_action_exit: """
0.0000
illegal
1.9000
"""
q_values_k_3_action_south: """
illegal
0.0000
illegal
"""
q_values_k_3_action_west: """
illegal
0.0000
illegal
"""
q_values_k_4_action_north: """
illegal
0.0000
illegal
"""
q_values_k_4_action_east: """
illegal
0.0000
illegal
"""
q_values_k_4_action_exit: """
0.0000
illegal
2.7100
"""
q_values_k_4_action_south: """
illegal
0.0000
illegal
"""
q_values_k_4_action_west: """
illegal
0.0000
illegal
"""
q_values_k_5_action_north: """
illegal
0.0000
illegal
"""
q_values_k_5_action_east: """
illegal
0.0000
illegal
"""
q_values_k_5_action_exit: """
-1.0000
illegal
2.7100
"""
q_values_k_5_action_south: """
illegal
0.0000
illegal
"""
q_values_k_5_action_west: """
illegal
0.0000
illegal
"""
q_values_k_6_action_north: """
illegal
0.0000
illegal
"""
q_values_k_6_action_east: """
illegal
0.0000
illegal
"""
q_values_k_6_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_6_action_south: """
illegal
0.0000
illegal
"""
q_values_k_6_action_west: """
illegal
0.0000
illegal
"""
q_values_k_7_action_north: """
illegal
0.0000
illegal
"""
q_values_k_7_action_east: """
illegal
0.0000
illegal
"""
q_values_k_7_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_7_action_south: """
illegal
0.2579
illegal
"""
q_values_k_7_action_west: """
illegal
0.0000
illegal
"""
q_values_k_8_action_north: """
illegal
0.0000
illegal
"""
q_values_k_8_action_east: """
illegal
0.0000
illegal
"""
q_values_k_8_action_exit: """
-1.0000
illegal
4.0951
"""
q_values_k_8_action_south: """
illegal
0.2579
illegal
"""
q_values_k_8_action_west: """
illegal
0.0000
illegal
"""
q_values_k_9_action_north: """
illegal
0.0000
illegal
"""
q_values_k_9_action_east: """
illegal
0.0000
illegal
"""
q_values_k_9_action_exit: """
-1.0000
illegal
4.6856
"""
q_values_k_9_action_south: """
illegal
0.2579
illegal
"""
q_values_k_9_action_west: """
illegal
0.0000
illegal
"""
q_values_k_100_action_north: """
illegal
-0.6670
illegal
"""
q_values_k_100_action_east: """
illegal
0.9499
illegal
"""
q_values_k_100_action_exit: """
-9.4767
illegal
9.8175
"""
q_values_k_100_action_south: """
illegal
3.2562
illegal
"""
q_values_k_100_action_west: """
illegal
0.8236
illegal
"""
values: """
-9.4767
3.2562
9.8175
"""
policy: """
exit
south
exit
"""

View file

@ -0,0 +1,22 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.75"
noise: "0.25"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,570 @@
q_values_k_0_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_exit: """
__________ 0.0000 __________
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_0_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_1_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_2_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_3_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_4_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
0.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_5_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_6_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_7_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_8_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.1000 __________
"""
q_values_k_9_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_500_action_north: """
__________ illegal __________
illegal -5.8648 illegal
illegal -0.7995 illegal
illegal -0.1671 illegal
illegal -1.2642 illegal
illegal -0.5871 illegal
__________ illegal __________
"""
q_values_k_500_action_east: """
__________ illegal __________
illegal -17.0676 illegal
illegal -26.5534 illegal
illegal -3.6957 illegal
illegal -43.5952 illegal
illegal -31.6884 illegal
__________ illegal __________
"""
q_values_k_500_action_exit: """
__________ 9.3539 __________
-96.5663 illegal -96.9097
-97.7472 illegal -94.1850
-89.0581 illegal -96.9097
-97.2187 illegal -87.8423
-92.8210 illegal -97.2187
__________ 0.9576 __________
"""
q_values_k_500_action_south: """
__________ illegal __________
illegal -6.8377 illegal
illegal -6.7277 illegal
illegal -3.4723 illegal
illegal -8.4015 illegal
illegal -5.5718 illegal
__________ illegal __________
"""
q_values_k_500_action_west: """
__________ illegal __________
illegal -27.0626 illegal
illegal -39.0610 illegal
illegal -40.5887 illegal
illegal -16.2839 illegal
illegal -20.7770 illegal
__________ illegal __________
"""
values: """
__________ 9.3539 __________
-96.5663 -5.8648 -96.9097
-97.7472 -0.7995 -94.1850
-89.0581 -0.1671 -96.9097
-97.2187 -1.2642 -87.8423
-92.8210 -0.5871 -97.2187
__________ 0.9576 __________
"""
policy: """
__________ exit __________
exit north exit
exit north exit
exit north exit
exit north exit
exit north exit
__________ exit __________
"""

View file

@ -0,0 +1,27 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# 10 #
-100 _ -100
-100 _ -100
-100 _ -100
-100 _ -100
-100 S -100
# 1 #
"""
gridName: "bridgeGrid"
discount: "0.85"
noise: "0.1"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "500"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,456 @@
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_1_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_2_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_3_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_4_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_5_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_6_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_7_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
-1.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_8_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal -0.0900 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
-1.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_9_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3000_action_north: """
illegal 4.3205 illegal 6.1517 3.8095
illegal 4.4238 __________ 5.2284 3.5129
illegal 1.0694 illegal 3.6867 2.0418
illegal 0.3423 __________ __________ 1.0655
illegal 0.0073 0.0079 0.0484 0.3768
"""
q_values_k_3000_action_east: """
illegal 8.0584 illegal 3.7245 3.3947
illegal 2.0499 __________ 3.2373 2.1742
illegal 0.8687 illegal 1.7398 1.2671
illegal 0.2927 __________ __________ 0.6669
illegal 0.0239 0.0097 0.1611 0.2051
"""
q_values_k_3000_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-9.9999 illegal illegal illegal illegal
"""
q_values_k_3000_action_south: """
illegal -0.3521 illegal 3.6948 2.9139
illegal -0.5605 __________ 2.1346 1.5674
illegal 0.2093 illegal 1.5389 0.5521
illegal -0.5505 __________ __________ 0.1006
illegal -1.8501 0.0060 0.0514 0.1223
"""
q_values_k_3000_action_west: """
illegal -6.2001 illegal 7.5146 4.9014
illegal -5.4013 __________ 4.0484 3.4126
illegal -8.0399 illegal 0.9653 1.6081
illegal -7.4767 __________ __________ 0.3934
illegal -6.3432 0.0179 0.0188 0.1028
"""
values: """
-10.0000 8.0584 10.0000 7.5146 4.9014
-10.0000 4.4238 __________ 5.2284 3.5129
-10.0000 1.0694 1.0000 3.6867 2.0418
-10.0000 0.3423 __________ __________ 1.0655
-9.9999 0.0239 0.0179 0.1611 0.3768
"""
policy: """
exit east exit west west
exit north __________ north north
exit north exit north north
exit north __________ __________ north
exit east west east north
"""

View file

@ -0,0 +1,24 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,2 @@
max_points: "5"
class: "PassAllTestsQuestion"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q5/1-tinygrid.test.
# File intentionally blank.

View file

@ -0,0 +1,22 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.5"
noise: "0.0"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q5/2-tinygrid-noisy.test.
# File intentionally blank.

View file

@ -0,0 +1,22 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.75"
noise: "0.25"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q5/3-bridge.test.
# File intentionally blank.

View file

@ -0,0 +1,27 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# 10 #
-100 _ -100
-100 _ -100
-100 _ -100
-100 _ -100
-100 S -100
# 1 #
"""
gridName: "bridgeGrid"
discount: "0.85"
noise: "0.1"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "500"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q5/4-discountgrid.test.
# File intentionally blank.

View file

@ -0,0 +1,24 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,2 @@
max_points: "3"
class: "PassAllTestsQuestion"

View file

@ -0,0 +1,2 @@
max_points: "1"
class: "PassAllTestsQuestion"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q6/grade-agent.test.
# File intentionally blank.

View file

@ -0,0 +1,2 @@
class: "Question6Test"

View file

@ -0,0 +1,2 @@
max_points: "1"
class: "PartialCreditQuestion"

View file

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q7/grade-agent.test.
# File intentionally blank.

View file

@ -0,0 +1,6 @@
class: "EvalAgentTest"
# 100 test games after 2000 training games
pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
winsThresholds: "70"

View file

@ -0,0 +1,429 @@
weights_k_0: """
{((0, 0), 'exit'): 0,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
weights_k_1: """
{((0, 0), 'exit'): 1.0,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_1_action_north: """
illegal
0.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_1_action_south: """
illegal
0.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
weights_k_2: """
{((0, 0), 'exit'): 1.0,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_2_action_north: """
illegal
0.0000
illegal
"""
q_values_k_2_action_east: """
illegal
0.0000
illegal
"""
q_values_k_2_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_2_action_south: """
illegal
0.0000
illegal
"""
q_values_k_2_action_west: """
illegal
0.0000
illegal
"""
weights_k_3: """
{((0, 0), 'exit'): 1.9,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_3_action_north: """
illegal
0.0000
illegal
"""
q_values_k_3_action_east: """
illegal
0.0000
illegal
"""
q_values_k_3_action_exit: """
0.0000
illegal
1.9000
"""
q_values_k_3_action_south: """
illegal
0.0000
illegal
"""
q_values_k_3_action_west: """
illegal
0.0000
illegal
"""
weights_k_4: """
{((0, 0), 'exit'): 2.71,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_4_action_north: """
illegal
0.0000
illegal
"""
q_values_k_4_action_east: """
illegal
0.0000
illegal
"""
q_values_k_4_action_exit: """
0.0000
illegal
2.7100
"""
q_values_k_4_action_south: """
illegal
0.0000
illegal
"""
q_values_k_4_action_west: """
illegal
0.0000
illegal
"""
weights_k_5: """
{((0, 0), 'exit'): 2.71,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_5_action_north: """
illegal
0.0000
illegal
"""
q_values_k_5_action_east: """
illegal
0.0000
illegal
"""
q_values_k_5_action_exit: """
-1.0000
illegal
2.7100
"""
q_values_k_5_action_south: """
illegal
0.0000
illegal
"""
q_values_k_5_action_west: """
illegal
0.0000
illegal
"""
weights_k_6: """
{((0, 0), 'exit'): 3.439,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_6_action_north: """
illegal
0.0000
illegal
"""
q_values_k_6_action_east: """
illegal
0.0000
illegal
"""
q_values_k_6_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_6_action_south: """
illegal
0.0000
illegal
"""
q_values_k_6_action_west: """
illegal
0.0000
illegal
"""
weights_k_7: """
{((0, 0), 'exit'): 3.439,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.17195000000000002,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_7_action_north: """
illegal
0.0000
illegal
"""
q_values_k_7_action_east: """
illegal
0.0000
illegal
"""
q_values_k_7_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_7_action_south: """
illegal
0.1720
illegal
"""
q_values_k_7_action_west: """
illegal
0.0000
illegal
"""
weights_k_8: """
{((0, 0), 'exit'): 4.0951,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.17195000000000002,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_8_action_north: """
illegal
0.0000
illegal
"""
q_values_k_8_action_east: """
illegal
0.0000
illegal
"""
q_values_k_8_action_exit: """
-1.0000
illegal
4.0951
"""
q_values_k_8_action_south: """
illegal
0.1720
illegal
"""
q_values_k_8_action_west: """
illegal
0.0000
illegal
"""
weights_k_9: """
{((0, 0), 'exit'): 4.68559,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.17195000000000002,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_9_action_north: """
illegal
0.0000
illegal
"""
q_values_k_9_action_east: """
illegal
0.0000
illegal
"""
q_values_k_9_action_exit: """
-1.0000
illegal
4.6856
"""
q_values_k_9_action_south: """
illegal
0.1720
illegal
"""
q_values_k_9_action_west: """
illegal
0.0000
illegal
"""
weights_k_100: """
{((0, 0), 'exit'): 9.817519963685992,
((0, 1), 'east'): 0.40629236674335106,
((0, 1), 'north'): -0.4534185789984799,
((0, 1), 'south'): 2.126721095524319,
((0, 1), 'west'): 0.39193283364906867,
((0, 2), 'exit'): -9.476652366972639}
"""
q_values_k_100_action_north: """
illegal
-0.4534
illegal
"""
q_values_k_100_action_east: """
illegal
0.4063
illegal
"""
q_values_k_100_action_exit: """
-9.4767
illegal
9.8175
"""
q_values_k_100_action_south: """
illegal
2.1267
illegal
"""
q_values_k_100_action_west: """
illegal
0.3919
illegal
"""

View file

@ -0,0 +1,22 @@
class: "ApproximateQLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.5"
noise: "0.0"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,429 @@
weights_k_0: """
{((0, 0), 'exit'): 0,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
weights_k_1: """
{((0, 0), 'exit'): 1.0,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_1_action_north: """
illegal
0.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_1_action_south: """
illegal
0.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
weights_k_2: """
{((0, 0), 'exit'): 1.0,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_2_action_north: """
illegal
0.0000
illegal
"""
q_values_k_2_action_east: """
illegal
0.0000
illegal
"""
q_values_k_2_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_2_action_south: """
illegal
0.0000
illegal
"""
q_values_k_2_action_west: """
illegal
0.0000
illegal
"""
weights_k_3: """
{((0, 0), 'exit'): 1.9,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_3_action_north: """
illegal
0.0000
illegal
"""
q_values_k_3_action_east: """
illegal
0.0000
illegal
"""
q_values_k_3_action_exit: """
0.0000
illegal
1.9000
"""
q_values_k_3_action_south: """
illegal
0.0000
illegal
"""
q_values_k_3_action_west: """
illegal
0.0000
illegal
"""
weights_k_4: """
{((0, 0), 'exit'): 2.71,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): 0}
"""
q_values_k_4_action_north: """
illegal
0.0000
illegal
"""
q_values_k_4_action_east: """
illegal
0.0000
illegal
"""
q_values_k_4_action_exit: """
0.0000
illegal
2.7100
"""
q_values_k_4_action_south: """
illegal
0.0000
illegal
"""
q_values_k_4_action_west: """
illegal
0.0000
illegal
"""
weights_k_5: """
{((0, 0), 'exit'): 2.71,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_5_action_north: """
illegal
0.0000
illegal
"""
q_values_k_5_action_east: """
illegal
0.0000
illegal
"""
q_values_k_5_action_exit: """
-1.0000
illegal
2.7100
"""
q_values_k_5_action_south: """
illegal
0.0000
illegal
"""
q_values_k_5_action_west: """
illegal
0.0000
illegal
"""
weights_k_6: """
{((0, 0), 'exit'): 3.439,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.0,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_6_action_north: """
illegal
0.0000
illegal
"""
q_values_k_6_action_east: """
illegal
0.0000
illegal
"""
q_values_k_6_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_6_action_south: """
illegal
0.0000
illegal
"""
q_values_k_6_action_west: """
illegal
0.0000
illegal
"""
weights_k_7: """
{((0, 0), 'exit'): 3.439,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.257925,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_7_action_north: """
illegal
0.0000
illegal
"""
q_values_k_7_action_east: """
illegal
0.0000
illegal
"""
q_values_k_7_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_7_action_south: """
illegal
0.2579
illegal
"""
q_values_k_7_action_west: """
illegal
0.0000
illegal
"""
weights_k_8: """
{((0, 0), 'exit'): 4.0951,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.257925,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_8_action_north: """
illegal
0.0000
illegal
"""
q_values_k_8_action_east: """
illegal
0.0000
illegal
"""
q_values_k_8_action_exit: """
-1.0000
illegal
4.0951
"""
q_values_k_8_action_south: """
illegal
0.2579
illegal
"""
q_values_k_8_action_west: """
illegal
0.0000
illegal
"""
weights_k_9: """
{((0, 0), 'exit'): 4.68559,
((0, 1), 'east'): 0,
((0, 1), 'north'): 0,
((0, 1), 'south'): 0.257925,
((0, 1), 'west'): 0,
((0, 2), 'exit'): -1.0}
"""
q_values_k_9_action_north: """
illegal
0.0000
illegal
"""
q_values_k_9_action_east: """
illegal
0.0000
illegal
"""
q_values_k_9_action_exit: """
-1.0000
illegal
4.6856
"""
q_values_k_9_action_south: """
illegal
0.2579
illegal
"""
q_values_k_9_action_west: """
illegal
0.0000
illegal
"""
weights_k_100: """
{((0, 0), 'exit'): 9.817519963685992,
((0, 1), 'east'): 0.9498968104823575,
((0, 1), 'north'): -0.66699795412272,
((0, 1), 'south'): 3.256207905310105,
((0, 1), 'west'): 0.8236280735014627,
((0, 2), 'exit'): -9.476652366972639}
"""
q_values_k_100_action_north: """
illegal
-0.6670
illegal
"""
q_values_k_100_action_east: """
illegal
0.9499
illegal
"""
q_values_k_100_action_exit: """
-9.4767
illegal
9.8175
"""
q_values_k_100_action_south: """
illegal
3.2562
illegal
"""
q_values_k_100_action_west: """
illegal
0.8236
illegal
"""

View file

@ -0,0 +1,22 @@
class: "ApproximateQLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.75"
noise: "0.25"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,935 @@
weights_k_0: """
{((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((0, 5), 'exit'): 0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): 0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_0_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_exit: """
__________ 0.0000 __________
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_0_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_1: """
{((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): 0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_1_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_1_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_2: """
{((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): 0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_2_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_2_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_3: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): 0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_3_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_3_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_4: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): -10.0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): 0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_4_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_4_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_5: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): -10.0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): -10.0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_5_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
0.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_5_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_6: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): -10.0,
((0, 4), 'exit'): -10.0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): -10.0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_6_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_6_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_7: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): -10.0,
((0, 4), 'exit'): -10.0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0.0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): -10.0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_7_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_7_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_8: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): -10.0,
((0, 3), 'exit'): -10.0,
((0, 4), 'exit'): -10.0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0.0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): -10.0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_8_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_8_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_9: """
{((0, 1), 'exit'): -10.0,
((0, 2), 'exit'): -10.0,
((0, 3), 'exit'): -10.0,
((0, 4), 'exit'): -10.0,
((0, 5), 'exit'): -10.0,
((1, 0), 'exit'): 0.1,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0.0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((1, 5), 'east'): 0,
((1, 5), 'north'): 0,
((1, 5), 'south'): 0.0,
((1, 5), 'west'): 0,
((1, 6), 'exit'): 0,
((2, 1), 'exit'): 0,
((2, 2), 'exit'): 0,
((2, 3), 'exit'): -10.0,
((2, 4), 'exit'): 0,
((2, 5), 'exit'): 0}
"""
q_values_k_9_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.1000 __________
"""
q_values_k_9_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
weights_k_500: """
{((0, 1), 'exit'): -92.82102012308148,
((0, 2), 'exit'): -97.21871610556306,
((0, 3), 'exit'): -89.05810108684878,
((0, 4), 'exit'): -97.74716004550608,
((0, 5), 'exit'): -96.56631617970748,
((1, 0), 'exit'): 0.9576088417247839,
((1, 1), 'east'): -31.68839649871871,
((1, 1), 'north'): -0.5871409700255297,
((1, 1), 'south'): -5.571799344704395,
((1, 1), 'west'): -20.777007017445538,
((1, 2), 'east'): -43.595242197319,
((1, 2), 'north'): -1.264202431807023,
((1, 2), 'south'): -8.401530599975509,
((1, 2), 'west'): -16.283916171605192,
((1, 3), 'east'): -3.6956691,
((1, 3), 'north'): -0.16712710492783758,
((1, 3), 'south'): -3.4722840178579073,
((1, 3), 'west'): -40.58867937480968,
((1, 4), 'east'): -26.553386621338632,
((1, 4), 'north'): -0.799493322153628,
((1, 4), 'south'): -6.727671187497919,
((1, 4), 'west'): -39.06095135014759,
((1, 5), 'east'): -17.067638934181446,
((1, 5), 'north'): -5.864753060887024,
((1, 5), 'south'): -6.83769420759525,
((1, 5), 'west'): -27.062643066307515,
((1, 6), 'exit'): 9.353891811077332,
((2, 1), 'exit'): -97.21871610556306,
((2, 2), 'exit'): -87.84233454094309,
((2, 3), 'exit'): -96.90968456173674,
((2, 4), 'exit'): -94.185026299696,
((2, 5), 'exit'): -96.90968456173674}
"""
q_values_k_500_action_north: """
__________ illegal __________
illegal -5.8648 illegal
illegal -0.7995 illegal
illegal -0.1671 illegal
illegal -1.2642 illegal
illegal -0.5871 illegal
__________ illegal __________
"""
q_values_k_500_action_east: """
__________ illegal __________
illegal -17.0676 illegal
illegal -26.5534 illegal
illegal -3.6957 illegal
illegal -43.5952 illegal
illegal -31.6884 illegal
__________ illegal __________
"""
q_values_k_500_action_exit: """
__________ 9.3539 __________
-96.5663 illegal -96.9097
-97.7472 illegal -94.1850
-89.0581 illegal -96.9097
-97.2187 illegal -87.8423
-92.8210 illegal -97.2187
__________ 0.9576 __________
"""
q_values_k_500_action_south: """
__________ illegal __________
illegal -6.8377 illegal
illegal -6.7277 illegal
illegal -3.4723 illegal
illegal -8.4015 illegal
illegal -5.5718 illegal
__________ illegal __________
"""
q_values_k_500_action_west: """
__________ illegal __________
illegal -27.0626 illegal
illegal -39.0610 illegal
illegal -40.5887 illegal
illegal -16.2839 illegal
illegal -20.7770 illegal
__________ illegal __________
"""

View file

@ -0,0 +1,27 @@
class: "ApproximateQLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# 10 #
-100 _ -100
-100 _ -100
-100 _ -100
-100 _ -100
-100 S -100
# 1 #
"""
gridName: "bridgeGrid"
discount: "0.85"
noise: "0.1"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "500"
valueIterations: "100"
iterations: "10000"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
class: "ApproximateQLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"

View file

@ -0,0 +1,880 @@
weights_k_0: """
{'action=east': 0,
'action=exit': 0,
'action=north': 0,
'action=south': 0,
'action=west': 0,
'x=0': 0,
'x=1': 0,
'x=2': 0,
'x=3': 0,
'x=4': 0,
'y=0': 0,
'y=1': 0,
'y=2': 0,
'y=3': 0,
'y=4': 0,
(0, 0): 0,
(0, 1): 0,
(0, 2): 0,
(0, 3): 0,
(0, 4): 0,
(1, 0): 0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_1: """
{'action=east': 0,
'action=exit': 0,
'action=north': 0,
'action=south': 0.0,
'action=west': 0,
'x=0': 0,
'x=1': 0.0,
'x=2': 0,
'x=3': 0,
'x=4': 0,
'y=0': 0,
'y=1': 0.0,
'y=2': 0,
'y=3': 0,
'y=4': 0,
(0, 0): 0,
(0, 1): 0,
(0, 2): 0,
(0, 3): 0,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_1_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_1_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_2: """
{'action=east': 0,
'action=exit': 0,
'action=north': 0,
'action=south': 0.0,
'action=west': 0,
'x=0': 0,
'x=1': 0.0,
'x=2': 0,
'x=3': 0.0,
'x=4': 0,
'y=0': 0,
'y=1': 0.0,
'y=2': 0,
'y=3': 0.0,
'y=4': 0,
(0, 0): 0,
(0, 1): 0,
(0, 2): 0,
(0, 3): 0,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_2_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_2_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_3: """
{'action=east': 0,
'action=exit': -1.0,
'action=north': 0,
'action=south': 0.0,
'action=west': 0,
'x=0': -1.0,
'x=1': 0.0,
'x=2': 0,
'x=3': 0.0,
'x=4': 0,
'y=0': -1.0,
'y=1': 0.0,
'y=2': 0,
'y=3': 0.0,
'y=4': 0,
(0, 0): -1.0,
(0, 1): 0,
(0, 2): 0,
(0, 3): 0,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_3_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_exit: """
-3.0000 illegal -1.0000 illegal illegal
-3.0000 illegal __________ illegal illegal
-3.0000 illegal -1.0000 illegal illegal
-3.0000 illegal __________ __________ illegal
-4.0000 illegal illegal illegal illegal
"""
q_values_k_3_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_4: """
{'action=east': 0.0,
'action=exit': -1.0,
'action=north': 0,
'action=south': 0.0,
'action=west': 0,
'x=0': -1.0,
'x=1': 0.0,
'x=2': 0,
'x=3': 0.0,
'x=4': 0,
'y=0': -1.0,
'y=1': 0.0,
'y=2': 0,
'y=3': 0.0,
'y=4': 0,
(0, 0): -1.0,
(0, 1): 0,
(0, 2): 0,
(0, 3): 0,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_4_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_exit: """
-3.0000 illegal -1.0000 illegal illegal
-3.0000 illegal __________ illegal illegal
-3.0000 illegal -1.0000 illegal illegal
-3.0000 illegal __________ __________ illegal
-4.0000 illegal illegal illegal illegal
"""
q_values_k_4_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_5: """
{'action=east': 0.0,
'action=exit': -1.0,
'action=north': 0,
'action=south': 0.0,
'action=west': 0.0,
'x=0': -1.0,
'x=1': 0.0,
'x=2': 0,
'x=3': 0.0,
'x=4': 0.0,
'y=0': -1.0,
'y=1': 0.0,
'y=2': 0,
'y=3': 0.0,
'y=4': 0.0,
(0, 0): -1.0,
(0, 1): 0,
(0, 2): 0,
(0, 3): 0,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0.0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_5_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_exit: """
-3.0000 illegal -1.0000 illegal illegal
-3.0000 illegal __________ illegal illegal
-3.0000 illegal -1.0000 illegal illegal
-3.0000 illegal __________ __________ illegal
-4.0000 illegal illegal illegal illegal
"""
q_values_k_5_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_6: """
{'action=east': 0.0,
'action=exit': -1.7000000000000002,
'action=north': 0,
'action=south': 0.0,
'action=west': 0.0,
'x=0': -1.7000000000000002,
'x=1': 0.0,
'x=2': 0,
'x=3': 0.0,
'x=4': 0.0,
'y=0': -1.7000000000000002,
'y=1': 0.0,
'y=2': 0,
'y=3': 0.0,
'y=4': 0.0,
(0, 0): -1.0,
(0, 1): 0,
(0, 2): 0,
(0, 3): -0.7000000000000001,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0.0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_6_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_exit: """
-5.1000 illegal -1.7000 illegal illegal
-5.8000 illegal __________ illegal illegal
-5.1000 illegal -1.7000 illegal illegal
-5.1000 illegal __________ __________ illegal
-6.1000 illegal illegal illegal illegal
"""
q_values_k_6_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_7: """
{'action=east': 0.0,
'action=exit': -1.4300000000000002,
'action=north': 0,
'action=south': 0.0,
'action=west': 0.0,
'x=0': -1.7000000000000002,
'x=1': 0.0,
'x=2': 0.27,
'x=3': 0.0,
'x=4': 0.0,
'y=0': -1.7000000000000002,
'y=1': 0.0,
'y=2': 0.27,
'y=3': 0.0,
'y=4': 0.0,
(0, 0): -1.0,
(0, 1): 0,
(0, 2): 0,
(0, 3): -0.7000000000000001,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0.27,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0.0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_7_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
q_values_k_7_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
q_values_k_7_action_exit: """
-4.8300 illegal -0.8900 illegal illegal
-5.5300 illegal __________ illegal illegal
-4.8300 illegal -0.6200 illegal illegal
-4.8300 illegal __________ __________ illegal
-5.8300 illegal illegal illegal illegal
"""
q_values_k_7_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
q_values_k_7_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
weights_k_8: """
{'action=east': 0.0,
'action=exit': -1.947,
'action=north': 0,
'action=south': 0.0,
'action=west': 0.0,
'x=0': -2.217,
'x=1': 0.0,
'x=2': 0.27,
'x=3': 0.0,
'x=4': 0.0,
'y=0': -2.217,
'y=1': 0.0,
'y=2': 0.27,
'y=3': 0.0,
'y=4': 0.0,
(0, 0): -1.0,
(0, 1): -0.517,
(0, 2): 0,
(0, 3): -0.7000000000000001,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): 0,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0.27,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0.0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_8_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
q_values_k_8_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
q_values_k_8_action_exit: """
-6.3810 illegal -1.4070 illegal illegal
-7.0810 illegal __________ illegal illegal
-6.3810 illegal -1.1370 illegal illegal
-6.8980 illegal __________ __________ illegal
-7.3810 illegal illegal illegal illegal
"""
q_values_k_8_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
q_values_k_8_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.5400 0.0000 0.0000
"""
weights_k_9: """
{'action=east': 0.0,
'action=exit': -1.947,
'action=north': -0.62082,
'action=south': 0.0,
'action=west': 0.0,
'x=0': -2.217,
'x=1': -0.62082,
'x=2': 0.27,
'x=3': 0.0,
'x=4': 0.0,
'y=0': -2.217,
'y=1': -0.62082,
'y=2': 0.27,
'y=3': 0.0,
'y=4': 0.0,
(0, 0): -1.0,
(0, 1): -0.517,
(0, 2): 0,
(0, 3): -0.7000000000000001,
(0, 4): 0,
(1, 0): 0.0,
(1, 1): -0.62082,
(1, 2): 0,
(1, 3): 0,
(1, 4): 0,
(2, 0): 0,
(2, 2): 0.27,
(2, 4): 0,
(3, 0): 0.0,
(3, 2): 0,
(3, 3): 0,
(3, 4): 0,
(4, 0): 0,
(4, 1): 0.0,
(4, 2): 0,
(4, 3): 0,
(4, 4): 0}
"""
q_values_k_9_action_north: """
illegal -1.8625 illegal -0.6208 -0.6208
illegal -1.8625 __________ -0.6208 -0.6208
illegal -1.8625 illegal -0.6208 -0.6208
illegal -2.4833 __________ __________ -0.6208
illegal -1.8625 -0.0808 -0.6208 -0.6208
"""
q_values_k_9_action_east: """
illegal -1.2416 illegal 0.0000 0.0000
illegal -1.2416 __________ 0.0000 0.0000
illegal -1.2416 illegal 0.0000 0.0000
illegal -1.8625 __________ __________ 0.0000
illegal -1.2416 0.5400 0.0000 0.0000
"""
q_values_k_9_action_exit: """
-6.3810 illegal -1.4070 illegal illegal
-7.0810 illegal __________ illegal illegal
-6.3810 illegal -1.1370 illegal illegal
-6.8980 illegal __________ __________ illegal
-7.3810 illegal illegal illegal illegal
"""
q_values_k_9_action_south: """
illegal -1.2416 illegal 0.0000 0.0000
illegal -1.2416 __________ 0.0000 0.0000
illegal -1.2416 illegal 0.0000 0.0000
illegal -1.8625 __________ __________ 0.0000
illegal -1.2416 0.5400 0.0000 0.0000
"""
q_values_k_9_action_west: """
illegal -1.2416 illegal 0.0000 0.0000
illegal -1.2416 __________ 0.0000 0.0000
illegal -1.2416 illegal 0.0000 0.0000
illegal -1.8625 __________ __________ 0.0000
illegal -1.2416 0.5400 0.0000 0.0000
"""
weights_k_3000: """
{'action=east': 6.719916513522846,
'action=exit': -2.2444981376861555,
'action=north': 4.568574519923728,
'action=south': 3.761510351874819,
'action=west': 1.2828606322891556,
'x=0': -3.604063955849794,
'x=1': 0.6731476152061693,
'x=2': 4.000208353074704,
'x=3': 5.988311380073477,
'x=4': 7.0307604874198235,
'y=0': -3.604063955849794,
'y=1': 0.6731476152061693,
'y=2': 4.000208353074704,
'y=3': 5.988311380073477,
'y=4': 7.0307604874198235,
(0, 0): -0.7073688447583666,
(0, 1): -0.7542862401704076,
(0, 2): -0.7043014501203066,
(0, 3): -0.7433344649617668,
(0, 4): -0.6947729558389527,
(1, 0): 2.364273811399719,
(1, 1): -0.2695405704605499,
(1, 2): -0.7105979212702271,
(1, 3): -1.4866826750327933,
(1, 4): 0.7756949705700219,
(2, 0): 2.64064253491107,
(2, 2): -3.7381118310263166,
(2, 4): 5.097677649189953,
(3, 0): 2.505262939441149,
(3, 2): 0.27218788923837256,
(3, 3): 2.2611084206093195,
(3, 4): 0.9497521307846304,
(4, 0): 1.7330586015291545,
(4, 1): 0.980194046153168,
(4, 2): 0.78786289128181,
(4, 3): 1.493343270762865,
(4, 4): 2.0363016776928333}
"""
q_values_k_3000_action_north: """
illegal 6.6906 illegal 17.4949 20.6664
illegal 4.4282 __________ 18.8063 20.1234
illegal 5.2043 illegal 16.8174 19.4180
illegal 5.6453 __________ __________ 19.6103
illegal 8.2791 15.2096 19.0505 20.3632
"""
q_values_k_3000_action_east: """
illegal 8.8419 illegal 19.6463 22.8177
illegal 6.5795 __________ 20.9576 22.2748
illegal 7.3556 illegal 18.9687 21.5693
illegal 7.7967 __________ __________ 21.7616
illegal 10.4305 17.3610 21.2018 22.5145
"""
q_values_k_3000_action_exit: """
-10.1474 illegal 10.8536 illegal illegal
-10.1960 illegal __________ illegal illegal
-10.1569 illegal 2.0178 illegal illegal
-10.2069 illegal __________ __________ illegal
-10.1600 illegal illegal illegal illegal
"""
q_values_k_3000_action_south: """
illegal 5.8835 illegal 16.6879 19.8593
illegal 3.6211 __________ 17.9992 19.3164
illegal 4.3972 illegal 16.0103 18.6109
illegal 4.8383 __________ __________ 18.8032
illegal 7.4721 14.4026 18.2434 19.5561
"""
q_values_k_3000_action_west: """
illegal 3.4049 illegal 14.2092 17.3807
illegal 1.1425 __________ 15.5206 16.8377
illegal 1.9186 illegal 13.5317 16.1322
illegal 2.3596 __________ __________ 16.3246
illegal 4.9934 11.9239 15.7647 17.0774
"""

View file

@ -0,0 +1,25 @@
class: "ApproximateQLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"
extractor: "CoordinateExtractor"

View file

@ -0,0 +1,2 @@
max_points: "3"
class: "PassAllTestsQuestion"

View file

@ -0,0 +1,81 @@
# textDisplay.py
# --------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import time
try:
import pacman
except:
pass
DRAW_EVERY = 1
SLEEP_TIME = 0 # This can be overwritten by __init__
DISPLAY_MOVES = False
QUIET = False # Supresses output
class NullGraphics:
def initialize(self, state, isBlue = False):
pass
def update(self, state):
pass
def checkNullDisplay(self):
return True
def pause(self):
time.sleep(SLEEP_TIME)
def draw(self, state):
print state
def updateDistributions(self, dist):
pass
def finish(self):
pass
class PacmanGraphics:
def __init__(self, speed=None):
if speed != None:
global SLEEP_TIME
SLEEP_TIME = speed
def initialize(self, state, isBlue = False):
self.draw(state)
self.pause()
self.turn = 0
self.agentCounter = 0
def update(self, state):
numAgents = len(state.agentStates)
self.agentCounter = (self.agentCounter + 1) % numAgents
if self.agentCounter == 0:
self.turn += 1
if DISPLAY_MOVES:
ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)]
print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts
if self.turn % DRAW_EVERY == 0:
self.draw(state)
self.pause()
if state._win or state._lose:
self.draw(state)
def pause(self):
time.sleep(SLEEP_TIME)
def draw(self, state):
print state
def finish(self):
pass

View file

@ -0,0 +1,324 @@
# textGridworldDisplay.py
# -----------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import util
class TextGridworldDisplay:
def __init__(self, gridworld):
self.gridworld = gridworld
def start(self):
pass
def pause(self):
pass
def displayValues(self, agent, currentState = None, message = None):
if message != None:
print message
values = util.Counter()
policy = {}
states = self.gridworld.getStates()
for state in states:
values[state] = agent.getValue(state)
policy[state] = agent.getPolicy(state)
prettyPrintValues(self.gridworld, values, policy, currentState)
def displayNullValues(self, agent, currentState = None, message = None):
if message != None: print message
prettyPrintNullValues(self.gridworld, currentState)
def displayQValues(self, agent, currentState = None, message = None):
if message != None: print message
qValues = util.Counter()
states = self.gridworld.getStates()
for state in states:
for action in self.gridworld.getPossibleActions(state):
qValues[(state, action)] = agent.getQValue(state, action)
prettyPrintQValues(self.gridworld, qValues, currentState)
def prettyPrintValues(gridWorld, values, policy=None, currentState = None):
grid = gridWorld.grid
maxLen = 11
newRows = []
for y in range(grid.height):
newRow = []
for x in range(grid.width):
state = (x, y)
value = values[state]
action = None
if policy != None and state in policy:
action = policy[state]
actions = gridWorld.getPossibleActions(state)
if action not in actions and 'exit' in actions:
action = 'exit'
valString = None
if action == 'exit':
valString = border('%.2f' % value)
else:
valString = '\n\n%.2f\n\n' % value
valString += ' '*maxLen
if grid[x][y] == 'S':
valString = '\n\nS: %.2f\n\n' % value
valString += ' '*maxLen
if grid[x][y] == '#':
valString = '\n#####\n#####\n#####\n'
valString += ' '*maxLen
pieces = [valString]
text = ("\n".join(pieces)).split('\n')
if currentState == state:
l = len(text[1])
if l == 0:
text[1] = '*'
else:
text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
if action == 'east':
text[2] = ' ' + text[2] + ' >'
elif action == 'west':
text[2] = '< ' + text[2] + ' '
elif action == 'north':
text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
elif action == 'south':
text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
newCell = "\n".join(text)
newRow.append(newCell)
newRows.append(newRow)
numCols = grid.width
for rowNum, row in enumerate(newRows):
row.insert(0,"\n\n"+str(rowNum))
newRows.reverse()
colLabels = [str(colNum) for colNum in range(numCols)]
colLabels.insert(0,' ')
finalRows = [colLabels] + newRows
print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
def prettyPrintNullValues(gridWorld, currentState = None):
grid = gridWorld.grid
maxLen = 11
newRows = []
for y in range(grid.height):
newRow = []
for x in range(grid.width):
state = (x, y)
# value = values[state]
action = None
# if policy != None and state in policy:
# action = policy[state]
#
actions = gridWorld.getPossibleActions(state)
if action not in actions and 'exit' in actions:
action = 'exit'
valString = None
# if action == 'exit':
# valString = border('%.2f' % value)
# else:
# valString = '\n\n%.2f\n\n' % value
# valString += ' '*maxLen
if grid[x][y] == 'S':
valString = '\n\nS\n\n'
valString += ' '*maxLen
elif grid[x][y] == '#':
valString = '\n#####\n#####\n#####\n'
valString += ' '*maxLen
elif type(grid[x][y]) == float or type(grid[x][y]) == int:
valString = border('%.2f' % float(grid[x][y]))
else: valString = border(' ')
pieces = [valString]
text = ("\n".join(pieces)).split('\n')
if currentState == state:
l = len(text[1])
if l == 0:
text[1] = '*'
else:
text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
if action == 'east':
text[2] = ' ' + text[2] + ' >'
elif action == 'west':
text[2] = '< ' + text[2] + ' '
elif action == 'north':
text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
elif action == 'south':
text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
newCell = "\n".join(text)
newRow.append(newCell)
newRows.append(newRow)
numCols = grid.width
for rowNum, row in enumerate(newRows):
row.insert(0,"\n\n"+str(rowNum))
newRows.reverse()
colLabels = [str(colNum) for colNum in range(numCols)]
colLabels.insert(0,' ')
finalRows = [colLabels] + newRows
print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
def prettyPrintQValues(gridWorld, qValues, currentState=None):
grid = gridWorld.grid
maxLen = 11
newRows = []
for y in range(grid.height):
newRow = []
for x in range(grid.width):
state = (x, y)
actions = gridWorld.getPossibleActions(state)
if actions == None or len(actions) == 0:
actions = [None]
bestQ = max([qValues[(state, action)] for action in actions])
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
# display cell
qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions])
northString = ('north' in qStrings and qStrings['north']) or ' '
southString = ('south' in qStrings and qStrings['south']) or ' '
eastString = ('east' in qStrings and qStrings['east']) or ' '
westString = ('west' in qStrings and qStrings['west']) or ' '
exitString = ('exit' in qStrings and qStrings['exit']) or ' '
eastLen = len(eastString)
westLen = len(westString)
if eastLen < westLen:
eastString = ' '*(westLen-eastLen)+eastString
if westLen < eastLen:
westString = westString+' '*(eastLen-westLen)
if 'north' in bestActions:
northString = '/'+northString+'\\'
if 'south' in bestActions:
southString = '\\'+southString+'/'
if 'east' in bestActions:
eastString = ''+eastString+'>'
else:
eastString = ''+eastString+' '
if 'west' in bestActions:
westString = '<'+westString+''
else:
westString = ' '+westString+''
if 'exit' in bestActions:
exitString = '[ '+exitString+' ]'
ewString = westString + " " + eastString
if state == currentState:
ewString = westString + " * " + eastString
if state == gridWorld.getStartState():
ewString = westString + " S " + eastString
if state == currentState and state == gridWorld.getStartState():
ewString = westString + " S:* " + eastString
text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString]
if grid[x][y] == '#':
text = ['', '\n#####\n#####\n#####', '']
newCell = "\n".join(text)
newRow.append(newCell)
newRows.append(newRow)
numCols = grid.width
for rowNum, row in enumerate(newRows):
row.insert(0,"\n\n\n"+str(rowNum))
newRows.reverse()
colLabels = [str(colNum) for colNum in range(numCols)]
colLabels.insert(0,' ')
finalRows = [colLabels] + newRows
print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True)
def border(text):
length = len(text)
pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)]
return '\n'.join(pieces)
# INDENTING CODE
# Indenting code based on a post from George Sakkis
# (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662)
import cStringIO,operator
def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
"""Indents a table by column.
- rows: A sequence of sequences of items, one sequence per row.
- hasHeader: True if the first row consists of the columns' names.
- headerChar: Character to be used for the row separator line
(if hasHeader==True or separateRows==True).
- delim: The column delimiter.
- justify: Determines how are data justified in their column.
Valid values are 'left','right' and 'center'.
- separateRows: True if rows are to be separated by a line
of 'headerChar's.
- prefix: A string prepended to each printed row.
- postfix: A string appended to each printed row.
- wrapfunc: A function f(text) for wrapping text; each element in
the table is first wrapped by this function."""
# closure for breaking logical rows to physical, using wrapfunc
def rowWrapper(row):
newRows = [wrapfunc(item).split('\n') for item in row]
return [[substr or '' for substr in item] for item in map(None,*newRows)]
# break each logical row into one or more physical ones
logicalRows = [rowWrapper(row) for row in rows]
# columns of physical rows
columns = map(None,*reduce(operator.add,logicalRows))
# get the maximum of each column by the string length of its items
maxWidths = [max([len(str(item)) for item in column]) for column in columns]
rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
len(delim)*(len(maxWidths)-1))
# select the appropriate justify method
justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
output=cStringIO.StringIO()
if separateRows: print >> output, rowSeparator
for physicalRows in logicalRows:
for row in physicalRows:
print >> output, \
prefix \
+ delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
+ postfix
if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
return output.getvalue()
import math
def wrap_always(text, width):
"""A simple word-wrap function that wraps text on exactly width characters.
It doesn't split the text in words."""
return '\n'.join([ text[width*i:width*(i+1)] \
for i in xrange(int(math.ceil(1.*len(text)/width))) ])
# TEST OF DISPLAY CODE
if __name__ == '__main__':
import gridworld, util
grid = gridworld.getCliffGrid3()
print grid.getStates()
policy = dict([(state,'east') for state in grid.getStates()])
values = util.Counter(dict([(state,1000.23) for state in grid.getStates()]))
prettyPrintValues(grid, values, policy, currentState = (0,0))
stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()]
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates]))
qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])]))
prettyPrintQValues(grid, qValues, currentState = (0,0))

653
reinforcement/util.py Normal file
View file

@ -0,0 +1,653 @@
# util.py
# -------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import sys
import inspect
import heapq, random
import cStringIO
class FixedRandom:
def __init__(self):
fixedState = (3, (2147483648L, 507801126L, 683453281L, 310439348L, 2597246090L, \
2209084787L, 2267831527L, 979920060L, 3098657677L, 37650879L, 807947081L, 3974896263L, \
881243242L, 3100634921L, 1334775171L, 3965168385L, 746264660L, 4074750168L, 500078808L, \
776561771L, 702988163L, 1636311725L, 2559226045L, 157578202L, 2498342920L, 2794591496L, \
4130598723L, 496985844L, 2944563015L, 3731321600L, 3514814613L, 3362575829L, 3038768745L, \
2206497038L, 1108748846L, 1317460727L, 3134077628L, 988312410L, 1674063516L, 746456451L, \
3958482413L, 1857117812L, 708750586L, 1583423339L, 3466495450L, 1536929345L, 1137240525L, \
3875025632L, 2466137587L, 1235845595L, 4214575620L, 3792516855L, 657994358L, 1241843248L, \
1695651859L, 3678946666L, 1929922113L, 2351044952L, 2317810202L, 2039319015L, 460787996L, \
3654096216L, 4068721415L, 1814163703L, 2904112444L, 1386111013L, 574629867L, 2654529343L, \
3833135042L, 2725328455L, 552431551L, 4006991378L, 1331562057L, 3710134542L, 303171486L, \
1203231078L, 2670768975L, 54570816L, 2679609001L, 578983064L, 1271454725L, 3230871056L, \
2496832891L, 2944938195L, 1608828728L, 367886575L, 2544708204L, 103775539L, 1912402393L, \
1098482180L, 2738577070L, 3091646463L, 1505274463L, 2079416566L, 659100352L, 839995305L, \
1696257633L, 274389836L, 3973303017L, 671127655L, 1061109122L, 517486945L, 1379749962L, \
3421383928L, 3116950429L, 2165882425L, 2346928266L, 2892678711L, 2936066049L, 1316407868L, \
2873411858L, 4279682888L, 2744351923L, 3290373816L, 1014377279L, 955200944L, 4220990860L, \
2386098930L, 1772997650L, 3757346974L, 1621616438L, 2877097197L, 442116595L, 2010480266L, \
2867861469L, 2955352695L, 605335967L, 2222936009L, 2067554933L, 4129906358L, 1519608541L, \
1195006590L, 1942991038L, 2736562236L, 279162408L, 1415982909L, 4099901426L, 1732201505L, \
2934657937L, 860563237L, 2479235483L, 3081651097L, 2244720867L, 3112631622L, 1636991639L, \
3860393305L, 2312061927L, 48780114L, 1149090394L, 2643246550L, 1764050647L, 3836789087L, \
3474859076L, 4237194338L, 1735191073L, 2150369208L, 92164394L, 756974036L, 2314453957L, \
323969533L, 4267621035L, 283649842L, 810004843L, 727855536L, 1757827251L, 3334960421L, \
3261035106L, 38417393L, 2660980472L, 1256633965L, 2184045390L, 811213141L, 2857482069L, \
2237770878L, 3891003138L, 2787806886L, 2435192790L, 2249324662L, 3507764896L, 995388363L, \
856944153L, 619213904L, 3233967826L, 3703465555L, 3286531781L, 3863193356L, 2992340714L, \
413696855L, 3865185632L, 1704163171L, 3043634452L, 2225424707L, 2199018022L, 3506117517L, \
3311559776L, 3374443561L, 1207829628L, 668793165L, 1822020716L, 2082656160L, 1160606415L, \
3034757648L, 741703672L, 3094328738L, 459332691L, 2702383376L, 1610239915L, 4162939394L, \
557861574L, 3805706338L, 3832520705L, 1248934879L, 3250424034L, 892335058L, 74323433L, \
3209751608L, 3213220797L, 3444035873L, 3743886725L, 1783837251L, 610968664L, 580745246L, \
4041979504L, 201684874L, 2673219253L, 1377283008L, 3497299167L, 2344209394L, 2304982920L, \
3081403782L, 2599256854L, 3184475235L, 3373055826L, 695186388L, 2423332338L, 222864327L, \
1258227992L, 3627871647L, 3487724980L, 4027953808L, 3053320360L, 533627073L, 3026232514L, \
2340271949L, 867277230L, 868513116L, 2158535651L, 2487822909L, 3428235761L, 3067196046L, \
3435119657L, 1908441839L, 788668797L, 3367703138L, 3317763187L, 908264443L, 2252100381L, \
764223334L, 4127108988L, 384641349L, 3377374722L, 1263833251L, 1958694944L, 3847832657L, \
1253909612L, 1096494446L, 555725445L, 2277045895L, 3340096504L, 1383318686L, 4234428127L, \
1072582179L, 94169494L, 1064509968L, 2681151917L, 2681864920L, 734708852L, 1338914021L, \
1270409500L, 1789469116L, 4191988204L, 1716329784L, 2213764829L, 3712538840L, 919910444L, \
1318414447L, 3383806712L, 3054941722L, 3378649942L, 1205735655L, 1268136494L, 2214009444L, \
2532395133L, 3232230447L, 230294038L, 342599089L, 772808141L, 4096882234L, 3146662953L, \
2784264306L, 1860954704L, 2675279609L, 2984212876L, 2466966981L, 2627986059L, 2985545332L, \
2578042598L, 1458940786L, 2944243755L, 3959506256L, 1509151382L, 325761900L, 942251521L, \
4184289782L, 2756231555L, 3297811774L, 1169708099L, 3280524138L, 3805245319L, 3227360276L, \
3199632491L, 2235795585L, 2865407118L, 36763651L, 2441503575L, 3314890374L, 1755526087L, \
17915536L, 1196948233L, 949343045L, 3815841867L, 489007833L, 2654997597L, 2834744136L, \
417688687L, 2843220846L, 85621843L, 747339336L, 2043645709L, 3520444394L, 1825470818L, \
647778910L, 275904777L, 1249389189L, 3640887431L, 4200779599L, 323384601L, 3446088641L, \
4049835786L, 1718989062L, 3563787136L, 44099190L, 3281263107L, 22910812L, 1826109246L, \
745118154L, 3392171319L, 1571490704L, 354891067L, 815955642L, 1453450421L, 940015623L, \
796817754L, 1260148619L, 3898237757L, 176670141L, 1870249326L, 3317738680L, 448918002L, \
4059166594L, 2003827551L, 987091377L, 224855998L, 3520570137L, 789522610L, 2604445123L, \
454472869L, 475688926L, 2990723466L, 523362238L, 3897608102L, 806637149L, 2642229586L, \
2928614432L, 1564415411L, 1691381054L, 3816907227L, 4082581003L, 1895544448L, 3728217394L, \
3214813157L, 4054301607L, 1882632454L, 2873728645L, 3694943071L, 1297991732L, 2101682438L, \
3952579552L, 678650400L, 1391722293L, 478833748L, 2976468591L, 158586606L, 2576499787L, \
662690848L, 3799889765L, 3328894692L, 2474578497L, 2383901391L, 1718193504L, 3003184595L, \
3630561213L, 1929441113L, 3848238627L, 1594310094L, 3040359840L, 3051803867L, 2462788790L, \
954409915L, 802581771L, 681703307L, 545982392L, 2738993819L, 8025358L, 2827719383L, \
770471093L, 3484895980L, 3111306320L, 3900000891L, 2116916652L, 397746721L, 2087689510L, \
721433935L, 1396088885L, 2751612384L, 1998988613L, 2135074843L, 2521131298L, 707009172L, \
2398321482L, 688041159L, 2264560137L, 482388305L, 207864885L, 3735036991L, 3490348331L, \
1963642811L, 3260224305L, 3493564223L, 1939428454L, 1128799656L, 1366012432L, 2858822447L, \
1428147157L, 2261125391L, 1611208390L, 1134826333L, 2374102525L, 3833625209L, 2266397263L, \
3189115077L, 770080230L, 2674657172L, 4280146640L, 3604531615L, 4235071805L, 3436987249L, \
509704467L, 2582695198L, 4256268040L, 3391197562L, 1460642842L, 1617931012L, 457825497L, \
1031452907L, 1330422862L, 4125947620L, 2280712485L, 431892090L, 2387410588L, 2061126784L, \
896457479L, 3480499461L, 2488196663L, 4021103792L, 1877063114L, 2744470201L, 1046140599L, \
2129952955L, 3583049218L, 4217723693L, 2720341743L, 820661843L, 1079873609L, 3360954200L, \
3652304997L, 3335838575L, 2178810636L, 1908053374L, 4026721976L, 1793145418L, 476541615L, \
973420250L, 515553040L, 919292001L, 2601786155L, 1685119450L, 3030170809L, 1590676150L, \
1665099167L, 651151584L, 2077190587L, 957892642L, 646336572L, 2743719258L, 866169074L, \
851118829L, 4225766285L, 963748226L, 799549420L, 1955032629L, 799460000L, 2425744063L, \
2441291571L, 1928963772L, 528930629L, 2591962884L, 3495142819L, 1896021824L, 901320159L, \
3181820243L, 843061941L, 3338628510L, 3782438992L, 9515330L, 1705797226L, 953535929L, \
764833876L, 3202464965L, 2970244591L, 519154982L, 3390617541L, 566616744L, 3438031503L, \
1853838297L, 170608755L, 1393728434L, 676900116L, 3184965776L, 1843100290L, 78995357L, \
2227939888L, 3460264600L, 1745705055L, 1474086965L, 572796246L, 4081303004L, 882828851L, \
1295445825L, 137639900L, 3304579600L, 2722437017L, 4093422709L, 273203373L, 2666507854L, \
3998836510L, 493829981L, 1623949669L, 3482036755L, 3390023939L, 833233937L, 1639668730L, \
1499455075L, 249728260L, 1210694006L, 3836497489L, 1551488720L, 3253074267L, 3388238003L, \
2372035079L, 3945715164L, 2029501215L, 3362012634L, 2007375355L, 4074709820L, 631485888L, \
3135015769L, 4273087084L, 3648076204L, 2739943601L, 1374020358L, 1760722448L, 3773939706L, \
1313027823L, 1895251226L, 4224465911L, 421382535L, 1141067370L, 3660034846L, 3393185650L, \
1850995280L, 1451917312L, 3841455409L, 3926840308L, 1397397252L, 2572864479L, 2500171350L, \
3119920613L, 531400869L, 1626487579L, 1099320497L, 407414753L, 2438623324L, 99073255L, \
3175491512L, 656431560L, 1153671785L, 236307875L, 2824738046L, 2320621382L, 892174056L, \
230984053L, 719791226L, 2718891946L, 624L), None)
self.random = random.Random()
self.random.setstate(fixedState)
"""
Data structures useful for implementing SearchAgents
"""
class Stack:
"A container with a last-in-first-out (LIFO) queuing policy."
def __init__(self):
self.list = []
def push(self,item):
"Push 'item' onto the stack"
self.list.append(item)
def pop(self):
"Pop the most recently pushed item from the stack"
return self.list.pop()
def isEmpty(self):
"Returns true if the stack is empty"
return len(self.list) == 0
class Queue:
"A container with a first-in-first-out (FIFO) queuing policy."
def __init__(self):
self.list = []
def push(self,item):
"Enqueue the 'item' into the queue"
self.list.insert(0,item)
def pop(self):
"""
Dequeue the earliest enqueued item still in the queue. This
operation removes the item from the queue.
"""
return self.list.pop()
def isEmpty(self):
"Returns true if the queue is empty"
return len(self.list) == 0
class PriorityQueue:
"""
Implements a priority queue data structure. Each inserted item
has a priority associated with it and the client is usually interested
in quick retrieval of the lowest-priority item in the queue. This
data structure allows O(1) access to the lowest-priority item.
Note that this PriorityQueue does not allow you to change the priority
of an item. However, you may insert the same item multiple times with
different priorities.
"""
def __init__(self):
self.heap = []
self.count = 0
def push(self, item, priority):
# FIXME: restored old behaviour to check against old results better
# FIXED: restored to stable behaviour
entry = (priority, self.count, item)
# entry = (priority, item)
heapq.heappush(self.heap, entry)
self.count += 1
def pop(self):
(_, _, item) = heapq.heappop(self.heap)
# (_, item) = heapq.heappop(self.heap)
return item
def isEmpty(self):
return len(self.heap) == 0
class PriorityQueueWithFunction(PriorityQueue):
"""
Implements a priority queue with the same push/pop signature of the
Queue and the Stack classes. This is designed for drop-in replacement for
those two classes. The caller has to provide a priority function, which
extracts each item's priority.
"""
def __init__(self, priorityFunction):
"priorityFunction (item) -> priority"
self.priorityFunction = priorityFunction # store the priority function
PriorityQueue.__init__(self) # super-class initializer
def push(self, item):
"Adds an item to the queue with priority from the priority function"
PriorityQueue.push(self, item, self.priorityFunction(item))
def manhattanDistance( xy1, xy2 ):
"Returns the Manhattan distance between points xy1 and xy2"
return abs( xy1[0] - xy2[0] ) + abs( xy1[1] - xy2[1] )
"""
Data structures and functions useful for various course projects
The search project should not need anything below this line.
"""
class Counter(dict):
"""
A counter keeps track of counts for a set of keys.
The counter class is an extension of the standard python
dictionary type. It is specialized to have number values
(integers or floats), and includes a handful of additional
functions to ease the task of counting data. In particular,
all keys are defaulted to have value 0. Using a dictionary:
a = {}
print a['test']
would give an error, while the Counter class analogue:
>>> a = Counter()
>>> print a['test']
0
returns the default 0 value. Note that to reference a key
that you know is contained in the counter,
you can still use the dictionary syntax:
>>> a = Counter()
>>> a['test'] = 2
>>> print a['test']
2
This is very useful for counting things without initializing their counts,
see for example:
>>> a['blah'] += 1
>>> print a['blah']
1
The counter also includes additional functionality useful in implementing
the classifiers for this assignment. Two counters can be added,
subtracted or multiplied together. See below for details. They can
also be normalized and their total count and arg max can be extracted.
"""
def __getitem__(self, idx):
self.setdefault(idx, 0)
return dict.__getitem__(self, idx)
def incrementAll(self, keys, count):
"""
Increments all elements of keys by the same count.
>>> a = Counter()
>>> a.incrementAll(['one','two', 'three'], 1)
>>> a['one']
1
>>> a['two']
1
"""
for key in keys:
self[key] += count
def argMax(self):
"""
Returns the key with the highest value.
"""
if len(self.keys()) == 0: return None
all = self.items()
values = [x[1] for x in all]
maxIndex = values.index(max(values))
return all[maxIndex][0]
def sortedKeys(self):
"""
Returns a list of keys sorted by their values. Keys
with the highest values will appear first.
>>> a = Counter()
>>> a['first'] = -2
>>> a['second'] = 4
>>> a['third'] = 1
>>> a.sortedKeys()
['second', 'third', 'first']
"""
sortedItems = self.items()
compare = lambda x, y: sign(y[1] - x[1])
sortedItems.sort(cmp=compare)
return [x[0] for x in sortedItems]
def totalCount(self):
"""
Returns the sum of counts for all keys.
"""
return sum(self.values())
def normalize(self):
"""
Edits the counter such that the total count of all
keys sums to 1. The ratio of counts for all keys
will remain the same. Note that normalizing an empty
Counter will result in an error.
"""
total = float(self.totalCount())
if total == 0: return
for key in self.keys():
self[key] = self[key] / total
def divideAll(self, divisor):
"""
Divides all counts by divisor
"""
divisor = float(divisor)
for key in self:
self[key] /= divisor
def copy(self):
"""
Returns a copy of the counter
"""
return Counter(dict.copy(self))
def __mul__(self, y ):
"""
Multiplying two counters gives the dot product of their vectors where
each unique label is a vector element.
>>> a = Counter()
>>> b = Counter()
>>> a['first'] = -2
>>> a['second'] = 4
>>> b['first'] = 3
>>> b['second'] = 5
>>> a['third'] = 1.5
>>> a['fourth'] = 2.5
>>> a * b
14
"""
sum = 0
x = self
if len(x) > len(y):
x,y = y,x
for key in x:
if key not in y:
continue
sum += x[key] * y[key]
return sum
def __radd__(self, y):
"""
Adding another counter to a counter increments the current counter
by the values stored in the second counter.
>>> a = Counter()
>>> b = Counter()
>>> a['first'] = -2
>>> a['second'] = 4
>>> b['first'] = 3
>>> b['third'] = 1
>>> a += b
>>> a['first']
1
"""
for key, value in y.items():
self[key] += value
def __add__( self, y ):
"""
Adding two counters gives a counter with the union of all keys and
counts of the second added to counts of the first.
>>> a = Counter()
>>> b = Counter()
>>> a['first'] = -2
>>> a['second'] = 4
>>> b['first'] = 3
>>> b['third'] = 1
>>> (a + b)['first']
1
"""
addend = Counter()
for key in self:
if key in y:
addend[key] = self[key] + y[key]
else:
addend[key] = self[key]
for key in y:
if key in self:
continue
addend[key] = y[key]
return addend
def __sub__( self, y ):
"""
Subtracting a counter from another gives a counter with the union of all keys and
counts of the second subtracted from counts of the first.
>>> a = Counter()
>>> b = Counter()
>>> a['first'] = -2
>>> a['second'] = 4
>>> b['first'] = 3
>>> b['third'] = 1
>>> (a - b)['first']
-5
"""
addend = Counter()
for key in self:
if key in y:
addend[key] = self[key] - y[key]
else:
addend[key] = self[key]
for key in y:
if key in self:
continue
addend[key] = -1 * y[key]
return addend
def raiseNotDefined():
fileName = inspect.stack()[1][1]
line = inspect.stack()[1][2]
method = inspect.stack()[1][3]
print "*** Method not implemented: %s at line %s of %s" % (method, line, fileName)
sys.exit(1)
def normalize(vectorOrCounter):
"""
normalize a vector or counter by dividing each value by the sum of all values
"""
normalizedCounter = Counter()
if type(vectorOrCounter) == type(normalizedCounter):
counter = vectorOrCounter
total = float(counter.totalCount())
if total == 0: return counter
for key in counter.keys():
value = counter[key]
normalizedCounter[key] = value / total
return normalizedCounter
else:
vector = vectorOrCounter
s = float(sum(vector))
if s == 0: return vector
return [el / s for el in vector]
def nSample(distribution, values, n):
if sum(distribution) != 1:
distribution = normalize(distribution)
rand = [random.random() for i in range(n)]
rand.sort()
samples = []
samplePos, distPos, cdf = 0,0, distribution[0]
while samplePos < n:
if rand[samplePos] < cdf:
samplePos += 1
samples.append(values[distPos])
else:
distPos += 1
cdf += distribution[distPos]
return samples
def sample(distribution, values = None):
if type(distribution) == Counter:
items = sorted(distribution.items())
distribution = [i[1] for i in items]
values = [i[0] for i in items]
if sum(distribution) != 1:
distribution = normalize(distribution)
choice = random.random()
i, total= 0, distribution[0]
while choice > total:
i += 1
total += distribution[i]
return values[i]
def sampleFromCounter(ctr):
items = sorted(ctr.items())
return sample([v for k,v in items], [k for k,v in items])
def getProbability(value, distribution, values):
"""
Gives the probability of a value under a discrete distribution
defined by (distributions, values).
"""
total = 0.0
for prob, val in zip(distribution, values):
if val == value:
total += prob
return total
def flipCoin( p ):
r = random.random()
return r < p
def chooseFromDistribution( distribution ):
"Takes either a counter or a list of (prob, key) pairs and samples"
if type(distribution) == dict or type(distribution) == Counter:
return sample(distribution)
r = random.random()
base = 0.0
for prob, element in distribution:
base += prob
if r <= base: return element
def nearestPoint( pos ):
"""
Finds the nearest grid point to a position (discretizes).
"""
( current_row, current_col ) = pos
grid_row = int( current_row + 0.5 )
grid_col = int( current_col + 0.5 )
return ( grid_row, grid_col )
def sign( x ):
"""
Returns 1 or -1 depending on the sign of x
"""
if( x >= 0 ):
return 1
else:
return -1
def arrayInvert(array):
"""
Inverts a matrix stored as a list of lists.
"""
result = [[] for i in array]
for outer in array:
for inner in range(len(outer)):
result[inner].append(outer[inner])
return result
def matrixAsList( matrix, value = True ):
"""
Turns a matrix into a list of coordinates matching the specified value
"""
rows, cols = len( matrix ), len( matrix[0] )
cells = []
for row in range( rows ):
for col in range( cols ):
if matrix[row][col] == value:
cells.append( ( row, col ) )
return cells
def lookup(name, namespace):
"""
Get a method or class from any imported module from its name.
Usage: lookup(functionName, globals())
"""
dots = name.count('.')
if dots > 0:
moduleName, objName = '.'.join(name.split('.')[:-1]), name.split('.')[-1]
module = __import__(moduleName)
return getattr(module, objName)
else:
modules = [obj for obj in namespace.values() if str(type(obj)) == "<type 'module'>"]
options = [getattr(module, name) for module in modules if name in dir(module)]
options += [obj[1] for obj in namespace.items() if obj[0] == name ]
if len(options) == 1: return options[0]
if len(options) > 1: raise Exception, 'Name conflict for %s'
raise Exception, '%s not found as a method or class' % name
def pause():
"""
Pauses the output stream awaiting user feedback.
"""
print "<Press enter/return to continue>"
raw_input()
# code to handle timeouts
#
# FIXME
# NOTE: TimeoutFuncton is NOT reentrant. Later timeouts will silently
# disable earlier timeouts. Could be solved by maintaining a global list
# of active time outs. Currently, questions which have test cases calling
# this have all student code so wrapped.
#
import signal
import time
class TimeoutFunctionException(Exception):
"""Exception to raise on a timeout"""
pass
class TimeoutFunction:
def __init__(self, function, timeout):
self.timeout = timeout
self.function = function
def handle_timeout(self, signum, frame):
raise TimeoutFunctionException()
def __call__(self, *args, **keyArgs):
# If we have SIGALRM signal, use it to cause an exception if and
# when this function runs too long. Otherwise check the time taken
# after the method has returned, and throw an exception then.
if hasattr(signal, 'SIGALRM'):
old = signal.signal(signal.SIGALRM, self.handle_timeout)
signal.alarm(self.timeout)
try:
result = self.function(*args, **keyArgs)
finally:
signal.signal(signal.SIGALRM, old)
signal.alarm(0)
else:
startTime = time.time()
result = self.function(*args, **keyArgs)
timeElapsed = time.time() - startTime
if timeElapsed >= self.timeout:
self.handle_timeout(None, None)
return result
_ORIGINAL_STDOUT = None
_ORIGINAL_STDERR = None
_MUTED = False
class WritableNull:
def write(self, string):
pass
def mutePrint():
global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
if _MUTED:
return
_MUTED = True
_ORIGINAL_STDOUT = sys.stdout
#_ORIGINAL_STDERR = sys.stderr
sys.stdout = WritableNull()
#sys.stderr = WritableNull()
def unmutePrint():
global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
if not _MUTED:
return
_MUTED = False
sys.stdout = _ORIGINAL_STDOUT
#sys.stderr = _ORIGINAL_STDERR

View file

@ -0,0 +1,123 @@
# -*- coding: utf-8 -*-
# valueIterationAgents.py
# -----------------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import mdp, util
from learningAgents import ValueEstimationAgent
class ValueIterationAgent(ValueEstimationAgent):
"""
* Please read learningAgents.py before reading this.*
A ValueIterationAgent takes a Markov decision process
(see mdp.py) on initialization and runs value iteration
for a given number of iterations using the supplied
discount factor.
"""
def __init__(self, mdp, discount=0.9, iterations=100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
states = self.mdp.getStates()
print "__init__ ... states: " + str(states)
for i in range(iterations):
# On reprend les valeurs de l'itération précédente comme référence
# Copie pour batch
q_copy = self.values.copy()
for state in states:
q_new = None
for action in self.mdp.getPossibleActions(state):
q = self.computeQValueFromValues(state, action)
# Garder la meilleure Q value
if q_new is None or q_new < q:
q_new = q
# Gérer le cas sans successeurs
if q_new is None:
q_copy[state] = 0
else:
q_copy[state] = q_new
# On met à jour pout les prochaines itérations
self.values = q_copy
def getValue(self, state):
"""
Return the value of the state (computed in __init__).
"""
return self.values[state]
def computeQValueFromValues(self, state, action):
"""
Compute the Q-value of action in state from the
value function stored in self.values.
"""
"*** YOUR CODE HERE ***"
values = []
for nextState, prob in self.mdp.getTransitionStatesAndProbs(state,action):
reward = self.mdp.getReward(state, action, nextState)
discount = self.discount
next_state_value = self.values[nextState]
values.append(prob*(reward+discount*next_state_value))
return sum(values)
def computeActionFromValues(self, state):
"""
The policy is the best action in the given state
according to the values currently stored in self.values.
You may break ties any way you see fit. Note that if
there are no legal actions, which is the case at the
terminal state, you should return None.
"""
"*** YOUR CODE HERE ***"
possibleActions = self.mdp.getPossibleActions(state)
if len(possibleActions) == 0:
return None
q_values = [self.computeQValueFromValues(state, action) for action in possibleActions]
print "computeActionFromValues ... q_values: "+str(q_values)
print "index:"+str(q_values.index(max(q_values)))
print "action:"+str(possibleActions[q_values.index(max(q_values))])
return possibleActions[q_values.index(max(q_values))]
def getPolicy(self, state):
return self.computeActionFromValues(state)
def getAction(self, state):
"Returns the policy at the state (no exploration)."
return self.computeActionFromValues(state)
def getQValue(self, state, action):
return self.computeQValueFromValues(state, action)

Some files were not shown because too many files have changed in this diff Show more