ajout des fichiers du tp 2 q1 répondue

2019-04-10 23:58:15 -04:00 · 2019-04-10 23:58:15 -04:00 · c3b9ff7050
commit c3b9ff7050
parent e4cca7e1fa
101 changed files with 16680 additions and 1 deletions
--- a/reinforcement/VERSION
+++ b/reinforcement/VERSION
@ -0,0 +1 @@
+v1.001
--- a/reinforcement/analysis.py
+++ b/reinforcement/analysis.py
@ -0,0 +1,73 @@
+# analysis.py
+# -----------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+######################
+# ANALYSIS QUESTIONS #
+######################
+
+# Set the given parameters to obtain the specified policies through
+# value iteration.
+
+def question2():
+    answerDiscount = 0.9
+    answerNoise = 0.2
+    return answerDiscount, answerNoise
+
+def question3a():
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question3b():
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question3c():
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question3d():
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question3e():
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question6():
+    answerEpsilon = None
+    answerLearningRate = None
+    return answerEpsilon, answerLearningRate
+    # If not possible, return 'NOT POSSIBLE'
+
+if __name__ == '__main__':
+    print 'Answers to analysis questions:'
+    import analysis
+    for q in [q for q in dir(analysis) if q.startswith('question')]:
+        response = getattr(analysis, q)()
+        print '  Question %s:\t%s' % (q, str(response))
--- a/reinforcement/autograder.py
+++ b/reinforcement/autograder.py
@ -0,0 +1,351 @@
+# autograder.py
+# -------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+# imports from python standard library
+import grading
+import imp
+import optparse
+import os
+import re
+import sys
+import projectParams
+import random
+random.seed(0)
+try: 
+    from pacman import GameState
+except:
+    pass
+
+# register arguments and set default values
+def readCommand(argv):
+    parser = optparse.OptionParser(description = 'Run public tests on student code')
+    parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False, noGraphics=False)
+    parser.add_option('--test-directory',
+                      dest = 'testRoot',
+                      default = 'test_cases',
+                      help = 'Root test directory which contains subdirectories corresponding to each question')
+    parser.add_option('--student-code',
+                      dest = 'studentCode',
+                      default = projectParams.STUDENT_CODE_DEFAULT,
+                      help = 'comma separated list of student code files')
+    parser.add_option('--code-directory',
+                    dest = 'codeRoot',
+                    default = "",
+                    help = 'Root directory containing the student and testClass code')
+    parser.add_option('--test-case-code',
+                      dest = 'testCaseCode',
+                      default = projectParams.PROJECT_TEST_CLASSES,
+                      help = 'class containing testClass classes for this project')
+    parser.add_option('--generate-solutions',
+                      dest = 'generateSolutions',
+                      action = 'store_true',
+                      help = 'Write solutions generated to .solution file')
+    parser.add_option('--edx-output',
+                    dest = 'edxOutput',
+                    action = 'store_true',
+                    help = 'Generate edX output files')
+    parser.add_option('--mute',
+                    dest = 'muteOutput',
+                    action = 'store_true',
+                    help = 'Mute output from executing tests')
+    parser.add_option('--print-tests', '-p',
+                    dest = 'printTestCase',
+                    action = 'store_true',
+                    help = 'Print each test case before running them.')
+    parser.add_option('--test', '-t',
+                      dest = 'runTest',
+                      default = None,
+                      help = 'Run one particular test.  Relative to test root.')
+    parser.add_option('--question', '-q',
+                    dest = 'gradeQuestion',
+                    default = None,
+                    help = 'Grade one particular question.')
+    parser.add_option('--no-graphics',
+                    dest = 'noGraphics',
+                    action = 'store_true',
+                    help = 'No graphics display for pacman games.')
+    (options, args) = parser.parse_args(argv)
+    return options
+
+
+# confirm we should author solution files
+def confirmGenerate():
+    print 'WARNING: this action will overwrite any solution files.'
+    print 'Are you sure you want to proceed? (yes/no)'
+    while True:
+        ans = sys.stdin.readline().strip()
+        if ans == 'yes':
+            break
+        elif ans == 'no':
+            sys.exit(0)
+        else:
+            print 'please answer either "yes" or "no"'
+
+
+# TODO: Fix this so that it tracebacks work correctly
+# Looking at source of the traceback module, presuming it works
+# the same as the intepreters, it uses co_filename.  This is,
+# however, a readonly attribute.
+def setModuleName(module, filename):
+    functionType = type(confirmGenerate)
+    classType = type(optparse.Option)
+
+    for i in dir(module):
+        o = getattr(module, i)
+        if hasattr(o, '__file__'): continue
+
+        if type(o) == functionType:
+            setattr(o, '__file__', filename)
+        elif type(o) == classType:
+            setattr(o, '__file__', filename)
+            # TODO: assign member __file__'s?
+        #print i, type(o)
+
+
+#from cStringIO import StringIO
+
+def loadModuleString(moduleSource):
+    # Below broken, imp doesn't believe its being passed a file:
+    #    ValueError: load_module arg#2 should be a file or None
+    #
+    #f = StringIO(moduleCodeDict[k])
+    #tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE))
+    tmp = imp.new_module(k)
+    exec moduleCodeDict[k] in tmp.__dict__
+    setModuleName(tmp, k)
+    return tmp
+
+import py_compile
+
+def loadModuleFile(moduleName, filePath):
+    with open(filePath, 'r') as f:
+        return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE))
+
+
+def readFile(path, root=""):
+    "Read file from disk at specified path and return as string"
+    with open(os.path.join(root, path), 'r') as handle:
+        return handle.read()
+
+
+#######################################################################
+# Error Hint Map
+#######################################################################
+
+# TODO: use these
+ERROR_HINT_MAP = {
+  'q1': {
+    "<type 'exceptions.IndexError'>": """
+      We noticed that your project threw an IndexError on q1.
+      While many things may cause this, it may have been from
+      assuming a certain number of successors from a state space
+      or assuming a certain number of actions available from a given
+      state. Try making your code more general (no hardcoded indices)
+      and submit again!
+    """
+  },
+  'q3': {
+      "<type 'exceptions.AttributeError'>": """
+        We noticed that your project threw an AttributeError on q3.
+        While many things may cause this, it may have been from assuming
+        a certain size or structure to the state space. For example, if you have
+        a line of code assuming that the state is (x, y) and we run your code
+        on a state space with (x, y, z), this error could be thrown. Try
+        making your code more general and submit again!
+
+    """
+  }
+}
+
+import pprint
+
+def splitStrings(d):
+    d2 = dict(d)
+    for k in d:
+        if k[0:2] == "__":
+            del d2[k]
+            continue
+        if d2[k].find("\n") >= 0:
+            d2[k] = d2[k].split("\n")
+    return d2
+
+
+def printTest(testDict, solutionDict):
+    pp = pprint.PrettyPrinter(indent=4)
+    print "Test case:"
+    for line in testDict["__raw_lines__"]:
+        print "   |", line
+    print "Solution:"
+    for line in solutionDict["__raw_lines__"]:
+        print "   |", line
+
+
+def runTest(testName, moduleDict, printTestCase=False, display=None):
+    import testParser
+    import testClasses
+    for module in moduleDict:
+        setattr(sys.modules[__name__], module, moduleDict[module])
+
+    testDict = testParser.TestParser(testName + ".test").parse()
+    solutionDict = testParser.TestParser(testName + ".solution").parse()
+    test_out_file = os.path.join('%s.test_output' % testName)
+    testDict['test_out_file'] = test_out_file
+    testClass = getattr(projectTestClasses, testDict['class'])
+
+    questionClass = getattr(testClasses, 'Question')
+    question = questionClass({'max_points': 0}, display)
+    testCase = testClass(question, testDict)
+
+    if printTestCase:
+        printTest(testDict, solutionDict)
+
+    # This is a fragile hack to create a stub grades object
+    grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)])
+    testCase.execute(grades, moduleDict, solutionDict)
+
+
+# returns all the tests you need to run in order to run question
+def getDepends(testParser, testRoot, question):
+    allDeps = [question]
+    questionDict = testParser.TestParser(os.path.join(testRoot, question, 'CONFIG')).parse()
+    if 'depends' in questionDict:
+        depends = questionDict['depends'].split()
+        for d in depends:
+            # run dependencies first
+            allDeps = getDepends(testParser, testRoot, d) + allDeps
+    return allDeps
+
+# get list of questions to grade
+def getTestSubdirs(testParser, testRoot, questionToGrade):
+    problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse()
+    if questionToGrade != None:
+        questions = getDepends(testParser, testRoot, questionToGrade)
+        if len(questions) > 1:
+            print 'Note: due to dependencies, the following tests will be run: %s' % ' '.join(questions)
+        return questions
+    if 'order' in problemDict:
+        return problemDict['order'].split()
+    return sorted(os.listdir(testRoot))
+
+
+# evaluate student code
+def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False,
+            printTestCase=False, questionToGrade=None, display=None):
+    # imports of testbench code.  note that the testClasses import must follow
+    # the import of student code due to dependencies
+    import testParser
+    import testClasses
+    for module in moduleDict:
+        setattr(sys.modules[__name__], module, moduleDict[module])
+
+    questions = []
+    questionDicts = {}
+    test_subdirs = getTestSubdirs(testParser, testRoot, questionToGrade)
+    for q in test_subdirs:
+        subdir_path = os.path.join(testRoot, q)
+        if not os.path.isdir(subdir_path) or q[0] == '.':
+            continue
+
+        # create a question object
+        questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse()
+        questionClass = getattr(testClasses, questionDict['class'])
+        question = questionClass(questionDict, display)
+        questionDicts[q] = questionDict
+
+        # load test cases into question
+        tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path))
+        tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests)
+        for t in sorted(tests):
+            test_file = os.path.join(subdir_path, '%s.test' % t)
+            solution_file = os.path.join(subdir_path, '%s.solution' % t)
+            test_out_file = os.path.join(subdir_path, '%s.test_output' % t)
+            testDict = testParser.TestParser(test_file).parse()
+            if testDict.get("disabled", "false").lower() == "true":
+                continue
+            testDict['test_out_file'] = test_out_file
+            testClass = getattr(projectTestClasses, testDict['class'])
+            testCase = testClass(question, testDict)
+            def makefun(testCase, solution_file):
+                if generateSolutions:
+                    # write solution file to disk
+                    return lambda grades: testCase.writeSolution(moduleDict, solution_file)
+                else:
+                    # read in solution dictionary and pass as an argument
+                    testDict = testParser.TestParser(test_file).parse()
+                    solutionDict = testParser.TestParser(solution_file).parse()
+                    if printTestCase:
+                        return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict)
+                    else:
+                        return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
+            question.addTestCase(testCase, makefun(testCase, solution_file))
+
+        # Note extra function is necessary for scoping reasons
+        def makefun(question):
+            return lambda grades: question.execute(grades)
+        setattr(sys.modules[__name__], q, makefun(question))
+        questions.append((q, question.getMaxPoints()))
+
+    grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput)
+    if questionToGrade == None:
+        for q in questionDicts:
+            for prereq in questionDicts[q].get('depends', '').split():
+                grades.addPrereq(q, prereq)
+
+    grades.grade(sys.modules[__name__], bonusPic = projectParams.BONUS_PIC)
+    return grades.points
+
+
+
+def getDisplay(graphicsByDefault, options=None):
+    graphics = graphicsByDefault
+    if options is not None and options.noGraphics:
+        graphics = False
+    if graphics:
+        try:
+            import graphicsDisplay
+            return graphicsDisplay.PacmanGraphics(1, frameTime=.05)
+        except ImportError:
+            pass
+    import textDisplay
+    return textDisplay.NullGraphics()
+
+
+
+
+if __name__ == '__main__':
+    options = readCommand(sys.argv)
+    if options.generateSolutions:
+        confirmGenerate()
+    codePaths = options.studentCode.split(',')
+    # moduleCodeDict = {}
+    # for cp in codePaths:
+    #     moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
+    #     moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot)
+    # moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot)
+    # moduleDict = loadModuleDict(moduleCodeDict)
+
+    moduleDict = {}
+    for cp in codePaths:
+        moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
+        moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp))
+    moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1)
+    moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode))
+
+
+    if options.runTest != None:
+        runTest(options.runTest, moduleDict, printTestCase=options.printTestCase, display=getDisplay(True, options))
+    else:
+        evaluate(options.generateSolutions, options.testRoot, moduleDict,
+            edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase,
+            questionToGrade=options.gradeQuestion, display=getDisplay(options.gradeQuestion!=None, options))
--- a/reinforcement/crawler.py
+++ b/reinforcement/crawler.py
@ -0,0 +1,384 @@
+# crawler.py
+# ----------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+#!/usr/bin/python
+import math
+from math import pi as PI
+import time
+import environment
+import random
+
+class CrawlingRobotEnvironment(environment.Environment):
+
+    def __init__(self, crawlingRobot):
+
+        self.crawlingRobot = crawlingRobot
+
+        # The state is of the form (armAngle, handAngle)
+        # where the angles are bucket numbers, not actual
+        # degree measurements
+        self.state = None
+
+        self.nArmStates = 9
+        self.nHandStates = 13
+
+        # create a list of arm buckets and hand buckets to
+        # discretize the state space
+        minArmAngle,maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
+        minHandAngle,maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
+        armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates-1)
+        handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates-1)
+        self.armBuckets = [minArmAngle+(armIncrement*i) \
+           for i in range(self.nArmStates)]
+        self.handBuckets = [minHandAngle+(handIncrement*i) \
+         for i in range(self.nHandStates)]
+
+        # Reset
+        self.reset()
+
+    def getCurrentState(self):
+        """
+          Return the current state
+          of the crawling robot
+        """
+        return self.state
+
+    def getPossibleActions(self, state):
+        """
+          Returns possible actions
+          for the states in the
+          current state
+        """
+
+        actions = list()
+
+        currArmBucket,currHandBucket = state
+        if currArmBucket > 0: actions.append('arm-down')
+        if currArmBucket < self.nArmStates-1: actions.append('arm-up')
+        if currHandBucket > 0: actions.append('hand-down')
+        if currHandBucket < self.nHandStates-1: actions.append('hand-up')
+
+        return actions
+
+    def doAction(self, action):
+        """
+          Perform the action and update
+          the current state of the Environment
+          and return the reward for the
+          current state, the next state
+          and the taken action.
+
+          Returns:
+            nextState, reward
+        """
+        nextState, reward =  None, None
+
+        oldX,oldY = self.crawlingRobot.getRobotPosition()
+
+        armBucket,handBucket = self.state
+        armAngle,handAngle = self.crawlingRobot.getAngles()
+        if action == 'arm-up':
+            newArmAngle = self.armBuckets[armBucket+1]
+            self.crawlingRobot.moveArm(newArmAngle)
+            nextState = (armBucket+1,handBucket)
+        if action == 'arm-down':
+            newArmAngle = self.armBuckets[armBucket-1]
+            self.crawlingRobot.moveArm(newArmAngle)
+            nextState = (armBucket-1,handBucket)
+        if action == 'hand-up':
+            newHandAngle = self.handBuckets[handBucket+1]
+            self.crawlingRobot.moveHand(newHandAngle)
+            nextState = (armBucket,handBucket+1)
+        if action == 'hand-down':
+            newHandAngle = self.handBuckets[handBucket-1]
+            self.crawlingRobot.moveHand(newHandAngle)
+            nextState = (armBucket,handBucket-1)
+
+        newX,newY = self.crawlingRobot.getRobotPosition()
+
+        # a simple reward function
+        reward = newX - oldX
+
+        self.state = nextState
+        return nextState, reward
+
+
+    def reset(self):
+        """
+         Resets the Environment to the initial state
+        """
+        ## Initialize the state to be the middle
+        ## value for each parameter e.g. if there are 13 and 19
+        ## buckets for the arm and hand parameters, then the intial
+        ## state should be (6,9)
+        ##
+        ## Also call self.crawlingRobot.setAngles()
+        ## to the initial arm and hand angle
+
+        armState = self.nArmStates/2
+        handState = self.nHandStates/2
+        self.state = armState,handState
+        self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
+        self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
+
+
+class CrawlingRobot:
+
+    def setAngles(self, armAngle, handAngle):
+        """
+            set the robot's arm and hand angles
+            to the passed in values
+        """
+        self.armAngle = armAngle
+        self.handAngle = handAngle
+
+    def getAngles(self):
+        """
+            returns the pair of (armAngle, handAngle)
+        """
+        return self.armAngle, self.handAngle
+
+    def getRobotPosition(self):
+        """
+            returns the (x,y) coordinates
+            of the lower-left point of the
+            robot
+        """
+        return self.robotPos
+
+    def moveArm(self, newArmAngle):
+        """
+            move the robot arm to 'newArmAngle'
+        """
+        oldArmAngle = self.armAngle
+        if newArmAngle > self.maxArmAngle:
+            raise 'Crawling Robot: Arm Raised too high. Careful!'
+        if newArmAngle < self.minArmAngle:
+            raise 'Crawling Robot: Arm Raised too low. Careful!'
+        disp = self.displacement(self.armAngle, self.handAngle,
+                                  newArmAngle, self.handAngle)
+        curXPos = self.robotPos[0]
+        self.robotPos = (curXPos+disp, self.robotPos[1])
+        self.armAngle = newArmAngle
+
+        # Position and Velocity Sign Post
+        self.positions.append(self.getRobotPosition()[0])
+#        self.angleSums.append(abs(math.degrees(oldArmAngle)-math.degrees(newArmAngle)))
+        if len(self.positions) > 100:
+            self.positions.pop(0)
+ #           self.angleSums.pop(0)
+
+    def moveHand(self, newHandAngle):
+        """
+            move the robot hand to 'newArmAngle'
+        """
+        oldHandAngle = self.handAngle
+
+        if newHandAngle > self.maxHandAngle:
+            raise 'Crawling Robot: Hand Raised too high. Careful!'
+        if newHandAngle < self.minHandAngle:
+            raise 'Crawling Robot: Hand Raised too low. Careful!'
+        disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
+        curXPos = self.robotPos[0]
+        self.robotPos = (curXPos+disp, self.robotPos[1])
+        self.handAngle = newHandAngle
+
+        # Position and Velocity Sign Post
+        self.positions.append(self.getRobotPosition()[0])
+ #       self.angleSums.append(abs(math.degrees(oldHandAngle)-math.degrees(newHandAngle)))
+        if len(self.positions) > 100:
+            self.positions.pop(0)
+ #           self.angleSums.pop(0)
+
+    def getMinAndMaxArmAngles(self):
+        """
+            get the lower- and upper- bound
+            for the arm angles returns (min,max) pair
+        """
+        return self.minArmAngle, self.maxArmAngle
+
+    def getMinAndMaxHandAngles(self):
+        """
+            get the lower- and upper- bound
+            for the hand angles returns (min,max) pair
+        """
+        return self.minHandAngle, self.maxHandAngle
+
+    def getRotationAngle(self):
+        """
+            get the current angle the
+            robot body is rotated off the ground
+        """
+        armCos, armSin = self.__getCosAndSin(self.armAngle)
+        handCos, handSin = self.__getCosAndSin(self.handAngle)
+        x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
+        y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
+        if y < 0:
+            return math.atan(-y/x)
+        return 0.0
+
+
+    ## You shouldn't need methods below here
+
+
+    def __getCosAndSin(self, angle):
+        return math.cos(angle), math.sin(angle)
+
+    def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
+
+        oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
+        armCos, armSin = self.__getCosAndSin(armDegree)
+        oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
+        handCos, handSin = self.__getCosAndSin(handDegree)
+
+        xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
+        yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight
+
+        x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
+        y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
+
+        if y < 0:
+            if yOld <= 0:
+                return math.sqrt(xOld*xOld + yOld*yOld) - math.sqrt(x*x + y*y)
+            return (xOld - yOld*(x-xOld) / (y - yOld)) - math.sqrt(x*x + y*y)
+        else:
+            if yOld  >= 0:
+                return 0.0
+            return -(x - y * (xOld-x)/(yOld-y)) + math.sqrt(xOld*xOld + yOld*yOld)
+
+        raise 'Never Should See This!'
+
+    def draw(self, stepCount, stepDelay):
+        x1, y1 = self.getRobotPosition()
+        x1 = x1 % self.totWidth
+
+        ## Check Lower Still on the ground
+        if y1 != self.groundY:
+            raise 'Flying Robot!!'
+
+        rotationAngle = self.getRotationAngle()
+        cosRot, sinRot = self.__getCosAndSin(rotationAngle)
+
+        x2 = x1 + self.robotWidth * cosRot
+        y2 = y1 - self.robotWidth * sinRot
+
+        x3 = x1 - self.robotHeight * sinRot
+        y3 = y1 - self.robotHeight * cosRot
+
+        x4 = x3 + cosRot*self.robotWidth
+        y4 = y3 - sinRot*self.robotWidth
+
+        self.canvas.coords(self.robotBody,x1,y1,x2,y2,x4,y4,x3,y3)
+
+        armCos, armSin = self.__getCosAndSin(rotationAngle+self.armAngle)
+        xArm = x4 + self.armLength * armCos
+        yArm = y4 - self.armLength * armSin
+
+        self.canvas.coords(self.robotArm,x4,y4,xArm,yArm)
+
+        handCos, handSin = self.__getCosAndSin(self.handAngle+rotationAngle)
+        xHand = xArm + self.handLength * handCos
+        yHand = yArm - self.handLength * handSin
+
+        self.canvas.coords(self.robotHand,xArm,yArm,xHand,yHand)
+
+
+        # Position and Velocity Sign Post
+#        time = len(self.positions) + 0.5 * sum(self.angleSums)
+#        velocity = (self.positions[-1]-self.positions[0]) / time
+#        if len(self.positions) == 1: return
+        steps = (stepCount - self.lastStep)
+        if steps==0:return
+ #       pos = self.positions[-1]
+#        velocity = (pos - self.lastPos) / steps
+  #      g = .9 ** (10 * stepDelay)
+#        g = .99 ** steps
+#        self.velAvg = g * self.velAvg + (1 - g) * velocity
+ #       g = .999 ** steps
+ #       self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity
+        pos = self.positions[-1]
+        velocity = pos - self.positions[-2]
+        vel2 = (pos - self.positions[0]) / len(self.positions)
+        self.velAvg = .9 * self.velAvg + .1 * vel2
+        velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
+#        velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
+        velocityMsg = 'Velocity: %.2f' % velocity
+        positionMsg = 'Position: %2.f' % pos
+        stepMsg = 'Step: %d' % stepCount
+        if 'vel_msg' in dir(self):
+            self.canvas.delete(self.vel_msg)
+            self.canvas.delete(self.pos_msg)
+            self.canvas.delete(self.step_msg)
+            self.canvas.delete(self.velavg_msg)
+ #           self.canvas.delete(self.velavg2_msg)
+ #       self.velavg2_msg = self.canvas.create_text(850,190,text=velMsg2)
+        self.velavg_msg = self.canvas.create_text(650,190,text=velMsg)
+        self.vel_msg = self.canvas.create_text(450,190,text=velocityMsg)
+        self.pos_msg = self.canvas.create_text(250,190,text=positionMsg)
+        self.step_msg = self.canvas.create_text(50,190,text=stepMsg)
+#        self.lastPos = pos
+        self.lastStep = stepCount
+#        self.lastVel = velocity
+
+    def __init__(self, canvas):
+
+        ## Canvas ##
+        self.canvas = canvas
+        self.velAvg = 0
+#        self.velAvg2 = 0
+#        self.lastPos = 0
+        self.lastStep = 0
+#        self.lastVel = 0
+
+        ## Arm and Hand Degrees ##
+        self.armAngle = self.oldArmDegree = 0.0
+        self.handAngle = self.oldHandDegree = -PI/6
+
+        self.maxArmAngle = PI/6
+        self.minArmAngle = -PI/6
+
+        self.maxHandAngle = 0
+        self.minHandAngle = -(5.0/6.0) * PI
+
+        ## Draw Ground ##
+        self.totWidth = canvas.winfo_reqwidth()
+        self.totHeight = canvas.winfo_reqheight()
+        self.groundHeight = 40
+        self.groundY = self.totHeight - self.groundHeight
+
+        self.ground = canvas.create_rectangle(0,
+            self.groundY,self.totWidth,self.totHeight, fill='blue')
+
+        ## Robot Body ##
+        self.robotWidth = 80
+        self.robotHeight = 40
+        self.robotPos = (20, self.groundY)
+        self.robotBody = canvas.create_polygon(0,0,0,0,0,0,0,0, fill='green')
+
+        ## Robot Arm ##
+        self.armLength = 60
+        self.robotArm = canvas.create_line(0,0,0,0,fill='orange',width=5)
+
+        ## Robot Hand ##
+        self.handLength = 40
+        self.robotHand = canvas.create_line(0,0,0,0,fill='red',width=3)
+
+        self.positions = [0,0]
+  #      self.angleSums = [0,0]
+
+
+
+if __name__ == '__main__':
+    from graphicsCrawlerDisplay import *
+    run()
--- a/reinforcement/environment.py
+++ b/reinforcement/environment.py
@ -0,0 +1,56 @@
+# environment.py
+# --------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+#!/usr/bin/python
+
+class Environment:
+
+    def getCurrentState(self):
+        """
+        Returns the current state of enviornment
+        """
+        abstract
+
+    def getPossibleActions(self, state):
+        """
+          Returns possible actions the agent
+          can take in the given state. Can
+          return the empty list if we are in
+          a terminal state.
+        """
+        abstract
+
+    def doAction(self, action):
+        """
+          Performs the given action in the current
+          environment state and updates the enviornment.
+
+          Returns a (reward, nextState) pair
+        """
+        abstract
+
+    def reset(self):
+        """
+          Resets the current state to the start state
+        """
+        abstract
+
+    def isTerminal(self):
+        """
+          Has the enviornment entered a terminal
+          state? This means there are no successors
+        """
+        state = self.getCurrentState()
+        actions = self.getPossibleActions(state)
+        return len(actions) == 0
--- a/reinforcement/featureExtractors.py
+++ b/reinforcement/featureExtractors.py
@ -0,0 +1,103 @@
+# featureExtractors.py
+# --------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+"Feature extractors for Pacman game states"
+
+from game import Directions, Actions
+import util
+
+class FeatureExtractor:
+    def getFeatures(self, state, action):
+        """
+          Returns a dict from features to counts
+          Usually, the count will just be 1.0 for
+          indicator functions.
+        """
+        util.raiseNotDefined()
+
+class IdentityExtractor(FeatureExtractor):
+    def getFeatures(self, state, action):
+        feats = util.Counter()
+        feats[(state,action)] = 1.0
+        return feats
+
+class CoordinateExtractor(FeatureExtractor):
+    def getFeatures(self, state, action):
+        feats = util.Counter()
+        feats[state] = 1.0
+        feats['x=%d' % state[0]] = 1.0
+        feats['y=%d' % state[0]] = 1.0
+        feats['action=%s' % action] = 1.0
+        return feats
+
+def closestFood(pos, food, walls):
+    """
+    closestFood -- this is similar to the function that we have
+    worked on in the search project; here its all in one place
+    """
+    fringe = [(pos[0], pos[1], 0)]
+    expanded = set()
+    while fringe:
+        pos_x, pos_y, dist = fringe.pop(0)
+        if (pos_x, pos_y) in expanded:
+            continue
+        expanded.add((pos_x, pos_y))
+        # if we find a food at this location then exit
+        if food[pos_x][pos_y]:
+            return dist
+        # otherwise spread out from the location to its neighbours
+        nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
+        for nbr_x, nbr_y in nbrs:
+            fringe.append((nbr_x, nbr_y, dist+1))
+    # no food found
+    return None
+
+class SimpleExtractor(FeatureExtractor):
+    """
+    Returns simple features for a basic reflex Pacman:
+    - whether food will be eaten
+    - how far away the next food is
+    - whether a ghost collision is imminent
+    - whether a ghost is one step away
+    """
+
+    def getFeatures(self, state, action):
+        # extract the grid of food and wall locations and get the ghost locations
+        food = state.getFood()
+        walls = state.getWalls()
+        ghosts = state.getGhostPositions()
+
+        features = util.Counter()
+
+        features["bias"] = 1.0
+
+        # compute the location of pacman after he takes the action
+        x, y = state.getPacmanPosition()
+        dx, dy = Actions.directionToVector(action)
+        next_x, next_y = int(x + dx), int(y + dy)
+
+        # count the number of ghosts 1-step away
+        features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
+
+        # if there is no danger of ghosts then add the food feature
+        if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
+            features["eats-food"] = 1.0
+
+        dist = closestFood((next_x, next_y), food, walls)
+        if dist is not None:
+            # make the distance a number less than one otherwise the update
+            # will diverge wildly
+            features["closest-food"] = float(dist) / (walls.width * walls.height)
+        features.divideAll(10.0)
+        return features
--- a/reinforcement/game.py
+++ b/reinforcement/game.py
@ -0,0 +1,729 @@
+# game.py
+# -------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+# game.py
+# -------
+# Licensing Information: Please do not distribute or publish solutions to this
+# project. You are free to use and extend these projects for educational
+# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
+# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
+
+from util import *
+import time, os
+import traceback
+import sys
+
+#######################
+# Parts worth reading #
+#######################
+
+class Agent:
+    """
+    An agent must define a getAction method, but may also define the
+    following methods which will be called if they exist:
+
+    def registerInitialState(self, state): # inspects the starting state
+    """
+    def __init__(self, index=0):
+        self.index = index
+
+    def getAction(self, state):
+        """
+        The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
+        must return an action from Directions.{North, South, East, West, Stop}
+        """
+        raiseNotDefined()
+
+class Directions:
+    NORTH = 'North'
+    SOUTH = 'South'
+    EAST = 'East'
+    WEST = 'West'
+    STOP = 'Stop'
+
+    LEFT =       {NORTH: WEST,
+                   SOUTH: EAST,
+                   EAST:  NORTH,
+                   WEST:  SOUTH,
+                   STOP:  STOP}
+
+    RIGHT =      dict([(y,x) for x, y in LEFT.items()])
+
+    REVERSE = {NORTH: SOUTH,
+               SOUTH: NORTH,
+               EAST: WEST,
+               WEST: EAST,
+               STOP: STOP}
+
+class Configuration:
+    """
+    A Configuration holds the (x,y) coordinate of a character, along with its
+    traveling direction.
+
+    The convention for positions, like a graph, is that (0,0) is the lower left corner, x increases
+    horizontally and y increases vertically.  Therefore, north is the direction of increasing y, or (0,1).
+    """
+
+    def __init__(self, pos, direction):
+        self.pos = pos
+        self.direction = direction
+
+    def getPosition(self):
+        return (self.pos)
+
+    def getDirection(self):
+        return self.direction
+
+    def isInteger(self):
+        x,y = self.pos
+        return x == int(x) and y == int(y)
+
+    def __eq__(self, other):
+        if other == None: return False
+        return (self.pos == other.pos and self.direction == other.direction)
+
+    def __hash__(self):
+        x = hash(self.pos)
+        y = hash(self.direction)
+        return hash(x + 13 * y)
+
+    def __str__(self):
+        return "(x,y)="+str(self.pos)+", "+str(self.direction)
+
+    def generateSuccessor(self, vector):
+        """
+        Generates a new configuration reached by translating the current
+        configuration by the action vector.  This is a low-level call and does
+        not attempt to respect the legality of the movement.
+
+        Actions are movement vectors.
+        """
+        x, y= self.pos
+        dx, dy = vector
+        direction = Actions.vectorToDirection(vector)
+        if direction == Directions.STOP:
+            direction = self.direction # There is no stop direction
+        return Configuration((x + dx, y+dy), direction)
+
+class AgentState:
+    """
+    AgentStates hold the state of an agent (configuration, speed, scared, etc).
+    """
+
+    def __init__( self, startConfiguration, isPacman ):
+        self.start = startConfiguration
+        self.configuration = startConfiguration
+        self.isPacman = isPacman
+        self.scaredTimer = 0
+        self.numCarrying = 0
+        self.numReturned = 0
+
+    def __str__( self ):
+        if self.isPacman:
+            return "Pacman: " + str( self.configuration )
+        else:
+            return "Ghost: " + str( self.configuration )
+
+    def __eq__( self, other ):
+        if other == None:
+            return False
+        return self.configuration == other.configuration and self.scaredTimer == other.scaredTimer
+
+    def __hash__(self):
+        return hash(hash(self.configuration) + 13 * hash(self.scaredTimer))
+
+    def copy( self ):
+        state = AgentState( self.start, self.isPacman )
+        state.configuration = self.configuration
+        state.scaredTimer = self.scaredTimer
+        state.numCarrying = self.numCarrying
+        state.numReturned = self.numReturned
+        return state
+
+    def getPosition(self):
+        if self.configuration == None: return None
+        return self.configuration.getPosition()
+
+    def getDirection(self):
+        return self.configuration.getDirection()
+
+class Grid:
+    """
+    A 2-dimensional array of objects backed by a list of lists.  Data is accessed
+    via grid[x][y] where (x,y) are positions on a Pacman map with x horizontal,
+    y vertical and the origin (0,0) in the bottom left corner.
+
+    The __str__ method constructs an output that is oriented like a pacman board.
+    """
+    def __init__(self, width, height, initialValue=False, bitRepresentation=None):
+        if initialValue not in [False, True]: raise Exception('Grids can only contain booleans')
+        self.CELLS_PER_INT = 30
+
+        self.width = width
+        self.height = height
+        self.data = [[initialValue for y in range(height)] for x in range(width)]
+        if bitRepresentation:
+            self._unpackBits(bitRepresentation)
+
+    def __getitem__(self, i):
+        return self.data[i]
+
+    def __setitem__(self, key, item):
+        self.data[key] = item
+
+    def __str__(self):
+        out = [[str(self.data[x][y])[0] for x in range(self.width)] for y in range(self.height)]
+        out.reverse()
+        return '\n'.join([''.join(x) for x in out])
+
+    def __eq__(self, other):
+        if other == None: return False
+        return self.data == other.data
+
+    def __hash__(self):
+        # return hash(str(self))
+        base = 1
+        h = 0
+        for l in self.data:
+            for i in l:
+                if i:
+                    h += base
+                base *= 2
+        return hash(h)
+
+    def copy(self):
+        g = Grid(self.width, self.height)
+        g.data = [x[:] for x in self.data]
+        return g
+
+    def deepCopy(self):
+        return self.copy()
+
+    def shallowCopy(self):
+        g = Grid(self.width, self.height)
+        g.data = self.data
+        return g
+
+    def count(self, item =True ):
+        return sum([x.count(item) for x in self.data])
+
+    def asList(self, key = True):
+        list = []
+        for x in range(self.width):
+            for y in range(self.height):
+                if self[x][y] == key: list.append( (x,y) )
+        return list
+
+    def packBits(self):
+        """
+        Returns an efficient int list representation
+
+        (width, height, bitPackedInts...)
+        """
+        bits = [self.width, self.height]
+        currentInt = 0
+        for i in range(self.height * self.width):
+            bit = self.CELLS_PER_INT - (i % self.CELLS_PER_INT) - 1
+            x, y = self._cellIndexToPosition(i)
+            if self[x][y]:
+                currentInt += 2 ** bit
+            if (i + 1) % self.CELLS_PER_INT == 0:
+                bits.append(currentInt)
+                currentInt = 0
+        bits.append(currentInt)
+        return tuple(bits)
+
+    def _cellIndexToPosition(self, index):
+        x = index / self.height
+        y = index % self.height
+        return x, y
+
+    def _unpackBits(self, bits):
+        """
+        Fills in data from a bit-level representation
+        """
+        cell = 0
+        for packed in bits:
+            for bit in self._unpackInt(packed, self.CELLS_PER_INT):
+                if cell == self.width * self.height: break
+                x, y = self._cellIndexToPosition(cell)
+                self[x][y] = bit
+                cell += 1
+
+    def _unpackInt(self, packed, size):
+        bools = []
+        if packed < 0: raise ValueError, "must be a positive integer"
+        for i in range(size):
+            n = 2 ** (self.CELLS_PER_INT - i - 1)
+            if packed >= n:
+                bools.append(True)
+                packed -= n
+            else:
+                bools.append(False)
+        return bools
+
+def reconstituteGrid(bitRep):
+    if type(bitRep) is not type((1,2)):
+        return bitRep
+    width, height = bitRep[:2]
+    return Grid(width, height, bitRepresentation= bitRep[2:])
+
+####################################
+# Parts you shouldn't have to read #
+####################################
+
+class Actions:
+    """
+    A collection of static methods for manipulating move actions.
+    """
+    # Directions
+    _directions = {Directions.NORTH: (0, 1),
+                   Directions.SOUTH: (0, -1),
+                   Directions.EAST:  (1, 0),
+                   Directions.WEST:  (-1, 0),
+                   Directions.STOP:  (0, 0)}
+
+    _directionsAsList = _directions.items()
+
+    TOLERANCE = .001
+
+    def reverseDirection(action):
+        if action == Directions.NORTH:
+            return Directions.SOUTH
+        if action == Directions.SOUTH:
+            return Directions.NORTH
+        if action == Directions.EAST:
+            return Directions.WEST
+        if action == Directions.WEST:
+            return Directions.EAST
+        return action
+    reverseDirection = staticmethod(reverseDirection)
+
+    def vectorToDirection(vector):
+        dx, dy = vector
+        if dy > 0:
+            return Directions.NORTH
+        if dy < 0:
+            return Directions.SOUTH
+        if dx < 0:
+            return Directions.WEST
+        if dx > 0:
+            return Directions.EAST
+        return Directions.STOP
+    vectorToDirection = staticmethod(vectorToDirection)
+
+    def directionToVector(direction, speed = 1.0):
+        dx, dy =  Actions._directions[direction]
+        return (dx * speed, dy * speed)
+    directionToVector = staticmethod(directionToVector)
+
+    def getPossibleActions(config, walls):
+        possible = []
+        x, y = config.pos
+        x_int, y_int = int(x + 0.5), int(y + 0.5)
+
+        # In between grid points, all agents must continue straight
+        if (abs(x - x_int) + abs(y - y_int)  > Actions.TOLERANCE):
+            return [config.getDirection()]
+
+        for dir, vec in Actions._directionsAsList:
+            dx, dy = vec
+            next_y = y_int + dy
+            next_x = x_int + dx
+            if not walls[next_x][next_y]: possible.append(dir)
+
+        return possible
+
+    getPossibleActions = staticmethod(getPossibleActions)
+
+    def getLegalNeighbors(position, walls):
+        x,y = position
+        x_int, y_int = int(x + 0.5), int(y + 0.5)
+        neighbors = []
+        for dir, vec in Actions._directionsAsList:
+            dx, dy = vec
+            next_x = x_int + dx
+            if next_x < 0 or next_x == walls.width: continue
+            next_y = y_int + dy
+            if next_y < 0 or next_y == walls.height: continue
+            if not walls[next_x][next_y]: neighbors.append((next_x, next_y))
+        return neighbors
+    getLegalNeighbors = staticmethod(getLegalNeighbors)
+
+    def getSuccessor(position, action):
+        dx, dy = Actions.directionToVector(action)
+        x, y = position
+        return (x + dx, y + dy)
+    getSuccessor = staticmethod(getSuccessor)
+
+class GameStateData:
+    """
+
+    """
+    def __init__( self, prevState = None ):
+        """
+        Generates a new data packet by copying information from its predecessor.
+        """
+        if prevState != None:
+            self.food = prevState.food.shallowCopy()
+            self.capsules = prevState.capsules[:]
+            self.agentStates = self.copyAgentStates( prevState.agentStates )
+            self.layout = prevState.layout
+            self._eaten = prevState._eaten
+            self.score = prevState.score
+
+        self._foodEaten = None
+        self._foodAdded = None
+        self._capsuleEaten = None
+        self._agentMoved = None
+        self._lose = False
+        self._win = False
+        self.scoreChange = 0
+
+    def deepCopy( self ):
+        state = GameStateData( self )
+        state.food = self.food.deepCopy()
+        state.layout = self.layout.deepCopy()
+        state._agentMoved = self._agentMoved
+        state._foodEaten = self._foodEaten
+        state._foodAdded = self._foodAdded
+        state._capsuleEaten = self._capsuleEaten
+        return state
+
+    def copyAgentStates( self, agentStates ):
+        copiedStates = []
+        for agentState in agentStates:
+            copiedStates.append( agentState.copy() )
+        return copiedStates
+
+    def __eq__( self, other ):
+        """
+        Allows two states to be compared.
+        """
+        if other == None: return False
+        # TODO Check for type of other
+        if not self.agentStates == other.agentStates: return False
+        if not self.food == other.food: return False
+        if not self.capsules == other.capsules: return False
+        if not self.score == other.score: return False
+        return True
+
+    def __hash__( self ):
+        """
+        Allows states to be keys of dictionaries.
+        """
+        for i, state in enumerate( self.agentStates ):
+            try:
+                int(hash(state))
+            except TypeError, e:
+                print e
+                #hash(state)
+        return int((hash(tuple(self.agentStates)) + 13*hash(self.food) + 113* hash(tuple(self.capsules)) + 7 * hash(self.score)) % 1048575 )
+
+    def __str__( self ):
+        width, height = self.layout.width, self.layout.height
+        map = Grid(width, height)
+        if type(self.food) == type((1,2)):
+            self.food = reconstituteGrid(self.food)
+        for x in range(width):
+            for y in range(height):
+                food, walls = self.food, self.layout.walls
+                map[x][y] = self._foodWallStr(food[x][y], walls[x][y])
+
+        for agentState in self.agentStates:
+            if agentState == None: continue
+            if agentState.configuration == None: continue
+            x,y = [int( i ) for i in nearestPoint( agentState.configuration.pos )]
+            agent_dir = agentState.configuration.direction
+            if agentState.isPacman:
+                map[x][y] = self._pacStr( agent_dir )
+            else:
+                map[x][y] = self._ghostStr( agent_dir )
+
+        for x, y in self.capsules:
+            map[x][y] = 'o'
+
+        return str(map) + ("\nScore: %d\n" % self.score)
+
+    def _foodWallStr( self, hasFood, hasWall ):
+        if hasFood:
+            return '.'
+        elif hasWall:
+            return '%'
+        else:
+            return ' '
+
+    def _pacStr( self, dir ):
+        if dir == Directions.NORTH:
+            return 'v'
+        if dir == Directions.SOUTH:
+            return '^'
+        if dir == Directions.WEST:
+            return '>'
+        return '<'
+
+    def _ghostStr( self, dir ):
+        return 'G'
+        if dir == Directions.NORTH:
+            return 'M'
+        if dir == Directions.SOUTH:
+            return 'W'
+        if dir == Directions.WEST:
+            return '3'
+        return 'E'
+
+    def initialize( self, layout, numGhostAgents ):
+        """
+        Creates an initial game state from a layout array (see layout.py).
+        """
+        self.food = layout.food.copy()
+        #self.capsules = []
+        self.capsules = layout.capsules[:]
+        self.layout = layout
+        self.score = 0
+        self.scoreChange = 0
+
+        self.agentStates = []
+        numGhosts = 0
+        for isPacman, pos in layout.agentPositions:
+            if not isPacman:
+                if numGhosts == numGhostAgents: continue # Max ghosts reached already
+                else: numGhosts += 1
+            self.agentStates.append( AgentState( Configuration( pos, Directions.STOP), isPacman) )
+        self._eaten = [False for a in self.agentStates]
+
+try:
+    import boinc
+    _BOINC_ENABLED = True
+except:
+    _BOINC_ENABLED = False
+
+class Game:
+    """
+    The Game manages the control flow, soliciting actions from agents.
+    """
+
+    def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False ):
+        self.agentCrashed = False
+        self.agents = agents
+        self.display = display
+        self.rules = rules
+        self.startingIndex = startingIndex
+        self.gameOver = False
+        self.muteAgents = muteAgents
+        self.catchExceptions = catchExceptions
+        self.moveHistory = []
+        self.totalAgentTimes = [0 for agent in agents]
+        self.totalAgentTimeWarnings = [0 for agent in agents]
+        self.agentTimeout = False
+        import cStringIO
+        self.agentOutput = [cStringIO.StringIO() for agent in agents]
+
+    def getProgress(self):
+        if self.gameOver:
+            return 1.0
+        else:
+            return self.rules.getProgress(self)
+
+    def _agentCrash( self, agentIndex, quiet=False):
+        "Helper method for handling agent crashes"
+        if not quiet: traceback.print_exc()
+        self.gameOver = True
+        self.agentCrashed = True
+        self.rules.agentCrash(self, agentIndex)
+
+    OLD_STDOUT = None
+    OLD_STDERR = None
+
+    def mute(self, agentIndex):
+        if not self.muteAgents: return
+        global OLD_STDOUT, OLD_STDERR
+        import cStringIO
+        OLD_STDOUT = sys.stdout
+        OLD_STDERR = sys.stderr
+        sys.stdout = self.agentOutput[agentIndex]
+        sys.stderr = self.agentOutput[agentIndex]
+
+    def unmute(self):
+        if not self.muteAgents: return
+        global OLD_STDOUT, OLD_STDERR
+        # Revert stdout/stderr to originals
+        sys.stdout = OLD_STDOUT
+        sys.stderr = OLD_STDERR
+
+
+    def run( self ):
+        """
+        Main control loop for game play.
+        """
+        self.display.initialize(self.state.data)
+        self.numMoves = 0
+
+        ###self.display.initialize(self.state.makeObservation(1).data)
+        # inform learning agents of the game start
+        for i in range(len(self.agents)):
+            agent = self.agents[i]
+            if not agent:
+                self.mute(i)
+                # this is a null agent, meaning it failed to load
+                # the other team wins
+                print >>sys.stderr, "Agent %d failed to load" % i
+                self.unmute()
+                self._agentCrash(i, quiet=True)
+                return
+            if ("registerInitialState" in dir(agent)):
+                self.mute(i)
+                if self.catchExceptions:
+                    try:
+                        timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
+                        try:
+                            start_time = time.time()
+                            timed_func(self.state.deepCopy())
+                            time_taken = time.time() - start_time
+                            self.totalAgentTimes[i] += time_taken
+                        except TimeoutFunctionException:
+                            print >>sys.stderr, "Agent %d ran out of time on startup!" % i
+                            self.unmute()
+                            self.agentTimeout = True
+                            self._agentCrash(i, quiet=True)
+                            return
+                    except Exception,data:
+                        self._agentCrash(i, quiet=False)
+                        self.unmute()
+                        return
+                else:
+                    agent.registerInitialState(self.state.deepCopy())
+                ## TODO: could this exceed the total time
+                self.unmute()
+
+        agentIndex = self.startingIndex
+        numAgents = len( self.agents )
+
+        while not self.gameOver:
+            # Fetch the next agent
+            agent = self.agents[agentIndex]
+            move_time = 0
+            skip_action = False
+            # Generate an observation of the state
+            if 'observationFunction' in dir( agent ):
+                self.mute(agentIndex)
+                if self.catchExceptions:
+                    try:
+                        timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
+                        try:
+                            start_time = time.time()
+                            observation = timed_func(self.state.deepCopy())
+                        except TimeoutFunctionException:
+                            skip_action = True
+                        move_time += time.time() - start_time
+                        self.unmute()
+                    except Exception,data:
+                        self._agentCrash(agentIndex, quiet=False)
+                        self.unmute()
+                        return
+                else:
+                    observation = agent.observationFunction(self.state.deepCopy())
+                self.unmute()
+            else:
+                observation = self.state.deepCopy()
+
+            # Solicit an action
+            action = None
+            self.mute(agentIndex)
+            if self.catchExceptions:
+                try:
+                    timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
+                    try:
+                        start_time = time.time()
+                        if skip_action:
+                            raise TimeoutFunctionException()
+                        action = timed_func( observation )
+                    except TimeoutFunctionException:
+                        print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex
+                        self.agentTimeout = True
+                        self._agentCrash(agentIndex, quiet=True)
+                        self.unmute()
+                        return
+
+                    move_time += time.time() - start_time
+
+                    if move_time > self.rules.getMoveWarningTime(agentIndex):
+                        self.totalAgentTimeWarnings[agentIndex] += 1
+                        print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
+                        if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
+                            print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
+                            self.agentTimeout = True
+                            self._agentCrash(agentIndex, quiet=True)
+                            self.unmute()
+                            return
+
+                    self.totalAgentTimes[agentIndex] += move_time
+                    #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
+                    if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
+                        print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])
+                        self.agentTimeout = True
+                        self._agentCrash(agentIndex, quiet=True)
+                        self.unmute()
+                        return
+                    self.unmute()
+                except Exception,data:
+                    self._agentCrash(agentIndex)
+                    self.unmute()
+                    return
+            else:
+                action = agent.getAction(observation)
+            self.unmute()
+
+            # Execute the action
+            self.moveHistory.append( (agentIndex, action) )
+            if self.catchExceptions:
+                try:
+                    self.state = self.state.generateSuccessor( agentIndex, action )
+                except Exception,data:
+                    self.mute(agentIndex)
+                    self._agentCrash(agentIndex)
+                    self.unmute()
+                    return
+            else:
+                self.state = self.state.generateSuccessor( agentIndex, action )
+
+            # Change the display
+            self.display.update( self.state.data )
+            ###idx = agentIndex - agentIndex % 2 + 1
+            ###self.display.update( self.state.makeObservation(idx).data )
+
+            # Allow for game specific conditions (winning, losing, etc.)
+            self.rules.process(self.state, self)
+            # Track progress
+            if agentIndex == numAgents + 1: self.numMoves += 1
+            # Next agent
+            agentIndex = ( agentIndex + 1 ) % numAgents
+
+            if _BOINC_ENABLED:
+                boinc.set_fraction_done(self.getProgress())
+
+        # inform a learning agent of the game result
+        for agentIndex, agent in enumerate(self.agents):
+            if "final" in dir( agent ) :
+                try:
+                    self.mute(agentIndex)
+                    agent.final( self.state )
+                    self.unmute()
+                except Exception,data:
+                    if not self.catchExceptions: raise
+                    self._agentCrash(agentIndex)
+                    self.unmute()
+                    return
+        self.display.finish()
--- a/reinforcement/ghostAgents.py
+++ b/reinforcement/ghostAgents.py
@ -0,0 +1,81 @@
+# ghostAgents.py
+# --------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from game import Agent
+from game import Actions
+from game import Directions
+import random
+from util import manhattanDistance
+import util
+
+class GhostAgent( Agent ):
+    def __init__( self, index ):
+        self.index = index
+
+    def getAction( self, state ):
+        dist = self.getDistribution(state)
+        if len(dist) == 0:
+            return Directions.STOP
+        else:
+            return util.chooseFromDistribution( dist )
+
+    def getDistribution(self, state):
+        "Returns a Counter encoding a distribution over actions from the provided state."
+        util.raiseNotDefined()
+
+class RandomGhost( GhostAgent ):
+    "A ghost that chooses a legal action uniformly at random."
+    def getDistribution( self, state ):
+        dist = util.Counter()
+        for a in state.getLegalActions( self.index ): dist[a] = 1.0
+        dist.normalize()
+        return dist
+
+class DirectionalGhost( GhostAgent ):
+    "A ghost that prefers to rush Pacman, or flee when scared."
+    def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ):
+        self.index = index
+        self.prob_attack = prob_attack
+        self.prob_scaredFlee = prob_scaredFlee
+
+    def getDistribution( self, state ):
+        # Read variables from state
+        ghostState = state.getGhostState( self.index )
+        legalActions = state.getLegalActions( self.index )
+        pos = state.getGhostPosition( self.index )
+        isScared = ghostState.scaredTimer > 0
+
+        speed = 1
+        if isScared: speed = 0.5
+
+        actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
+        newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
+        pacmanPosition = state.getPacmanPosition()
+
+        # Select best actions given the state
+        distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
+        if isScared:
+            bestScore = max( distancesToPacman )
+            bestProb = self.prob_scaredFlee
+        else:
+            bestScore = min( distancesToPacman )
+            bestProb = self.prob_attack
+        bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
+
+        # Construct distribution
+        dist = util.Counter()
+        for a in bestActions: dist[a] = bestProb / len(bestActions)
+        for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
+        dist.normalize()
+        return dist
--- a/reinforcement/grading.py
+++ b/reinforcement/grading.py
@ -0,0 +1,282 @@
+# grading.py
+# ----------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+"Common code for autograders"
+
+import cgi
+import time
+import sys
+import traceback
+import pdb
+from collections import defaultdict
+import util
+
+class Grades:
+  "A data structure for project grades, along with formatting code to display them"
+  def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False):
+    """
+    Defines the grading scheme for a project
+      projectName: project name
+      questionsAndMaxesDict: a list of (question name, max points per question)
+    """
+    self.questions = [el[0] for el in questionsAndMaxesList]
+    self.maxes = dict(questionsAndMaxesList)
+    self.points = Counter()
+    self.messages = dict([(q, []) for q in self.questions])
+    self.project = projectName
+    self.start = time.localtime()[1:6]
+    self.sane = True # Sanity checks
+    self.currentQuestion = None # Which question we're grading
+    self.edxOutput = edxOutput
+    self.mute = muteOutput
+    self.prereqs = defaultdict(set)
+
+    #print 'Autograder transcript for %s' % self.project
+    print 'Starting on %d-%d at %d:%02d:%02d' % self.start
+
+  def addPrereq(self, question, prereq):
+    self.prereqs[question].add(prereq)
+
+  def grade(self, gradingModule, exceptionMap = {}, bonusPic = False):
+    """
+    Grades each question
+      gradingModule: the module with all the grading functions (pass in with sys.modules[__name__])
+    """
+
+    completedQuestions = set([])
+    for q in self.questions:
+      print '\nQuestion %s' % q
+      print '=' * (9 + len(q))
+      print
+      self.currentQuestion = q
+
+      incompleted = self.prereqs[q].difference(completedQuestions)
+      if len(incompleted) > 0:
+          prereq = incompleted.pop()
+          print \
+"""*** NOTE: Make sure to complete Question %s before working on Question %s,
+*** because Question %s builds upon your answer for Question %s.
+""" % (prereq, q, q, prereq)
+          continue
+
+      if self.mute: util.mutePrint()
+      try:
+        util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
+        #TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function
+      except Exception, inst:
+        self.addExceptionMessage(q, inst, traceback)
+        self.addErrorHints(exceptionMap, inst, q[1])
+      except:
+        self.fail('FAIL: Terminated with a string exception.')
+      finally:
+        if self.mute: util.unmutePrint()
+
+      if self.points[q] >= self.maxes[q]:
+        completedQuestions.add(q)
+
+      print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q])
+
+
+    print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6]
+    print "\nProvisional grades\n=================="
+
+    for q in self.questions:
+      print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q])
+    print '------------------'
+    print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values()))
+    if bonusPic and self.points.totalCount() == 25:
+      print """
+
+                     ALL HAIL GRANDPAC.
+              LONG LIVE THE GHOSTBUSTING KING.
+
+                  ---      ----      ---
+                  |  \    /  + \    /  |
+                  | + \--/      \--/ + |
+                  |   +     +          |
+                  | +     +        +   |
+                @@@@@@@@@@@@@@@@@@@@@@@@@@
+              @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+            \   @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+             \ /  @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+              V   \   @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                   \ /  @@@@@@@@@@@@@@@@@@@@@@@@@@
+                    V     @@@@@@@@@@@@@@@@@@@@@@@@
+                            @@@@@@@@@@@@@@@@@@@@@@
+                    /\      @@@@@@@@@@@@@@@@@@@@@@
+                   /  \  @@@@@@@@@@@@@@@@@@@@@@@@@
+              /\  /    @@@@@@@@@@@@@@@@@@@@@@@@@@@
+             /  \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+            /    @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+              @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                @@@@@@@@@@@@@@@@@@@@@@@@@@
+                    @@@@@@@@@@@@@@@@@@
+
+"""
+    print """
+Your grades are NOT yet registered.  To register your grades, make sure
+to follow your instructor's guidelines to receive credit on your project.
+"""
+
+    if self.edxOutput:
+        self.produceOutput()
+
+  def addExceptionMessage(self, q, inst, traceback):
+    """
+    Method to format the exception message, this is more complicated because
+    we need to cgi.escape the traceback but wrap the exception in a <pre> tag
+    """
+    self.fail('FAIL: Exception raised: %s' % inst)
+    self.addMessage('')
+    for line in traceback.format_exc().split('\n'):
+        self.addMessage(line)
+
+  def addErrorHints(self, exceptionMap, errorInstance, questionNum):
+    typeOf = str(type(errorInstance))
+    questionName = 'q' + questionNum
+    errorHint = ''
+
+    # question specific error hints
+    if exceptionMap.get(questionName):
+      questionMap = exceptionMap.get(questionName)
+      if (questionMap.get(typeOf)):
+        errorHint = questionMap.get(typeOf)
+    # fall back to general error messages if a question specific
+    # one does not exist
+    if (exceptionMap.get(typeOf)):
+      errorHint = exceptionMap.get(typeOf)
+
+    # dont include the HTML if we have no error hint
+    if not errorHint:
+      return ''
+
+    for line in errorHint.split('\n'):
+      self.addMessage(line)
+
+  def produceOutput(self):
+    edxOutput = open('edx_response.html', 'w')
+    edxOutput.write("<div>")
+
+    # first sum
+    total_possible = sum(self.maxes.values())
+    total_score = sum(self.points.values())
+    checkOrX = '<span class="incorrect"/>'
+    if (total_score >= total_possible):
+        checkOrX = '<span class="correct"/>'
+    header = """
+        <h3>
+            Total score ({total_score} / {total_possible})
+        </h3>
+    """.format(total_score = total_score,
+      total_possible = total_possible,
+      checkOrX = checkOrX
+    )
+    edxOutput.write(header)
+
+    for q in self.questions:
+      if len(q) == 2:
+          name = q[1]
+      else:
+          name = q
+      checkOrX = '<span class="incorrect"/>'
+      if (self.points[q] == self.maxes[q]):
+        checkOrX = '<span class="correct"/>'
+      #messages = '\n<br/>\n'.join(self.messages[q])
+      messages = "<pre>%s</pre>" % '\n'.join(self.messages[q])
+      output = """
+        <div class="test">
+          <section>
+          <div class="shortform">
+            Question {q} ({points}/{max}) {checkOrX}
+          </div>
+        <div class="longform">
+          {messages}
+        </div>
+        </section>
+      </div>
+      """.format(q = name,
+        max = self.maxes[q],
+        messages = messages,
+        checkOrX = checkOrX,
+        points = self.points[q]
+      )
+      # print "*** output for Question %s " % q[1]
+      # print output
+      edxOutput.write(output)
+    edxOutput.write("</div>")
+    edxOutput.close()
+    edxOutput = open('edx_grade', 'w')
+    edxOutput.write(str(self.points.totalCount()))
+    edxOutput.close()
+
+  def fail(self, message, raw=False):
+    "Sets sanity check bit to false and outputs a message"
+    self.sane = False
+    self.assignZeroCredit()
+    self.addMessage(message, raw)
+
+  def assignZeroCredit(self):
+    self.points[self.currentQuestion] = 0
+
+  def addPoints(self, amt):
+    self.points[self.currentQuestion] += amt
+
+  def deductPoints(self, amt):
+    self.points[self.currentQuestion] -= amt
+
+  def assignFullCredit(self, message="", raw=False):
+    self.points[self.currentQuestion] = self.maxes[self.currentQuestion]
+    if message != "":
+      self.addMessage(message, raw)
+
+  def addMessage(self, message, raw=False):
+    if not raw:
+        # We assume raw messages, formatted for HTML, are printed separately
+        if self.mute: util.unmutePrint()
+        print '*** ' + message
+        if self.mute: util.mutePrint()
+        message = cgi.escape(message)
+    self.messages[self.currentQuestion].append(message)
+
+  def addMessageToEmail(self, message):
+    print "WARNING**** addMessageToEmail is deprecated %s" % message
+    for line in message.split('\n'):
+      pass
+      #print '%%% ' + line + ' %%%'
+      #self.messages[self.currentQuestion].append(line)
+
+
+
+
+
+class Counter(dict):
+  """
+  Dict with default 0
+  """
+  def __getitem__(self, idx):
+    try:
+      return dict.__getitem__(self, idx)
+    except KeyError:
+      return 0
+
+  def totalCount(self):
+    """
+    Returns the sum of counts for all keys.
+    """
+    return sum(self.values())
+
--- a/reinforcement/graphicsCrawlerDisplay.py
+++ b/reinforcement/graphicsCrawlerDisplay.py
@ -0,0 +1,333 @@
+# graphicsCrawlerDisplay.py
+# -------------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+# graphicsCrawlerDisplay.py
+# -------------------------
+# Licensing Information: Please do not distribute or publish solutions to this
+# project. You are free to use and extend these projects for educational
+# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
+# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and Pieter
+# Abbeel in Spring 2013.
+# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
+
+import Tkinter
+import qlearningAgents
+import time
+import threading
+import sys
+import crawler
+#import pendulum
+import math
+from math import pi as PI
+
+robotType = 'crawler'
+
+class Application:
+
+    def sigmoid(self, x):
+        return 1.0 / (1.0 + 2.0 ** (-x))
+
+    def incrementSpeed(self, inc):
+        self.tickTime *= inc
+#        self.epsilon = min(1.0, self.epsilon)
+#        self.epsilon = max(0.0,self.epsilon)
+#        self.learner.setSpeed(self.epsilon)
+        self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
+
+    def incrementEpsilon(self, inc):
+        self.ep += inc
+        self.epsilon = self.sigmoid(self.ep)
+        self.learner.setEpsilon(self.epsilon)
+        self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
+
+    def incrementGamma(self, inc):
+        self.ga += inc
+        self.gamma = self.sigmoid(self.ga)
+        self.learner.setDiscount(self.gamma)
+        self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
+
+    def incrementAlpha(self, inc):
+        self.al += inc
+        self.alpha = self.sigmoid(self.al)
+        self.learner.setLearningRate(self.alpha)
+        self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
+
+    def __initGUI(self, win):
+        ## Window ##
+        self.win = win
+
+        ## Initialize Frame ##
+        win.grid()
+        self.dec = -.5
+        self.inc = .5
+        self.tickTime = 0.1
+
+        ## Epsilon Button + Label ##
+        self.setupSpeedButtonAndLabel(win)
+
+        self.setupEpsilonButtonAndLabel(win)
+
+        ## Gamma Button + Label ##
+        self.setUpGammaButtonAndLabel(win)
+
+        ## Alpha Button + Label ##
+        self.setupAlphaButtonAndLabel(win)
+
+        ## Exit Button ##
+        #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
+        #self.exit_button.grid(row=0, column=9)
+
+        ## Simulation Buttons ##
+#        self.setupSimulationButtons(win)
+
+         ## Canvas ##
+        self.canvas = Tkinter.Canvas(root, height=200, width=1000)
+        self.canvas.grid(row=2,columnspan=10)
+
+    def setupAlphaButtonAndLabel(self, win):
+        self.alpha_minus = Tkinter.Button(win,
+        text="-",command=(lambda: self.incrementAlpha(self.dec)))
+        self.alpha_minus.grid(row=1, column=3, padx=10)
+
+        self.alpha = self.sigmoid(self.al)
+        self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
+        self.alpha_label.grid(row=1, column=4)
+
+        self.alpha_plus = Tkinter.Button(win,
+        text="+",command=(lambda: self.incrementAlpha(self.inc)))
+        self.alpha_plus.grid(row=1, column=5, padx=10)
+
+    def setUpGammaButtonAndLabel(self, win):
+        self.gamma_minus = Tkinter.Button(win,
+        text="-",command=(lambda: self.incrementGamma(self.dec)))
+        self.gamma_minus.grid(row=1, column=0, padx=10)
+
+        self.gamma = self.sigmoid(self.ga)
+        self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
+        self.gamma_label.grid(row=1, column=1)
+
+        self.gamma_plus = Tkinter.Button(win,
+        text="+",command=(lambda: self.incrementGamma(self.inc)))
+        self.gamma_plus.grid(row=1, column=2, padx=10)
+
+    def setupEpsilonButtonAndLabel(self, win):
+        self.epsilon_minus = Tkinter.Button(win,
+        text="-",command=(lambda: self.incrementEpsilon(self.dec)))
+        self.epsilon_minus.grid(row=0, column=3)
+
+        self.epsilon = self.sigmoid(self.ep)
+        self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
+        self.epsilon_label.grid(row=0, column=4)
+
+        self.epsilon_plus = Tkinter.Button(win,
+        text="+",command=(lambda: self.incrementEpsilon(self.inc)))
+        self.epsilon_plus.grid(row=0, column=5)
+
+    def setupSpeedButtonAndLabel(self, win):
+        self.speed_minus = Tkinter.Button(win,
+        text="-",command=(lambda: self.incrementSpeed(.5)))
+        self.speed_minus.grid(row=0, column=0)
+
+        self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
+        self.speed_label.grid(row=0, column=1)
+
+        self.speed_plus = Tkinter.Button(win,
+        text="+",command=(lambda: self.incrementSpeed(2)))
+        self.speed_plus.grid(row=0, column=2)
+
+
+
+
+
+
+
+    def skip5kSteps(self):
+        self.stepsToSkip = 5000
+
+    def __init__(self, win):
+
+        self.ep = 0
+        self.ga = 2
+        self.al = 2
+        self.stepCount = 0
+        ## Init Gui
+
+        self.__initGUI(win)
+
+        # Init environment
+        if robotType == 'crawler':
+            self.robot = crawler.CrawlingRobot(self.canvas)
+            self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
+        elif robotType == 'pendulum':
+            self.robot = pendulum.PendulumRobot(self.canvas)
+            self.robotEnvironment = \
+                pendulum.PendulumRobotEnvironment(self.robot)
+        else:
+            raise "Unknown RobotType"
+
+        # Init Agent
+        simulationFn = lambda agent: \
+          simulation.SimulationEnvironment(self.robotEnvironment,agent)
+        actionFn = lambda state: \
+          self.robotEnvironment.getPossibleActions(state)
+        self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
+
+        self.learner.setEpsilon(self.epsilon)
+        self.learner.setLearningRate(self.alpha)
+        self.learner.setDiscount(self.gamma)
+
+        # Start GUI
+        self.running = True
+        self.stopped = False
+        self.stepsToSkip = 0
+        self.thread = threading.Thread(target=self.run)
+        self.thread.start()
+
+
+    def exit(self):
+        self.running = False
+        for i in range(5):
+            if not self.stopped:
+                time.sleep(0.1)
+        try:
+            self.win.destroy()
+        except:
+            pass
+        sys.exit(0)
+
+    def step(self):
+
+        self.stepCount += 1
+
+        state = self.robotEnvironment.getCurrentState()
+        actions = self.robotEnvironment.getPossibleActions(state)
+        if len(actions) == 0.0:
+            self.robotEnvironment.reset()
+            state = self.robotEnvironment.getCurrentState()
+            actions = self.robotEnvironment.getPossibleActions(state)
+            print 'Reset!'
+        action = self.learner.getAction(state)
+        if action == None:
+            raise 'None action returned: Code Not Complete'
+        nextState, reward = self.robotEnvironment.doAction(action)
+        self.learner.observeTransition(state, action, nextState, reward)
+
+    def animatePolicy(self):
+        if robotType != 'pendulum':
+            raise 'Only pendulum can animatePolicy'
+
+
+        totWidth = self.canvas.winfo_reqwidth()
+        totHeight = self.canvas.winfo_reqheight()
+
+        length = 0.48 * min(totWidth, totHeight)
+        x,y = totWidth-length-30, length+10
+
+
+
+        angleMin, angleMax = self.robot.getMinAndMaxAngle()
+        velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
+
+        if not 'animatePolicyBox' in dir(self):
+            self.canvas.create_line(x,y,x+length,y)
+            self.canvas.create_line(x+length,y,x+length,y-length)
+            self.canvas.create_line(x+length,y-length,x,y-length)
+            self.canvas.create_line(x,y-length,x,y)
+            self.animatePolicyBox = 1
+            self.canvas.create_text(x+length/2,y+10,text='angle')
+            self.canvas.create_text(x-30,y-length/2,text='velocity')
+            self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
+            self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
+            self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
+
+
+
+        angleDelta = (angleMax-angleMin) / 100
+        velDelta = (velMax-velMin) / 100
+        for i in range(100):
+            angle = angleMin + i * angleDelta
+
+            for j in range(100):
+                vel = velMin + j * velDelta
+                state = self.robotEnvironment.getState(angle,vel)
+                max, argMax = None, None
+                if not self.learner.seenState(state):
+                    argMax = 'unseen'
+                else:
+                    for action in ('kickLeft','kickRight','doNothing'):
+                        qVal = self.learner.getQValue(state, action)
+                        if max == None or qVal > max:
+                            max, argMax = qVal, action
+                if argMax != 'unseen':
+                    if argMax == 'kickLeft':
+                        color = 'blue'
+                    elif argMax == 'kickRight':
+                        color = 'red'
+                    elif argMax == 'doNothing':
+                        color = 'white'
+                    dx = length / 100.0
+                    dy = length / 100.0
+                    x0, y0 = x+i*dx, y-j*dy
+                    self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
+
+
+
+
+    def run(self):
+        self.stepCount = 0
+        self.learner.startEpisode()
+        while True:
+            minSleep = .01
+            tm = max(minSleep, self.tickTime)
+            time.sleep(tm)
+            self.stepsToSkip = int(tm / self.tickTime) - 1
+
+            if not self.running:
+                self.stopped = True
+                return
+            for i in range(self.stepsToSkip):
+                self.step()
+            self.stepsToSkip = 0
+            self.step()
+#          self.robot.draw()
+        self.learner.stopEpisode()
+
+    def start(self):
+        self.win.mainloop()
+
+
+
+
+
+def run():
+    global root
+    root = Tkinter.Tk()
+    root.title( 'Crawler GUI' )
+    root.resizable( 0, 0 )
+
+#  root.mainloop()
+
+
+    app = Application(root)
+    def update_gui():
+        app.robot.draw(app.stepCount, app.tickTime)
+        root.after(10, update_gui)
+    update_gui()
+
+    root.protocol( 'WM_DELETE_WINDOW', app.exit)
+    try:
+        app.start()
+    except:
+        app.exit()
--- a/reinforcement/graphicsDisplay.py
+++ b/reinforcement/graphicsDisplay.py
@ -0,0 +1,679 @@
+# graphicsDisplay.py
+# ------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from graphicsUtils import *
+import math, time
+from game import Directions
+
+###########################
+#  GRAPHICS DISPLAY CODE  #
+###########################
+
+# Most code by Dan Klein and John Denero written or rewritten for cs188, UC Berkeley.
+# Some code from a Pacman implementation by LiveWires, and used / modified with permission.
+
+DEFAULT_GRID_SIZE = 30.0
+INFO_PANE_HEIGHT = 35
+BACKGROUND_COLOR = formatColor(0,0,0)
+WALL_COLOR = formatColor(0.0/255.0, 51.0/255.0, 255.0/255.0)
+INFO_PANE_COLOR = formatColor(.4,.4,0)
+SCORE_COLOR = formatColor(.9, .9, .9)
+PACMAN_OUTLINE_WIDTH = 2
+PACMAN_CAPTURE_OUTLINE_WIDTH = 4
+
+GHOST_COLORS = []
+GHOST_COLORS.append(formatColor(.9,0,0)) # Red
+GHOST_COLORS.append(formatColor(0,.3,.9)) # Blue
+GHOST_COLORS.append(formatColor(.98,.41,.07)) # Orange
+GHOST_COLORS.append(formatColor(.1,.75,.7)) # Green
+GHOST_COLORS.append(formatColor(1.0,0.6,0.0)) # Yellow
+GHOST_COLORS.append(formatColor(.4,0.13,0.91)) # Purple
+
+TEAM_COLORS = GHOST_COLORS[:2]
+
+GHOST_SHAPE = [
+    ( 0,    0.3 ),
+    ( 0.25, 0.75 ),
+    ( 0.5,  0.3 ),
+    ( 0.75, 0.75 ),
+    ( 0.75, -0.5 ),
+    ( 0.5,  -0.75 ),
+    (-0.5,  -0.75 ),
+    (-0.75, -0.5 ),
+    (-0.75, 0.75 ),
+    (-0.5,  0.3 ),
+    (-0.25, 0.75 )
+  ]
+GHOST_SIZE = 0.65
+SCARED_COLOR = formatColor(1,1,1)
+
+GHOST_VEC_COLORS = map(colorToVector, GHOST_COLORS)
+
+PACMAN_COLOR = formatColor(255.0/255.0,255.0/255.0,61.0/255)
+PACMAN_SCALE = 0.5
+#pacman_speed = 0.25
+
+# Food
+FOOD_COLOR = formatColor(1,1,1)
+FOOD_SIZE = 0.1
+
+# Laser
+LASER_COLOR = formatColor(1,0,0)
+LASER_SIZE = 0.02
+
+# Capsule graphics
+CAPSULE_COLOR = formatColor(1,1,1)
+CAPSULE_SIZE = 0.25
+
+# Drawing walls
+WALL_RADIUS = 0.15
+
+class InfoPane:
+    def __init__(self, layout, gridSize):
+        self.gridSize = gridSize
+        self.width = (layout.width) * gridSize
+        self.base = (layout.height + 1) * gridSize
+        self.height = INFO_PANE_HEIGHT
+        self.fontSize = 24
+        self.textColor = PACMAN_COLOR
+        self.drawPane()
+
+    def toScreen(self, pos, y = None):
+        """
+          Translates a point relative from the bottom left of the info pane.
+        """
+        if y == None:
+            x,y = pos
+        else:
+            x = pos
+
+        x = self.gridSize + x # Margin
+        y = self.base + y
+        return x,y
+
+    def drawPane(self):
+        self.scoreText = text( self.toScreen(0, 0  ), self.textColor, "SCORE:    0", "Times", self.fontSize, "bold")
+
+    def initializeGhostDistances(self, distances):
+        self.ghostDistanceText = []
+
+        size = 20
+        if self.width < 240:
+            size = 12
+        if self.width < 160:
+            size = 10
+
+        for i, d in enumerate(distances):
+            t = text( self.toScreen(self.width/2 + self.width/8 * i, 0), GHOST_COLORS[i+1], d, "Times", size, "bold")
+            self.ghostDistanceText.append(t)
+
+    def updateScore(self, score):
+        changeText(self.scoreText, "SCORE: % 4d" % score)
+
+    def setTeam(self, isBlue):
+        text = "RED TEAM"
+        if isBlue: text = "BLUE TEAM"
+        self.teamText = text( self.toScreen(300, 0  ), self.textColor, text, "Times", self.fontSize, "bold")
+
+    def updateGhostDistances(self, distances):
+        if len(distances) == 0: return
+        if 'ghostDistanceText' not in dir(self): self.initializeGhostDistances(distances)
+        else:
+            for i, d in enumerate(distances):
+                changeText(self.ghostDistanceText[i], d)
+
+    def drawGhost(self):
+        pass
+
+    def drawPacman(self):
+        pass
+
+    def drawWarning(self):
+        pass
+
+    def clearIcon(self):
+        pass
+
+    def updateMessage(self, message):
+        pass
+
+    def clearMessage(self):
+        pass
+
+
+class PacmanGraphics:
+    def __init__(self, zoom=1.0, frameTime=0.0, capture=False):
+        self.have_window = 0
+        self.currentGhostImages = {}
+        self.pacmanImage = None
+        self.zoom = zoom
+        self.gridSize = DEFAULT_GRID_SIZE * zoom
+        self.capture = capture
+        self.frameTime = frameTime
+
+    def checkNullDisplay(self):
+        return False
+
+    def initialize(self, state, isBlue = False):
+        self.isBlue = isBlue
+        self.startGraphics(state)
+
+        # self.drawDistributions(state)
+        self.distributionImages = None  # Initialized lazily
+        self.drawStaticObjects(state)
+        self.drawAgentObjects(state)
+
+        # Information
+        self.previousState = state
+
+    def startGraphics(self, state):
+        self.layout = state.layout
+        layout = self.layout
+        self.width = layout.width
+        self.height = layout.height
+        self.make_window(self.width, self.height)
+        self.infoPane = InfoPane(layout, self.gridSize)
+        self.currentState = layout
+
+    def drawDistributions(self, state):
+        walls = state.layout.walls
+        dist = []
+        for x in range(walls.width):
+            distx = []
+            dist.append(distx)
+            for y in range(walls.height):
+                ( screen_x, screen_y ) = self.to_screen( (x, y) )
+                block = square( (screen_x, screen_y),
+                                0.5 * self.gridSize,
+                                color = BACKGROUND_COLOR,
+                                filled = 1, behind=2)
+                distx.append(block)
+        self.distributionImages = dist
+
+    def drawStaticObjects(self, state):
+        layout = self.layout
+        self.drawWalls(layout.walls)
+        self.food = self.drawFood(layout.food)
+        self.capsules = self.drawCapsules(layout.capsules)
+        refresh()
+
+    def drawAgentObjects(self, state):
+        self.agentImages = [] # (agentState, image)
+        for index, agent in enumerate(state.agentStates):
+            if agent.isPacman:
+                image = self.drawPacman(agent, index)
+                self.agentImages.append( (agent, image) )
+            else:
+                image = self.drawGhost(agent, index)
+                self.agentImages.append( (agent, image) )
+        refresh()
+
+    def swapImages(self, agentIndex, newState):
+        """
+          Changes an image from a ghost to a pacman or vis versa (for capture)
+        """
+        prevState, prevImage = self.agentImages[agentIndex]
+        for item in prevImage: remove_from_screen(item)
+        if newState.isPacman:
+            image = self.drawPacman(newState, agentIndex)
+            self.agentImages[agentIndex] = (newState, image )
+        else:
+            image = self.drawGhost(newState, agentIndex)
+            self.agentImages[agentIndex] = (newState, image )
+        refresh()
+
+    def update(self, newState):
+        agentIndex = newState._agentMoved
+        agentState = newState.agentStates[agentIndex]
+
+        if self.agentImages[agentIndex][0].isPacman != agentState.isPacman: self.swapImages(agentIndex, agentState)
+        prevState, prevImage = self.agentImages[agentIndex]
+        if agentState.isPacman:
+            self.animatePacman(agentState, prevState, prevImage)
+        else:
+            self.moveGhost(agentState, agentIndex, prevState, prevImage)
+        self.agentImages[agentIndex] = (agentState, prevImage)
+
+        if newState._foodEaten != None:
+            self.removeFood(newState._foodEaten, self.food)
+        if newState._capsuleEaten != None:
+            self.removeCapsule(newState._capsuleEaten, self.capsules)
+        self.infoPane.updateScore(newState.score)
+        if 'ghostDistances' in dir(newState):
+            self.infoPane.updateGhostDistances(newState.ghostDistances)
+
+    def make_window(self, width, height):
+        grid_width = (width-1) * self.gridSize
+        grid_height = (height-1) * self.gridSize
+        screen_width = 2*self.gridSize + grid_width
+        screen_height = 2*self.gridSize + grid_height + INFO_PANE_HEIGHT
+
+        begin_graphics(screen_width,
+                       screen_height,
+                       BACKGROUND_COLOR,
+                       "CS188 Pacman")
+
+    def drawPacman(self, pacman, index):
+        position = self.getPosition(pacman)
+        screen_point = self.to_screen(position)
+        endpoints = self.getEndpoints(self.getDirection(pacman))
+
+        width = PACMAN_OUTLINE_WIDTH
+        outlineColor = PACMAN_COLOR
+        fillColor = PACMAN_COLOR
+
+        if self.capture:
+            outlineColor = TEAM_COLORS[index % 2]
+            fillColor = GHOST_COLORS[index]
+            width = PACMAN_CAPTURE_OUTLINE_WIDTH
+
+        return [circle(screen_point, PACMAN_SCALE * self.gridSize,
+                       fillColor = fillColor, outlineColor = outlineColor,
+                       endpoints = endpoints,
+                       width = width)]
+
+    def getEndpoints(self, direction, position=(0,0)):
+        x, y = position
+        pos = x - int(x) + y - int(y)
+        width = 30 + 80 * math.sin(math.pi* pos)
+
+        delta = width / 2
+        if (direction == 'West'):
+            endpoints = (180+delta, 180-delta)
+        elif (direction == 'North'):
+            endpoints = (90+delta, 90-delta)
+        elif (direction == 'South'):
+            endpoints = (270+delta, 270-delta)
+        else:
+            endpoints = (0+delta, 0-delta)
+        return endpoints
+
+    def movePacman(self, position, direction, image):
+        screenPosition = self.to_screen(position)
+        endpoints = self.getEndpoints( direction, position )
+        r = PACMAN_SCALE * self.gridSize
+        moveCircle(image[0], screenPosition, r, endpoints)
+        refresh()
+
+    def animatePacman(self, pacman, prevPacman, image):
+        if self.frameTime < 0:
+            print 'Press any key to step forward, "q" to play'
+            keys = wait_for_keys()
+            if 'q' in keys:
+                self.frameTime = 0.1
+        if self.frameTime > 0.01 or self.frameTime < 0:
+            start = time.time()
+            fx, fy = self.getPosition(prevPacman)
+            px, py = self.getPosition(pacman)
+            frames = 4.0
+            for i in range(1,int(frames) + 1):
+                pos = px*i/frames + fx*(frames-i)/frames, py*i/frames + fy*(frames-i)/frames
+                self.movePacman(pos, self.getDirection(pacman), image)
+                refresh()
+                sleep(abs(self.frameTime) / frames)
+        else:
+            self.movePacman(self.getPosition(pacman), self.getDirection(pacman), image)
+        refresh()
+
+    def getGhostColor(self, ghost, ghostIndex):
+        if ghost.scaredTimer > 0:
+            return SCARED_COLOR
+        else:
+            return GHOST_COLORS[ghostIndex]
+
+    def drawGhost(self, ghost, agentIndex):
+        pos = self.getPosition(ghost)
+        dir = self.getDirection(ghost)
+        (screen_x, screen_y) = (self.to_screen(pos) )
+        coords = []
+        for (x, y) in GHOST_SHAPE:
+            coords.append((x*self.gridSize*GHOST_SIZE + screen_x, y*self.gridSize*GHOST_SIZE + screen_y))
+
+        colour = self.getGhostColor(ghost, agentIndex)
+        body = polygon(coords, colour, filled = 1)
+        WHITE = formatColor(1.0, 1.0, 1.0)
+        BLACK = formatColor(0.0, 0.0, 0.0)
+
+        dx = 0
+        dy = 0
+        if dir == 'North':
+            dy = -0.2
+        if dir == 'South':
+            dy = 0.2
+        if dir == 'East':
+            dx = 0.2
+        if dir == 'West':
+            dx = -0.2
+        leftEye = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
+        rightEye = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
+        leftPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
+        rightPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
+        ghostImageParts = []
+        ghostImageParts.append(body)
+        ghostImageParts.append(leftEye)
+        ghostImageParts.append(rightEye)
+        ghostImageParts.append(leftPupil)
+        ghostImageParts.append(rightPupil)
+
+        return ghostImageParts
+
+    def moveEyes(self, pos, dir, eyes):
+        (screen_x, screen_y) = (self.to_screen(pos) )
+        dx = 0
+        dy = 0
+        if dir == 'North':
+            dy = -0.2
+        if dir == 'South':
+            dy = 0.2
+        if dir == 'East':
+            dx = 0.2
+        if dir == 'West':
+            dx = -0.2
+        moveCircle(eyes[0],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
+        moveCircle(eyes[1],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
+        moveCircle(eyes[2],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
+        moveCircle(eyes[3],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
+
+    def moveGhost(self, ghost, ghostIndex, prevGhost, ghostImageParts):
+        old_x, old_y = self.to_screen(self.getPosition(prevGhost))
+        new_x, new_y = self.to_screen(self.getPosition(ghost))
+        delta = new_x - old_x, new_y - old_y
+
+        for ghostImagePart in ghostImageParts:
+            move_by(ghostImagePart, delta)
+        refresh()
+
+        if ghost.scaredTimer > 0:
+            color = SCARED_COLOR
+        else:
+            color = GHOST_COLORS[ghostIndex]
+        edit(ghostImageParts[0], ('fill', color), ('outline', color))
+        self.moveEyes(self.getPosition(ghost), self.getDirection(ghost), ghostImageParts[-4:])
+        refresh()
+
+    def getPosition(self, agentState):
+        if agentState.configuration == None: return (-1000, -1000)
+        return agentState.getPosition()
+
+    def getDirection(self, agentState):
+        if agentState.configuration == None: return Directions.STOP
+        return agentState.configuration.getDirection()
+
+    def finish(self):
+        end_graphics()
+
+    def to_screen(self, point):
+        ( x, y ) = point
+        #y = self.height - y
+        x = (x + 1)*self.gridSize
+        y = (self.height  - y)*self.gridSize
+        return ( x, y )
+
+    # Fixes some TK issue with off-center circles
+    def to_screen2(self, point):
+        ( x, y ) = point
+        #y = self.height - y
+        x = (x + 1)*self.gridSize
+        y = (self.height  - y)*self.gridSize
+        return ( x, y )
+
+    def drawWalls(self, wallMatrix):
+        wallColor = WALL_COLOR
+        for xNum, x in enumerate(wallMatrix):
+            if self.capture and (xNum * 2) < wallMatrix.width: wallColor = TEAM_COLORS[0]
+            if self.capture and (xNum * 2) >= wallMatrix.width: wallColor = TEAM_COLORS[1]
+
+            for yNum, cell in enumerate(x):
+                if cell: # There's a wall here
+                    pos = (xNum, yNum)
+                    screen = self.to_screen(pos)
+                    screen2 = self.to_screen2(pos)
+
+                    # draw each quadrant of the square based on adjacent walls
+                    wIsWall = self.isWall(xNum-1, yNum, wallMatrix)
+                    eIsWall = self.isWall(xNum+1, yNum, wallMatrix)
+                    nIsWall = self.isWall(xNum, yNum+1, wallMatrix)
+                    sIsWall = self.isWall(xNum, yNum-1, wallMatrix)
+                    nwIsWall = self.isWall(xNum-1, yNum+1, wallMatrix)
+                    swIsWall = self.isWall(xNum-1, yNum-1, wallMatrix)
+                    neIsWall = self.isWall(xNum+1, yNum+1, wallMatrix)
+                    seIsWall = self.isWall(xNum+1, yNum-1, wallMatrix)
+
+                    # NE quadrant
+                    if (not nIsWall) and (not eIsWall):
+                        # inner circle
+                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (0,91), 'arc')
+                    if (nIsWall) and (not eIsWall):
+                        # vertical line
+                        line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
+                    if (not nIsWall) and (eIsWall):
+                        # horizontal line
+                        line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
+                    if (nIsWall) and (eIsWall) and (not neIsWall):
+                        # outer circle
+                        circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (180,271), 'arc')
+                        line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
+                        line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
+
+                    # NW quadrant
+                    if (not nIsWall) and (not wIsWall):
+                        # inner circle
+                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (90,181), 'arc')
+                    if (nIsWall) and (not wIsWall):
+                        # vertical line
+                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
+                    if (not nIsWall) and (wIsWall):
+                        # horizontal line
+                        line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
+                    if (nIsWall) and (wIsWall) and (not nwIsWall):
+                        # outer circle
+                        circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (270,361), 'arc')
+                        line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(-1)*WALL_RADIUS)), wallColor)
+                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
+
+                    # SE quadrant
+                    if (not sIsWall) and (not eIsWall):
+                        # inner circle
+                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (270,361), 'arc')
+                    if (sIsWall) and (not eIsWall):
+                        # vertical line
+                        line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
+                    if (not sIsWall) and (eIsWall):
+                        # horizontal line
+                        line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
+                    if (sIsWall) and (eIsWall) and (not seIsWall):
+                        # outer circle
+                        circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (90,181), 'arc')
+                        line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5, self.gridSize*(1)*WALL_RADIUS)), wallColor)
+                        line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
+
+                    # SW quadrant
+                    if (not sIsWall) and (not wIsWall):
+                        # inner circle
+                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (180,271), 'arc')
+                    if (sIsWall) and (not wIsWall):
+                        # vertical line
+                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
+                    if (not sIsWall) and (wIsWall):
+                        # horizontal line
+                        line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
+                    if (sIsWall) and (wIsWall) and (not swIsWall):
+                        # outer circle
+                        circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (0,91), 'arc')
+                        line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(1)*WALL_RADIUS)), wallColor)
+                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
+
+    def isWall(self, x, y, walls):
+        if x < 0 or y < 0:
+            return False
+        if x >= walls.width or y >= walls.height:
+            return False
+        return walls[x][y]
+
+    def drawFood(self, foodMatrix ):
+        foodImages = []
+        color = FOOD_COLOR
+        for xNum, x in enumerate(foodMatrix):
+            if self.capture and (xNum * 2) <= foodMatrix.width: color = TEAM_COLORS[0]
+            if self.capture and (xNum * 2) > foodMatrix.width: color = TEAM_COLORS[1]
+            imageRow = []
+            foodImages.append(imageRow)
+            for yNum, cell in enumerate(x):
+                if cell: # There's food here
+                    screen = self.to_screen((xNum, yNum ))
+                    dot = circle( screen,
+                                  FOOD_SIZE * self.gridSize,
+                                  outlineColor = color, fillColor = color,
+                                  width = 1)
+                    imageRow.append(dot)
+                else:
+                    imageRow.append(None)
+        return foodImages
+
+    def drawCapsules(self, capsules ):
+        capsuleImages = {}
+        for capsule in capsules:
+            ( screen_x, screen_y ) = self.to_screen(capsule)
+            dot = circle( (screen_x, screen_y),
+                              CAPSULE_SIZE * self.gridSize,
+                              outlineColor = CAPSULE_COLOR,
+                              fillColor = CAPSULE_COLOR,
+                              width = 1)
+            capsuleImages[capsule] = dot
+        return capsuleImages
+
+    def removeFood(self, cell, foodImages ):
+        x, y = cell
+        remove_from_screen(foodImages[x][y])
+
+    def removeCapsule(self, cell, capsuleImages ):
+        x, y = cell
+        remove_from_screen(capsuleImages[(x, y)])
+
+    def drawExpandedCells(self, cells):
+        """
+        Draws an overlay of expanded grid positions for search agents
+        """
+        n = float(len(cells))
+        baseColor = [1.0, 0.0, 0.0]
+        self.clearExpandedCells()
+        self.expandedCells = []
+        for k, cell in enumerate(cells):
+            screenPos = self.to_screen( cell)
+            cellColor = formatColor(*[(n-k) * c * .5 / n + .25 for c in baseColor])
+            block = square(screenPos,
+                     0.5 * self.gridSize,
+                     color = cellColor,
+                     filled = 1, behind=2)
+            self.expandedCells.append(block)
+            if self.frameTime < 0:
+                refresh()
+
+    def clearExpandedCells(self):
+        if 'expandedCells' in dir(self) and len(self.expandedCells) > 0:
+            for cell in self.expandedCells:
+                remove_from_screen(cell)
+
+
+    def updateDistributions(self, distributions):
+        "Draws an agent's belief distributions"
+        # copy all distributions so we don't change their state
+        distributions = map(lambda x: x.copy(), distributions)
+        if self.distributionImages == None:
+            self.drawDistributions(self.previousState)
+        for x in range(len(self.distributionImages)):
+            for y in range(len(self.distributionImages[0])):
+                image = self.distributionImages[x][y]
+                weights = [dist[ (x,y) ] for dist in distributions]
+
+                if sum(weights) != 0:
+                    pass
+                # Fog of war
+                color = [0.0,0.0,0.0]
+                colors = GHOST_VEC_COLORS[1:] # With Pacman
+                if self.capture: colors = GHOST_VEC_COLORS
+                for weight, gcolor in zip(weights, colors):
+                    color = [min(1.0, c + 0.95 * g * weight ** .3) for c,g in zip(color, gcolor)]
+                changeColor(image, formatColor(*color))
+        refresh()
+
+class FirstPersonPacmanGraphics(PacmanGraphics):
+    def __init__(self, zoom = 1.0, showGhosts = True, capture = False, frameTime=0):
+        PacmanGraphics.__init__(self, zoom, frameTime=frameTime)
+        self.showGhosts = showGhosts
+        self.capture = capture
+
+    def initialize(self, state, isBlue = False):
+
+        self.isBlue = isBlue
+        PacmanGraphics.startGraphics(self, state)
+        # Initialize distribution images
+        walls = state.layout.walls
+        dist = []
+        self.layout = state.layout
+
+        # Draw the rest
+        self.distributionImages = None  # initialize lazily
+        self.drawStaticObjects(state)
+        self.drawAgentObjects(state)
+
+        # Information
+        self.previousState = state
+
+    def lookAhead(self, config, state):
+        if config.getDirection() == 'Stop':
+            return
+        else:
+            pass
+            # Draw relevant ghosts
+            allGhosts = state.getGhostStates()
+            visibleGhosts = state.getVisibleGhosts()
+            for i, ghost in enumerate(allGhosts):
+                if ghost in visibleGhosts:
+                    self.drawGhost(ghost, i)
+                else:
+                    self.currentGhostImages[i] = None
+
+    def getGhostColor(self, ghost, ghostIndex):
+        return GHOST_COLORS[ghostIndex]
+
+    def getPosition(self, ghostState):
+        if not self.showGhosts and not ghostState.isPacman and ghostState.getPosition()[1] > 1:
+            return (-1000, -1000)
+        else:
+            return PacmanGraphics.getPosition(self, ghostState)
+
+def add(x, y):
+    return (x[0] + y[0], x[1] + y[1])
+
+
+# Saving graphical output
+# -----------------------
+# Note: to make an animated gif from this postscript output, try the command:
+# convert -delay 7 -loop 1 -compress lzw -layers optimize frame* out.gif
+# convert is part of imagemagick (freeware)
+
+SAVE_POSTSCRIPT = False
+POSTSCRIPT_OUTPUT_DIR = 'frames'
+FRAME_NUMBER = 0
+import os
+
+def saveFrame():
+    "Saves the current graphical output as a postscript file"
+    global SAVE_POSTSCRIPT, FRAME_NUMBER, POSTSCRIPT_OUTPUT_DIR
+    if not SAVE_POSTSCRIPT: return
+    if not os.path.exists(POSTSCRIPT_OUTPUT_DIR): os.mkdir(POSTSCRIPT_OUTPUT_DIR)
+    name = os.path.join(POSTSCRIPT_OUTPUT_DIR, 'frame_%08d.ps' % FRAME_NUMBER)
+    FRAME_NUMBER += 1
+    writePostscript(name) # writes the current canvas
--- a/reinforcement/graphicsGridworldDisplay.py
+++ b/reinforcement/graphicsGridworldDisplay.py
@ -0,0 +1,348 @@
+# graphicsGridworldDisplay.py
+# ---------------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import util
+from graphicsUtils import *
+
+class GraphicsGridworldDisplay:
+
+    def __init__(self, gridworld, size=120, speed=1.0):
+        self.gridworld = gridworld
+        self.size = size
+        self.speed = speed
+
+    def start(self):
+        setup(self.gridworld, size=self.size)
+
+    def pause(self):
+        wait_for_keys()
+
+    def displayValues(self, agent, currentState = None, message = 'Agent Values'):
+        values = util.Counter()
+        policy = {}
+        states = self.gridworld.getStates()
+        for state in states:
+            values[state] = agent.getValue(state)
+            policy[state] = agent.getPolicy(state)
+        drawValues(self.gridworld, values, policy, currentState, message)
+        sleep(0.05 / self.speed)
+
+    def displayNullValues(self, currentState = None, message = ''):
+        values = util.Counter()
+        #policy = {}
+        states = self.gridworld.getStates()
+        for state in states:
+            values[state] = 0.0
+            #policy[state] = agent.getPolicy(state)
+        drawNullValues(self.gridworld, currentState,'')
+        # drawValues(self.gridworld, values, policy, currentState, message)
+        sleep(0.05 / self.speed)
+
+    def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
+        qValues = util.Counter()
+        states = self.gridworld.getStates()
+        for state in states:
+            for action in self.gridworld.getPossibleActions(state):
+                qValues[(state, action)] = agent.getQValue(state, action)
+        drawQValues(self.gridworld, qValues, currentState, message)
+        sleep(0.05 / self.speed)
+
+BACKGROUND_COLOR = formatColor(0,0,0)
+EDGE_COLOR = formatColor(1,1,1)
+OBSTACLE_COLOR = formatColor(0.5,0.5,0.5)
+TEXT_COLOR = formatColor(1,1,1)
+MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7)
+LOCATION_COLOR = formatColor(0,0,1)
+
+WINDOW_SIZE = -1
+GRID_SIZE = -1
+GRID_HEIGHT = -1
+MARGIN = -1
+
+def setup(gridworld, title = "Gridworld Display", size = 120):
+    global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT
+    grid = gridworld.grid
+    WINDOW_SIZE = size
+    GRID_SIZE = size
+    GRID_HEIGHT = grid.height
+    MARGIN = GRID_SIZE * 0.75
+    screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2
+    screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2
+
+    begin_graphics(screen_width,
+                   screen_height,
+                   BACKGROUND_COLOR, title=title)
+
+def drawNullValues(gridworld, currentState = None, message = ''):
+    grid = gridworld.grid
+    blank()
+    for x in range(grid.width):
+        for y in range(grid.height):
+            state = (x, y)
+            gridType = grid[x][y]
+            isExit = (str(gridType) != gridType)
+            isCurrent = (currentState == state)
+            if gridType == '#':
+                drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
+            else:
+                drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent)
+    pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
+    text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
+
+
+def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'):
+    grid = gridworld.grid
+    blank()
+    valueList = [values[state] for state in gridworld.getStates()] + [0.0]
+    minValue = min(valueList)
+    maxValue = max(valueList)
+    for x in range(grid.width):
+        for y in range(grid.height):
+            state = (x, y)
+            gridType = grid[x][y]
+            isExit = (str(gridType) != gridType)
+            isCurrent = (currentState == state)
+            if gridType == '#':
+                drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
+            else:
+                value = values[state]
+                action = None
+                if policy != None and state in policy:
+                    action = policy[state]
+                    actions = gridworld.getPossibleActions(state)
+                if action not in actions and 'exit' in actions:
+                    action = 'exit'
+                valString = '%.2f' % value
+                drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
+    pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
+    text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
+
+def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
+    grid = gridworld.grid
+    blank()
+    stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
+    qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
+    qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
+    minValue = min(qValueList)
+    maxValue = max(qValueList)
+    for x in range(grid.width):
+        for y in range(grid.height):
+            state = (x, y)
+            gridType = grid[x][y]
+            isExit = (str(gridType) != gridType)
+            isCurrent = (currentState == state)
+            actions = gridworld.getPossibleActions(state)
+            if actions == None or len(actions) == 0:
+                actions = [None]
+            bestQ = max([qValues[(state, action)] for action in actions])
+            bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
+
+            q = util.Counter()
+            valStrings = {}
+            for action in actions:
+                v = qValues[(state, action)]
+                q[action] += v
+                valStrings[action] = '%.2f' % v
+            if gridType == '#':
+                drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
+            elif isExit:
+                action = 'exit'
+                value = q[action]
+                valString = '%.2f' % value
+                drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
+            else:
+                drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
+    pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
+    text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
+
+
+def blank():
+    clear_screen()
+
+def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent):
+
+    square_color = getColor(0, -1, 1)
+
+    if isObstacle:
+        square_color = OBSTACLE_COLOR
+
+    (screen_x, screen_y) = to_screen((x, y))
+    square( (screen_x, screen_y),
+                   0.5* GRID_SIZE,
+                   color = square_color,
+                   filled = 1,
+                   width = 1)
+
+    square( (screen_x, screen_y),
+                   0.5* GRID_SIZE,
+                   color = EDGE_COLOR,
+                   filled = 0,
+                   width = 3)
+
+    if isTerminal and not isObstacle:
+        square( (screen_x, screen_y),
+                     0.4* GRID_SIZE,
+                     color = EDGE_COLOR,
+                     filled = 0,
+                     width = 2)
+        text( (screen_x, screen_y),
+               TEXT_COLOR,
+               str(grid[x][y]),
+               "Courier", -24, "bold", "c")
+
+
+    text_color = TEXT_COLOR
+
+    if not isObstacle and isCurrent:
+        circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
+
+    # if not isObstacle:
+    #   text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c")
+
+def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent):
+
+    square_color = getColor(val, min, max)
+
+    if isObstacle:
+        square_color = OBSTACLE_COLOR
+
+    (screen_x, screen_y) = to_screen((x, y))
+    square( (screen_x, screen_y),
+                   0.5* GRID_SIZE,
+                   color = square_color,
+                   filled = 1,
+                   width = 1)
+    square( (screen_x, screen_y),
+                   0.5* GRID_SIZE,
+                   color = EDGE_COLOR,
+                   filled = 0,
+                   width = 3)
+    if isTerminal and not isObstacle:
+        square( (screen_x, screen_y),
+                     0.4* GRID_SIZE,
+                     color = EDGE_COLOR,
+                     filled = 0,
+                     width = 2)
+
+
+    if action == 'north':
+        polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
+    if action == 'south':
+        polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
+    if action == 'west':
+        polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
+    if action == 'east':
+        polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
+
+
+    text_color = TEXT_COLOR
+
+    if not isObstacle and isCurrent:
+        circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR )
+
+    if not isObstacle:
+        text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c")
+
+
+def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent):
+
+    (screen_x, screen_y) = to_screen((x, y))
+
+    center = (screen_x, screen_y)
+    nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
+    ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
+    se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
+    sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
+    n = (screen_x, screen_y-0.5*GRID_SIZE+5)
+    s = (screen_x, screen_y+0.5*GRID_SIZE-5)
+    w = (screen_x-0.5*GRID_SIZE+5, screen_y)
+    e = (screen_x+0.5*GRID_SIZE-5, screen_y)
+
+    actions = qVals.keys()
+    for action in actions:
+
+        wedge_color = getColor(qVals[action], minVal, maxVal)
+
+        if action == 'north':
+            polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False)
+            #text(n, text_color, valStr, "Courier", 8, "bold", "n")
+        if action == 'south':
+            polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False)
+            #text(s, text_color, valStr, "Courier", 8, "bold", "s")
+        if action == 'east':
+            polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False)
+            #text(e, text_color, valStr, "Courier", 8, "bold", "e")
+        if action == 'west':
+            polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False)
+            #text(w, text_color, valStr, "Courier", 8, "bold", "w")
+
+    square( (screen_x, screen_y),
+                   0.5* GRID_SIZE,
+                   color = EDGE_COLOR,
+                   filled = 0,
+                   width = 3)
+    line(ne, sw, color = EDGE_COLOR)
+    line(nw, se, color = EDGE_COLOR)
+
+    if isCurrent:
+        circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
+
+    for action in actions:
+        text_color = TEXT_COLOR
+        if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR
+        valStr = ""
+        if action in valStrs:
+            valStr = valStrs[action]
+        h = -20
+        if action == 'north':
+            #polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0)
+            text(n, text_color, valStr, "Courier", h, "bold", "n")
+        if action == 'south':
+            #polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0)
+            text(s, text_color, valStr, "Courier", h, "bold", "s")
+        if action == 'east':
+            #polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0)
+            text(e, text_color, valStr, "Courier", h, "bold", "e")
+        if action == 'west':
+            #polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0)
+            text(w, text_color, valStr, "Courier", h, "bold", "w")
+
+
+def getColor(val, minVal, max):
+    r, g = 0.0, 0.0
+    if val < 0 and minVal < 0:
+        r = val * 0.65 / minVal
+    if val > 0 and max > 0:
+        g = val * 0.65 / max
+    return formatColor(r,g,0.0)
+
+
+def square(pos, size, color, filled, width):
+    x, y = pos
+    dx, dy = size, size
+    return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False)
+
+
+def to_screen(point):
+    ( gamex, gamey ) = point
+    x = gamex*GRID_SIZE + MARGIN
+    y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN
+    return ( x, y )
+
+def to_grid(point):
+    (x, y) = point
+    x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
+    y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
+    print point, "-->", (x, y)
+    return (x, y)
--- a/reinforcement/graphicsUtils.py
+++ b/reinforcement/graphicsUtils.py
@ -0,0 +1,398 @@
+# graphicsUtils.py
+# ----------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import sys
+import math
+import random
+import string
+import time
+import types
+import Tkinter
+
+_Windows = sys.platform == 'win32'  # True if on Win95/98/NT
+
+_root_window = None      # The root window for graphics output
+_canvas = None      # The canvas which holds graphics
+_canvas_xs = None      # Size of canvas object
+_canvas_ys = None
+_canvas_x = None      # Current position on canvas
+_canvas_y = None
+_canvas_col = None      # Current colour (set to black below)
+_canvas_tsize = 12
+_canvas_tserifs = 0
+
+def formatColor(r, g, b):
+    return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255))
+
+def colorToVector(color):
+    return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]])
+
+if _Windows:
+    _canvas_tfonts = ['times new roman', 'lucida console']
+else:
+    _canvas_tfonts = ['times', 'lucidasans-24']
+    pass # XXX need defaults here
+
+def sleep(secs):
+    global _root_window
+    if _root_window == None:
+        time.sleep(secs)
+    else:
+        _root_window.update_idletasks()
+        _root_window.after(int(1000 * secs), _root_window.quit)
+        _root_window.mainloop()
+
+def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None):
+
+    global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color
+
+    # Check for duplicate call
+    if _root_window is not None:
+        # Lose the window.
+        _root_window.destroy()
+
+    # Save the canvas size parameters
+    _canvas_xs, _canvas_ys = width - 1, height - 1
+    _canvas_x, _canvas_y = 0, _canvas_ys
+    _bg_color = color
+
+    # Create the root window
+    _root_window = Tkinter.Tk()
+    _root_window.protocol('WM_DELETE_WINDOW', _destroy_window)
+    _root_window.title(title or 'Graphics Window')
+    _root_window.resizable(0, 0)
+
+    # Create the canvas object
+    try:
+        _canvas = Tkinter.Canvas(_root_window, width=width, height=height)
+        _canvas.pack()
+        draw_background()
+        _canvas.update()
+    except:
+        _root_window = None
+        raise
+
+    # Bind to key-down and key-up events
+    _root_window.bind( "<KeyPress>", _keypress )
+    _root_window.bind( "<KeyRelease>", _keyrelease )
+    _root_window.bind( "<FocusIn>", _clear_keys )
+    _root_window.bind( "<FocusOut>", _clear_keys )
+    _root_window.bind( "<Button-1>", _leftclick )
+    _root_window.bind( "<Button-2>", _rightclick )
+    _root_window.bind( "<Button-3>", _rightclick )
+    _root_window.bind( "<Control-Button-1>", _ctrl_leftclick)
+    _clear_keys()
+
+_leftclick_loc = None
+_rightclick_loc = None
+_ctrl_leftclick_loc = None
+
+def _leftclick(event):
+    global _leftclick_loc
+    _leftclick_loc = (event.x, event.y)
+
+def _rightclick(event):
+    global _rightclick_loc
+    _rightclick_loc = (event.x, event.y)
+
+def _ctrl_leftclick(event):
+    global _ctrl_leftclick_loc
+    _ctrl_leftclick_loc = (event.x, event.y)
+
+def wait_for_click():
+    while True:
+        global _leftclick_loc
+        global _rightclick_loc
+        global _ctrl_leftclick_loc
+        if _leftclick_loc != None:
+            val = _leftclick_loc
+            _leftclick_loc = None
+            return val, 'left'
+        if _rightclick_loc != None:
+            val = _rightclick_loc
+            _rightclick_loc = None
+            return val, 'right'
+        if _ctrl_leftclick_loc != None:
+            val = _ctrl_leftclick_loc
+            _ctrl_leftclick_loc = None
+            return val, 'ctrl_left'
+        sleep(0.05)
+
+def draw_background():
+    corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)]
+    polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False)
+
+def _destroy_window(event=None):
+    sys.exit(0)
+#    global _root_window
+#    _root_window.destroy()
+#    _root_window = None
+    #print "DESTROY"
+
+def end_graphics():
+    global _root_window, _canvas, _mouse_enabled
+    try:
+        try:
+            sleep(1)
+            if _root_window != None:
+                _root_window.destroy()
+        except SystemExit, e:
+            print 'Ending graphics raised an exception:', e
+    finally:
+        _root_window = None
+        _canvas = None
+        _mouse_enabled = 0
+        _clear_keys()
+
+def clear_screen(background=None):
+    global _canvas_x, _canvas_y
+    _canvas.delete('all')
+    draw_background()
+    _canvas_x, _canvas_y = 0, _canvas_ys
+
+def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1):
+    c = []
+    for coord in coords:
+        c.append(coord[0])
+        c.append(coord[1])
+    if fillColor == None: fillColor = outlineColor
+    if filled == 0: fillColor = ""
+    poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width)
+    if behind > 0:
+        _canvas.tag_lower(poly, behind) # Higher should be more visible
+    return poly
+
+def square(pos, r, color, filled=1, behind=0):
+    x, y = pos
+    coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)]
+    return polygon(coords, color, color, filled, 0, behind=behind)
+
+def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2):
+    x, y = pos
+    x0, x1 = x - r - 1, x + r
+    y0, y1 = y - r - 1, y + r
+    if endpoints == None:
+        e = [0, 359]
+    else:
+        e = list(endpoints)
+    while e[0] > e[1]: e[1] = e[1] + 360
+
+    return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor,
+                              extent=e[1] - e[0], start=e[0], style=style, width=width)
+
+def image(pos, file="../../blueghost.gif"):
+    x, y = pos
+    # img = PhotoImage(file=file)
+    return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW)
+
+
+def refresh():
+    _canvas.update_idletasks()
+
+def moveCircle(id, pos, r, endpoints=None):
+    global _canvas_x, _canvas_y
+
+    x, y = pos
+#    x0, x1 = x - r, x + r + 1
+#    y0, y1 = y - r, y + r + 1
+    x0, x1 = x - r - 1, x + r
+    y0, y1 = y - r - 1, y + r
+    if endpoints == None:
+        e = [0, 359]
+    else:
+        e = list(endpoints)
+    while e[0] > e[1]: e[1] = e[1] + 360
+
+    edit(id, ('start', e[0]), ('extent', e[1] - e[0]))
+    move_to(id, x0, y0)
+
+def edit(id, *args):
+    _canvas.itemconfigure(id, **dict(args))
+
+def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"):
+    global _canvas_x, _canvas_y
+    x, y = pos
+    font = (font, str(size), style)
+    return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor)
+
+def changeText(id, newText, font=None, size=12, style='normal'):
+    _canvas.itemconfigure(id, text=newText)
+    if font != None:
+        _canvas.itemconfigure(id, font=(font, '-%d' % size, style))
+
+def changeColor(id, newColor):
+    _canvas.itemconfigure(id, fill=newColor)
+
+def line(here, there, color=formatColor(0, 0, 0), width=2):
+    x0, y0 = here[0], here[1]
+    x1, y1 = there[0], there[1]
+    return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width)
+
+##############################################################################
+### Keypress handling ########################################################
+##############################################################################
+
+# We bind to key-down and key-up events.
+
+_keysdown = {}
+_keyswaiting = {}
+# This holds an unprocessed key release.  We delay key releases by up to
+# one call to keys_pressed() to get round a problem with auto repeat.
+_got_release = None
+
+def _keypress(event):
+    global _got_release
+    #remap_arrows(event)
+    _keysdown[event.keysym] = 1
+    _keyswaiting[event.keysym] = 1
+#    print event.char, event.keycode
+    _got_release = None
+
+def _keyrelease(event):
+    global _got_release
+    #remap_arrows(event)
+    try:
+        del _keysdown[event.keysym]
+    except:
+        pass
+    _got_release = 1
+
+def remap_arrows(event):
+    # TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT)
+    if event.char in ['a', 's', 'd', 'w']:
+        return
+    if event.keycode in [37, 101]: # LEFT ARROW (win / x)
+        event.char = 'a'
+    if event.keycode in [38, 99]: # UP ARROW
+        event.char = 'w'
+    if event.keycode in [39, 102]: # RIGHT ARROW
+        event.char = 'd'
+    if event.keycode in [40, 104]: # DOWN ARROW
+        event.char = 's'
+
+def _clear_keys(event=None):
+    global _keysdown, _got_release, _keyswaiting
+    _keysdown = {}
+    _keyswaiting = {}
+    _got_release = None
+
+def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent,
+                 d_w=Tkinter.tkinter.DONT_WAIT):
+    d_o_e(d_w)
+    if _got_release:
+        d_o_e(d_w)
+    return _keysdown.keys()
+
+def keys_waiting():
+    global _keyswaiting
+    keys = _keyswaiting.keys()
+    _keyswaiting = {}
+    return keys
+
+# Block for a list of keys...
+
+def wait_for_keys():
+    keys = []
+    while keys == []:
+        keys = keys_pressed()
+        sleep(0.05)
+    return keys
+
+def remove_from_screen(x,
+                       d_o_e=Tkinter.tkinter.dooneevent,
+                       d_w=Tkinter.tkinter.DONT_WAIT):
+    _canvas.delete(x)
+    d_o_e(d_w)
+
+def _adjust_coords(coord_list, x, y):
+    for i in range(0, len(coord_list), 2):
+        coord_list[i] = coord_list[i] + x
+        coord_list[i + 1] = coord_list[i + 1] + y
+    return coord_list
+
+def move_to(object, x, y=None,
+            d_o_e=Tkinter.tkinter.dooneevent,
+            d_w=Tkinter.tkinter.DONT_WAIT):
+    if y is None:
+        try: x, y = x
+        except: raise  'incomprehensible coordinates'
+
+    horiz = True
+    newCoords = []
+    current_x, current_y = _canvas.coords(object)[0:2] # first point
+    for coord in  _canvas.coords(object):
+        if horiz:
+            inc = x - current_x
+        else:
+            inc = y - current_y
+        horiz = not horiz
+
+        newCoords.append(coord + inc)
+
+    _canvas.coords(object, *newCoords)
+    d_o_e(d_w)
+
+def move_by(object, x, y=None,
+            d_o_e=Tkinter.tkinter.dooneevent,
+            d_w=Tkinter.tkinter.DONT_WAIT, lift=False):
+    if y is None:
+        try: x, y = x
+        except: raise Exception, 'incomprehensible coordinates'
+
+    horiz = True
+    newCoords = []
+    for coord in  _canvas.coords(object):
+        if horiz:
+            inc = x
+        else:
+            inc = y
+        horiz = not horiz
+
+        newCoords.append(coord + inc)
+
+    _canvas.coords(object, *newCoords)
+    d_o_e(d_w)
+    if lift:
+        _canvas.tag_raise(object)
+
+def writePostscript(filename):
+    "Writes the current canvas to a postscript file."
+    psfile = file(filename, 'w')
+    psfile.write(_canvas.postscript(pageanchor='sw',
+                     y='0.c',
+                     x='0.c'))
+    psfile.close()
+
+ghost_shape = [
+    (0, - 0.5),
+    (0.25, - 0.75),
+    (0.5, - 0.5),
+    (0.75, - 0.75),
+    (0.75, 0.5),
+    (0.5, 0.75),
+    (- 0.5, 0.75),
+    (- 0.75, 0.5),
+    (- 0.75, - 0.75),
+    (- 0.5, - 0.5),
+    (- 0.25, - 0.75)
+  ]
+
+if __name__ == '__main__':
+    begin_graphics()
+    clear_screen()
+    ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape]
+    g = polygon(ghost_shape, formatColor(1, 1, 1))
+    move_to(g, (50, 50))
+    circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15])
+    sleep(2)
--- a/reinforcement/gridworld.py
+++ b/reinforcement/gridworld.py
@ -0,0 +1,585 @@
+# gridworld.py
+# ------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import random
+import sys
+import mdp
+import environment
+import util
+import optparse
+
+class Gridworld(mdp.MarkovDecisionProcess):
+    """
+      Gridworld
+    """
+    def __init__(self, grid):
+        # layout
+        if type(grid) == type([]): grid = makeGrid(grid)
+        self.grid = grid
+
+        # parameters
+        self.livingReward = 0.0
+        self.noise = 0.2
+
+    def setLivingReward(self, reward):
+        """
+        The (negative) reward for exiting "normal" states.
+
+        Note that in the R+N text, this reward is on entering
+        a state and therefore is not clearly part of the state's
+        future rewards.
+        """
+        self.livingReward = reward
+
+    def setNoise(self, noise):
+        """
+        The probability of moving in an unintended direction.
+        """
+        self.noise = noise
+
+
+    def getPossibleActions(self, state):
+        """
+        Returns list of valid actions for 'state'.
+
+        Note that you can request moves into walls and
+        that "exit" states transition to the terminal
+        state under the special action "done".
+        """
+        if state == self.grid.terminalState:
+            return ()
+        x,y = state
+        if type(self.grid[x][y]) == int:
+            return ('exit',)
+        return ('north','west','south','east')
+
+    def getStates(self):
+        """
+        Return list of all states.
+        """
+        # The true terminal state.
+        states = [self.grid.terminalState]
+        for x in range(self.grid.width):
+            for y in range(self.grid.height):
+                if self.grid[x][y] != '#':
+                    state = (x,y)
+                    states.append(state)
+        return states
+
+    def getReward(self, state, action, nextState):
+        """
+        Get reward for state, action, nextState transition.
+
+        Note that the reward depends only on the state being
+        departed (as in the R+N book examples, which more or
+        less use this convention).
+        """
+        if state == self.grid.terminalState:
+            return 0.0
+        x, y = state
+        cell = self.grid[x][y]
+        if type(cell) == int or type(cell) == float:
+            return cell
+        return self.livingReward
+
+    def getStartState(self):
+        for x in range(self.grid.width):
+            for y in range(self.grid.height):
+                if self.grid[x][y] == 'S':
+                    return (x, y)
+        raise 'Grid has no start state'
+
+    def isTerminal(self, state):
+        """
+        Only the TERMINAL_STATE state is *actually* a terminal state.
+        The other "exit" states are technically non-terminals with
+        a single action "exit" which leads to the true terminal state.
+        This convention is to make the grids line up with the examples
+        in the R+N textbook.
+        """
+        return state == self.grid.terminalState
+
+
+    def getTransitionStatesAndProbs(self, state, action):
+        """
+        Returns list of (nextState, prob) pairs
+        representing the states reachable
+        from 'state' by taking 'action' along
+        with their transition probabilities.
+        """
+
+        if action not in self.getPossibleActions(state):
+            raise "Illegal action!"
+
+        if self.isTerminal(state):
+            return []
+
+        x, y = state
+
+        if type(self.grid[x][y]) == int or type(self.grid[x][y]) == float:
+            termState = self.grid.terminalState
+            return [(termState, 1.0)]
+
+        successors = []
+
+        northState = (self.__isAllowed(y+1,x) and (x,y+1)) or state
+        westState = (self.__isAllowed(y,x-1) and (x-1,y)) or state
+        southState = (self.__isAllowed(y-1,x) and (x,y-1)) or state
+        eastState = (self.__isAllowed(y,x+1) and (x+1,y)) or state
+
+        if action == 'north' or action == 'south':
+            if action == 'north':
+                successors.append((northState,1-self.noise))
+            else:
+                successors.append((southState,1-self.noise))
+
+            massLeft = self.noise
+            successors.append((westState,massLeft/2.0))
+            successors.append((eastState,massLeft/2.0))
+
+        if action == 'west' or action == 'east':
+            if action == 'west':
+                successors.append((westState,1-self.noise))
+            else:
+                successors.append((eastState,1-self.noise))
+
+            massLeft = self.noise
+            successors.append((northState,massLeft/2.0))
+            successors.append((southState,massLeft/2.0))
+
+        successors = self.__aggregate(successors)
+
+        return successors
+
+    def __aggregate(self, statesAndProbs):
+        counter = util.Counter()
+        for state, prob in statesAndProbs:
+            counter[state] += prob
+        newStatesAndProbs = []
+        for state, prob in counter.items():
+            newStatesAndProbs.append((state, prob))
+        return newStatesAndProbs
+
+    def __isAllowed(self, y, x):
+        if y < 0 or y >= self.grid.height: return False
+        if x < 0 or x >= self.grid.width: return False
+        return self.grid[x][y] != '#'
+
+class GridworldEnvironment(environment.Environment):
+
+    def __init__(self, gridWorld):
+        self.gridWorld = gridWorld
+        self.reset()
+
+    def getCurrentState(self):
+        return self.state
+
+    def getPossibleActions(self, state):
+        return self.gridWorld.getPossibleActions(state)
+
+    def doAction(self, action):
+        state = self.getCurrentState()
+        (nextState, reward) = self.getRandomNextState(state, action)
+        self.state = nextState
+        return (nextState, reward)
+
+    def getRandomNextState(self, state, action, randObj=None):
+        rand = -1.0
+        if randObj is None:
+            rand = random.random()
+        else:
+            rand = randObj.random()
+        sum = 0.0
+        successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
+        for nextState, prob in successors:
+            sum += prob
+            if sum > 1.0:
+                raise 'Total transition probability more than one; sample failure.'
+            if rand < sum:
+                reward = self.gridWorld.getReward(state, action, nextState)
+                return (nextState, reward)
+        raise 'Total transition probability less than one; sample failure.'
+
+    def reset(self):
+        self.state = self.gridWorld.getStartState()
+
+class Grid:
+    """
+    A 2-dimensional array of immutables backed by a list of lists.  Data is accessed
+    via grid[x][y] where (x,y) are cartesian coordinates with x horizontal,
+    y vertical and the origin (0,0) in the bottom left corner.
+
+    The __str__ method constructs an output that is oriented appropriately.
+    """
+    def __init__(self, width, height, initialValue=' '):
+        self.width = width
+        self.height = height
+        self.data = [[initialValue for y in range(height)] for x in range(width)]
+        self.terminalState = 'TERMINAL_STATE'
+
+    def __getitem__(self, i):
+        return self.data[i]
+
+    def __setitem__(self, key, item):
+        self.data[key] = item
+
+    def __eq__(self, other):
+        if other == None: return False
+        return self.data == other.data
+
+    def __hash__(self):
+        return hash(self.data)
+
+    def copy(self):
+        g = Grid(self.width, self.height)
+        g.data = [x[:] for x in self.data]
+        return g
+
+    def deepCopy(self):
+        return self.copy()
+
+    def shallowCopy(self):
+        g = Grid(self.width, self.height)
+        g.data = self.data
+        return g
+
+    def _getLegacyText(self):
+        t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)]
+        t.reverse()
+        return t
+
+    def __str__(self):
+        return str(self._getLegacyText())
+
+def makeGrid(gridString):
+    width, height = len(gridString[0]), len(gridString)
+    grid = Grid(width, height)
+    for ybar, line in enumerate(gridString):
+        y = height - ybar - 1
+        for x, el in enumerate(line):
+            grid[x][y] = el
+    return grid
+
+def getCliffGrid():
+    grid = [[' ',' ',' ',' ',' '],
+            ['S',' ',' ',' ',10],
+            [-100,-100, -100, -100, -100]]
+    return Gridworld(makeGrid(grid))
+
+def getCliffGrid2():
+    grid = [[' ',' ',' ',' ',' '],
+            [8,'S',' ',' ',10],
+            [-100,-100, -100, -100, -100]]
+    return Gridworld(grid)
+
+def getDiscountGrid():
+    grid = [[' ',' ',' ',' ',' '],
+            [' ','#',' ',' ',' '],
+            [' ','#', 1,'#', 10],
+            ['S',' ',' ',' ',' '],
+            [-10,-10, -10, -10, -10]]
+    return Gridworld(grid)
+
+def getBridgeGrid():
+    grid = [[ '#',-100, -100, -100, -100, -100, '#'],
+            [   1, 'S',  ' ',  ' ',  ' ',  ' ',  10],
+            [ '#',-100, -100, -100, -100, -100, '#']]
+    return Gridworld(grid)
+
+def getBookGrid():
+    grid = [[' ',' ',' ',+1],
+            [' ','#',' ',-1],
+            ['S',' ',' ',' ']]
+    return Gridworld(grid)
+
+def getMazeGrid():
+    grid = [[' ',' ',' ',+1],
+            ['#','#',' ','#'],
+            [' ','#',' ',' '],
+            [' ','#','#',' '],
+            ['S',' ',' ',' ']]
+    return Gridworld(grid)
+
+
+
+def getUserAction(state, actionFunction):
+    """
+    Get an action from the user (rather than the agent).
+
+    Used for debugging and lecture demos.
+    """
+    import graphicsUtils
+    action = None
+    while True:
+        keys = graphicsUtils.wait_for_keys()
+        if 'Up' in keys: action = 'north'
+        if 'Down' in keys: action = 'south'
+        if 'Left' in keys: action = 'west'
+        if 'Right' in keys: action = 'east'
+        if 'q' in keys: sys.exit(0)
+        if action == None: continue
+        break
+    actions = actionFunction(state)
+    if action not in actions:
+        action = actions[0]
+    return action
+
+def printString(x): print x
+
+def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
+    returns = 0
+    totalDiscount = 1.0
+    environment.reset()
+    if 'startEpisode' in dir(agent): agent.startEpisode()
+    message("BEGINNING EPISODE: "+str(episode)+"\n")
+    while True:
+
+        # DISPLAY CURRENT STATE
+        state = environment.getCurrentState()
+        display(state)
+        pause()
+
+        # END IF IN A TERMINAL STATE
+        actions = environment.getPossibleActions(state)
+        if len(actions) == 0:
+            message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
+            return returns
+
+        # GET ACTION (USUALLY FROM AGENT)
+        action = decision(state)
+        if action == None:
+            raise 'Error: Agent returned None action'
+
+        # EXECUTE ACTION
+        nextState, reward = environment.doAction(action)
+        message("Started in state: "+str(state)+
+                "\nTook action: "+str(action)+
+                "\nEnded in state: "+str(nextState)+
+                "\nGot reward: "+str(reward)+"\n")
+        # UPDATE LEARNER
+        if 'observeTransition' in dir(agent):
+            agent.observeTransition(state, action, nextState, reward)
+
+        returns += reward * totalDiscount
+        totalDiscount *= discount
+
+    if 'stopEpisode' in dir(agent):
+        agent.stopEpisode()
+
+def parseOptions():
+    optParser = optparse.OptionParser()
+    optParser.add_option('-d', '--discount',action='store',
+                         type='float',dest='discount',default=0.9,
+                         help='Discount on future (default %default)')
+    optParser.add_option('-r', '--livingReward',action='store',
+                         type='float',dest='livingReward',default=0.0,
+                         metavar="R", help='Reward for living for a time step (default %default)')
+    optParser.add_option('-n', '--noise',action='store',
+                         type='float',dest='noise',default=0.2,
+                         metavar="P", help='How often action results in ' +
+                         'unintended direction (default %default)' )
+    optParser.add_option('-e', '--epsilon',action='store',
+                         type='float',dest='epsilon',default=0.3,
+                         metavar="E", help='Chance of taking a random action in q-learning (default %default)')
+    optParser.add_option('-l', '--learningRate',action='store',
+                         type='float',dest='learningRate',default=0.5,
+                         metavar="P", help='TD learning rate (default %default)' )
+    optParser.add_option('-i', '--iterations',action='store',
+                         type='int',dest='iters',default=10,
+                         metavar="K", help='Number of rounds of value iteration (default %default)')
+    optParser.add_option('-k', '--episodes',action='store',
+                         type='int',dest='episodes',default=1,
+                         metavar="K", help='Number of epsiodes of the MDP to run (default %default)')
+    optParser.add_option('-g', '--grid',action='store',
+                         metavar="G", type='string',dest='grid',default="BookGrid",
+                         help='Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)' )
+    optParser.add_option('-w', '--windowSize', metavar="X", type='int',dest='gridSize',default=150,
+                         help='Request a window width of X pixels *per grid cell* (default %default)')
+    optParser.add_option('-a', '--agent',action='store', metavar="A",
+                         type='string',dest='agent',default="random",
+                         help='Agent type (options are \'random\', \'value\' and \'q\', default %default)')
+    optParser.add_option('-t', '--text',action='store_true',
+                         dest='textDisplay',default=False,
+                         help='Use text-only ASCII display')
+    optParser.add_option('-p', '--pause',action='store_true',
+                         dest='pause',default=False,
+                         help='Pause GUI after each time step when running the MDP')
+    optParser.add_option('-q', '--quiet',action='store_true',
+                         dest='quiet',default=False,
+                         help='Skip display of any learning episodes')
+    optParser.add_option('-s', '--speed',action='store', metavar="S", type=float,
+                         dest='speed',default=1.0,
+                         help='Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)')
+    optParser.add_option('-m', '--manual',action='store_true',
+                         dest='manual',default=False,
+                         help='Manually control agent')
+    optParser.add_option('-v', '--valueSteps',action='store_true' ,default=False,
+                         help='Display each step of value iteration')
+
+    opts, args = optParser.parse_args()
+
+    if opts.manual and opts.agent != 'q':
+        print '## Disabling Agents in Manual Mode (-m) ##'
+        opts.agent = None
+
+    # MANAGE CONFLICTS
+    if opts.textDisplay or opts.quiet:
+    # if opts.quiet:
+        opts.pause = False
+        # opts.manual = False
+
+    if opts.manual:
+        opts.pause = True
+
+    return opts
+
+
+if __name__ == '__main__':
+
+    opts = parseOptions()
+
+    ###########################
+    # GET THE GRIDWORLD
+    ###########################
+
+    import gridworld
+    mdpFunction = getattr(gridworld, "get"+opts.grid)
+    mdp = mdpFunction()
+    mdp.setLivingReward(opts.livingReward)
+    mdp.setNoise(opts.noise)
+    env = gridworld.GridworldEnvironment(mdp)
+
+
+    ###########################
+    # GET THE DISPLAY ADAPTER
+    ###########################
+
+    import textGridworldDisplay
+    display = textGridworldDisplay.TextGridworldDisplay(mdp)
+    if not opts.textDisplay:
+        import graphicsGridworldDisplay
+        display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)
+    try:
+        display.start()
+    except KeyboardInterrupt:
+        sys.exit(0)
+
+    ###########################
+    # GET THE AGENT
+    ###########################
+
+    import valueIterationAgents, qlearningAgents
+    a = None
+    if opts.agent == 'value':
+        a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters)
+    elif opts.agent == 'q':
+        #env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
+        #simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
+        gridWorldEnv = GridworldEnvironment(mdp)
+        actionFn = lambda state: mdp.getPossibleActions(state)
+        qLearnOpts = {'gamma': opts.discount,
+                      'alpha': opts.learningRate,
+                      'epsilon': opts.epsilon,
+                      'actionFn': actionFn}
+        a = qlearningAgents.QLearningAgent(**qLearnOpts)
+    elif opts.agent == 'random':
+        # # No reason to use the random agent without episodes
+        if opts.episodes == 0:
+            opts.episodes = 10
+        class RandomAgent:
+            def getAction(self, state):
+                return random.choice(mdp.getPossibleActions(state))
+            def getValue(self, state):
+                return 0.0
+            def getQValue(self, state, action):
+                return 0.0
+            def getPolicy(self, state):
+                "NOTE: 'random' is a special policy value; don't use it in your code."
+                return 'random'
+            def update(self, state, action, nextState, reward):
+                pass
+        a = RandomAgent()
+    else:
+        if not opts.manual: raise 'Unknown agent type: '+opts.agent
+
+
+    ###########################
+    # RUN EPISODES
+    ###########################
+    # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
+    try:
+        if not opts.manual and opts.agent == 'value':
+            if opts.valueSteps:
+                for i in range(opts.iters):
+                    tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i)
+                    display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS")
+                    display.pause()
+
+            display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS")
+            display.pause()
+            display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS")
+            display.pause()
+    except KeyboardInterrupt:
+        sys.exit(0)
+
+
+
+    # FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
+    displayCallback = lambda x: None
+    if not opts.quiet:
+        if opts.manual and opts.agent == None:
+            displayCallback = lambda state: display.displayNullValues(state)
+        else:
+            if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
+            if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
+            if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES")
+
+    messageCallback = lambda x: printString(x)
+    if opts.quiet:
+        messageCallback = lambda x: None
+
+    # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
+    pauseCallback = lambda : None
+    if opts.pause:
+        pauseCallback = lambda : display.pause()
+
+    # FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
+    if opts.manual:
+        decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions)
+    else:
+        decisionCallback = a.getAction
+
+    # RUN EPISODES
+    if opts.episodes > 0:
+        print
+        print "RUNNING", opts.episodes, "EPISODES"
+        print
+    returns = 0
+    for episode in range(1, opts.episodes+1):
+        returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
+    if opts.episodes > 0:
+        print
+        print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes)
+        print
+        print
+
+    # DISPLAY POST-LEARNING VALUES / Q-VALUES
+    if opts.agent == 'q' and not opts.manual:
+        try:
+            display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES")
+            display.pause()
+            display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES")
+            display.pause()
+        except KeyboardInterrupt:
+            sys.exit(0)
--- a/reinforcement/keyboardAgents.py
+++ b/reinforcement/keyboardAgents.py
@ -0,0 +1,84 @@
+# keyboardAgents.py
+# -----------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from game import Agent
+from game import Directions
+import random
+
+class KeyboardAgent(Agent):
+    """
+    An agent controlled by the keyboard.
+    """
+    # NOTE: Arrow keys also work.
+    WEST_KEY  = 'a'
+    EAST_KEY  = 'd'
+    NORTH_KEY = 'w'
+    SOUTH_KEY = 's'
+    STOP_KEY = 'q'
+
+    def __init__( self, index = 0 ):
+
+        self.lastMove = Directions.STOP
+        self.index = index
+        self.keys = []
+
+    def getAction( self, state):
+        from graphicsUtils import keys_waiting
+        from graphicsUtils import keys_pressed
+        keys = keys_waiting() + keys_pressed()
+        if keys != []:
+            self.keys = keys
+
+        legal = state.getLegalActions(self.index)
+        move = self.getMove(legal)
+
+        if move == Directions.STOP:
+            # Try to move in the same direction as before
+            if self.lastMove in legal:
+                move = self.lastMove
+
+        if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP
+
+        if move not in legal:
+            move = random.choice(legal)
+
+        self.lastMove = move
+        return move
+
+    def getMove(self, legal):
+        move = Directions.STOP
+        if   (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal:  move = Directions.WEST
+        if   (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST
+        if   (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal:   move = Directions.NORTH
+        if   (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
+        return move
+
+class KeyboardAgent2(KeyboardAgent):
+    """
+    A second agent controlled by the keyboard.
+    """
+    # NOTE: Arrow keys also work.
+    WEST_KEY  = 'j'
+    EAST_KEY  = "l"
+    NORTH_KEY = 'i'
+    SOUTH_KEY = 'k'
+    STOP_KEY = 'u'
+
+    def getMove(self, legal):
+        move = Directions.STOP
+        if   (self.WEST_KEY in self.keys) and Directions.WEST in legal:  move = Directions.WEST
+        if   (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST
+        if   (self.NORTH_KEY in self.keys) and Directions.NORTH in legal:   move = Directions.NORTH
+        if   (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
+        return move
--- a/reinforcement/layout.py
+++ b/reinforcement/layout.py
@ -0,0 +1,149 @@
+# layout.py
+# ---------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from util import manhattanDistance
+from game import Grid
+import os
+import random
+
+VISIBILITY_MATRIX_CACHE = {}
+
+class Layout:
+    """
+    A Layout manages the static information about the game board.
+    """
+
+    def __init__(self, layoutText):
+        self.width = len(layoutText[0])
+        self.height= len(layoutText)
+        self.walls = Grid(self.width, self.height, False)
+        self.food = Grid(self.width, self.height, False)
+        self.capsules = []
+        self.agentPositions = []
+        self.numGhosts = 0
+        self.processLayoutText(layoutText)
+        self.layoutText = layoutText
+        self.totalFood = len(self.food.asList())
+        # self.initializeVisibilityMatrix()
+
+    def getNumGhosts(self):
+        return self.numGhosts
+
+    def initializeVisibilityMatrix(self):
+        global VISIBILITY_MATRIX_CACHE
+        if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE:
+            from game import Directions
+            vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)]
+            dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST]
+            vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()})
+            for x in range(self.width):
+                for y in range(self.height):
+                    if self.walls[x][y] == False:
+                        for vec, direction in zip(vecs, dirs):
+                            dx, dy = vec
+                            nextx, nexty = x + dx, y + dy
+                            while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] :
+                                vis[x][y][direction].add((nextx, nexty))
+                                nextx, nexty = x + dx, y + dy
+            self.visibility = vis
+            VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis
+        else:
+            self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)]
+
+    def isWall(self, pos):
+        x, col = pos
+        return self.walls[x][col]
+
+    def getRandomLegalPosition(self):
+        x = random.choice(range(self.width))
+        y = random.choice(range(self.height))
+        while self.isWall( (x, y) ):
+            x = random.choice(range(self.width))
+            y = random.choice(range(self.height))
+        return (x,y)
+
+    def getRandomCorner(self):
+        poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
+        return random.choice(poses)
+
+    def getFurthestCorner(self, pacPos):
+        poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
+        dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses])
+        return pos
+
+    def isVisibleFrom(self, ghostPos, pacPos, pacDirection):
+        row, col = [int(x) for x in pacPos]
+        return ghostPos in self.visibility[row][col][pacDirection]
+
+    def __str__(self):
+        return "\n".join(self.layoutText)
+
+    def deepCopy(self):
+        return Layout(self.layoutText[:])
+
+    def processLayoutText(self, layoutText):
+        """
+        Coordinates are flipped from the input format to the (x,y) convention here
+
+        The shape of the maze.  Each character
+        represents a different type of object.
+         % - Wall
+         . - Food
+         o - Capsule
+         G - Ghost
+         P - Pacman
+        Other characters are ignored.
+        """
+        maxY = self.height - 1
+        for y in range(self.height):
+            for x in range(self.width):
+                layoutChar = layoutText[maxY - y][x]
+                self.processLayoutChar(x, y, layoutChar)
+        self.agentPositions.sort()
+        self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions]
+
+    def processLayoutChar(self, x, y, layoutChar):
+        if layoutChar == '%':
+            self.walls[x][y] = True
+        elif layoutChar == '.':
+            self.food[x][y] = True
+        elif layoutChar == 'o':
+            self.capsules.append((x, y))
+        elif layoutChar == 'P':
+            self.agentPositions.append( (0, (x, y) ) )
+        elif layoutChar in ['G']:
+            self.agentPositions.append( (1, (x, y) ) )
+            self.numGhosts += 1
+        elif layoutChar in  ['1', '2', '3', '4']:
+            self.agentPositions.append( (int(layoutChar), (x,y)))
+            self.numGhosts += 1
+def getLayout(name, back = 2):
+    if name.endswith('.lay'):
+        layout = tryToLoad('layouts/' + name)
+        if layout == None: layout = tryToLoad(name)
+    else:
+        layout = tryToLoad('layouts/' + name + '.lay')
+        if layout == None: layout = tryToLoad(name + '.lay')
+    if layout == None and back >= 0:
+        curdir = os.path.abspath('.')
+        os.chdir('..')
+        layout = getLayout(name, back -1)
+        os.chdir(curdir)
+    return layout
+
+def tryToLoad(fullname):
+    if(not os.path.exists(fullname)): return None
+    f = open(fullname)
+    try: return Layout([line.strip() for line in f])
+    finally: f.close()
--- a/reinforcement/layouts/capsuleClassic.lay
+++ b/reinforcement/layouts/capsuleClassic.lay
@ -0,0 +1,7 @@
+%%%%%%%%%%%%%%%%%%%
+%G.       G   ....%
+%.% % %%%%%% %.%%.%
+%.%o% %   o% %.o%.%
+%.%%%.%  %%% %..%.%
+%.....  P    %..%G%
+%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/layouts/contestClassic.lay
+++ b/reinforcement/layouts/contestClassic.lay
@ -0,0 +1,9 @@
+%%%%%%%%%%%%%%%%%%%%
+%o...%........%...o%
+%.%%.%.%%..%%.%.%%.%
+%...... G GG%......%
+%.%.%%.%% %%%.%%.%.%
+%.%....% ooo%.%..%.%
+%.%.%%.% %% %.%.%%.%
+%o%......P....%....%
+%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/layouts/mediumClassic.lay
+++ b/reinforcement/layouts/mediumClassic.lay
@ -0,0 +1,11 @@
+%%%%%%%%%%%%%%%%%%%%
+%o...%........%....%
+%.%%.%.%%%%%%.%.%%.%
+%.%..............%.%
+%.%.%%.%%  %%.%%.%.%
+%......%G  G%......%
+%.%.%%.%%%%%%.%%.%.%
+%.%..............%.%
+%.%%.%.%%%%%%.%.%%.%
+%....%...P....%...o%
+%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/layouts/mediumGrid.lay
+++ b/reinforcement/layouts/mediumGrid.lay
@ -0,0 +1,7 @@
+%%%%%%%%
+%P     %
+% .% . %
+%  %   %
+% .% . %
+%     G%
+%%%%%%%%
--- a/reinforcement/layouts/minimaxClassic.lay
+++ b/reinforcement/layouts/minimaxClassic.lay
@ -0,0 +1,5 @@
+%%%%%%%%%
+%.P    G% 
+% %.%G%%%  
+%G    %%% 
+%%%%%%%%%
--- a/reinforcement/layouts/openClassic.lay
+++ b/reinforcement/layouts/openClassic.lay
@ -0,0 +1,9 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%
+%.. P  ....      ....   %
+%..  ...  ...  ...  ... %
+%..  ...  ...  ...  ... %
+%..    ....      .... G %
+%..  ...  ...  ...  ... %
+%..  ...  ...  ...  ... %
+%..    ....      ....  o%
+%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/layouts/originalClassic.lay
+++ b/reinforcement/layouts/originalClassic.lay
@ -0,0 +1,27 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%............%%............%
+%.%%%%.%%%%%.%%.%%%%%.%%%%.%
+%o%%%%.%%%%%.%%.%%%%%.%%%%o%
+%.%%%%.%%%%%.%%.%%%%%.%%%%.%
+%..........................%
+%.%%%%.%%.%%%%%%%%.%%.%%%%.%
+%.%%%%.%%.%%%%%%%%.%%.%%%%.%
+%......%%....%%....%%......%
+%%%%%%.%%%%% %% %%%%%.%%%%%%
+%%%%%%.%%%%% %% %%%%%.%%%%%%
+%%%%%%.%            %.%%%%%%
+%%%%%%.% %%%%  %%%% %.%%%%%%
+%     .  %G  GG  G%  .     %
+%%%%%%.% %%%%%%%%%% %.%%%%%%
+%%%%%%.%            %.%%%%%%
+%%%%%%.% %%%%%%%%%% %.%%%%%%
+%............%%............%
+%.%%%%.%%%%%.%%.%%%%%.%%%%.%
+%.%%%%.%%%%%.%%.%%%%%.%%%%.%
+%o..%%.......  .......%%..o%
+%%%.%%.%%.%%%%%%%%.%%.%%.%%%
+%%%.%%.%%.%%%%%%%%.%%.%%.%%%
+%......%%....%%....%%......%
+%.%%%%%%%%%%.%%.%%%%%%%%%%.%
+%.............P............%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/layouts/smallClassic.lay
+++ b/reinforcement/layouts/smallClassic.lay
@ -0,0 +1,7 @@
+%%%%%%%%%%%%%%%%%%%%
+%......%G  G%......%
+%.%%...%%  %%...%%.%
+%.%o.%........%.o%.%
+%.%%.%.%%%%%%.%.%%.%
+%........P.........%
+%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/layouts/smallGrid.lay
+++ b/reinforcement/layouts/smallGrid.lay
@ -0,0 +1,7 @@
+%%%%%%%
+% P   %
+% %%% %
+% %.  %
+% %%% %
+%. G  %
+%%%%%%%
--- a/reinforcement/layouts/testClassic.lay
+++ b/reinforcement/layouts/testClassic.lay
@ -0,0 +1,10 @@
+%%%%%
+% . %
+%.G.%
+% . %
+%. .%
+%   %
+%  .%
+%   %
+%P .%
+%%%%%
--- a/reinforcement/layouts/trappedClassic.lay
+++ b/reinforcement/layouts/trappedClassic.lay
@ -0,0 +1,5 @@
+%%%%%%%%
+%   P G%
+%G%%%%%%
+%....  %
+%%%%%%%%
--- a/reinforcement/layouts/trickyClassic.lay
+++ b/reinforcement/layouts/trickyClassic.lay
@ -0,0 +1,13 @@
+%%%%%%%%%%%%%%%%%%%%
+%o...%........%...o%
+%.%%.%.%%..%%.%.%%.%
+%.%.....%..%.....%.%
+%.%.%%.%%  %%.%%.%.%
+%...... GGGG%.%....%
+%.%....%%%%%%.%..%.%
+%.%....%  oo%.%..%.%
+%.%....% %%%%.%..%.%
+%.%...........%..%.%
+%.%%.%.%%%%%%.%.%%.%
+%o...%...P....%...o%
+%%%%%%%%%%%%%%%%%%%%
--- a/reinforcement/learningAgents.py
+++ b/reinforcement/learningAgents.py
@ -0,0 +1,258 @@
+# learningAgents.py
+# -----------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from game import Directions, Agent, Actions
+
+import random,util,time
+
+class ValueEstimationAgent(Agent):
+    """
+      Abstract agent which assigns values to (state,action)
+      Q-Values for an environment. As well as a value to a
+      state and a policy given respectively by,
+
+      V(s) = max_{a in actions} Q(s,a)
+      policy(s) = arg_max_{a in actions} Q(s,a)
+
+      Both ValueIterationAgent and QLearningAgent inherit
+      from this agent. While a ValueIterationAgent has
+      a model of the environment via a MarkovDecisionProcess
+      (see mdp.py) that is used to estimate Q-Values before
+      ever actually acting, the QLearningAgent estimates
+      Q-Values while acting in the environment.
+    """
+
+    def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10):
+        """
+        Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,...
+        alpha    - learning rate
+        epsilon  - exploration rate
+        gamma    - discount factor
+        numTraining - number of training episodes, i.e. no learning after these many episodes
+        """
+        self.alpha = float(alpha)
+        self.epsilon = float(epsilon)
+        self.discount = float(gamma)
+        self.numTraining = int(numTraining)
+
+    ####################################
+    #    Override These Functions      #
+    ####################################
+    def getQValue(self, state, action):
+        """
+        Should return Q(state,action)
+        """
+        util.raiseNotDefined()
+
+    def getValue(self, state):
+        """
+        What is the value of this state under the best action?
+        Concretely, this is given by
+
+        V(s) = max_{a in actions} Q(s,a)
+        """
+        util.raiseNotDefined()
+
+    def getPolicy(self, state):
+        """
+        What is the best action to take in the state. Note that because
+        we might want to explore, this might not coincide with getAction
+        Concretely, this is given by
+
+        policy(s) = arg_max_{a in actions} Q(s,a)
+
+        If many actions achieve the maximal Q-value,
+        it doesn't matter which is selected.
+        """
+        util.raiseNotDefined()
+
+    def getAction(self, state):
+        """
+        state: can call state.getLegalActions()
+        Choose an action and return it.
+        """
+        util.raiseNotDefined()
+
+class ReinforcementAgent(ValueEstimationAgent):
+    """
+      Abstract Reinforcemnt Agent: A ValueEstimationAgent
+            which estimates Q-Values (as well as policies) from experience
+            rather than a model
+
+        What you need to know:
+                    - The environment will call
+                      observeTransition(state,action,nextState,deltaReward),
+                      which will call update(state, action, nextState, deltaReward)
+                      which you should override.
+        - Use self.getLegalActions(state) to know which actions
+                      are available in a state
+    """
+    ####################################
+    #    Override These Functions      #
+    ####################################
+
+    def update(self, state, action, nextState, reward):
+        """
+                This class will call this function, which you write, after
+                observing a transition and reward
+        """
+        util.raiseNotDefined()
+
+    ####################################
+    #    Read These Functions          #
+    ####################################
+
+    def getLegalActions(self,state):
+        """
+          Get the actions available for a given
+          state. This is what you should use to
+          obtain legal actions for a state
+        """
+        return self.actionFn(state)
+
+    def observeTransition(self, state,action,nextState,deltaReward):
+        """
+            Called by environment to inform agent that a transition has
+            been observed. This will result in a call to self.update
+            on the same arguments
+
+            NOTE: Do *not* override or call this function
+        """
+        self.episodeRewards += deltaReward
+        self.update(state,action,nextState,deltaReward)
+
+    def startEpisode(self):
+        """
+          Called by environment when new episode is starting
+        """
+        self.lastState = None
+        self.lastAction = None
+        self.episodeRewards = 0.0
+
+    def stopEpisode(self):
+        """
+          Called by environment when episode is done
+        """
+        if self.episodesSoFar < self.numTraining:
+            self.accumTrainRewards += self.episodeRewards
+        else:
+            self.accumTestRewards += self.episodeRewards
+        self.episodesSoFar += 1
+        if self.episodesSoFar >= self.numTraining:
+            # Take off the training wheels
+            self.epsilon = 0.0    # no exploration
+            self.alpha = 0.0      # no learning
+
+    def isInTraining(self):
+        return self.episodesSoFar < self.numTraining
+
+    def isInTesting(self):
+        return not self.isInTraining()
+
+    def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1):
+        """
+        actionFn: Function which takes a state and returns the list of legal actions
+
+        alpha    - learning rate
+        epsilon  - exploration rate
+        gamma    - discount factor
+        numTraining - number of training episodes, i.e. no learning after these many episodes
+        """
+        if actionFn == None:
+            actionFn = lambda state: state.getLegalActions()
+        self.actionFn = actionFn
+        self.episodesSoFar = 0
+        self.accumTrainRewards = 0.0
+        self.accumTestRewards = 0.0
+        self.numTraining = int(numTraining)
+        self.epsilon = float(epsilon)
+        self.alpha = float(alpha)
+        self.discount = float(gamma)
+
+    ################################
+    # Controls needed for Crawler  #
+    ################################
+    def setEpsilon(self, epsilon):
+        self.epsilon = epsilon
+
+    def setLearningRate(self, alpha):
+        self.alpha = alpha
+
+    def setDiscount(self, discount):
+        self.discount = discount
+
+    def doAction(self,state,action):
+        """
+            Called by inherited class when
+            an action is taken in a state
+        """
+        self.lastState = state
+        self.lastAction = action
+
+    ###################
+    # Pacman Specific #
+    ###################
+    def observationFunction(self, state):
+        """
+            This is where we ended up after our last action.
+            The simulation should somehow ensure this is called
+        """
+        if not self.lastState is None:
+            reward = state.getScore() - self.lastState.getScore()
+            self.observeTransition(self.lastState, self.lastAction, state, reward)
+        return state
+
+    def registerInitialState(self, state):
+        self.startEpisode()
+        if self.episodesSoFar == 0:
+            print 'Beginning %d episodes of Training' % (self.numTraining)
+
+    def final(self, state):
+        """
+          Called by Pacman game at the terminal state
+        """
+        deltaReward = state.getScore() - self.lastState.getScore()
+        self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
+        self.stopEpisode()
+
+        # Make sure we have this var
+        if not 'episodeStartTime' in self.__dict__:
+            self.episodeStartTime = time.time()
+        if not 'lastWindowAccumRewards' in self.__dict__:
+            self.lastWindowAccumRewards = 0.0
+        self.lastWindowAccumRewards += state.getScore()
+
+        NUM_EPS_UPDATE = 100
+        if self.episodesSoFar % NUM_EPS_UPDATE == 0:
+            print 'Reinforcement Learning Status:'
+            windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
+            if self.episodesSoFar <= self.numTraining:
+                trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
+                print '\tCompleted %d out of %d training episodes' % (
+                       self.episodesSoFar,self.numTraining)
+                print '\tAverage Rewards over all training: %.2f' % (
+                        trainAvg)
+            else:
+                testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
+                print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
+                print '\tAverage Rewards over testing: %.2f' % testAvg
+            print '\tAverage Rewards for last %d episodes: %.2f'  % (
+                    NUM_EPS_UPDATE,windowAvg)
+            print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
+            self.lastWindowAccumRewards = 0.0
+            self.episodeStartTime = time.time()
+
+        if self.episodesSoFar == self.numTraining:
+            msg = 'Training Done (turning off epsilon and alpha)'
+            print '%s\n%s' % (msg,'-' * len(msg))
--- a/reinforcement/mdp.py
+++ b/reinforcement/mdp.py
@ -0,0 +1,67 @@
+# mdp.py
+# ------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import random
+
+class MarkovDecisionProcess:
+
+    def getStates(self):
+        """
+        Return a list of all states in the MDP.
+        Not generally possible for large MDPs.
+        """
+        abstract
+
+    def getStartState(self):
+        """
+        Return the start state of the MDP.
+        """
+        abstract
+
+    def getPossibleActions(self, state):
+        """
+        Return list of possible actions from 'state'.
+        """
+        abstract
+
+    def getTransitionStatesAndProbs(self, state, action):
+        """
+        Returns list of (nextState, prob) pairs
+        representing the states reachable
+        from 'state' by taking 'action' along
+        with their transition probabilities.
+
+        Note that in Q-Learning and reinforcment
+        learning in general, we do not know these
+        probabilities nor do we directly model them.
+        """
+        abstract
+
+    def getReward(self, state, action, nextState):
+        """
+        Get the reward for the state, action, nextState transition.
+
+        Not available in reinforcement learning.
+        """
+        abstract
+
+    def isTerminal(self, state):
+        """
+        Returns true if the current state is a terminal state.  By convention,
+        a terminal state has zero future rewards.  Sometimes the terminal state(s)
+        may have no possible actions.  It is also common to think of the terminal
+        state as having a self-loop action 'pass' with zero reward; the formulations
+        are equivalent.
+        """
+        abstract
--- a/reinforcement/pacman.py
+++ b/reinforcement/pacman.py
@ -0,0 +1,684 @@
+# pacman.py
+# ---------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+"""
+Pacman.py holds the logic for the classic pacman game along with the main
+code to run a game.  This file is divided into three sections:
+
+  (i)  Your interface to the pacman world:
+          Pacman is a complex environment.  You probably don't want to
+          read through all of the code we wrote to make the game runs
+          correctly.  This section contains the parts of the code
+          that you will need to understand in order to complete the
+          project.  There is also some code in game.py that you should
+          understand.
+
+  (ii)  The hidden secrets of pacman:
+          This section contains all of the logic code that the pacman
+          environment uses to decide who can move where, who dies when
+          things collide, etc.  You shouldn't need to read this section
+          of code, but you can if you want.
+
+  (iii) Framework to start a game:
+          The final section contains the code for reading the command
+          you use to set up the game, then starting up a new game, along with
+          linking in all the external parts (agent functions, graphics).
+          Check this section out to see all the options available to you.
+
+To play your first game, type 'python pacman.py' from the command line.
+The keys are 'a', 's', 'd', and 'w' to move (or arrow keys).  Have fun!
+"""
+from game import GameStateData
+from game import Game
+from game import Directions
+from game import Actions
+from util import nearestPoint
+from util import manhattanDistance
+import util, layout
+import sys, types, time, random, os
+
+###################################################
+# YOUR INTERFACE TO THE PACMAN WORLD: A GameState #
+###################################################
+
+class GameState:
+    """
+    A GameState specifies the full game state, including the food, capsules,
+    agent configurations and score changes.
+
+    GameStates are used by the Game object to capture the actual state of the game and
+    can be used by agents to reason about the game.
+
+    Much of the information in a GameState is stored in a GameStateData object.  We
+    strongly suggest that you access that data via the accessor methods below rather
+    than referring to the GameStateData object directly.
+
+    Note that in classic Pacman, Pacman is always agent 0.
+    """
+
+    ####################################################
+    # Accessor methods: use these to access state data #
+    ####################################################
+
+    # static variable keeps track of which states have had getLegalActions called
+    explored = set()
+    def getAndResetExplored():
+        tmp = GameState.explored.copy()
+        GameState.explored = set()
+        return tmp
+    getAndResetExplored = staticmethod(getAndResetExplored)
+
+    def getLegalActions( self, agentIndex=0 ):
+        """
+        Returns the legal actions for the agent specified.
+        """
+#        GameState.explored.add(self)
+        if self.isWin() or self.isLose(): return []
+
+        if agentIndex == 0:  # Pacman is moving
+            return PacmanRules.getLegalActions( self )
+        else:
+            return GhostRules.getLegalActions( self, agentIndex )
+
+    def generateSuccessor( self, agentIndex, action):
+        """
+        Returns the successor state after the specified agent takes the action.
+        """
+        # Check that successors exist
+        if self.isWin() or self.isLose(): raise Exception('Can\'t generate a successor of a terminal state.')
+
+        # Copy current state
+        state = GameState(self)
+
+        # Let agent's logic deal with its action's effects on the board
+        if agentIndex == 0:  # Pacman is moving
+            state.data._eaten = [False for i in range(state.getNumAgents())]
+            PacmanRules.applyAction( state, action )
+        else:                # A ghost is moving
+            GhostRules.applyAction( state, action, agentIndex )
+
+        # Time passes
+        if agentIndex == 0:
+            state.data.scoreChange += -TIME_PENALTY # Penalty for waiting around
+        else:
+            GhostRules.decrementTimer( state.data.agentStates[agentIndex] )
+
+        # Resolve multi-agent effects
+        GhostRules.checkDeath( state, agentIndex )
+
+        # Book keeping
+        state.data._agentMoved = agentIndex
+        state.data.score += state.data.scoreChange
+        GameState.explored.add(self)
+        GameState.explored.add(state)
+        return state
+
+    def getLegalPacmanActions( self ):
+        return self.getLegalActions( 0 )
+
+    def generatePacmanSuccessor( self, action ):
+        """
+        Generates the successor state after the specified pacman move
+        """
+        return self.generateSuccessor( 0, action )
+
+    def getPacmanState( self ):
+        """
+        Returns an AgentState object for pacman (in game.py)
+
+        state.pos gives the current position
+        state.direction gives the travel vector
+        """
+        return self.data.agentStates[0].copy()
+
+    def getPacmanPosition( self ):
+        return self.data.agentStates[0].getPosition()
+
+    def getGhostStates( self ):
+        return self.data.agentStates[1:]
+
+    def getGhostState( self, agentIndex ):
+        if agentIndex == 0 or agentIndex >= self.getNumAgents():
+            raise Exception("Invalid index passed to getGhostState")
+        return self.data.agentStates[agentIndex]
+
+    def getGhostPosition( self, agentIndex ):
+        if agentIndex == 0:
+            raise Exception("Pacman's index passed to getGhostPosition")
+        return self.data.agentStates[agentIndex].getPosition()
+
+    def getGhostPositions(self):
+        return [s.getPosition() for s in self.getGhostStates()]
+
+    def getNumAgents( self ):
+        return len( self.data.agentStates )
+
+    def getScore( self ):
+        return float(self.data.score)
+
+    def getCapsules(self):
+        """
+        Returns a list of positions (x,y) of the remaining capsules.
+        """
+        return self.data.capsules
+
+    def getNumFood( self ):
+        return self.data.food.count()
+
+    def getFood(self):
+        """
+        Returns a Grid of boolean food indicator variables.
+
+        Grids can be accessed via list notation, so to check
+        if there is food at (x,y), just call
+
+        currentFood = state.getFood()
+        if currentFood[x][y] == True: ...
+        """
+        return self.data.food
+
+    def getWalls(self):
+        """
+        Returns a Grid of boolean wall indicator variables.
+
+        Grids can be accessed via list notation, so to check
+        if there is a wall at (x,y), just call
+
+        walls = state.getWalls()
+        if walls[x][y] == True: ...
+        """
+        return self.data.layout.walls
+
+    def hasFood(self, x, y):
+        return self.data.food[x][y]
+
+    def hasWall(self, x, y):
+        return self.data.layout.walls[x][y]
+
+    def isLose( self ):
+        return self.data._lose
+
+    def isWin( self ):
+        return self.data._win
+
+    #############################################
+    #             Helper methods:               #
+    # You shouldn't need to call these directly #
+    #############################################
+
+    def __init__( self, prevState = None ):
+        """
+        Generates a new state by copying information from its predecessor.
+        """
+        if prevState != None: # Initial state
+            self.data = GameStateData(prevState.data)
+        else:
+            self.data = GameStateData()
+
+    def deepCopy( self ):
+        state = GameState( self )
+        state.data = self.data.deepCopy()
+        return state
+
+    def __eq__( self, other ):
+        """
+        Allows two states to be compared.
+        """
+        return hasattr(other, 'data') and self.data == other.data
+
+    def __hash__( self ):
+        """
+        Allows states to be keys of dictionaries.
+        """
+        return hash( self.data )
+
+    def __str__( self ):
+
+        return str(self.data)
+
+    def initialize( self, layout, numGhostAgents=1000 ):
+        """
+        Creates an initial game state from a layout array (see layout.py).
+        """
+        self.data.initialize(layout, numGhostAgents)
+
+############################################################################
+#                     THE HIDDEN SECRETS OF PACMAN                         #
+#                                                                          #
+# You shouldn't need to look through the code in this section of the file. #
+############################################################################
+
+SCARED_TIME = 40    # Moves ghosts are scared
+COLLISION_TOLERANCE = 0.7 # How close ghosts must be to Pacman to kill
+TIME_PENALTY = 1 # Number of points lost each round
+
+class ClassicGameRules:
+    """
+    These game rules manage the control flow of a game, deciding when
+    and how the game starts and ends.
+    """
+    def __init__(self, timeout=30):
+        self.timeout = timeout
+
+    def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False):
+        agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()]
+        initState = GameState()
+        initState.initialize( layout, len(ghostAgents) )
+        game = Game(agents, display, self, catchExceptions=catchExceptions)
+        game.state = initState
+        self.initialState = initState.deepCopy()
+        self.quiet = quiet
+        return game
+
+    def process(self, state, game):
+        """
+        Checks to see whether it is time to end the game.
+        """
+        if state.isWin(): self.win(state, game)
+        if state.isLose(): self.lose(state, game)
+
+    def win( self, state, game ):
+        if not self.quiet: print "Pacman emerges victorious! Score: %d" % state.data.score
+        game.gameOver = True
+
+    def lose( self, state, game ):
+        if not self.quiet: print "Pacman died! Score: %d" % state.data.score
+        game.gameOver = True
+
+    def getProgress(self, game):
+        return float(game.state.getNumFood()) / self.initialState.getNumFood()
+
+    def agentCrash(self, game, agentIndex):
+        if agentIndex == 0:
+            print "Pacman crashed"
+        else:
+            print "A ghost crashed"
+
+    def getMaxTotalTime(self, agentIndex):
+        return self.timeout
+
+    def getMaxStartupTime(self, agentIndex):
+        return self.timeout
+
+    def getMoveWarningTime(self, agentIndex):
+        return self.timeout
+
+    def getMoveTimeout(self, agentIndex):
+        return self.timeout
+
+    def getMaxTimeWarnings(self, agentIndex):
+        return 0
+
+class PacmanRules:
+    """
+    These functions govern how pacman interacts with his environment under
+    the classic game rules.
+    """
+    PACMAN_SPEED=1
+
+    def getLegalActions( state ):
+        """
+        Returns a list of possible actions.
+        """
+        return Actions.getPossibleActions( state.getPacmanState().configuration, state.data.layout.walls )
+    getLegalActions = staticmethod( getLegalActions )
+
+    def applyAction( state, action ):
+        """
+        Edits the state to reflect the results of the action.
+        """
+        legal = PacmanRules.getLegalActions( state )
+        if action not in legal:
+            raise Exception("Illegal action " + str(action))
+
+        pacmanState = state.data.agentStates[0]
+
+        # Update Configuration
+        vector = Actions.directionToVector( action, PacmanRules.PACMAN_SPEED )
+        pacmanState.configuration = pacmanState.configuration.generateSuccessor( vector )
+
+        # Eat
+        next = pacmanState.configuration.getPosition()
+        nearest = nearestPoint( next )
+        if manhattanDistance( nearest, next ) <= 0.5 :
+            # Remove food
+            PacmanRules.consume( nearest, state )
+    applyAction = staticmethod( applyAction )
+
+    def consume( position, state ):
+        x,y = position
+        # Eat food
+        if state.data.food[x][y]:
+            state.data.scoreChange += 10
+            state.data.food = state.data.food.copy()
+            state.data.food[x][y] = False
+            state.data._foodEaten = position
+            # TODO: cache numFood?
+            numFood = state.getNumFood()
+            if numFood == 0 and not state.data._lose:
+                state.data.scoreChange += 500
+                state.data._win = True
+        # Eat capsule
+        if( position in state.getCapsules() ):
+            state.data.capsules.remove( position )
+            state.data._capsuleEaten = position
+            # Reset all ghosts' scared timers
+            for index in range( 1, len( state.data.agentStates ) ):
+                state.data.agentStates[index].scaredTimer = SCARED_TIME
+    consume = staticmethod( consume )
+
+class GhostRules:
+    """
+    These functions dictate how ghosts interact with their environment.
+    """
+    GHOST_SPEED=1.0
+    def getLegalActions( state, ghostIndex ):
+        """
+        Ghosts cannot stop, and cannot turn around unless they
+        reach a dead end, but can turn 90 degrees at intersections.
+        """
+        conf = state.getGhostState( ghostIndex ).configuration
+        possibleActions = Actions.getPossibleActions( conf, state.data.layout.walls )
+        reverse = Actions.reverseDirection( conf.direction )
+        if Directions.STOP in possibleActions:
+            possibleActions.remove( Directions.STOP )
+        if reverse in possibleActions and len( possibleActions ) > 1:
+            possibleActions.remove( reverse )
+        return possibleActions
+    getLegalActions = staticmethod( getLegalActions )
+
+    def applyAction( state, action, ghostIndex):
+
+        legal = GhostRules.getLegalActions( state, ghostIndex )
+        if action not in legal:
+            raise Exception("Illegal ghost action " + str(action))
+
+        ghostState = state.data.agentStates[ghostIndex]
+        speed = GhostRules.GHOST_SPEED
+        if ghostState.scaredTimer > 0: speed /= 2.0
+        vector = Actions.directionToVector( action, speed )
+        ghostState.configuration = ghostState.configuration.generateSuccessor( vector )
+    applyAction = staticmethod( applyAction )
+
+    def decrementTimer( ghostState):
+        timer = ghostState.scaredTimer
+        if timer == 1:
+            ghostState.configuration.pos = nearestPoint( ghostState.configuration.pos )
+        ghostState.scaredTimer = max( 0, timer - 1 )
+    decrementTimer = staticmethod( decrementTimer )
+
+    def checkDeath( state, agentIndex):
+        pacmanPosition = state.getPacmanPosition()
+        if agentIndex == 0: # Pacman just moved; Anyone can kill him
+            for index in range( 1, len( state.data.agentStates ) ):
+                ghostState = state.data.agentStates[index]
+                ghostPosition = ghostState.configuration.getPosition()
+                if GhostRules.canKill( pacmanPosition, ghostPosition ):
+                    GhostRules.collide( state, ghostState, index )
+        else:
+            ghostState = state.data.agentStates[agentIndex]
+            ghostPosition = ghostState.configuration.getPosition()
+            if GhostRules.canKill( pacmanPosition, ghostPosition ):
+                GhostRules.collide( state, ghostState, agentIndex )
+    checkDeath = staticmethod( checkDeath )
+
+    def collide( state, ghostState, agentIndex):
+        if ghostState.scaredTimer > 0:
+            state.data.scoreChange += 200
+            GhostRules.placeGhost(state, ghostState)
+            ghostState.scaredTimer = 0
+            # Added for first-person
+            state.data._eaten[agentIndex] = True
+        else:
+            if not state.data._win:
+                state.data.scoreChange -= 500
+                state.data._lose = True
+    collide = staticmethod( collide )
+
+    def canKill( pacmanPosition, ghostPosition ):
+        return manhattanDistance( ghostPosition, pacmanPosition ) <= COLLISION_TOLERANCE
+    canKill = staticmethod( canKill )
+
+    def placeGhost(state, ghostState):
+        ghostState.configuration = ghostState.start
+    placeGhost = staticmethod( placeGhost )
+
+#############################
+# FRAMEWORK TO START A GAME #
+#############################
+
+def default(str):
+    return str + ' [Default: %default]'
+
+def parseAgentArgs(str):
+    if str == None: return {}
+    pieces = str.split(',')
+    opts = {}
+    for p in pieces:
+        if '=' in p:
+            key, val = p.split('=')
+        else:
+            key,val = p, 1
+        opts[key] = val
+    return opts
+
+def readCommand( argv ):
+    """
+    Processes the command used to run pacman from the command line.
+    """
+    from optparse import OptionParser
+    usageStr = """
+    USAGE:      python pacman.py <options>
+    EXAMPLES:   (1) python pacman.py
+                    - starts an interactive game
+                (2) python pacman.py --layout smallClassic --zoom 2
+                OR  python pacman.py -l smallClassic -z 2
+                    - starts an interactive game on a smaller board, zoomed in
+    """
+    parser = OptionParser(usageStr)
+
+    parser.add_option('-n', '--numGames', dest='numGames', type='int',
+                      help=default('the number of GAMES to play'), metavar='GAMES', default=1)
+    parser.add_option('-l', '--layout', dest='layout',
+                      help=default('the LAYOUT_FILE from which to load the map layout'),
+                      metavar='LAYOUT_FILE', default='mediumClassic')
+    parser.add_option('-p', '--pacman', dest='pacman',
+                      help=default('the agent TYPE in the pacmanAgents module to use'),
+                      metavar='TYPE', default='KeyboardAgent')
+    parser.add_option('-t', '--textGraphics', action='store_true', dest='textGraphics',
+                      help='Display output as text only', default=False)
+    parser.add_option('-q', '--quietTextGraphics', action='store_true', dest='quietGraphics',
+                      help='Generate minimal output and no graphics', default=False)
+    parser.add_option('-g', '--ghosts', dest='ghost',
+                      help=default('the ghost agent TYPE in the ghostAgents module to use'),
+                      metavar = 'TYPE', default='RandomGhost')
+    parser.add_option('-k', '--numghosts', type='int', dest='numGhosts',
+                      help=default('The maximum number of ghosts to use'), default=4)
+    parser.add_option('-z', '--zoom', type='float', dest='zoom',
+                      help=default('Zoom the size of the graphics window'), default=1.0)
+    parser.add_option('-f', '--fixRandomSeed', action='store_true', dest='fixRandomSeed',
+                      help='Fixes the random seed to always play the same game', default=False)
+    parser.add_option('-r', '--recordActions', action='store_true', dest='record',
+                      help='Writes game histories to a file (named by the time they were played)', default=False)
+    parser.add_option('--replay', dest='gameToReplay',
+                      help='A recorded game file (pickle) to replay', default=None)
+    parser.add_option('-a','--agentArgs',dest='agentArgs',
+                      help='Comma separated values sent to agent. e.g. "opt1=val1,opt2,opt3=val3"')
+    parser.add_option('-x', '--numTraining', dest='numTraining', type='int',
+                      help=default('How many episodes are training (suppresses output)'), default=0)
+    parser.add_option('--frameTime', dest='frameTime', type='float',
+                      help=default('Time to delay between frames; <0 means keyboard'), default=0.1)
+    parser.add_option('-c', '--catchExceptions', action='store_true', dest='catchExceptions',
+                      help='Turns on exception handling and timeouts during games', default=False)
+    parser.add_option('--timeout', dest='timeout', type='int',
+                      help=default('Maximum length of time an agent can spend computing in a single game'), default=30)
+
+    options, otherjunk = parser.parse_args(argv)
+    if len(otherjunk) != 0:
+        raise Exception('Command line input not understood: ' + str(otherjunk))
+    args = dict()
+
+    # Fix the random seed
+    if options.fixRandomSeed: random.seed('cs188')
+
+    # Choose a layout
+    args['layout'] = layout.getLayout( options.layout )
+    if args['layout'] == None: raise Exception("The layout " + options.layout + " cannot be found")
+
+    # Choose a Pacman agent
+    noKeyboard = options.gameToReplay == None and (options.textGraphics or options.quietGraphics)
+    pacmanType = loadAgent(options.pacman, noKeyboard)
+    agentOpts = parseAgentArgs(options.agentArgs)
+    if options.numTraining > 0:
+        args['numTraining'] = options.numTraining
+        if 'numTraining' not in agentOpts: agentOpts['numTraining'] = options.numTraining
+    pacman = pacmanType(**agentOpts) # Instantiate Pacman with agentArgs
+    args['pacman'] = pacman
+
+    # Don't display training games
+    if 'numTrain' in agentOpts:
+        options.numQuiet = int(agentOpts['numTrain'])
+        options.numIgnore = int(agentOpts['numTrain'])
+
+    # Choose a ghost agent
+    ghostType = loadAgent(options.ghost, noKeyboard)
+    args['ghosts'] = [ghostType( i+1 ) for i in range( options.numGhosts )]
+
+    # Choose a display format
+    if options.quietGraphics:
+        import textDisplay
+        args['display'] = textDisplay.NullGraphics()
+    elif options.textGraphics:
+        import textDisplay
+        textDisplay.SLEEP_TIME = options.frameTime
+        args['display'] = textDisplay.PacmanGraphics()
+    else:
+        import graphicsDisplay
+        args['display'] = graphicsDisplay.PacmanGraphics(options.zoom, frameTime = options.frameTime)
+    args['numGames'] = options.numGames
+    args['record'] = options.record
+    args['catchExceptions'] = options.catchExceptions
+    args['timeout'] = options.timeout
+
+    # Special case: recorded games don't use the runGames method or args structure
+    if options.gameToReplay != None:
+        print 'Replaying recorded game %s.' % options.gameToReplay
+        import cPickle
+        f = open(options.gameToReplay)
+        try: recorded = cPickle.load(f)
+        finally: f.close()
+        recorded['display'] = args['display']
+        replayGame(**recorded)
+        sys.exit(0)
+
+    return args
+
+def loadAgent(pacman, nographics):
+    # Looks through all pythonPath Directories for the right module,
+    pythonPathStr = os.path.expandvars("$PYTHONPATH")
+    if pythonPathStr.find(';') == -1:
+        pythonPathDirs = pythonPathStr.split(':')
+    else:
+        pythonPathDirs = pythonPathStr.split(';')
+    pythonPathDirs.append('.')
+
+    for moduleDir in pythonPathDirs:
+        if not os.path.isdir(moduleDir): continue
+        moduleNames = [f for f in os.listdir(moduleDir) if f.endswith('gents.py')]
+        for modulename in moduleNames:
+            try:
+                module = __import__(modulename[:-3])
+            except ImportError:
+                continue
+            if pacman in dir(module):
+                if nographics and modulename == 'keyboardAgents.py':
+                    raise Exception('Using the keyboard requires graphics (not text display)')
+                return getattr(module, pacman)
+    raise Exception('The agent ' + pacman + ' is not specified in any *Agents.py.')
+
+def replayGame( layout, actions, display ):
+    import pacmanAgents, ghostAgents
+    rules = ClassicGameRules()
+    agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(layout.getNumGhosts())]
+    game = rules.newGame( layout, agents[0], agents[1:], display )
+    state = game.state
+    display.initialize(state.data)
+
+    for action in actions:
+            # Execute the action
+        state = state.generateSuccessor( *action )
+        # Change the display
+        display.update( state.data )
+        # Allow for game specific conditions (winning, losing, etc.)
+        rules.process(state, game)
+
+    display.finish()
+
+def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30 ):
+    import __main__
+    __main__.__dict__['_display'] = display
+
+    rules = ClassicGameRules(timeout)
+    games = []
+
+    for i in range( numGames ):
+        beQuiet = i < numTraining
+        if beQuiet:
+                # Suppress output and graphics
+            import textDisplay
+            gameDisplay = textDisplay.NullGraphics()
+            rules.quiet = True
+        else:
+            gameDisplay = display
+            rules.quiet = False
+        game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions)
+        game.run()
+        if not beQuiet: games.append(game)
+
+        if record:
+            import time, cPickle
+            fname = ('recorded-game-%d' % (i + 1)) +  '-'.join([str(t) for t in time.localtime()[1:6]])
+            f = file(fname, 'w')
+            components = {'layout': layout, 'actions': game.moveHistory}
+            cPickle.dump(components, f)
+            f.close()
+
+    if (numGames-numTraining) > 0:
+        scores = [game.state.getScore() for game in games]
+        wins = [game.state.isWin() for game in games]
+        winRate = wins.count(True)/ float(len(wins))
+        print 'Average Score:', sum(scores) / float(len(scores))
+        print 'Scores:       ', ', '.join([str(score) for score in scores])
+        print 'Win Rate:      %d/%d (%.2f)' % (wins.count(True), len(wins), winRate)
+        print 'Record:       ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins])
+
+    return games
+
+if __name__ == '__main__':
+    """
+    The main function called when pacman.py is run
+    from the command line:
+
+    > python pacman.py
+
+    See the usage string for more details.
+
+    > python pacman.py --help
+    """
+    args = readCommand( sys.argv[1:] ) # Get game components based on input
+    runGames( **args )
+
+    # import cProfile
+    # cProfile.run("runGames( **args )")
+    pass
--- a/reinforcement/pacmanAgents.py
+++ b/reinforcement/pacmanAgents.py
@ -0,0 +1,52 @@
+# pacmanAgents.py
+# ---------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from pacman import Directions
+from game import Agent
+import random
+import game
+import util
+
+class LeftTurnAgent(game.Agent):
+    "An agent that turns left at every opportunity"
+
+    def getAction(self, state):
+        legal = state.getLegalPacmanActions()
+        current = state.getPacmanState().configuration.direction
+        if current == Directions.STOP: current = Directions.NORTH
+        left = Directions.LEFT[current]
+        if left in legal: return left
+        if current in legal: return current
+        if Directions.RIGHT[current] in legal: return Directions.RIGHT[current]
+        if Directions.LEFT[left] in legal: return Directions.LEFT[left]
+        return Directions.STOP
+
+class GreedyAgent(Agent):
+    def __init__(self, evalFn="scoreEvaluation"):
+        self.evaluationFunction = util.lookup(evalFn, globals())
+        assert self.evaluationFunction != None
+
+    def getAction(self, state):
+        # Generate candidate actions
+        legal = state.getLegalPacmanActions()
+        if Directions.STOP in legal: legal.remove(Directions.STOP)
+
+        successors = [(state.generateSuccessor(0, action), action) for action in legal]
+        scored = [(self.evaluationFunction(state), action) for state, action in successors]
+        bestScore = max(scored)[0]
+        bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
+        return random.choice(bestActions)
+
+def scoreEvaluation(state):
+    return state.getScore()
--- a/reinforcement/projectParams.py
+++ b/reinforcement/projectParams.py
@ -0,0 +1,18 @@
+# projectParams.py
+# ----------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py'
+PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py'
+PROJECT_NAME = 'Project 3: Reinforcement learning'
+BONUS_PIC = False
--- a/reinforcement/qlearningAgents.py
+++ b/reinforcement/qlearningAgents.py
@ -0,0 +1,186 @@
+# qlearningAgents.py
+# ------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+from game import *
+from learningAgents import ReinforcementAgent
+from featureExtractors import *
+
+import random,util,math
+
+class QLearningAgent(ReinforcementAgent):
+    """
+      Q-Learning Agent
+
+      Functions you should fill in:
+        - computeValueFromQValues
+        - computeActionFromQValues
+        - getQValue
+        - getAction
+        - update
+
+      Instance variables you have access to
+        - self.epsilon (exploration prob)
+        - self.alpha (learning rate)
+        - self.discount (discount rate)
+
+      Functions you should use
+        - self.getLegalActions(state)
+          which returns legal actions for a state
+    """
+    def __init__(self, **args):
+        "You can initialize Q-values here..."
+        ReinforcementAgent.__init__(self, **args)
+
+        "*** YOUR CODE HERE ***"
+
+    def getQValue(self, state, action):
+        """
+          Returns Q(state,action)
+          Should return 0.0 if we have never seen a state
+          or the Q node value otherwise
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+
+    def computeValueFromQValues(self, state):
+        """
+          Returns max_action Q(state,action)
+          where the max is over legal actions.  Note that if
+          there are no legal actions, which is the case at the
+          terminal state, you should return a value of 0.0.
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def computeActionFromQValues(self, state):
+        """
+          Compute the best action to take in a state.  Note that if there
+          are no legal actions, which is the case at the terminal state,
+          you should return None.
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def getAction(self, state):
+        """
+          Compute the action to take in the current state.  With
+          probability self.epsilon, we should take a random action and
+          take the best policy action otherwise.  Note that if there are
+          no legal actions, which is the case at the terminal state, you
+          should choose None as the action.
+
+          HINT: You might want to use util.flipCoin(prob)
+          HINT: To pick randomly from a list, use random.choice(list)
+        """
+        # Pick Action
+        legalActions = self.getLegalActions(state)
+        action = None
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+        return action
+
+    def update(self, state, action, nextState, reward):
+        """
+          The parent class calls this to observe a
+          state = action => nextState and reward transition.
+          You should do your Q-Value update here
+
+          NOTE: You should never call this function,
+          it will be called on your behalf
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def getPolicy(self, state):
+        return self.computeActionFromQValues(state)
+
+    def getValue(self, state):
+        return self.computeValueFromQValues(state)
+
+
+class PacmanQAgent(QLearningAgent):
+    "Exactly the same as QLearningAgent, but with different default parameters"
+
+    def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
+        """
+        These default parameters can be changed from the pacman.py command line.
+        For example, to change the exploration rate, try:
+            python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
+
+        alpha    - learning rate
+        epsilon  - exploration rate
+        gamma    - discount factor
+        numTraining - number of training episodes, i.e. no learning after these many episodes
+        """
+        args['epsilon'] = epsilon
+        args['gamma'] = gamma
+        args['alpha'] = alpha
+        args['numTraining'] = numTraining
+        self.index = 0  # This is always Pacman
+        QLearningAgent.__init__(self, **args)
+
+    def getAction(self, state):
+        """
+        Simply calls the getAction method of QLearningAgent and then
+        informs parent of action for Pacman.  Do not change or remove this
+        method.
+        """
+        action = QLearningAgent.getAction(self,state)
+        self.doAction(state,action)
+        return action
+
+
+class ApproximateQAgent(PacmanQAgent):
+    """
+       ApproximateQLearningAgent
+
+       You should only have to overwrite getQValue
+       and update.  All other QLearningAgent functions
+       should work as is.
+    """
+    def __init__(self, extractor='IdentityExtractor', **args):
+        self.featExtractor = util.lookup(extractor, globals())()
+        PacmanQAgent.__init__(self, **args)
+        self.weights = util.Counter()
+
+    def getWeights(self):
+        return self.weights
+
+    def getQValue(self, state, action):
+        """
+          Should return Q(state,action) = w * featureVector
+          where * is the dotProduct operator
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def update(self, state, action, nextState, reward):
+        """
+           Should update your weights based on transition
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def final(self, state):
+        "Called at the end of each game."
+        # call the super-class final method
+        PacmanQAgent.final(self, state)
+
+        # did we finish training?
+        if self.episodesSoFar == self.numTraining:
+            # you might want to print your weights here for debugging
+            "*** YOUR CODE HERE ***"
+            pass
--- a/reinforcement/reinforcementTestClasses.py
+++ b/reinforcement/reinforcementTestClasses.py
@ -0,0 +1,924 @@
+# reinforcementTestClasses.py
+# ---------------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import testClasses
+import random, math, traceback, sys, os
+import layout, textDisplay, pacman, gridworld
+import time
+from util import Counter, TimeoutFunction, FixedRandom
+from collections import defaultdict
+from pprint import PrettyPrinter
+from hashlib import sha1
+pp = PrettyPrinter()
+VERBOSE = False
+
+import gridworld
+
+LIVINGREWARD = -0.1
+NOISE = 0.2
+
+class ValueIterationTest(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(ValueIterationTest, self).__init__(question, testDict)
+        self.discount = float(testDict['discount'])
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        iterations = int(testDict['valueIterations'])
+        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
+        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
+        maxPreIterations = 10
+        self.numsIterationsForDisplay = range(min(iterations, maxPreIterations))
+        self.testOutFile = testDict['test_out_file']
+        if maxPreIterations < iterations:
+            self.numsIterationsForDisplay.append(iterations)
+
+    def writeFailureFile(self, string):
+        with open(self.testOutFile, 'w') as handle:
+            handle.write(string)
+
+    def removeFailureFileIfExists(self):
+        if os.path.exists(self.testOutFile):
+            os.remove(self.testOutFile)
+
+    def execute(self, grades, moduleDict, solutionDict):
+        failureOutputFileString = ''
+        failureOutputStdString = ''
+        for n in self.numsIterationsForDisplay:
+            checkPolicy = (n == self.numsIterationsForDisplay[-1])
+            testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
+            failureOutputStdString += stdOutString
+            failureOutputFileString += fileOutString
+            if not testPass:
+                self.addMessage(failureOutputStdString)
+                self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
+                self.writeFailureFile(failureOutputFileString)
+                return self.testFail(grades)
+        self.removeFailureFileIfExists()
+        return self.testPass(grades)
+
+    def executeNIterations(self, grades, moduleDict, solutionDict, n, checkPolicy):
+        testPass = True
+        valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
+        stdOutString = ''
+        fileOutString = ''
+        valuesKey = "values_k_%d" % n
+        if self.comparePrettyValues(valuesPretty, solutionDict[valuesKey]):
+            fileOutString += "Values at iteration %d are correct.\n" % n
+            fileOutString += "   Student/correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
+        else:
+            testPass = False
+            outString = "Values at iteration %d are NOT correct.\n" % n
+            outString += "   Student solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
+            outString += "   Correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, solutionDict[valuesKey])
+            stdOutString += outString
+            fileOutString += outString
+        for action in actions:
+            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
+            qValues = qValuesPretty[action]
+            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
+                fileOutString += "Q-Values at iteration %d for action %s are correct.\n" % (n, action)
+                fileOutString += "   Student/correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
+            else:
+                testPass = False
+                outString = "Q-Values at iteration %d for action %s are NOT correct.\n" % (n, action)
+                outString += "   Student solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
+                outString += "   Correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
+                stdOutString += outString
+                fileOutString += outString
+        if checkPolicy:
+            if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
+                testPass = False
+                outString = "Policy is NOT correct.\n"
+                outString += "   Student solution:\n %s\n" % self.prettyValueSolutionString('policy', policyPretty)
+                outString += "   Correct solution:\n %s\n" % self.prettyValueSolutionString('policy', solutionDict['policy'])
+                stdOutString += outString
+                fileOutString += outString
+        return testPass, stdOutString, fileOutString
+
+    def writeSolution(self, moduleDict, filePath):
+        with open(filePath, 'w') as handle:
+            policyPretty = ''
+            actions = []
+            for n in self.numsIterationsForDisplay:
+                valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
+                handle.write(self.prettyValueSolutionString('values_k_%d' % n, valuesPretty))
+                for action in actions:
+                    handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
+            handle.write(self.prettyValueSolutionString('policy', policyPretty))
+            handle.write(self.prettyValueSolutionString('actions', '\n'.join(actions) + '\n'))
+        return True
+
+    def runAgent(self, moduleDict, numIterations):
+        agent = moduleDict['valueIterationAgents'].ValueIterationAgent(self.grid, discount=self.discount, iterations=numIterations)
+        states = self.grid.getStates()
+        actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
+        values = {}
+        qValues = {}
+        policy = {}
+        for state in states:
+            values[state] = agent.getValue(state)
+            policy[state] = agent.computeActionFromValues(state)
+            possibleActions = self.grid.getPossibleActions(state)
+            for action in actions:
+                if not qValues.has_key(action):
+                    qValues[action] = {}
+                if action in possibleActions:
+                    qValues[action][state] = agent.computeQValueFromValues(state, action)
+                else:
+                    qValues[action][state] = None
+        valuesPretty = self.prettyValues(values)
+        policyPretty = self.prettyPolicy(policy)
+        qValuesPretty = {}
+        for action in actions:
+            qValuesPretty[action] = self.prettyValues(qValues[action])
+        return (valuesPretty, qValuesPretty, actions, policyPretty)
+
+    def prettyPrint(self, elements, formatString):
+        pretty = ''
+        states = self.grid.getStates()
+        for ybar in range(self.grid.grid.height):
+            y = self.grid.grid.height-1-ybar
+            row = []
+            for x in range(self.grid.grid.width):
+                if (x, y) in states:
+                    value = elements[(x, y)]
+                    if value is None:
+                        row.append('   illegal')
+                    else:
+                        row.append(formatString.format(elements[(x,y)]))
+                else:
+                    row.append('_' * 10)
+            pretty += '        %s\n' % ("   ".join(row), )
+        pretty += '\n'
+        return pretty
+
+    def prettyValues(self, values):
+        return self.prettyPrint(values, '{0:10.4f}')
+
+    def prettyPolicy(self, policy):
+        return self.prettyPrint(policy, '{0:10s}')
+
+    def prettyValueSolutionString(self, name, pretty):
+        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
+
+    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
+        aList = self.parsePrettyValues(aPretty)
+        bList = self.parsePrettyValues(bPretty)
+        if len(aList) != len(bList):
+            return False
+        for a, b in zip(aList, bList):
+            try:
+                aNum = float(a)
+                bNum = float(b)
+                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
+                error = abs(aNum - bNum)
+                if error > tolerance:
+                    return False
+            except ValueError:
+                if a.strip() != b.strip():
+                    return False
+        return True
+
+    def parsePrettyValues(self, pretty):
+        values = pretty.split()
+        return values
+
+
+class ApproximateQLearningTest(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(ApproximateQLearningTest, self).__init__(question, testDict)
+        self.discount = float(testDict['discount'])
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
+        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        self.env = gridworld.GridworldEnvironment(self.grid)
+        self.epsilon = float(testDict['epsilon'])
+        self.learningRate = float(testDict['learningRate'])
+        self.extractor = 'IdentityExtractor'
+        if 'extractor' in testDict:
+            self.extractor = testDict['extractor']
+        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
+        numExperiences = int(testDict['numExperiences'])
+        maxPreExperiences = 10
+        self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
+        self.testOutFile = testDict['test_out_file']
+        if maxPreExperiences < numExperiences:
+            self.numsExperiencesForDisplay.append(numExperiences)
+
+    def writeFailureFile(self, string):
+        with open(self.testOutFile, 'w') as handle:
+            handle.write(string)
+
+    def removeFailureFileIfExists(self):
+        if os.path.exists(self.testOutFile):
+            os.remove(self.testOutFile)
+
+    def execute(self, grades, moduleDict, solutionDict):
+        failureOutputFileString = ''
+        failureOutputStdString = ''
+        for n in self.numsExperiencesForDisplay:
+            testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n)
+            failureOutputStdString += stdOutString
+            failureOutputFileString += fileOutString
+            if not testPass:
+                self.addMessage(failureOutputStdString)
+                self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
+                self.writeFailureFile(failureOutputFileString)
+                return self.testFail(grades)
+        self.removeFailureFileIfExists()
+        return self.testPass(grades)
+
+    def executeNExperiences(self, grades, moduleDict, solutionDict, n):
+        testPass = True
+        qValuesPretty, weights, actions, lastExperience = self.runAgent(moduleDict, n)
+        stdOutString = ''
+        fileOutString = "==================== Iteration %d ====================\n" % n
+        if lastExperience is not None:
+            fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience
+        weightsKey = 'weights_k_%d' % n
+        if weights == eval(solutionDict[weightsKey]):
+            fileOutString += "Weights at iteration %d are correct." % n
+            fileOutString += "   Student/correct solution:\n\n%s\n\n" % pp.pformat(weights)
+        for action in actions:
+            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
+            qValues = qValuesPretty[action]
+            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
+                fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
+                fileOutString += "   Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
+            else:
+                testPass = False
+                outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
+                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
+                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
+                stdOutString += outString
+                fileOutString += outString
+        return testPass, stdOutString, fileOutString
+
+    def writeSolution(self, moduleDict, filePath):
+        with open(filePath, 'w') as handle:
+            for n in self.numsExperiencesForDisplay:
+                qValuesPretty, weights, actions, _ = self.runAgent(moduleDict, n)
+                handle.write(self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights)))
+                for action in actions:
+                    handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
+        return True
+
+    def runAgent(self, moduleDict, numExperiences):
+        agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts)
+        states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
+        states.sort()
+        randObj = FixedRandom().random
+        # choose a random start state and a random possible action from that state
+        # get the next state and reward from the transition function
+        lastExperience = None
+        for i in range(numExperiences):
+            startState = randObj.choice(states)
+            action = randObj.choice(self.grid.getPossibleActions(startState))
+            (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
+            lastExperience = (startState, action, endState, reward)
+            agent.update(*lastExperience)
+        actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
+        qValues = {}
+        weights = agent.getWeights()
+        for state in states:
+            possibleActions = self.grid.getPossibleActions(state)
+            for action in actions:
+                if not qValues.has_key(action):
+                    qValues[action] = {}
+                if action in possibleActions:
+                    qValues[action][state] = agent.getQValue(state, action)
+                else:
+                    qValues[action][state] = None
+        qValuesPretty = {}
+        for action in actions:
+            qValuesPretty[action] = self.prettyValues(qValues[action])
+        return (qValuesPretty, weights, actions, lastExperience)
+
+    def prettyPrint(self, elements, formatString):
+        pretty = ''
+        states = self.grid.getStates()
+        for ybar in range(self.grid.grid.height):
+            y = self.grid.grid.height-1-ybar
+            row = []
+            for x in range(self.grid.grid.width):
+                if (x, y) in states:
+                    value = elements[(x, y)]
+                    if value is None:
+                        row.append('   illegal')
+                    else:
+                        row.append(formatString.format(elements[(x,y)]))
+                else:
+                    row.append('_' * 10)
+            pretty += '        %s\n' % ("   ".join(row), )
+        pretty += '\n'
+        return pretty
+
+    def prettyValues(self, values):
+        return self.prettyPrint(values, '{0:10.4f}')
+
+    def prettyPolicy(self, policy):
+        return self.prettyPrint(policy, '{0:10s}')
+
+    def prettyValueSolutionString(self, name, pretty):
+        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
+
+    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
+        aList = self.parsePrettyValues(aPretty)
+        bList = self.parsePrettyValues(bPretty)
+        if len(aList) != len(bList):
+            return False
+        for a, b in zip(aList, bList):
+            try:
+                aNum = float(a)
+                bNum = float(b)
+                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
+                error = abs(aNum - bNum)
+                if error > tolerance:
+                    return False
+            except ValueError:
+                if a.strip() != b.strip():
+                    return False
+        return True
+
+    def parsePrettyValues(self, pretty):
+        values = pretty.split()
+        return values
+
+
+class QLearningTest(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(QLearningTest, self).__init__(question, testDict)
+        self.discount = float(testDict['discount'])
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
+        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        self.env = gridworld.GridworldEnvironment(self.grid)
+        self.epsilon = float(testDict['epsilon'])
+        self.learningRate = float(testDict['learningRate'])
+        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
+        numExperiences = int(testDict['numExperiences'])
+        maxPreExperiences = 10
+        self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
+        self.testOutFile = testDict['test_out_file']
+        if maxPreExperiences < numExperiences:
+            self.numsExperiencesForDisplay.append(numExperiences)
+
+    def writeFailureFile(self, string):
+        with open(self.testOutFile, 'w') as handle:
+            handle.write(string)
+
+    def removeFailureFileIfExists(self):
+        if os.path.exists(self.testOutFile):
+            os.remove(self.testOutFile)
+
+    def execute(self, grades, moduleDict, solutionDict):
+        failureOutputFileString = ''
+        failureOutputStdString = ''
+        for n in self.numsExperiencesForDisplay:
+            checkValuesAndPolicy = (n == self.numsExperiencesForDisplay[-1])
+            testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n, checkValuesAndPolicy)
+            failureOutputStdString += stdOutString
+            failureOutputFileString += fileOutString
+            if not testPass:
+                self.addMessage(failureOutputStdString)
+                self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
+                self.writeFailureFile(failureOutputFileString)
+                return self.testFail(grades)
+        self.removeFailureFileIfExists()
+        return self.testPass(grades)
+
+    def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy):
+        testPass = True
+        valuesPretty, qValuesPretty, actions, policyPretty, lastExperience = self.runAgent(moduleDict, n)
+        stdOutString = ''
+        fileOutString = "==================== Iteration %d ====================\n" % n
+        if lastExperience is not None:
+            fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience
+        for action in actions:
+            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
+            qValues = qValuesPretty[action]
+            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
+                fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
+                fileOutString += "   Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
+            else:
+                testPass = False
+                outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
+                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
+                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
+                stdOutString += outString
+                fileOutString += outString
+        if checkValuesAndPolicy:
+            if not self.comparePrettyValues(valuesPretty, solutionDict['values']):
+                testPass = False
+                outString = "Values are NOT correct."
+                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString('values', valuesPretty)
+                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString('values', solutionDict['values'])
+                stdOutString += outString
+                fileOutString += outString
+            if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
+                testPass = False
+                outString = "Policy is NOT correct."
+                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString('policy', policyPretty)
+                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString('policy', solutionDict['policy'])
+                stdOutString += outString
+                fileOutString += outString
+        return testPass, stdOutString, fileOutString
+
+    def writeSolution(self, moduleDict, filePath):
+        with open(filePath, 'w') as handle:
+            valuesPretty = ''
+            policyPretty = ''
+            for n in self.numsExperiencesForDisplay:
+                valuesPretty, qValuesPretty, actions, policyPretty, _ = self.runAgent(moduleDict, n)
+                for action in actions:
+                    handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
+            handle.write(self.prettyValueSolutionString('values', valuesPretty))
+            handle.write(self.prettyValueSolutionString('policy', policyPretty))
+        return True
+
+    def runAgent(self, moduleDict, numExperiences):
+        agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
+        states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
+        states.sort()
+        randObj = FixedRandom().random
+        # choose a random start state and a random possible action from that state
+        # get the next state and reward from the transition function
+        lastExperience = None
+        for i in range(numExperiences):
+            startState = randObj.choice(states)
+            action = randObj.choice(self.grid.getPossibleActions(startState))
+            (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
+            lastExperience = (startState, action, endState, reward)
+            agent.update(*lastExperience)
+        actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
+        values = {}
+        qValues = {}
+        policy = {}
+        for state in states:
+            values[state] = agent.computeValueFromQValues(state)
+            policy[state] = agent.computeActionFromQValues(state)
+            possibleActions = self.grid.getPossibleActions(state)
+            for action in actions:
+                if not qValues.has_key(action):
+                    qValues[action] = {}
+                if action in possibleActions:
+                    qValues[action][state] = agent.getQValue(state, action)
+                else:
+                    qValues[action][state] = None
+        valuesPretty = self.prettyValues(values)
+        policyPretty = self.prettyPolicy(policy)
+        qValuesPretty = {}
+        for action in actions:
+            qValuesPretty[action] = self.prettyValues(qValues[action])
+        return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
+
+    def prettyPrint(self, elements, formatString):
+        pretty = ''
+        states = self.grid.getStates()
+        for ybar in range(self.grid.grid.height):
+            y = self.grid.grid.height-1-ybar
+            row = []
+            for x in range(self.grid.grid.width):
+                if (x, y) in states:
+                    value = elements[(x, y)]
+                    if value is None:
+                        row.append('   illegal')
+                    else:
+                        row.append(formatString.format(elements[(x,y)]))
+                else:
+                    row.append('_' * 10)
+            pretty += '        %s\n' % ("   ".join(row), )
+        pretty += '\n'
+        return pretty
+
+    def prettyValues(self, values):
+        return self.prettyPrint(values, '{0:10.4f}')
+
+    def prettyPolicy(self, policy):
+        return self.prettyPrint(policy, '{0:10s}')
+
+    def prettyValueSolutionString(self, name, pretty):
+        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
+
+    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
+        aList = self.parsePrettyValues(aPretty)
+        bList = self.parsePrettyValues(bPretty)
+        if len(aList) != len(bList):
+            return False
+        for a, b in zip(aList, bList):
+            try:
+                aNum = float(a)
+                bNum = float(b)
+                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
+                error = abs(aNum - bNum)
+                if error > tolerance:
+                    return False
+            except ValueError:
+                if a.strip() != b.strip():
+                    return False
+        return True
+
+    def parsePrettyValues(self, pretty):
+        values = pretty.split()
+        return values
+
+
+class EpsilonGreedyTest(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(EpsilonGreedyTest, self).__init__(question, testDict)
+        self.discount = float(testDict['discount'])
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
+        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
+
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        self.env = gridworld.GridworldEnvironment(self.grid)
+        self.epsilon = float(testDict['epsilon'])
+        self.learningRate = float(testDict['learningRate'])
+        self.numExperiences = int(testDict['numExperiences'])
+        self.numIterations = int(testDict['iterations'])
+        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
+
+    def execute(self, grades, moduleDict, solutionDict):
+        if self.testEpsilonGreedy(moduleDict):
+            return self.testPass(grades)
+        else:
+            return self.testFail(grades)
+
+    def writeSolution(self, moduleDict, filePath):
+        with open(filePath, 'w') as handle:
+            handle.write('# This is the solution file for %s.\n' % self.path)
+            handle.write('# File intentionally blank.\n')
+        return True
+
+    def runAgent(self, moduleDict):
+        agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
+        states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
+        states.sort()
+        randObj = FixedRandom().random
+        # choose a random start state and a random possible action from that state
+        # get the next state and reward from the transition function
+        for i in range(self.numExperiences):
+            startState = randObj.choice(states)
+            action = randObj.choice(self.grid.getPossibleActions(startState))
+            (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
+            agent.update(startState, action, endState, reward)
+        return agent
+
+    def testEpsilonGreedy(self, moduleDict, tolerance=0.025):
+        agent = self.runAgent(moduleDict)
+        for state in self.grid.getStates():
+            numLegalActions = len(agent.getLegalActions(state))
+            if numLegalActions <= 1:
+                continue
+            numGreedyChoices = 0
+            optimalAction = agent.computeActionFromQValues(state)
+            for iteration in range(self.numIterations):
+                # assume that their computeActionFromQValues implementation is correct (q4 tests this)
+                if agent.getAction(state) == optimalAction:
+                    numGreedyChoices += 1
+            # e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions
+            # g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k)
+            empiricalEpsilonNumerator = self.numIterations - numGreedyChoices
+            empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(numLegalActions)
+            empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator
+            error = abs(empiricalEpsilon - self.epsilon)
+            if error > tolerance:
+                self.addMessage("Epsilon-greedy action selection is not correct.")
+                self.addMessage("Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance))
+                return False
+        return True
+
+
+### q6
+class Question6Test(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(Question6Test, self).__init__(question, testDict)
+
+    def execute(self, grades, moduleDict, solutionDict):
+        studentSolution = moduleDict['analysis'].question6()
+        studentSolution = str(studentSolution).strip().lower()
+        hashedSolution = sha1(studentSolution).hexdigest()
+        if hashedSolution == '46729c96bb1e4081fdc81a8ff74b3e5db8fba415':
+            return self.testPass(grades)
+        else:
+            self.addMessage("Solution is not correct.")
+            self.addMessage("   Student solution: %s" % (studentSolution,))
+            return self.testFail(grades)
+
+    def writeSolution(self, moduleDict, filePath):
+        handle = open(filePath, 'w')
+        handle.write('# This is the solution file for %s.\n' % self.path)
+        handle.write('# File intentionally blank.\n')
+        handle.close()
+        return True
+
+
+### q7/q8
+### =====
+## Average wins of a pacman agent
+
+class EvalAgentTest(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(EvalAgentTest, self).__init__(question, testDict)
+        self.pacmanParams = testDict['pacmanParams']
+
+        self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None
+        self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None
+        self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None
+
+        self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()]
+        self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()]
+        self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()]
+
+        self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]])
+
+
+    def execute(self, grades, moduleDict, solutionDict):
+        self.addMessage('Grading agent using command:  python pacman.py %s'% (self.pacmanParams,))
+
+        startTime = time.time()
+        games = pacman.runGames(** pacman.readCommand(self.pacmanParams.split(' ')))
+        totalTime = time.time() - startTime
+        numGames = len(games)
+
+        stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True),
+                 'games': games, 'scores': [g.state.getScore() for g in games],
+                 'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)}
+
+        averageScore = sum(stats['scores']) / float(len(stats['scores']))
+        nonTimeouts = numGames - stats['timeouts']
+        wins = stats['wins']
+
+        def gradeThreshold(value, minimum, thresholds, name):
+            points = 0
+            passed = (minimum == None) or (value >= minimum)
+            if passed:
+                for t in thresholds:
+                    if value >= t:
+                        points += 1
+            return (passed, points, value, minimum, thresholds, name)
+
+        results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"),
+                   gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"),
+                   gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")]
+
+        totalPoints = 0
+        for passed, points, value, minimum, thresholds, name in results:
+            if minimum == None and len(thresholds)==0:
+                continue
+
+            # print passed, points, value, minimum, thresholds, name
+            totalPoints += points
+            if not passed:
+                assert points == 0
+                self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum))
+            else:
+                self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds)))
+
+            if minimum != None:
+                self.addMessage("    Grading scheme:")
+                self.addMessage("     < %s:  fail" % (minimum,))
+                if len(thresholds)==0 or minimum != thresholds[0]:
+                    self.addMessage("    >= %s:  0 points" % (minimum,))
+                for idx, threshold in enumerate(thresholds):
+                    self.addMessage("    >= %s:  %s points" % (threshold, idx+1))
+            elif len(thresholds) > 0:
+                self.addMessage("    Grading scheme:")
+                self.addMessage("     < %s:  0 points" % (thresholds[0],))
+                for idx, threshold in enumerate(thresholds):
+                    self.addMessage("    >= %s:  %s points" % (threshold, idx+1))
+
+        if any([not passed for passed, _, _, _, _, _ in results]):
+            totalPoints = 0
+
+        return self.testPartial(grades, totalPoints, self.maxPoints)
+
+    def writeSolution(self, moduleDict, filePath):
+        with open(filePath, 'w') as handle:
+            handle.write('# This is the solution file for %s.\n' % self.path)
+            handle.write('# File intentionally blank.\n')
+        return True
+
+
+
+
+### q2/q3
+### =====
+## For each parameter setting, compute the optimal policy, see if it satisfies some properties
+
+def followPath(policy, start, numSteps=100):
+    state = start
+    path = []
+    for i in range(numSteps):
+        if state not in policy:
+            break
+        action = policy[state]
+        path.append("(%s,%s)" % state)
+        if action == 'north': nextState = state[0],state[1]+1
+        if action == 'south': nextState = state[0],state[1]-1
+        if action == 'east': nextState = state[0]+1,state[1]
+        if action == 'west': nextState = state[0]-1,state[1]
+        if action == 'exit' or action == None:
+            path.append('TERMINAL_STATE')
+            break
+        state = nextState
+
+    return path
+
+def parseGrid(string):
+    grid = [[entry.strip() for entry in line.split()] for line in string.split('\n')]
+    for row in grid:
+        for x, col in enumerate(row):
+            try:
+                col = int(col)
+            except:
+                pass
+            if col == "_":
+                col = ' '
+            row[x] = col
+    return gridworld.makeGrid(grid)
+
+
+def computePolicy(moduleDict, grid, discount):
+    valueIterator = moduleDict['valueIterationAgents'].ValueIterationAgent(grid, discount=discount)
+    policy = {}
+    for state in grid.getStates():
+        policy[state] = valueIterator.computeActionFromValues(state)
+    return policy
+
+
+
+class GridPolicyTest(testClasses.TestCase):
+
+    def __init__(self, question, testDict):
+        super(GridPolicyTest, self).__init__(question, testDict)
+
+        # Function in module in analysis that returns (discount, noise)
+        self.parameterFn = testDict['parameterFn']
+        self.question2 = testDict.get('question2', 'false').lower() == 'true'
+
+        # GridWorld specification
+        #    _ is empty space
+        #    numbers are terminal states with that value
+        #    # is a wall
+        #    S is a start state
+        #
+        self.gridText = testDict['grid']
+        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
+        self.gridName = testDict['gridName']
+
+        # Policy specification
+        #    _                  policy choice not checked
+        #    N, E, S, W policy action must be north, east, south, west
+        #
+        self.policy = parseGrid(testDict['policy'])
+
+        # State the most probable path must visit
+        #    (x,y) for a particular location; (0,0) is bottom left
+        #    terminal for the terminal state
+        self.pathVisits = testDict.get('pathVisits', None)
+
+        # State the most probable path must not visit
+        #    (x,y) for a particular location; (0,0) is bottom left
+        #    terminal for the terminal state
+        self.pathNotVisits = testDict.get('pathNotVisits', None)
+
+
+    def execute(self, grades, moduleDict, solutionDict):
+        if not hasattr(moduleDict['analysis'], self.parameterFn):
+            self.addMessage('Method not implemented: analysis.%s' % (self.parameterFn,))
+            return self.testFail(grades)
+
+        result = getattr(moduleDict['analysis'], self.parameterFn)()
+
+        if type(result) == str and result.lower()[0:3] == "not":
+            self.addMessage('Actually, it is possible!')
+            return self.testFail(grades)
+
+        if self.question2:
+            livingReward = None
+            try:
+                discount, noise = result
+                discount = float(discount)
+                noise = float(noise)
+            except:
+                self.addMessage('Did not return a (discount, noise) pair; instead analysis.%s returned: %s' % (self.parameterFn, result))
+                return self.testFail(grades)
+            if discount != 0.9 and noise != 0.2:
+                self.addMessage('Must change either the discount or the noise, not both. Returned (discount, noise) = %s' % (result,))
+                return self.testFail(grades)
+        else:
+            try:
+                discount, noise, livingReward = result
+                discount = float(discount)
+                noise = float(noise)
+                livingReward = float(livingReward)
+            except:
+                self.addMessage('Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %s' % (self.parameterFn, result))
+                return self.testFail(grades)
+
+        self.grid.setNoise(noise)
+        if livingReward != None:
+            self.grid.setLivingReward(livingReward)
+
+        start = self.grid.getStartState()
+        policy = computePolicy(moduleDict, self.grid, discount)
+
+        ## check policy
+        actionMap = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west', 'X': 'exit'}
+        width, height = self.policy.width, self.policy.height
+        policyPassed = True
+        for x in range(width):
+            for y in range(height):
+                if self.policy[x][y] in actionMap and policy[(x,y)] != actionMap[self.policy[x][y]]:
+                    differPoint = (x,y)
+                    policyPassed = False
+
+        if not policyPassed:
+            self.addMessage('Policy not correct.')
+            self.addMessage('    Student policy at %s: %s' % (differPoint, policy[differPoint]))
+            self.addMessage('    Correct policy at %s: %s' % (differPoint, actionMap[self.policy[differPoint[0]][differPoint[1]]]))
+            self.addMessage('    Student policy:')
+            self.printPolicy(policy, False)
+            self.addMessage("        Legend:  N,S,E,W at states which move north etc, X at states which exit,")
+            self.addMessage("                 . at states where the policy is not defined (e.g. walls)")
+            self.addMessage('    Correct policy specification:')
+            self.printPolicy(self.policy, True)
+            self.addMessage("        Legend:  N,S,E,W for states in which the student policy must move north etc,")
+            self.addMessage("                 _ for states where it doesn't matter what the student policy does.")
+            self.printGridworld()
+            return self.testFail(grades)
+
+        ## check path
+        path = followPath(policy, self.grid.getStartState())
+
+        if self.pathVisits != None and self.pathVisits not in path:
+            self.addMessage('Policy does not visit state %s when moving without noise.' % (self.pathVisits,))
+            self.addMessage('    States visited: %s' % (path,))
+            self.addMessage('    Student policy:')
+            self.printPolicy(policy, False)
+            self.addMessage("        Legend:  N,S,E,W at states which move north etc, X at states which exit,")
+            self.addMessage("                 . at states where policy not defined")
+            self.printGridworld()
+            return self.testFail(grades)
+
+        if self.pathNotVisits != None and self.pathNotVisits in path:
+            self.addMessage('Policy visits state %s when moving without noise.' % (self.pathNotVisits,))
+            self.addMessage('    States visited: %s' % (path,))
+            self.addMessage('    Student policy:')
+            self.printPolicy(policy, False)
+            self.addMessage("        Legend:  N,S,E,W at states which move north etc, X at states which exit,")
+            self.addMessage("                 . at states where policy not defined")
+            self.printGridworld()
+            return self.testFail(grades)
+
+        return self.testPass(grades)
+
+    def printGridworld(self):
+        self.addMessage('    Gridworld:')
+        for line in self.gridText.split('\n'):
+            self.addMessage('     ' + line)
+        self.addMessage('        Legend: # wall, _ empty, S start, numbers terminal states with that reward.')
+
+    def printPolicy(self, policy, policyTypeIsGrid):
+        if policyTypeIsGrid:
+            legend = {'N': 'N', 'E': 'E', 'S': 'S', 'W': 'W', ' ': '_'}
+        else:
+            legend = {'north': 'N', 'east': 'E', 'south': 'S', 'west': 'W', 'exit': 'X', '.': '.', ' ': '_'}
+
+        for ybar in range(self.grid.grid.height):
+            y = self.grid.grid.height-1-ybar
+            if policyTypeIsGrid:
+                self.addMessage("        %s" % ("    ".join([legend[policy[x][y]] for x in range(self.grid.grid.width)]),))
+            else:
+                self.addMessage("        %s" % ("    ".join([legend[policy.get((x,y), '.')]  for x in range(self.grid.grid.width)]),))
+        # for state in sorted(self.grid.getStates()):
+        #     if state != 'TERMINAL_STATE':
+        #         self.addMessage('      (%s,%s) %s' % (state[0], state[1], policy[state]))
+
+
+    def writeSolution(self, moduleDict, filePath):
+        with open(filePath, 'w') as handle:
+            handle.write('# This is the solution file for %s.\n' % self.path)
+            handle.write('# File intentionally blank.\n')
+        return True
+
--- a/reinforcement/testClasses.py
+++ b/reinforcement/testClasses.py
@ -0,0 +1,189 @@
+# testClasses.py
+# --------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+# import modules from python standard library
+import inspect
+import re
+import sys
+
+
+# Class which models a question in a project.  Note that questions have a
+# maximum number of points they are worth, and are composed of a series of
+# test cases
+class Question(object):
+
+    def raiseNotDefined(self):
+        print 'Method not implemented: %s' % inspect.stack()[1][3]
+        sys.exit(1)
+
+    def __init__(self, questionDict, display):
+        self.maxPoints = int(questionDict['max_points'])
+        self.testCases = []
+        self.display = display
+
+    def getDisplay(self):
+        return self.display
+
+    def getMaxPoints(self):
+        return self.maxPoints
+
+    # Note that 'thunk' must be a function which accepts a single argument,
+    # namely a 'grading' object
+    def addTestCase(self, testCase, thunk):
+        self.testCases.append((testCase, thunk))
+
+    def execute(self, grades):
+        self.raiseNotDefined()
+
+# Question in which all test cases must be passed in order to receive credit
+class PassAllTestsQuestion(Question):
+
+    def execute(self, grades):
+        # TODO: is this the right way to use grades?  The autograder doesn't seem to use it.
+        testsFailed = False
+        grades.assignZeroCredit()
+        for _, f in self.testCases:
+            if not f(grades):
+                testsFailed = True
+        if testsFailed:
+            grades.fail("Tests failed.")
+        else:
+            grades.assignFullCredit()
+
+
+# Question in which predict credit is given for test cases with a ``points'' property.
+# All other tests are mandatory and must be passed.
+class HackedPartialCreditQuestion(Question):
+
+    def execute(self, grades):
+        # TODO: is this the right way to use grades?  The autograder doesn't seem to use it.
+        grades.assignZeroCredit()
+
+        points = 0
+        passed = True
+        for testCase, f in self.testCases:
+            testResult = f(grades)
+            if "points" in testCase.testDict:
+                if testResult: points += float(testCase.testDict["points"])
+            else:
+                passed = passed and testResult
+
+        ## FIXME: Below terrible hack to match q3's logic
+        if int(points) == self.maxPoints and not passed:
+            grades.assignZeroCredit()
+        else:
+            grades.addPoints(int(points))
+
+
+class Q6PartialCreditQuestion(Question):
+    """Fails any test which returns False, otherwise doesn't effect the grades object.
+    Partial credit tests will add the required points."""
+
+    def execute(self, grades):
+        grades.assignZeroCredit()
+
+        results = []
+        for _, f in self.testCases:
+            results.append(f(grades))
+        if False in results:
+            grades.assignZeroCredit()
+
+class PartialCreditQuestion(Question):
+    """Fails any test which returns False, otherwise doesn't effect the grades object.
+    Partial credit tests will add the required points."""
+
+    def execute(self, grades):
+        grades.assignZeroCredit()
+
+        for _, f in self.testCases:
+            if not f(grades):
+                grades.assignZeroCredit()
+                grades.fail("Tests failed.")
+                return False
+
+
+
+class NumberPassedQuestion(Question):
+    """Grade is the number of test cases passed."""
+
+    def execute(self, grades):
+        grades.addPoints([f(grades) for _, f in self.testCases].count(True))
+
+
+
+
+
+# Template modeling a generic test case
+class TestCase(object):
+
+    def raiseNotDefined(self):
+        print 'Method not implemented: %s' % inspect.stack()[1][3]
+        sys.exit(1)
+
+    def getPath(self):
+        return self.path
+
+    def __init__(self, question, testDict):
+        self.question = question
+        self.testDict = testDict
+        self.path = testDict['path']
+        self.messages = []
+
+    def __str__(self):
+        self.raiseNotDefined()
+
+    def execute(self, grades, moduleDict, solutionDict):
+        self.raiseNotDefined()
+
+    def writeSolution(self, moduleDict, filePath):
+        self.raiseNotDefined()
+        return True
+
+    # Tests should call the following messages for grading
+    # to ensure a uniform format for test output.
+    #
+    # TODO: this is hairy, but we need to fix grading.py's interface
+    # to get a nice hierarchical project - question - test structure,
+    # then these should be moved into Question proper.
+    def testPass(self, grades):
+        grades.addMessage('PASS: %s' % (self.path,))
+        for line in self.messages:
+            grades.addMessage('    %s' % (line,))
+        return True
+
+    def testFail(self, grades):
+        grades.addMessage('FAIL: %s' % (self.path,))
+        for line in self.messages:
+            grades.addMessage('    %s' % (line,))
+        return False
+
+    # This should really be question level?
+    #
+    def testPartial(self, grades, points, maxPoints):
+        grades.addPoints(points)
+        extraCredit = max(0, points - maxPoints)
+        regularCredit = points - extraCredit
+
+        grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints))
+        if extraCredit > 0:
+            grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,))
+
+        for line in self.messages:
+            grades.addMessage('    %s' % (line,))
+
+        return True
+
+    def addMessage(self, message):
+        self.messages.extend(message.split('\n'))
+
--- a/reinforcement/testParser.py
+++ b/reinforcement/testParser.py
@ -0,0 +1,85 @@
+# testParser.py
+# -------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import re
+import sys
+
+class TestParser(object):
+
+    def __init__(self, path):
+        # save the path to the test file
+        self.path = path
+
+    def removeComments(self, rawlines):
+        # remove any portion of a line following a '#' symbol
+        fixed_lines = []
+        for l in rawlines:
+            idx = l.find('#')
+            if idx == -1:
+                fixed_lines.append(l)
+            else:
+                fixed_lines.append(l[0:idx])
+        return '\n'.join(fixed_lines)
+
+    def parse(self):
+        # read in the test case and remove comments
+        test = {}
+        with open(self.path) as handle:
+            raw_lines = handle.read().split('\n')
+
+        test_text = self.removeComments(raw_lines)
+        test['__raw_lines__'] = raw_lines
+        test['path'] = self.path
+        test['__emit__'] = []
+        lines = test_text.split('\n')
+        i = 0
+        # read a property in each loop cycle
+        while(i < len(lines)):
+            # skip blank lines
+            if re.match('\A\s*\Z', lines[i]):
+                test['__emit__'].append(("raw", raw_lines[i]))
+                i += 1
+                continue
+            m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i])
+            if m:
+                test[m.group(1)] = m.group(2)
+                test['__emit__'].append(("oneline", m.group(1)))
+                i += 1
+                continue
+            m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i])
+            if m:
+                msg = []
+                i += 1
+                while(not re.match('\A\s*"""\s*\Z', lines[i])):
+                    msg.append(raw_lines[i])
+                    i += 1
+                test[m.group(1)] = '\n'.join(msg)
+                test['__emit__'].append(("multiline", m.group(1)))
+                i += 1
+                continue
+            print 'error parsing test file: %s' % self.path
+            sys.exit(1)
+        return test
+
+
+def emitTestDict(testDict, handle):
+    for kind, data in testDict['__emit__']:
+        if kind == "raw":
+            handle.write(data + "\n")
+        elif kind == "oneline":
+            handle.write('%s: "%s"\n' % (data, testDict[data]))
+        elif kind == "multiline":
+            handle.write('%s: """\n%s\n"""\n' % (data, testDict[data]))
+        else:
+            raise Exception("Bad __emit__")
--- a/reinforcement/test_cases/CONFIG
+++ b/reinforcement/test_cases/CONFIG
--- a/reinforcement/test_cases/q1/1-tinygrid.solution
+++ b/reinforcement/test_cases/q1/1-tinygrid.solution
@ -0,0 +1,410 @@
+values_k_0: """
+            0.0000
+            0.0000
+            0.0000
+"""
+
+q_values_k_0_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_0_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+values_k_1: """
+          -10.0000
+            0.0000
+           10.0000
+"""
+
+q_values_k_1_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_1_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_1_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_1_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+values_k_2: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_2_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_2_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_2_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_2_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_2_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_3: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_3_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_3_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_3_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_3_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_3_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_4: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_4_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_4_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_4_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_4_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_4_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_5: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_5_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_5_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_5_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_5_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_5_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_6: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_6_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_6_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_6_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_6_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_6_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_7: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_7_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_7_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_7_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_7_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_7_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_8: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_8_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_8_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_8_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_8_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_8_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_9: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_9_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_9_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_9_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_9_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_9_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+values_k_100: """
+          -10.0000
+            5.0000
+           10.0000
+"""
+
+q_values_k_100_action_north: """
+           illegal
+           -5.0000
+           illegal
+"""
+
+q_values_k_100_action_east: """
+           illegal
+            2.5000
+           illegal
+"""
+
+q_values_k_100_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_100_action_south: """
+           illegal
+            5.0000
+           illegal
+"""
+
+q_values_k_100_action_west: """
+           illegal
+            2.5000
+           illegal
+"""
+
+policy: """
+        exit      
+        south     
+        exit
+"""
+
+actions: """
+north
+east
+exit
+south
+west
+"""
+
--- a/reinforcement/test_cases/q1/1-tinygrid.test
+++ b/reinforcement/test_cases/q1/1-tinygrid.test
@ -0,0 +1,22 @@
+class: "ValueIterationTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.5"
+noise: "0.0"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q1/2-tinygrid-noisy.solution
+++ b/reinforcement/test_cases/q1/2-tinygrid-noisy.solution
@ -0,0 +1,410 @@
+values_k_0: """
+            0.0000
+            0.0000
+            0.0000
+"""
+
+q_values_k_0_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_0_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+values_k_1: """
+          -10.0000
+            0.0000
+           10.0000
+"""
+
+q_values_k_1_action_north: """
+           illegal
+           -5.6250
+           illegal
+"""
+
+q_values_k_1_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_1_action_south: """
+           illegal
+            5.6250
+           illegal
+"""
+
+q_values_k_1_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+values_k_2: """
+          -10.0000
+            5.6250
+           10.0000
+"""
+
+q_values_k_2_action_north: """
+           illegal
+           -4.5703
+           illegal
+"""
+
+q_values_k_2_action_east: """
+           illegal
+            3.1641
+           illegal
+"""
+
+q_values_k_2_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_2_action_south: """
+           illegal
+            6.6797
+           illegal
+"""
+
+q_values_k_2_action_west: """
+           illegal
+            3.1641
+           illegal
+"""
+
+values_k_3: """
+          -10.0000
+            6.6797
+           10.0000
+"""
+
+q_values_k_3_action_north: """
+           illegal
+           -4.3726
+           illegal
+"""
+
+q_values_k_3_action_east: """
+           illegal
+            3.7573
+           illegal
+"""
+
+q_values_k_3_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_3_action_south: """
+           illegal
+            6.8774
+           illegal
+"""
+
+q_values_k_3_action_west: """
+           illegal
+            3.7573
+           illegal
+"""
+
+values_k_4: """
+          -10.0000
+            6.8774
+           10.0000
+"""
+
+q_values_k_4_action_north: """
+           illegal
+           -4.3355
+           illegal
+"""
+
+q_values_k_4_action_east: """
+           illegal
+            3.8686
+           illegal
+"""
+
+q_values_k_4_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_4_action_south: """
+           illegal
+            6.9145
+           illegal
+"""
+
+q_values_k_4_action_west: """
+           illegal
+            3.8686
+           illegal
+"""
+
+values_k_5: """
+          -10.0000
+            6.9145
+           10.0000
+"""
+
+q_values_k_5_action_north: """
+           illegal
+           -4.3285
+           illegal
+"""
+
+q_values_k_5_action_east: """
+           illegal
+            3.8894
+           illegal
+"""
+
+q_values_k_5_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_5_action_south: """
+           illegal
+            6.9215
+           illegal
+"""
+
+q_values_k_5_action_west: """
+           illegal
+            3.8894
+           illegal
+"""
+
+values_k_6: """
+          -10.0000
+            6.9215
+           10.0000
+"""
+
+q_values_k_6_action_north: """
+           illegal
+           -4.3272
+           illegal
+"""
+
+q_values_k_6_action_east: """
+           illegal
+            3.8933
+           illegal
+"""
+
+q_values_k_6_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_6_action_south: """
+           illegal
+            6.9228
+           illegal
+"""
+
+q_values_k_6_action_west: """
+           illegal
+            3.8933
+           illegal
+"""
+
+values_k_7: """
+          -10.0000
+            6.9228
+           10.0000
+"""
+
+q_values_k_7_action_north: """
+           illegal
+           -4.3270
+           illegal
+"""
+
+q_values_k_7_action_east: """
+           illegal
+            3.8941
+           illegal
+"""
+
+q_values_k_7_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_7_action_south: """
+           illegal
+            6.9230
+           illegal
+"""
+
+q_values_k_7_action_west: """
+           illegal
+            3.8941
+           illegal
+"""
+
+values_k_8: """
+          -10.0000
+            6.9230
+           10.0000
+"""
+
+q_values_k_8_action_north: """
+           illegal
+           -4.3269
+           illegal
+"""
+
+q_values_k_8_action_east: """
+           illegal
+            3.8942
+           illegal
+"""
+
+q_values_k_8_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_8_action_south: """
+           illegal
+            6.9231
+           illegal
+"""
+
+q_values_k_8_action_west: """
+           illegal
+            3.8942
+           illegal
+"""
+
+values_k_9: """
+          -10.0000
+            6.9231
+           10.0000
+"""
+
+q_values_k_9_action_north: """
+           illegal
+           -4.3269
+           illegal
+"""
+
+q_values_k_9_action_east: """
+           illegal
+            3.8942
+           illegal
+"""
+
+q_values_k_9_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_9_action_south: """
+           illegal
+            6.9231
+           illegal
+"""
+
+q_values_k_9_action_west: """
+           illegal
+            3.8942
+           illegal
+"""
+
+values_k_100: """
+          -10.0000
+            6.9231
+           10.0000
+"""
+
+q_values_k_100_action_north: """
+           illegal
+           -4.3269
+           illegal
+"""
+
+q_values_k_100_action_east: """
+           illegal
+            3.8942
+           illegal
+"""
+
+q_values_k_100_action_exit: """
+          -10.0000
+           illegal
+           10.0000
+"""
+
+q_values_k_100_action_south: """
+           illegal
+            6.9231
+           illegal
+"""
+
+q_values_k_100_action_west: """
+           illegal
+            3.8942
+           illegal
+"""
+
+policy: """
+        exit      
+        south     
+        exit
+"""
+
+actions: """
+north
+east
+exit
+south
+west
+"""
+
--- a/reinforcement/test_cases/q1/2-tinygrid-noisy.test
+++ b/reinforcement/test_cases/q1/2-tinygrid-noisy.test
@ -0,0 +1,22 @@
+class: "ValueIterationTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.75"
+noise: "0.25"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q1/3-bridge.solution
+++ b/reinforcement/test_cases/q1/3-bridge.solution
@ -0,0 +1,678 @@
+values_k_0: """
+        __________       0.0000   __________
+            0.0000       0.0000       0.0000
+            0.0000       0.0000       0.0000
+            0.0000       0.0000       0.0000
+            0.0000       0.0000       0.0000
+            0.0000       0.0000       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_0_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_0_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+values_k_1: """
+        __________      10.0000   __________
+         -100.0000       0.0000    -100.0000
+         -100.0000       0.0000    -100.0000
+         -100.0000       0.0000    -100.0000
+         -100.0000       0.0000    -100.0000
+         -100.0000       0.0000    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_1_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -8.5000      illegal
+           illegal      -8.5000      illegal
+           illegal      -8.5000      illegal
+           illegal      -8.5000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_east: """
+        __________      illegal   __________
+           illegal     -76.0750      illegal
+           illegal     -76.5000      illegal
+           illegal     -76.5000      illegal
+           illegal     -76.5000      illegal
+           illegal     -76.4575      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_1_action_south: """
+        __________      illegal   __________
+           illegal      -8.5000      illegal
+           illegal      -8.5000      illegal
+           illegal      -8.5000      illegal
+           illegal      -8.5000      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_west: """
+        __________      illegal   __________
+           illegal     -76.0750      illegal
+           illegal     -76.5000      illegal
+           illegal     -76.5000      illegal
+           illegal     -76.5000      illegal
+           illegal     -76.4575      illegal
+        __________      illegal   __________
+"""
+
+values_k_2: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -8.5000    -100.0000
+         -100.0000      -8.5000    -100.0000
+         -100.0000      -8.5000    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_2_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.0025      illegal
+           illegal     -15.0025      illegal
+           illegal     -15.0025      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_east: """
+        __________      illegal   __________
+           illegal     -76.4363      illegal
+           illegal     -76.8974      illegal
+           illegal     -77.2225      illegal
+           illegal     -77.1900      illegal
+           illegal     -76.8187      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_2_action_south: """
+        __________      illegal   __________
+           illegal     -15.0025      illegal
+           illegal     -15.0025      illegal
+           illegal     -15.0025      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_west: """
+        __________      illegal   __________
+           illegal     -76.4363      illegal
+           illegal     -76.8974      illegal
+           illegal     -77.2225      illegal
+           illegal     -77.1900      illegal
+           illegal     -76.8187      illegal
+        __________      illegal   __________
+"""
+
+values_k_3: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.0025    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_3_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -19.9769      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1737      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4663      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_3_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -19.9769      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1737      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4663      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_4: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_4_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_4_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_5: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_5_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_5_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_6: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_6_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_6_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_7: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_7_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_7_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_8: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_8_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_8_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_9: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_9_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_9_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+values_k_100: """
+        __________      10.0000   __________
+         -100.0000      -0.8500    -100.0000
+         -100.0000      -9.1502    -100.0000
+         -100.0000     -15.4999    -100.0000
+         -100.0000     -14.4173    -100.0000
+         -100.0000      -7.7350    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_100_action_north: """
+        __________      illegal   __________
+           illegal      -0.8500      illegal
+           illegal      -9.1502      illegal
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_100_action_east: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_100_action_exit: """
+        __________      10.0000   __________
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+         -100.0000      illegal    -100.0000
+        __________       1.0000   __________
+"""
+
+q_values_k_100_action_south: """
+        __________      illegal   __________
+           illegal     -15.4999      illegal
+           illegal     -20.3575      illegal
+           illegal     -19.5292      illegal
+           illegal     -14.4173      illegal
+           illegal      -7.7350      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_100_action_west: """
+        __________      illegal   __________
+           illegal     -76.4639      illegal
+           illegal     -77.1949      illegal
+           illegal     -77.5016      illegal
+           illegal     -77.4875      illegal
+           illegal     -77.0702      illegal
+        __________      illegal   __________
+"""
+
+policy: """
+        __________   exit         __________
+        exit         north        exit      
+        exit         north        exit      
+        exit         north        exit      
+        exit         south        exit      
+        exit         south        exit      
+        __________   exit         __________
+"""
+
+actions: """
+north
+east
+exit
+south
+west
+"""
+
--- a/reinforcement/test_cases/q1/3-bridge.test
+++ b/reinforcement/test_cases/q1/3-bridge.test
@ -0,0 +1,27 @@
+class: "ValueIterationTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+    #   10    #
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    S -100
+    #    1    #
+"""
+gridName: "bridgeGrid"
+discount: "0.85"
+noise: "0.1"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "500"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q1/4-discountgrid.solution
+++ b/reinforcement/test_cases/q1/4-discountgrid.solution
@ -0,0 +1,544 @@
+values_k_0: """
+            0.0000       0.0000       0.0000       0.0000       0.0000
+            0.0000       0.0000   __________       0.0000       0.0000
+            0.0000       0.0000       0.0000       0.0000       0.0000
+            0.0000       0.0000   __________   __________       0.0000
+            0.0000       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_0_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+values_k_1: """
+          -10.0000       0.0000      10.0000       0.0000       0.0000
+          -10.0000       0.0000   __________       0.0000       0.0000
+          -10.0000       0.0000       1.0000       0.0000       0.0000
+          -10.0000       0.0000   __________   __________       0.0000
+          -10.0000       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_north: """
+           illegal       0.0000      illegal       0.9000       0.0000
+           illegal      -0.9000   __________       0.0000       0.0000
+           illegal      -0.8100      illegal       0.0900       0.0000
+           illegal      -0.9000   __________   __________       0.0000
+           illegal      -0.9000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_east: """
+           illegal       7.2000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.7200      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_1_action_south: """
+           illegal       0.0000      illegal       0.9000       0.0000
+           illegal      -0.9000   __________       0.0000       0.0000
+           illegal      -0.8100      illegal       0.0900       0.0000
+           illegal      -0.9000   __________   __________       0.0000
+           illegal      -0.9000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_west: """
+           illegal      -7.2000      illegal       7.2000       0.0000
+           illegal      -7.2000   __________       0.0000       0.0000
+           illegal      -7.2000      illegal       0.7200       0.0000
+           illegal      -7.2000   __________   __________       0.0000
+           illegal      -7.2000       0.0000       0.0000       0.0000
+"""
+
+values_k_2: """
+          -10.0000       7.2000      10.0000       7.2000       0.0000
+          -10.0000       0.0000   __________       0.0000       0.0000
+          -10.0000       0.7200       1.0000       0.7200       0.0000
+          -10.0000       0.0000   __________   __________       0.0000
+          -10.0000       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_north: """
+           illegal       5.1840      illegal       6.0840       0.6480
+           illegal       4.2840   __________       5.1840       0.0000
+           illegal      -0.8100      illegal       0.0900       0.0648
+           illegal      -0.3816   __________   __________       0.0000
+           illegal      -0.9000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_east: """
+           illegal       7.8480      illegal       0.6480       0.0000
+           illegal       0.7128   __________       0.7128       0.0000
+           illegal       0.7200      illegal       0.0648       0.0000
+           illegal       0.0648   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_2_action_south: """
+           illegal       0.0000      illegal       0.9000       0.6480
+           illegal      -0.3816   __________       0.5184       0.0000
+           illegal      -0.8100      illegal       0.6084       0.0648
+           illegal      -0.9000   __________   __________       0.0000
+           illegal      -0.9000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_west: """
+           illegal      -6.5520      illegal       7.8480       5.1840
+           illegal      -6.4872   __________       0.7128       0.0000
+           illegal      -7.2000      illegal       0.7848       0.5184
+           illegal      -7.1352   __________   __________       0.0000
+           illegal      -7.2000       0.0000       0.0000       0.0000
+"""
+
+values_k_3: """
+          -10.0000       7.8480      10.0000       7.8480       5.1840
+          -10.0000       4.2840   __________       5.1840       0.0000
+          -10.0000       0.7200       1.0000       0.7848       0.5184
+          -10.0000       0.0648   __________   __________       0.0000
+          -10.0000       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_north: """
+           illegal       5.6506      illegal       7.0171       4.9054
+           illegal       5.1361   __________       6.1171       4.1990
+           illegal       2.2745      illegal       3.8691       0.1173
+           illegal      -0.3758   __________   __________       0.3732
+           illegal      -0.8533       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_east: """
+           illegal       8.2919      illegal       4.9054       4.1990
+           illegal       3.8556   __________       0.7770       0.5132
+           illegal       1.1114      illegal       0.9104       0.3732
+           illegal       0.1115   __________   __________       0.0467
+           illegal       0.0058       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_3_action_south: """
+           illegal       3.0845      illegal       5.0990       1.1729
+           illegal       0.0040   __________       1.0316       0.8398
+           illegal      -0.7633      illegal       0.7017       0.1173
+           illegal      -0.8942   __________   __________       0.0000
+           illegal      -0.9000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_west: """
+           illegal      -6.1081      illegal       8.3729       6.1171
+           illegal      -6.4289   __________       4.5094       4.2457
+           illegal      -6.8086      illegal       1.2572       0.5651
+           illegal      -7.1352   __________   __________       0.0467
+           illegal      -7.1942       0.0000       0.0000       0.0000
+"""
+
+values_k_4: """
+          -10.0000       8.2919      10.0000       8.3729       6.1171
+          -10.0000       5.1361   __________       6.1171       4.2457
+          -10.0000       2.2745       1.0000       3.8691       0.5651
+          -10.0000       0.1115   __________   __________       0.3732
+          -10.0000       0.0058       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_north: """
+           illegal       5.9702      illegal       7.4790       5.7084
+           illegal       5.5324   __________       6.9611       5.3370
+           illegal       2.8880      illegal       4.5452       3.4560
+           illegal       0.7477   __________   __________       0.4740
+           illegal      -0.8198       0.0005       0.0000       0.2687
+"""
+
+q_values_k_4_action_east: """
+           illegal       8.4085      illegal       5.7084       5.3370
+           illegal       4.6490   __________       4.1587       3.6583
+           illegal       1.1923      illegal       1.3056       0.8225
+           illegal       0.2855   __________   __________       0.3196
+           illegal       0.0106       0.0000       0.0000       0.0336
+"""
+
+q_values_k_4_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_4_action_south: """
+           illegal       3.6980      illegal       5.8549       4.3610
+           illegal       1.1999   __________       3.7184       1.3395
+           illegal      -0.7298      illegal       2.9266       0.6678
+           illegal      -0.8858   __________   __________       0.0672
+           illegal      -0.8958       0.0005       0.0000       0.0000
+"""
+
+q_values_k_4_action_west: """
+           illegal      -5.9915      illegal       8.5041       6.9611
+           illegal      -6.2490   __________       5.5061       5.0057
+           illegal      -6.7277      illegal       1.6188       3.2015
+           illegal      -6.9948   __________   __________       0.3196
+           illegal      -7.1894       0.0042       0.0000       0.0336
+"""
+
+values_k_5: """
+          -10.0000       8.4085      10.0000       8.5041       6.9611
+          -10.0000       5.5324   __________       6.9611       5.3370
+          -10.0000       2.8880       1.0000       4.5452       3.4560
+          -10.0000       0.7477   __________   __________       0.4740
+          -10.0000       0.0106       0.0042       0.0000       0.2687
+"""
+
+q_values_k_5_action_north: """
+           illegal       6.0541      illegal       7.6495       6.4039
+           illegal       5.6521   __________       7.2298       6.1188
+           illegal       3.1733      illegal       5.4130       4.5627
+           illegal       1.2467   __________   __________       2.5736
+           illegal      -0.3613       0.0040       0.0246       0.3655
+"""
+
+q_values_k_5_action_east: """
+           illegal       8.4547      illegal       6.4039       6.1188
+           illegal       5.0000   __________       5.0171       4.7802
+           illegal       1.2852      illegal       3.5239       3.0113
+           illegal       0.7992   __________   __________       0.6765
+           illegal       0.0713       0.0008       0.1935       0.2603
+"""
+
+q_values_k_5_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_5_action_south: """
+           illegal       3.9833      illegal       6.5385       5.2345
+           illegal       1.6773   __________       4.3794       3.5951
+           illegal      -0.2717      illegal       3.6736       1.0614
+           illegal      -0.8251   __________   __________       0.2788
+           illegal      -0.8920       0.0040       0.0246       0.2177
+"""
+
+q_values_k_5_action_west: """
+           illegal      -5.9453      illegal       8.5919       7.2298
+           illegal      -6.1833   __________       6.1864       5.9496
+           illegal      -6.6348      illegal       1.7556       3.7955
+           illegal      -6.9391   __________   __________       0.6765
+           illegal      -7.1318       0.0084       0.0030       0.0668
+"""
+
+values_k_6: """
+          -10.0000       8.4547      10.0000       8.5919       7.2298
+          -10.0000       5.6521   __________       7.2298       6.1188
+          -10.0000       3.1733       1.0000       5.4130       4.5627
+          -10.0000       1.2467   __________   __________       2.5736
+          -10.0000       0.0713       0.0084       0.1935       0.3655
+"""
+
+q_values_k_6_action_north: """
+           illegal       6.0874      illegal       7.7368       6.6294
+           illegal       5.6961   __________       7.3875       6.4068
+           illegal       3.2595      illegal       5.7061       5.3034
+           illegal       1.4970   __________   __________       3.7484
+           illegal      -0.0017       0.0298       0.1730       1.9033
+"""
+
+q_values_k_6_action_east: """
+           illegal       8.4696      illegal       6.6294       6.4068
+           illegal       5.1160   __________       5.6660       5.4669
+           illegal       1.3409      illegal       4.4230       4.0675
+           illegal       1.1896   __________   __________       2.2966
+           illegal       0.1246       0.1408       0.2980       0.5277
+"""
+
+q_values_k_6_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_6_action_south: """
+           illegal       4.0695      illegal       6.7561       5.8295
+           illegal       1.8935   __________       5.0988       4.4865
+           illegal       0.0876      illegal       4.3980       2.7508
+           illegal      -0.7365   __________   __________       0.7264
+           illegal      -0.8479       0.0298       0.1730       0.3135
+"""
+
+q_values_k_6_action_west: """
+           illegal      -5.9304      illegal       8.6239       7.3875
+           illegal      -6.1535   __________       6.4659       6.2668
+           illegal      -6.5791      illegal       1.8579       4.6797
+           illegal      -6.9080   __________   __________       2.2966
+           illegal      -7.0814       0.0528       0.0408       0.4038
+"""
+
+values_k_7: """
+          -10.0000       8.4696      10.0000       8.6239       7.3875
+          -10.0000       5.6961   __________       7.3875       6.4068
+          -10.0000       3.2595       1.0000       5.7061       5.3034
+          -10.0000       1.4970   __________   __________       3.7484
+          -10.0000       0.1246       0.1408       0.2980       1.9033
+"""
+
+q_values_k_7_action_north: """
+           illegal       6.0981      illegal       7.7741       6.7600
+           illegal       5.7108   __________       7.4507       6.5605
+           illegal       3.2912      illegal       5.8863       5.6038
+           illegal       1.5816   __________   __________       4.4932
+           illegal       0.1905       0.1394       0.3985       2.8970
+"""
+
+q_values_k_7_action_east: """
+           illegal       8.4749      illegal       6.7600       6.5605
+           illegal       5.1568   __________       5.9026       5.7551
+           illegal       1.3674      illegal       4.9969       4.7324
+           illegal       1.3824   __________   __________       3.3475
+           illegal       0.2473       0.2399       1.4240       1.8790
+"""
+
+q_values_k_7_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_7_action_south: """
+           illegal       4.1012      illegal       6.8839       6.0539
+           illegal       1.9595   __________       5.3499       5.0599
+           illegal       0.2678      illegal       4.6757       3.6897
+           illegal      -0.6755   __________   __________       2.0451
+           illegal      -0.7976       0.1394       0.3985       1.5685
+"""
+
+q_values_k_7_action_west: """
+           illegal      -5.9251      illegal       8.6410       7.4507
+           illegal      -6.1444   __________       6.6087       6.4612
+           illegal      -6.5526      illegal       1.8984       5.0224
+           illegal      -6.8954   __________   __________       3.3475
+           illegal      -7.0541       0.1151       0.1550       0.7232
+"""
+
+values_k_8: """
+          -10.0000       8.4749      10.0000       8.6410       7.4507
+          -10.0000       5.7108   __________       7.4507       6.5605
+          -10.0000       3.2912       1.0000       5.8863       5.6038
+          -10.0000       1.5816   __________   __________       4.4932
+          -10.0000       0.2473       0.2399       1.4240       2.8970
+"""
+
+q_values_k_8_action_north: """
+           illegal       6.1019      illegal       7.7921       6.8128
+           illegal       5.7159   __________       7.4826       6.6255
+           illegal       3.3017      illegal       5.9589       5.7577
+           illegal       1.6120   __________   __________       4.8435
+           illegal       0.2603       0.3231       1.3076       3.6240
+"""
+
+q_values_k_8_action_east: """
+           illegal       8.4767      illegal       6.8128       6.6255
+           illegal       5.1707   __________       6.0310       5.8985
+           illegal       1.3763      illegal       5.2350       5.0295
+           illegal       1.4572   __________   __________       4.0001
+           illegal       0.3373       1.0685       2.3421       2.7509
+"""
+
+q_values_k_8_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_8_action_south: """
+           illegal       4.1117      illegal       6.9351       6.1718
+           illegal       1.9836   __________       5.4992       5.2957
+           illegal       0.3287      illegal       4.8325       4.2692
+           illegal      -0.5796   __________   __________       2.8946
+           illegal      -0.7003       0.3231       1.3076       2.4747
+"""
+
+q_values_k_8_action_west: """
+           illegal      -5.9233      illegal       8.6483       7.4826
+           illegal      -6.1411   __________       6.6720       6.5394
+           illegal      -6.5437      illegal       1.9203       5.2330
+           illegal      -6.8815   __________   __________       4.0001
+           illegal      -7.0354       0.2213       0.4290       1.6904
+"""
+
+values_k_9: """
+          -10.0000       8.4767      10.0000       8.6483       7.4826
+          -10.0000       5.7159   __________       7.4826       6.6255
+          -10.0000       3.3017       1.0000       5.9589       5.7577
+          -10.0000       1.6120   __________   __________       4.8435
+          -10.0000       0.3373       1.0685       2.3421       3.6240
+"""
+
+q_values_k_9_action_north: """
+           illegal       6.1032      illegal       7.8002       6.8392
+           illegal       5.7177   __________       7.4965       6.6572
+           illegal       3.3055      illegal       5.9956       5.8249
+           illegal       1.6223   __________   __________       5.0174
+           illegal       0.3568       1.0105       2.1087       4.0243
+"""
+
+q_values_k_9_action_east: """
+           illegal       8.4773      illegal       6.8392       6.6572
+           illegal       5.1755   __________       6.0850       5.9620
+           illegal       1.3795      illegal       5.3553       5.1777
+           illegal       1.4881   __________   __________       4.3316
+           illegal       0.9447       1.8787       3.0308       3.3713
+"""
+
+q_values_k_9_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_9_action_south: """
+           illegal       4.1155      illegal       6.9609       6.2222
+           illegal       1.9917   __________       5.5601       5.4153
+           illegal       0.3506      illegal       4.8986       4.5418
+           illegal      -0.5121   __________   __________       3.4811
+           illegal      -0.5610       1.0105       2.1087       3.1462
+"""
+
+q_values_k_9_action_west: """
+           illegal      -5.9227      illegal       8.6518       7.4965
+           illegal      -6.1399   __________       6.7021       6.5791
+           illegal      -6.5405      illegal       1.9297       5.3226
+           illegal      -6.8725   __________   __________       4.3316
+           illegal      -7.0246       0.4352       1.1909       2.4484
+"""
+
+values_k_100: """
+          -10.0000       8.4777      10.0000       8.6547       7.5087
+          -10.0000       5.7186   __________       7.5087       6.6836
+          -10.0000       3.3074       1.0000       6.0258       5.8841
+          -10.0000       2.0045   __________   __________       5.1665
+          -10.0000       2.9289       3.4513       3.9306       4.4765
+"""
+
+q_values_k_100_action_north: """
+           illegal       6.1039      illegal       7.8072       6.8610
+           illegal       5.7186   __________       7.5087       6.6836
+           illegal       3.3074      illegal       6.0258       5.8841
+           illegal       1.6617   __________   __________       5.1665
+           illegal       0.8539       3.1023       3.5435       4.4765
+"""
+
+q_values_k_100_action_east: """
+           illegal       8.4777      illegal       6.8610       6.6836
+           illegal       5.1780   __________       6.1334       6.0175
+           illegal       1.4151      illegal       5.4546       5.3030
+           illegal       2.0045   __________   __________       4.6523
+           illegal       2.9289       3.4513       3.9306       4.0910
+"""
+
+q_values_k_100_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+          -10.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_100_action_south: """
+           illegal       4.1174      illegal       6.9820       6.2669
+           illegal       1.9960   __________       5.6159       5.5138
+           illegal       0.6333      illegal       4.9582       4.7918
+           illegal       1.3892   __________   __________       4.1531
+           illegal       1.5194       3.1023       3.5435       3.9797
+"""
+
+q_values_k_100_action_west: """
+           illegal      -5.9223      illegal       8.6547       7.5087
+           illegal      -6.1393   __________       6.7275       6.6116
+           illegal      -6.5049      illegal       1.9381       5.4051
+           illegal      -6.6387   __________   __________       4.6523
+           illegal      -6.7560       2.7300       3.1924       3.6979
+"""
+
+policy: """
+        exit         east         exit         west         west      
+        exit         north        __________   north        north     
+        exit         north        exit         north        north     
+        exit         east         __________   __________   north     
+        exit         east         east         east         north
+"""
+
+actions: """
+north
+east
+exit
+south
+west
+"""
+
--- a/reinforcement/test_cases/q1/4-discountgrid.test
+++ b/reinforcement/test_cases/q1/4-discountgrid.test
@ -0,0 +1,24 @@
+class: "ValueIterationTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10    _   10    _    _
+  -10    _    #    _    _
+  -10    _    1    _    _
+  -10    _    #    #    _
+  -10    S    _    _    _
+"""
+discount: "0.9"
+noise: "0.2"
+livingReward: "0.0"
+epsilon: "0.2"
+learningRate: "0.1"
+numExperiences: "3000"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q1/CONFIG
+++ b/reinforcement/test_cases/q1/CONFIG
@ -0,0 +1,2 @@
+max_points: "6"
+class: "PassAllTestsQuestion"
--- a/reinforcement/test_cases/q2/1-bridge-grid.solution
+++ b/reinforcement/test_cases/q2/1-bridge-grid.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q2/1-bridge-grid.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q2/1-bridge-grid.test
+++ b/reinforcement/test_cases/q2/1-bridge-grid.test
@ -0,0 +1,29 @@
+class: "GridPolicyTest"
+
+# Function in module in analysis that returns (discount, noise)
+parameterFn: "question2"
+question2: "true"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+   # -100 -100 -100 -100 -100    #
+   1    S    _    _    _    _    10
+   # -100 -100 -100 -100 -100    #   
+"""
+gridName: "bridgeGrid"
+
+# Policy specification
+#    _ 			policy choice not checked
+#    N, E, S, W policy action must be north, east, south, west
+#
+policy: """
+   _    _    _    _    _    _    _
+   _    E    _    _    _    _    _
+   _    _    _    _    _    _    _
+"""
+
--- a/reinforcement/test_cases/q2/CONFIG
+++ b/reinforcement/test_cases/q2/CONFIG
@ -0,0 +1,2 @@
+max_points: "1"
+class: "PassAllTestsQuestion"
--- a/reinforcement/test_cases/q3/1-question-3.1.solution
+++ b/reinforcement/test_cases/q3/1-question-3.1.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q3/1-question-3.1.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q3/1-question-3.1.test
+++ b/reinforcement/test_cases/q3/1-question-3.1.test
@ -0,0 +1,31 @@
+class: "GridPolicyTest"
+
+# Function in module in analysis that returns (discount, noise)
+parameterFn: "question3a"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+   _    _    _    _    _  
+   _    #    _    _    _
+   _    #    1    #   10
+   S    _    _    _    _
+ -10  -10  -10  -10  -10
+"""
+gridName: "discountGrid"
+
+# Policy specification
+#    _ 			policy choice not checked
+#    N, E, S, W policy action must be north, east, south, west
+#
+policy: """
+   _    _    _    _    _  
+   _    _    _    _    _  
+   _    _    _    _    _  
+   E    E    N    _    _  
+   _    _    _    _    _  
+"""
--- a/reinforcement/test_cases/q3/2-question-3.2.solution
+++ b/reinforcement/test_cases/q3/2-question-3.2.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q3/2-question-3.2.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q3/2-question-3.2.test
+++ b/reinforcement/test_cases/q3/2-question-3.2.test
@ -0,0 +1,31 @@
+class: "GridPolicyTest"
+
+# Function in module in analysis that returns (discount, noise)
+parameterFn: "question3b"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+   _    _    _    _    _  
+   _    #    _    _    _
+   _    #    1    #   10
+   S    _    _    _    _
+ -10  -10  -10  -10  -10
+"""
+gridName: "discountGrid"
+
+# Policy specification
+#    _ 			policy choice not checked
+#    N, E, S, W policy action must be north, east, south, west
+#
+policy: """
+   E    E    S    _    _  
+   N    _    S    _    _  
+   N    _    _    _    _  
+   N    _    _    _    _  
+   _    _    _    _    _  
+"""
--- a/reinforcement/test_cases/q3/3-question-3.3.solution
+++ b/reinforcement/test_cases/q3/3-question-3.3.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q3/3-question-3.3.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q3/3-question-3.3.test
+++ b/reinforcement/test_cases/q3/3-question-3.3.test
@ -0,0 +1,31 @@
+class: "GridPolicyTest"
+
+# Function in module in analysis that returns (discount, noise)
+parameterFn: "question3c"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+   _    _    _    _    _  
+   _    #    _    _    _
+   _    #    1    #   10
+   S    _    _    _    _
+ -10  -10  -10  -10  -10
+"""
+gridName: "discountGrid"
+
+# Policy specification
+#    _ 			policy choice not checked
+#    N, E, S, W policy action must be north, east, south, west
+#
+policy: """
+   _    _    _    _    _  
+   _    _    _    _    _  
+   _    _    _    _    _  
+   E    E    E    E    N  
+   _    _    _    _    _  
+"""
--- a/reinforcement/test_cases/q3/4-question-3.4.solution
+++ b/reinforcement/test_cases/q3/4-question-3.4.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q3/4-question-3.4.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q3/4-question-3.4.test
+++ b/reinforcement/test_cases/q3/4-question-3.4.test
@ -0,0 +1,36 @@
+class: "GridPolicyTest"
+
+# Function in module in analysis that returns (discount, noise)
+parameterFn: "question3d"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+   _    _    _    _    _  
+   _    #    _    _    _
+   _    #    1    #   10
+   S    _    _    _    _
+ -10  -10  -10  -10  -10
+"""
+gridName: "discountGrid"
+
+# Policy specification
+#    _ 			policy choice not checked
+#    N, E, S, W policy action must be north, east, south, west
+#
+policy: """
+   _    _    _    _    _  
+   _    _    _    _    _  
+   _    _    _    _    _  
+   N    _    _    _    _  
+   _    _    _    _    _  
+"""
+
+# State the most probable path must visit
+#    (x,y) for a particular location; (0,0) is bottom left
+#    TERMINAL_STATE for the terminal state
+pathVisits: "(4,2)"
--- a/reinforcement/test_cases/q3/5-question-3.5.solution
+++ b/reinforcement/test_cases/q3/5-question-3.5.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q3/5-question-3.5.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q3/5-question-3.5.test
+++ b/reinforcement/test_cases/q3/5-question-3.5.test
@ -0,0 +1,36 @@
+class: "GridPolicyTest"
+
+# Function in module in analysis that returns (discount, noise)
+parameterFn: "question3e"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+   _    _    _    _    _  
+   _    #    _    _    _
+   _    #    1    #   10
+   S    _    _    _    _
+ -10  -10  -10  -10  -10
+"""
+gridName: "discountGrid"
+
+# Policy specification
+#    _ 			policy choice not checked
+#    N, E, S, W policy action must be north, east, south, west
+#
+policy: """
+   _    _    _    _    _  
+   _    _    _    _    _  
+   _    _    _    _    _  
+   _    _    _    _    _
+   _    _    _    _    _  
+"""
+
+# State the most probable path must not visit
+#    (x,y) for a particular location; (0,0) is bottom left
+#    TERMINAL_STATE for the terminal state
+pathNotVisits: "TERMINAL_STATE"
--- a/reinforcement/test_cases/q3/CONFIG
+++ b/reinforcement/test_cases/q3/CONFIG
@ -0,0 +1,2 @@
+max_points: "5"
+class: "NumberPassedQuestion"
--- a/reinforcement/test_cases/q4/1-tinygrid.solution
+++ b/reinforcement/test_cases/q4/1-tinygrid.solution
@ -0,0 +1,342 @@
+q_values_k_0_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_exit: """
+            0.0000
+           illegal
+            0.0000
+"""
+
+q_values_k_0_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_1_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_2_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_exit: """
+            0.0000
+           illegal
+            1.9000
+"""
+
+q_values_k_3_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_exit: """
+            0.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_4_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_exit: """
+           -1.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_5_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_6_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_7_action_south: """
+           illegal
+            0.1720
+           illegal
+"""
+
+q_values_k_7_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_exit: """
+           -1.0000
+           illegal
+            4.0951
+"""
+
+q_values_k_8_action_south: """
+           illegal
+            0.1720
+           illegal
+"""
+
+q_values_k_8_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_exit: """
+           -1.0000
+           illegal
+            4.6856
+"""
+
+q_values_k_9_action_south: """
+           illegal
+            0.1720
+           illegal
+"""
+
+q_values_k_9_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_100_action_north: """
+           illegal
+           -0.4534
+           illegal
+"""
+
+q_values_k_100_action_east: """
+           illegal
+            0.4063
+           illegal
+"""
+
+q_values_k_100_action_exit: """
+           -9.4767
+           illegal
+            9.8175
+"""
+
+q_values_k_100_action_south: """
+           illegal
+            2.1267
+           illegal
+"""
+
+q_values_k_100_action_west: """
+           illegal
+            0.3919
+           illegal
+"""
+
+values: """
+           -9.4767
+            2.1267
+            9.8175
+"""
+
+policy: """
+        exit      
+        south     
+        exit
+"""
+
--- a/reinforcement/test_cases/q4/1-tinygrid.test
+++ b/reinforcement/test_cases/q4/1-tinygrid.test
@ -0,0 +1,22 @@
+class: "QLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.5"
+noise: "0.0"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q4/2-tinygrid-noisy.solution
+++ b/reinforcement/test_cases/q4/2-tinygrid-noisy.solution
@ -0,0 +1,342 @@
+q_values_k_0_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_exit: """
+            0.0000
+           illegal
+            0.0000
+"""
+
+q_values_k_0_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_1_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_2_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_exit: """
+            0.0000
+           illegal
+            1.9000
+"""
+
+q_values_k_3_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_exit: """
+            0.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_4_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_exit: """
+           -1.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_5_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_6_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_7_action_south: """
+           illegal
+            0.2579
+           illegal
+"""
+
+q_values_k_7_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_exit: """
+           -1.0000
+           illegal
+            4.0951
+"""
+
+q_values_k_8_action_south: """
+           illegal
+            0.2579
+           illegal
+"""
+
+q_values_k_8_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_exit: """
+           -1.0000
+           illegal
+            4.6856
+"""
+
+q_values_k_9_action_south: """
+           illegal
+            0.2579
+           illegal
+"""
+
+q_values_k_9_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_100_action_north: """
+           illegal
+           -0.6670
+           illegal
+"""
+
+q_values_k_100_action_east: """
+           illegal
+            0.9499
+           illegal
+"""
+
+q_values_k_100_action_exit: """
+           -9.4767
+           illegal
+            9.8175
+"""
+
+q_values_k_100_action_south: """
+           illegal
+            3.2562
+           illegal
+"""
+
+q_values_k_100_action_west: """
+           illegal
+            0.8236
+           illegal
+"""
+
+values: """
+           -9.4767
+            3.2562
+            9.8175
+"""
+
+policy: """
+        exit      
+        south     
+        exit
+"""
+
--- a/reinforcement/test_cases/q4/2-tinygrid-noisy.test
+++ b/reinforcement/test_cases/q4/2-tinygrid-noisy.test
@ -0,0 +1,22 @@
+class: "QLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.75"
+noise: "0.25"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q4/3-bridge.solution
+++ b/reinforcement/test_cases/q4/3-bridge.solution
@ -0,0 +1,570 @@
+q_values_k_0_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_exit: """
+        __________       0.0000   __________
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_0_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_1_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_2_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_3_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_4_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+            0.0000      illegal     -10.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_5_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_6_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_7_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_8_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.1000   __________
+"""
+
+q_values_k_9_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_north: """
+        __________      illegal   __________
+           illegal      -5.8648      illegal
+           illegal      -0.7995      illegal
+           illegal      -0.1671      illegal
+           illegal      -1.2642      illegal
+           illegal      -0.5871      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_east: """
+        __________      illegal   __________
+           illegal     -17.0676      illegal
+           illegal     -26.5534      illegal
+           illegal      -3.6957      illegal
+           illegal     -43.5952      illegal
+           illegal     -31.6884      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_exit: """
+        __________       9.3539   __________
+          -96.5663      illegal     -96.9097
+          -97.7472      illegal     -94.1850
+          -89.0581      illegal     -96.9097
+          -97.2187      illegal     -87.8423
+          -92.8210      illegal     -97.2187
+        __________       0.9576   __________
+"""
+
+q_values_k_500_action_south: """
+        __________      illegal   __________
+           illegal      -6.8377      illegal
+           illegal      -6.7277      illegal
+           illegal      -3.4723      illegal
+           illegal      -8.4015      illegal
+           illegal      -5.5718      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_west: """
+        __________      illegal   __________
+           illegal     -27.0626      illegal
+           illegal     -39.0610      illegal
+           illegal     -40.5887      illegal
+           illegal     -16.2839      illegal
+           illegal     -20.7770      illegal
+        __________      illegal   __________
+"""
+
+values: """
+        __________       9.3539   __________
+          -96.5663      -5.8648     -96.9097
+          -97.7472      -0.7995     -94.1850
+          -89.0581      -0.1671     -96.9097
+          -97.2187      -1.2642     -87.8423
+          -92.8210      -0.5871     -97.2187
+        __________       0.9576   __________
+"""
+
+policy: """
+        __________   exit         __________
+        exit         north        exit      
+        exit         north        exit      
+        exit         north        exit      
+        exit         north        exit      
+        exit         north        exit      
+        __________   exit         __________
+"""
+
--- a/reinforcement/test_cases/q4/3-bridge.test
+++ b/reinforcement/test_cases/q4/3-bridge.test
@ -0,0 +1,27 @@
+class: "QLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+    #   10    #
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    S -100
+    #    1    #
+"""
+gridName: "bridgeGrid"
+discount: "0.85"
+noise: "0.1"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "500"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q4/4-discountgrid.solution
+++ b/reinforcement/test_cases/q4/4-discountgrid.solution
@ -0,0 +1,456 @@
+q_values_k_0_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+            0.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_0_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+            0.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_1_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+            0.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_2_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_3_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_4_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_5_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+           -1.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_6_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_7_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_7_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_7_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+           -1.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.1000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_7_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_7_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_8_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_8_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_8_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+           -1.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.1000      illegal      illegal
+           -1.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_8_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_8_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_9_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal      -0.0900   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_9_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_9_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+           -1.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.1000      illegal      illegal
+           -1.0000      illegal   __________   __________      illegal
+           -1.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_9_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_9_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3000_action_north: """
+           illegal       4.3205      illegal       6.1517       3.8095
+           illegal       4.4238   __________       5.2284       3.5129
+           illegal       1.0694      illegal       3.6867       2.0418
+           illegal       0.3423   __________   __________       1.0655
+           illegal       0.0073       0.0079       0.0484       0.3768
+"""
+
+q_values_k_3000_action_east: """
+           illegal       8.0584      illegal       3.7245       3.3947
+           illegal       2.0499   __________       3.2373       2.1742
+           illegal       0.8687      illegal       1.7398       1.2671
+           illegal       0.2927   __________   __________       0.6669
+           illegal       0.0239       0.0097       0.1611       0.2051
+"""
+
+q_values_k_3000_action_exit: """
+          -10.0000      illegal      10.0000      illegal      illegal
+          -10.0000      illegal   __________      illegal      illegal
+          -10.0000      illegal       1.0000      illegal      illegal
+          -10.0000      illegal   __________   __________      illegal
+           -9.9999      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_3000_action_south: """
+           illegal      -0.3521      illegal       3.6948       2.9139
+           illegal      -0.5605   __________       2.1346       1.5674
+           illegal       0.2093      illegal       1.5389       0.5521
+           illegal      -0.5505   __________   __________       0.1006
+           illegal      -1.8501       0.0060       0.0514       0.1223
+"""
+
+q_values_k_3000_action_west: """
+           illegal      -6.2001      illegal       7.5146       4.9014
+           illegal      -5.4013   __________       4.0484       3.4126
+           illegal      -8.0399      illegal       0.9653       1.6081
+           illegal      -7.4767   __________   __________       0.3934
+           illegal      -6.3432       0.0179       0.0188       0.1028
+"""
+
+values: """
+          -10.0000       8.0584      10.0000       7.5146       4.9014
+          -10.0000       4.4238   __________       5.2284       3.5129
+          -10.0000       1.0694       1.0000       3.6867       2.0418
+          -10.0000       0.3423   __________   __________       1.0655
+           -9.9999       0.0239       0.0179       0.1611       0.3768
+"""
+
+policy: """
+        exit         east         exit         west         west      
+        exit         north        __________   north        north     
+        exit         north        exit         north        north     
+        exit         north        __________   __________   north     
+        exit         east         west         east         north
+"""
+
--- a/reinforcement/test_cases/q4/4-discountgrid.test
+++ b/reinforcement/test_cases/q4/4-discountgrid.test
@ -0,0 +1,24 @@
+class: "QLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10    _   10    _    _
+  -10    _    #    _    _
+  -10    _    1    _    _
+  -10    _    #    #    _
+  -10    S    _    _    _
+"""
+discount: "0.9"
+noise: "0.2"
+livingReward: "0.0"
+epsilon: "0.2"
+learningRate: "0.1"
+numExperiences: "3000"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q4/CONFIG
+++ b/reinforcement/test_cases/q4/CONFIG
@ -0,0 +1,2 @@
+max_points: "5"
+class: "PassAllTestsQuestion"
--- a/reinforcement/test_cases/q5/1-tinygrid.solution
+++ b/reinforcement/test_cases/q5/1-tinygrid.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q5/1-tinygrid.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q5/1-tinygrid.test
+++ b/reinforcement/test_cases/q5/1-tinygrid.test
@ -0,0 +1,22 @@
+class: "EpsilonGreedyTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.5"
+noise: "0.0"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q5/2-tinygrid-noisy.solution
+++ b/reinforcement/test_cases/q5/2-tinygrid-noisy.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q5/2-tinygrid-noisy.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q5/2-tinygrid-noisy.test
+++ b/reinforcement/test_cases/q5/2-tinygrid-noisy.test
@ -0,0 +1,22 @@
+class: "EpsilonGreedyTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.75"
+noise: "0.25"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q5/3-bridge.solution
+++ b/reinforcement/test_cases/q5/3-bridge.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q5/3-bridge.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q5/3-bridge.test
+++ b/reinforcement/test_cases/q5/3-bridge.test
@ -0,0 +1,27 @@
+class: "EpsilonGreedyTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+    #   10    #
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    S -100
+    #    1    #
+"""
+gridName: "bridgeGrid"
+discount: "0.85"
+noise: "0.1"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "500"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q5/4-discountgrid.solution
+++ b/reinforcement/test_cases/q5/4-discountgrid.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q5/4-discountgrid.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q5/4-discountgrid.test
+++ b/reinforcement/test_cases/q5/4-discountgrid.test
@ -0,0 +1,24 @@
+class: "EpsilonGreedyTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10    _   10    _    _
+  -10    _    #    _    _
+  -10    _    1    _    _
+  -10    _    #    #    _
+  -10    S    _    _    _
+"""
+discount: "0.9"
+noise: "0.2"
+livingReward: "0.0"
+epsilon: "0.2"
+learningRate: "0.1"
+numExperiences: "3000"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q5/CONFIG
+++ b/reinforcement/test_cases/q5/CONFIG
@ -0,0 +1,2 @@
+max_points: "3"
+class: "PassAllTestsQuestion"
--- a/reinforcement/test_cases/q6/CONFIG
+++ b/reinforcement/test_cases/q6/CONFIG
@ -0,0 +1,2 @@
+max_points: "1"
+class: "PassAllTestsQuestion"
--- a/reinforcement/test_cases/q6/grade-agent.solution
+++ b/reinforcement/test_cases/q6/grade-agent.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q6/grade-agent.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q6/grade-agent.test
+++ b/reinforcement/test_cases/q6/grade-agent.test
@ -0,0 +1,2 @@
+class: "Question6Test"
+
--- a/reinforcement/test_cases/q7/CONFIG
+++ b/reinforcement/test_cases/q7/CONFIG
@ -0,0 +1,2 @@
+max_points: "1"
+class: "PartialCreditQuestion"
--- a/reinforcement/test_cases/q7/grade-agent.solution
+++ b/reinforcement/test_cases/q7/grade-agent.solution
@ -0,0 +1,2 @@
+# This is the solution file for test_cases/q7/grade-agent.test.
+# File intentionally blank.
--- a/reinforcement/test_cases/q7/grade-agent.test
+++ b/reinforcement/test_cases/q7/grade-agent.test
@ -0,0 +1,6 @@
+class: "EvalAgentTest"
+
+# 100 test games after 2000 training games
+pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
+
+winsThresholds: "70"
--- a/reinforcement/test_cases/q8/1-tinygrid.solution
+++ b/reinforcement/test_cases/q8/1-tinygrid.solution
@ -0,0 +1,429 @@
+weights_k_0: """
+{((0, 0), 'exit'): 0,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_0_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_exit: """
+            0.0000
+           illegal
+            0.0000
+"""
+
+q_values_k_0_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_1: """
+{((0, 0), 'exit'): 1.0,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_1_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_1_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_2: """
+{((0, 0), 'exit'): 1.0,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_2_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_2_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_3: """
+{((0, 0), 'exit'): 1.9,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_3_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_exit: """
+            0.0000
+           illegal
+            1.9000
+"""
+
+q_values_k_3_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_4: """
+{((0, 0), 'exit'): 2.71,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_4_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_exit: """
+            0.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_4_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_5: """
+{((0, 0), 'exit'): 2.71,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_5_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_exit: """
+           -1.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_5_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_6: """
+{((0, 0), 'exit'): 3.439,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_6_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_6_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_7: """
+{((0, 0), 'exit'): 3.439,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.17195000000000002,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_7_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_7_action_south: """
+           illegal
+            0.1720
+           illegal
+"""
+
+q_values_k_7_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_8: """
+{((0, 0), 'exit'): 4.0951,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.17195000000000002,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_8_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_exit: """
+           -1.0000
+           illegal
+            4.0951
+"""
+
+q_values_k_8_action_south: """
+           illegal
+            0.1720
+           illegal
+"""
+
+q_values_k_8_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_9: """
+{((0, 0), 'exit'): 4.68559,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.17195000000000002,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_9_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_exit: """
+           -1.0000
+           illegal
+            4.6856
+"""
+
+q_values_k_9_action_south: """
+           illegal
+            0.1720
+           illegal
+"""
+
+q_values_k_9_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_100: """
+{((0, 0), 'exit'): 9.817519963685992,
+ ((0, 1), 'east'): 0.40629236674335106,
+ ((0, 1), 'north'): -0.4534185789984799,
+ ((0, 1), 'south'): 2.126721095524319,
+ ((0, 1), 'west'): 0.39193283364906867,
+ ((0, 2), 'exit'): -9.476652366972639}
+"""
+
+q_values_k_100_action_north: """
+           illegal
+           -0.4534
+           illegal
+"""
+
+q_values_k_100_action_east: """
+           illegal
+            0.4063
+           illegal
+"""
+
+q_values_k_100_action_exit: """
+           -9.4767
+           illegal
+            9.8175
+"""
+
+q_values_k_100_action_south: """
+           illegal
+            2.1267
+           illegal
+"""
+
+q_values_k_100_action_west: """
+           illegal
+            0.3919
+           illegal
+"""
+
--- a/reinforcement/test_cases/q8/1-tinygrid.test
+++ b/reinforcement/test_cases/q8/1-tinygrid.test
@ -0,0 +1,22 @@
+class: "ApproximateQLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.5"
+noise: "0.0"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q8/2-tinygrid-noisy.solution
+++ b/reinforcement/test_cases/q8/2-tinygrid-noisy.solution
@ -0,0 +1,429 @@
+weights_k_0: """
+{((0, 0), 'exit'): 0,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_0_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_exit: """
+            0.0000
+           illegal
+            0.0000
+"""
+
+q_values_k_0_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_0_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_1: """
+{((0, 0), 'exit'): 1.0,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_1_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_1_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_1_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_2: """
+{((0, 0), 'exit'): 1.0,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_2_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_exit: """
+            0.0000
+           illegal
+            1.0000
+"""
+
+q_values_k_2_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_2_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_3: """
+{((0, 0), 'exit'): 1.9,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_3_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_exit: """
+            0.0000
+           illegal
+            1.9000
+"""
+
+q_values_k_3_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_3_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_4: """
+{((0, 0), 'exit'): 2.71,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): 0}
+"""
+
+q_values_k_4_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_exit: """
+            0.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_4_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_4_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_5: """
+{((0, 0), 'exit'): 2.71,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_5_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_exit: """
+           -1.0000
+           illegal
+            2.7100
+"""
+
+q_values_k_5_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_5_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_6: """
+{((0, 0), 'exit'): 3.439,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.0,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_6_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_6_action_south: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_6_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_7: """
+{((0, 0), 'exit'): 3.439,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.257925,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_7_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_7_action_exit: """
+           -1.0000
+           illegal
+            3.4390
+"""
+
+q_values_k_7_action_south: """
+           illegal
+            0.2579
+           illegal
+"""
+
+q_values_k_7_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_8: """
+{((0, 0), 'exit'): 4.0951,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.257925,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_8_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_8_action_exit: """
+           -1.0000
+           illegal
+            4.0951
+"""
+
+q_values_k_8_action_south: """
+           illegal
+            0.2579
+           illegal
+"""
+
+q_values_k_8_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_9: """
+{((0, 0), 'exit'): 4.68559,
+ ((0, 1), 'east'): 0,
+ ((0, 1), 'north'): 0,
+ ((0, 1), 'south'): 0.257925,
+ ((0, 1), 'west'): 0,
+ ((0, 2), 'exit'): -1.0}
+"""
+
+q_values_k_9_action_north: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_east: """
+           illegal
+            0.0000
+           illegal
+"""
+
+q_values_k_9_action_exit: """
+           -1.0000
+           illegal
+            4.6856
+"""
+
+q_values_k_9_action_south: """
+           illegal
+            0.2579
+           illegal
+"""
+
+q_values_k_9_action_west: """
+           illegal
+            0.0000
+           illegal
+"""
+
+weights_k_100: """
+{((0, 0), 'exit'): 9.817519963685992,
+ ((0, 1), 'east'): 0.9498968104823575,
+ ((0, 1), 'north'): -0.66699795412272,
+ ((0, 1), 'south'): 3.256207905310105,
+ ((0, 1), 'west'): 0.8236280735014627,
+ ((0, 2), 'exit'): -9.476652366972639}
+"""
+
+q_values_k_100_action_north: """
+           illegal
+           -0.6670
+           illegal
+"""
+
+q_values_k_100_action_east: """
+           illegal
+            0.9499
+           illegal
+"""
+
+q_values_k_100_action_exit: """
+           -9.4767
+           illegal
+            9.8175
+"""
+
+q_values_k_100_action_south: """
+           illegal
+            3.2562
+           illegal
+"""
+
+q_values_k_100_action_west: """
+           illegal
+            0.8236
+           illegal
+"""
+
--- a/reinforcement/test_cases/q8/2-tinygrid-noisy.test
+++ b/reinforcement/test_cases/q8/2-tinygrid-noisy.test
@ -0,0 +1,22 @@
+class: "ApproximateQLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10
+    S
+   10
+"""
+discount: "0.75"
+noise: "0.25"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "100"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q8/3-bridge.solution
+++ b/reinforcement/test_cases/q8/3-bridge.solution
@ -0,0 +1,935 @@
+weights_k_0: """
+{((0, 1), 'exit'): 0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): 0,
+ ((0, 4), 'exit'): 0,
+ ((0, 5), 'exit'): 0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): 0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_0_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_exit: """
+        __________       0.0000   __________
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_0_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_0_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_1: """
+{((0, 1), 'exit'): 0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): 0,
+ ((0, 4), 'exit'): 0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): 0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_1_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_1_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_1_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_2: """
+{((0, 1), 'exit'): 0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): 0,
+ ((0, 4), 'exit'): 0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): 0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_2_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_2_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_2_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_3: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): 0,
+ ((0, 4), 'exit'): 0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): 0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_3_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_3_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_3_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_4: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): 0,
+ ((0, 4), 'exit'): -10.0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): 0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_4_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_4_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_4_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_5: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): 0,
+ ((0, 4), 'exit'): -10.0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): -10.0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_5_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+            0.0000      illegal     -10.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_5_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_5_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_6: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): -10.0,
+ ((0, 4), 'exit'): -10.0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): -10.0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_6_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_6_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_6_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_7: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): 0,
+ ((0, 3), 'exit'): -10.0,
+ ((0, 4), 'exit'): -10.0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0.0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): -10.0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_7_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+            0.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_7_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_7_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_8: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): -10.0,
+ ((0, 3), 'exit'): -10.0,
+ ((0, 4), 'exit'): -10.0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0.0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): -10.0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_8_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.0000   __________
+"""
+
+q_values_k_8_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_8_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_9: """
+{((0, 1), 'exit'): -10.0,
+ ((0, 2), 'exit'): -10.0,
+ ((0, 3), 'exit'): -10.0,
+ ((0, 4), 'exit'): -10.0,
+ ((0, 5), 'exit'): -10.0,
+ ((1, 0), 'exit'): 0.1,
+ ((1, 1), 'east'): 0,
+ ((1, 1), 'north'): 0,
+ ((1, 1), 'south'): 0,
+ ((1, 1), 'west'): 0,
+ ((1, 2), 'east'): 0,
+ ((1, 2), 'north'): 0,
+ ((1, 2), 'south'): 0,
+ ((1, 2), 'west'): 0,
+ ((1, 3), 'east'): 0,
+ ((1, 3), 'north'): 0,
+ ((1, 3), 'south'): 0.0,
+ ((1, 3), 'west'): 0,
+ ((1, 4), 'east'): 0,
+ ((1, 4), 'north'): 0,
+ ((1, 4), 'south'): 0,
+ ((1, 4), 'west'): 0,
+ ((1, 5), 'east'): 0,
+ ((1, 5), 'north'): 0,
+ ((1, 5), 'south'): 0.0,
+ ((1, 5), 'west'): 0,
+ ((1, 6), 'exit'): 0,
+ ((2, 1), 'exit'): 0,
+ ((2, 2), 'exit'): 0,
+ ((2, 3), 'exit'): -10.0,
+ ((2, 4), 'exit'): 0,
+ ((2, 5), 'exit'): 0}
+"""
+
+q_values_k_9_action_north: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_east: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_exit: """
+        __________       0.0000   __________
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal     -10.0000
+          -10.0000      illegal       0.0000
+          -10.0000      illegal       0.0000
+        __________       0.1000   __________
+"""
+
+q_values_k_9_action_south: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_9_action_west: """
+        __________      illegal   __________
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+           illegal       0.0000      illegal
+        __________      illegal   __________
+"""
+
+weights_k_500: """
+{((0, 1), 'exit'): -92.82102012308148,
+ ((0, 2), 'exit'): -97.21871610556306,
+ ((0, 3), 'exit'): -89.05810108684878,
+ ((0, 4), 'exit'): -97.74716004550608,
+ ((0, 5), 'exit'): -96.56631617970748,
+ ((1, 0), 'exit'): 0.9576088417247839,
+ ((1, 1), 'east'): -31.68839649871871,
+ ((1, 1), 'north'): -0.5871409700255297,
+ ((1, 1), 'south'): -5.571799344704395,
+ ((1, 1), 'west'): -20.777007017445538,
+ ((1, 2), 'east'): -43.595242197319,
+ ((1, 2), 'north'): -1.264202431807023,
+ ((1, 2), 'south'): -8.401530599975509,
+ ((1, 2), 'west'): -16.283916171605192,
+ ((1, 3), 'east'): -3.6956691,
+ ((1, 3), 'north'): -0.16712710492783758,
+ ((1, 3), 'south'): -3.4722840178579073,
+ ((1, 3), 'west'): -40.58867937480968,
+ ((1, 4), 'east'): -26.553386621338632,
+ ((1, 4), 'north'): -0.799493322153628,
+ ((1, 4), 'south'): -6.727671187497919,
+ ((1, 4), 'west'): -39.06095135014759,
+ ((1, 5), 'east'): -17.067638934181446,
+ ((1, 5), 'north'): -5.864753060887024,
+ ((1, 5), 'south'): -6.83769420759525,
+ ((1, 5), 'west'): -27.062643066307515,
+ ((1, 6), 'exit'): 9.353891811077332,
+ ((2, 1), 'exit'): -97.21871610556306,
+ ((2, 2), 'exit'): -87.84233454094309,
+ ((2, 3), 'exit'): -96.90968456173674,
+ ((2, 4), 'exit'): -94.185026299696,
+ ((2, 5), 'exit'): -96.90968456173674}
+"""
+
+q_values_k_500_action_north: """
+        __________      illegal   __________
+           illegal      -5.8648      illegal
+           illegal      -0.7995      illegal
+           illegal      -0.1671      illegal
+           illegal      -1.2642      illegal
+           illegal      -0.5871      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_east: """
+        __________      illegal   __________
+           illegal     -17.0676      illegal
+           illegal     -26.5534      illegal
+           illegal      -3.6957      illegal
+           illegal     -43.5952      illegal
+           illegal     -31.6884      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_exit: """
+        __________       9.3539   __________
+          -96.5663      illegal     -96.9097
+          -97.7472      illegal     -94.1850
+          -89.0581      illegal     -96.9097
+          -97.2187      illegal     -87.8423
+          -92.8210      illegal     -97.2187
+        __________       0.9576   __________
+"""
+
+q_values_k_500_action_south: """
+        __________      illegal   __________
+           illegal      -6.8377      illegal
+           illegal      -6.7277      illegal
+           illegal      -3.4723      illegal
+           illegal      -8.4015      illegal
+           illegal      -5.5718      illegal
+        __________      illegal   __________
+"""
+
+q_values_k_500_action_west: """
+        __________      illegal   __________
+           illegal     -27.0626      illegal
+           illegal     -39.0610      illegal
+           illegal     -40.5887      illegal
+           illegal     -16.2839      illegal
+           illegal     -20.7770      illegal
+        __________      illegal   __________
+"""
+
--- a/reinforcement/test_cases/q8/3-bridge.test
+++ b/reinforcement/test_cases/q8/3-bridge.test
@ -0,0 +1,27 @@
+class: "ApproximateQLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+    #   10    #
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    _ -100
+ -100    S -100
+    #    1    #
+"""
+gridName: "bridgeGrid"
+discount: "0.85"
+noise: "0.1"
+livingReward: "0.0"
+epsilon: "0.5"
+learningRate: "0.1"
+numExperiences: "500"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q8/4-discountgrid.solution
+++ b/reinforcement/test_cases/q8/4-discountgrid.solution
--- a/reinforcement/test_cases/q8/4-discountgrid.test
+++ b/reinforcement/test_cases/q8/4-discountgrid.test
@ -0,0 +1,24 @@
+class: "ApproximateQLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10    _   10    _    _
+  -10    _    #    _    _
+  -10    _    1    _    _
+  -10    _    #    #    _
+  -10    S    _    _    _
+"""
+discount: "0.9"
+noise: "0.2"
+livingReward: "0.0"
+epsilon: "0.2"
+learningRate: "0.1"
+numExperiences: "3000"
+valueIterations: "100"
+iterations: "10000"
+
--- a/reinforcement/test_cases/q8/5-coord-extractor.solution
+++ b/reinforcement/test_cases/q8/5-coord-extractor.solution
@ -0,0 +1,880 @@
+weights_k_0: """
+{'action=east': 0,
+ 'action=exit': 0,
+ 'action=north': 0,
+ 'action=south': 0,
+ 'action=west': 0,
+ 'x=0': 0,
+ 'x=1': 0,
+ 'x=2': 0,
+ 'x=3': 0,
+ 'x=4': 0,
+ 'y=0': 0,
+ 'y=1': 0,
+ 'y=2': 0,
+ 'y=3': 0,
+ 'y=4': 0,
+ (0, 0): 0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): 0,
+ (0, 4): 0,
+ (1, 0): 0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_0_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+            0.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_0_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_0_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_1: """
+{'action=east': 0,
+ 'action=exit': 0,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0,
+ 'x=0': 0,
+ 'x=1': 0.0,
+ 'x=2': 0,
+ 'x=3': 0,
+ 'x=4': 0,
+ 'y=0': 0,
+ 'y=1': 0.0,
+ 'y=2': 0,
+ 'y=3': 0,
+ 'y=4': 0,
+ (0, 0): 0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): 0,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_1_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+            0.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_1_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_1_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_2: """
+{'action=east': 0,
+ 'action=exit': 0,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0,
+ 'x=0': 0,
+ 'x=1': 0.0,
+ 'x=2': 0,
+ 'x=3': 0.0,
+ 'x=4': 0,
+ 'y=0': 0,
+ 'y=1': 0.0,
+ 'y=2': 0,
+ 'y=3': 0.0,
+ 'y=4': 0,
+ (0, 0): 0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): 0,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_2_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_exit: """
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________      illegal      illegal
+            0.0000      illegal       0.0000      illegal      illegal
+            0.0000      illegal   __________   __________      illegal
+            0.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_2_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_2_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_3: """
+{'action=east': 0,
+ 'action=exit': -1.0,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0,
+ 'x=0': -1.0,
+ 'x=1': 0.0,
+ 'x=2': 0,
+ 'x=3': 0.0,
+ 'x=4': 0,
+ 'y=0': -1.0,
+ 'y=1': 0.0,
+ 'y=2': 0,
+ 'y=3': 0.0,
+ 'y=4': 0,
+ (0, 0): -1.0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): 0,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_3_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_exit: """
+           -3.0000      illegal      -1.0000      illegal      illegal
+           -3.0000      illegal   __________      illegal      illegal
+           -3.0000      illegal      -1.0000      illegal      illegal
+           -3.0000      illegal   __________   __________      illegal
+           -4.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_3_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_3_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_4: """
+{'action=east': 0.0,
+ 'action=exit': -1.0,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0,
+ 'x=0': -1.0,
+ 'x=1': 0.0,
+ 'x=2': 0,
+ 'x=3': 0.0,
+ 'x=4': 0,
+ 'y=0': -1.0,
+ 'y=1': 0.0,
+ 'y=2': 0,
+ 'y=3': 0.0,
+ 'y=4': 0,
+ (0, 0): -1.0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): 0,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_4_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_exit: """
+           -3.0000      illegal      -1.0000      illegal      illegal
+           -3.0000      illegal   __________      illegal      illegal
+           -3.0000      illegal      -1.0000      illegal      illegal
+           -3.0000      illegal   __________   __________      illegal
+           -4.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_4_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_4_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_5: """
+{'action=east': 0.0,
+ 'action=exit': -1.0,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0.0,
+ 'x=0': -1.0,
+ 'x=1': 0.0,
+ 'x=2': 0,
+ 'x=3': 0.0,
+ 'x=4': 0.0,
+ 'y=0': -1.0,
+ 'y=1': 0.0,
+ 'y=2': 0,
+ 'y=3': 0.0,
+ 'y=4': 0.0,
+ (0, 0): -1.0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): 0,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0.0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_5_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_exit: """
+           -3.0000      illegal      -1.0000      illegal      illegal
+           -3.0000      illegal   __________      illegal      illegal
+           -3.0000      illegal      -1.0000      illegal      illegal
+           -3.0000      illegal   __________   __________      illegal
+           -4.0000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_5_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_5_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_6: """
+{'action=east': 0.0,
+ 'action=exit': -1.7000000000000002,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0.0,
+ 'x=0': -1.7000000000000002,
+ 'x=1': 0.0,
+ 'x=2': 0,
+ 'x=3': 0.0,
+ 'x=4': 0.0,
+ 'y=0': -1.7000000000000002,
+ 'y=1': 0.0,
+ 'y=2': 0,
+ 'y=3': 0.0,
+ 'y=4': 0.0,
+ (0, 0): -1.0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): -0.7000000000000001,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0.0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_6_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_exit: """
+           -5.1000      illegal      -1.7000      illegal      illegal
+           -5.8000      illegal   __________      illegal      illegal
+           -5.1000      illegal      -1.7000      illegal      illegal
+           -5.1000      illegal   __________   __________      illegal
+           -6.1000      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_6_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+q_values_k_6_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.0000       0.0000       0.0000
+"""
+
+weights_k_7: """
+{'action=east': 0.0,
+ 'action=exit': -1.4300000000000002,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0.0,
+ 'x=0': -1.7000000000000002,
+ 'x=1': 0.0,
+ 'x=2': 0.27,
+ 'x=3': 0.0,
+ 'x=4': 0.0,
+ 'y=0': -1.7000000000000002,
+ 'y=1': 0.0,
+ 'y=2': 0.27,
+ 'y=3': 0.0,
+ 'y=4': 0.0,
+ (0, 0): -1.0,
+ (0, 1): 0,
+ (0, 2): 0,
+ (0, 3): -0.7000000000000001,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0.27,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0.0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_7_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+q_values_k_7_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+q_values_k_7_action_exit: """
+           -4.8300      illegal      -0.8900      illegal      illegal
+           -5.5300      illegal   __________      illegal      illegal
+           -4.8300      illegal      -0.6200      illegal      illegal
+           -4.8300      illegal   __________   __________      illegal
+           -5.8300      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_7_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+q_values_k_7_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+weights_k_8: """
+{'action=east': 0.0,
+ 'action=exit': -1.947,
+ 'action=north': 0,
+ 'action=south': 0.0,
+ 'action=west': 0.0,
+ 'x=0': -2.217,
+ 'x=1': 0.0,
+ 'x=2': 0.27,
+ 'x=3': 0.0,
+ 'x=4': 0.0,
+ 'y=0': -2.217,
+ 'y=1': 0.0,
+ 'y=2': 0.27,
+ 'y=3': 0.0,
+ 'y=4': 0.0,
+ (0, 0): -1.0,
+ (0, 1): -0.517,
+ (0, 2): 0,
+ (0, 3): -0.7000000000000001,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): 0,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0.27,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0.0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_8_action_north: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+q_values_k_8_action_east: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+q_values_k_8_action_exit: """
+           -6.3810      illegal      -1.4070      illegal      illegal
+           -7.0810      illegal   __________      illegal      illegal
+           -6.3810      illegal      -1.1370      illegal      illegal
+           -6.8980      illegal   __________   __________      illegal
+           -7.3810      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_8_action_south: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+q_values_k_8_action_west: """
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________       0.0000       0.0000
+           illegal       0.0000      illegal       0.0000       0.0000
+           illegal       0.0000   __________   __________       0.0000
+           illegal       0.0000       0.5400       0.0000       0.0000
+"""
+
+weights_k_9: """
+{'action=east': 0.0,
+ 'action=exit': -1.947,
+ 'action=north': -0.62082,
+ 'action=south': 0.0,
+ 'action=west': 0.0,
+ 'x=0': -2.217,
+ 'x=1': -0.62082,
+ 'x=2': 0.27,
+ 'x=3': 0.0,
+ 'x=4': 0.0,
+ 'y=0': -2.217,
+ 'y=1': -0.62082,
+ 'y=2': 0.27,
+ 'y=3': 0.0,
+ 'y=4': 0.0,
+ (0, 0): -1.0,
+ (0, 1): -0.517,
+ (0, 2): 0,
+ (0, 3): -0.7000000000000001,
+ (0, 4): 0,
+ (1, 0): 0.0,
+ (1, 1): -0.62082,
+ (1, 2): 0,
+ (1, 3): 0,
+ (1, 4): 0,
+ (2, 0): 0,
+ (2, 2): 0.27,
+ (2, 4): 0,
+ (3, 0): 0.0,
+ (3, 2): 0,
+ (3, 3): 0,
+ (3, 4): 0,
+ (4, 0): 0,
+ (4, 1): 0.0,
+ (4, 2): 0,
+ (4, 3): 0,
+ (4, 4): 0}
+"""
+
+q_values_k_9_action_north: """
+           illegal      -1.8625      illegal      -0.6208      -0.6208
+           illegal      -1.8625   __________      -0.6208      -0.6208
+           illegal      -1.8625      illegal      -0.6208      -0.6208
+           illegal      -2.4833   __________   __________      -0.6208
+           illegal      -1.8625      -0.0808      -0.6208      -0.6208
+"""
+
+q_values_k_9_action_east: """
+           illegal      -1.2416      illegal       0.0000       0.0000
+           illegal      -1.2416   __________       0.0000       0.0000
+           illegal      -1.2416      illegal       0.0000       0.0000
+           illegal      -1.8625   __________   __________       0.0000
+           illegal      -1.2416       0.5400       0.0000       0.0000
+"""
+
+q_values_k_9_action_exit: """
+           -6.3810      illegal      -1.4070      illegal      illegal
+           -7.0810      illegal   __________      illegal      illegal
+           -6.3810      illegal      -1.1370      illegal      illegal
+           -6.8980      illegal   __________   __________      illegal
+           -7.3810      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_9_action_south: """
+           illegal      -1.2416      illegal       0.0000       0.0000
+           illegal      -1.2416   __________       0.0000       0.0000
+           illegal      -1.2416      illegal       0.0000       0.0000
+           illegal      -1.8625   __________   __________       0.0000
+           illegal      -1.2416       0.5400       0.0000       0.0000
+"""
+
+q_values_k_9_action_west: """
+           illegal      -1.2416      illegal       0.0000       0.0000
+           illegal      -1.2416   __________       0.0000       0.0000
+           illegal      -1.2416      illegal       0.0000       0.0000
+           illegal      -1.8625   __________   __________       0.0000
+           illegal      -1.2416       0.5400       0.0000       0.0000
+"""
+
+weights_k_3000: """
+{'action=east': 6.719916513522846,
+ 'action=exit': -2.2444981376861555,
+ 'action=north': 4.568574519923728,
+ 'action=south': 3.761510351874819,
+ 'action=west': 1.2828606322891556,
+ 'x=0': -3.604063955849794,
+ 'x=1': 0.6731476152061693,
+ 'x=2': 4.000208353074704,
+ 'x=3': 5.988311380073477,
+ 'x=4': 7.0307604874198235,
+ 'y=0': -3.604063955849794,
+ 'y=1': 0.6731476152061693,
+ 'y=2': 4.000208353074704,
+ 'y=3': 5.988311380073477,
+ 'y=4': 7.0307604874198235,
+ (0, 0): -0.7073688447583666,
+ (0, 1): -0.7542862401704076,
+ (0, 2): -0.7043014501203066,
+ (0, 3): -0.7433344649617668,
+ (0, 4): -0.6947729558389527,
+ (1, 0): 2.364273811399719,
+ (1, 1): -0.2695405704605499,
+ (1, 2): -0.7105979212702271,
+ (1, 3): -1.4866826750327933,
+ (1, 4): 0.7756949705700219,
+ (2, 0): 2.64064253491107,
+ (2, 2): -3.7381118310263166,
+ (2, 4): 5.097677649189953,
+ (3, 0): 2.505262939441149,
+ (3, 2): 0.27218788923837256,
+ (3, 3): 2.2611084206093195,
+ (3, 4): 0.9497521307846304,
+ (4, 0): 1.7330586015291545,
+ (4, 1): 0.980194046153168,
+ (4, 2): 0.78786289128181,
+ (4, 3): 1.493343270762865,
+ (4, 4): 2.0363016776928333}
+"""
+
+q_values_k_3000_action_north: """
+           illegal       6.6906      illegal      17.4949      20.6664
+           illegal       4.4282   __________      18.8063      20.1234
+           illegal       5.2043      illegal      16.8174      19.4180
+           illegal       5.6453   __________   __________      19.6103
+           illegal       8.2791      15.2096      19.0505      20.3632
+"""
+
+q_values_k_3000_action_east: """
+           illegal       8.8419      illegal      19.6463      22.8177
+           illegal       6.5795   __________      20.9576      22.2748
+           illegal       7.3556      illegal      18.9687      21.5693
+           illegal       7.7967   __________   __________      21.7616
+           illegal      10.4305      17.3610      21.2018      22.5145
+"""
+
+q_values_k_3000_action_exit: """
+          -10.1474      illegal      10.8536      illegal      illegal
+          -10.1960      illegal   __________      illegal      illegal
+          -10.1569      illegal       2.0178      illegal      illegal
+          -10.2069      illegal   __________   __________      illegal
+          -10.1600      illegal      illegal      illegal      illegal
+"""
+
+q_values_k_3000_action_south: """
+           illegal       5.8835      illegal      16.6879      19.8593
+           illegal       3.6211   __________      17.9992      19.3164
+           illegal       4.3972      illegal      16.0103      18.6109
+           illegal       4.8383   __________   __________      18.8032
+           illegal       7.4721      14.4026      18.2434      19.5561
+"""
+
+q_values_k_3000_action_west: """
+           illegal       3.4049      illegal      14.2092      17.3807
+           illegal       1.1425   __________      15.5206      16.8377
+           illegal       1.9186      illegal      13.5317      16.1322
+           illegal       2.3596   __________   __________      16.3246
+           illegal       4.9934      11.9239      15.7647      17.0774
+"""
+
--- a/reinforcement/test_cases/q8/5-coord-extractor.test
+++ b/reinforcement/test_cases/q8/5-coord-extractor.test
@ -0,0 +1,25 @@
+class: "ApproximateQLearningTest"
+
+# GridWorld specification
+#    _ is empty space
+#    numbers are terminal states with that value
+#    # is a wall
+#    S is a start state
+#
+grid: """
+  -10    _   10    _    _
+  -10    _    #    _    _
+  -10    _    1    _    _
+  -10    _    #    #    _
+  -10    S    _    _    _
+"""
+discount: "0.9"
+noise: "0.2"
+livingReward: "0.0"
+epsilon: "0.2"
+learningRate: "0.1"
+numExperiences: "3000"
+valueIterations: "100"
+iterations: "10000"
+extractor: "CoordinateExtractor"
+
--- a/reinforcement/test_cases/q8/CONFIG
+++ b/reinforcement/test_cases/q8/CONFIG
@ -0,0 +1,2 @@
+max_points: "3"
+class: "PassAllTestsQuestion"
--- a/reinforcement/textDisplay.py
+++ b/reinforcement/textDisplay.py
@ -0,0 +1,81 @@
+# textDisplay.py
+# --------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import time
+try: 
+    import pacman
+except:
+    pass
+
+DRAW_EVERY = 1
+SLEEP_TIME = 0 # This can be overwritten by __init__
+DISPLAY_MOVES = False
+QUIET = False # Supresses output
+
+class NullGraphics:
+    def initialize(self, state, isBlue = False):
+        pass
+
+    def update(self, state):
+        pass
+
+    def checkNullDisplay(self):
+        return True
+
+    def pause(self):
+        time.sleep(SLEEP_TIME)
+
+    def draw(self, state):
+        print state
+
+    def updateDistributions(self, dist):
+        pass
+
+    def finish(self):
+        pass
+
+class PacmanGraphics:
+    def __init__(self, speed=None):
+        if speed != None:
+            global SLEEP_TIME
+            SLEEP_TIME = speed
+
+    def initialize(self, state, isBlue = False):
+        self.draw(state)
+        self.pause()
+        self.turn = 0
+        self.agentCounter = 0
+
+    def update(self, state):
+        numAgents = len(state.agentStates)
+        self.agentCounter = (self.agentCounter + 1) % numAgents
+        if self.agentCounter == 0:
+            self.turn += 1
+            if DISPLAY_MOVES:
+                ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)]
+                print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts
+            if self.turn % DRAW_EVERY == 0:
+                self.draw(state)
+                self.pause()
+        if state._win or state._lose:
+            self.draw(state)
+
+    def pause(self):
+        time.sleep(SLEEP_TIME)
+
+    def draw(self, state):
+        print state
+
+    def finish(self):
+        pass
--- a/reinforcement/textGridworldDisplay.py
+++ b/reinforcement/textGridworldDisplay.py
@ -0,0 +1,324 @@
+# textGridworldDisplay.py
+# -----------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import util
+
+class TextGridworldDisplay:
+
+    def __init__(self, gridworld):
+        self.gridworld = gridworld
+
+    def start(self):
+        pass
+
+    def pause(self):
+        pass
+
+    def displayValues(self, agent, currentState = None, message = None):
+        if message != None:
+            print message
+        values = util.Counter()
+        policy = {}
+        states = self.gridworld.getStates()
+        for state in states:
+            values[state] = agent.getValue(state)
+            policy[state] = agent.getPolicy(state)
+        prettyPrintValues(self.gridworld, values, policy, currentState)
+
+    def displayNullValues(self, agent, currentState = None, message = None):
+        if message != None: print message
+        prettyPrintNullValues(self.gridworld, currentState)
+
+    def displayQValues(self, agent, currentState = None, message = None):
+        if message != None: print message
+        qValues = util.Counter()
+        states = self.gridworld.getStates()
+        for state in states:
+            for action in self.gridworld.getPossibleActions(state):
+                qValues[(state, action)] = agent.getQValue(state, action)
+        prettyPrintQValues(self.gridworld, qValues, currentState)
+
+
+def prettyPrintValues(gridWorld, values, policy=None, currentState = None):
+    grid = gridWorld.grid
+    maxLen = 11
+    newRows = []
+    for y in range(grid.height):
+        newRow = []
+        for x in range(grid.width):
+            state = (x, y)
+            value = values[state]
+            action = None
+            if policy != None and state in policy:
+                action = policy[state]
+            actions = gridWorld.getPossibleActions(state)
+            if action not in actions and 'exit' in actions:
+                action = 'exit'
+            valString = None
+            if action == 'exit':
+                valString = border('%.2f' % value)
+            else:
+                valString = '\n\n%.2f\n\n' % value
+                valString += ' '*maxLen
+            if grid[x][y] == 'S':
+                valString = '\n\nS: %.2f\n\n'  % value
+                valString += ' '*maxLen
+            if grid[x][y] == '#':
+                valString = '\n#####\n#####\n#####\n'
+                valString += ' '*maxLen
+            pieces = [valString]
+            text = ("\n".join(pieces)).split('\n')
+            if currentState == state:
+                l = len(text[1])
+                if l == 0:
+                    text[1] = '*'
+                else:
+                    text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
+            if action == 'east':
+                text[2] = '  ' + text[2]  + ' >'
+            elif action == 'west':
+                text[2] = '< ' + text[2]  + '  '
+            elif action == 'north':
+                text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
+            elif action == 'south':
+                text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
+            newCell = "\n".join(text)
+            newRow.append(newCell)
+        newRows.append(newRow)
+    numCols = grid.width
+    for rowNum, row in enumerate(newRows):
+        row.insert(0,"\n\n"+str(rowNum))
+    newRows.reverse()
+    colLabels = [str(colNum) for colNum in range(numCols)]
+    colLabels.insert(0,' ')
+    finalRows = [colLabels] + newRows
+    print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
+
+
+def prettyPrintNullValues(gridWorld, currentState = None):
+    grid = gridWorld.grid
+    maxLen = 11
+    newRows = []
+    for y in range(grid.height):
+        newRow = []
+        for x in range(grid.width):
+            state = (x, y)
+
+            # value = values[state]
+
+            action = None
+            # if policy != None and state in policy:
+            #   action = policy[state]
+            #
+            actions = gridWorld.getPossibleActions(state)
+
+            if action not in actions and 'exit' in actions:
+                action = 'exit'
+
+            valString = None
+            # if action == 'exit':
+            #   valString = border('%.2f' % value)
+            # else:
+            #   valString = '\n\n%.2f\n\n' % value
+            #   valString += ' '*maxLen
+
+            if grid[x][y] == 'S':
+                valString = '\n\nS\n\n'
+                valString += ' '*maxLen
+            elif grid[x][y] == '#':
+                valString = '\n#####\n#####\n#####\n'
+                valString += ' '*maxLen
+            elif type(grid[x][y]) == float or type(grid[x][y]) == int:
+                valString = border('%.2f' % float(grid[x][y]))
+            else: valString = border('  ')
+            pieces = [valString]
+
+            text = ("\n".join(pieces)).split('\n')
+
+            if currentState == state:
+                l = len(text[1])
+                if l == 0:
+                    text[1] = '*'
+                else:
+                    text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
+
+            if action == 'east':
+                text[2] = '  ' + text[2]  + ' >'
+            elif action == 'west':
+                text[2] = '< ' + text[2]  + '  '
+            elif action == 'north':
+                text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
+            elif action == 'south':
+                text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
+            newCell = "\n".join(text)
+            newRow.append(newCell)
+        newRows.append(newRow)
+    numCols = grid.width
+    for rowNum, row in enumerate(newRows):
+        row.insert(0,"\n\n"+str(rowNum))
+    newRows.reverse()
+    colLabels = [str(colNum) for colNum in range(numCols)]
+    colLabels.insert(0,' ')
+    finalRows = [colLabels] + newRows
+    print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
+
+def prettyPrintQValues(gridWorld, qValues, currentState=None):
+    grid = gridWorld.grid
+    maxLen = 11
+    newRows = []
+    for y in range(grid.height):
+        newRow = []
+        for x in range(grid.width):
+            state = (x, y)
+            actions = gridWorld.getPossibleActions(state)
+            if actions == None or len(actions) == 0:
+                actions = [None]
+            bestQ = max([qValues[(state, action)] for action in actions])
+            bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
+
+            # display cell
+            qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions])
+            northString = ('north' in qStrings and qStrings['north']) or ' '
+            southString = ('south' in qStrings and qStrings['south']) or ' '
+            eastString = ('east' in qStrings and qStrings['east']) or ' '
+            westString = ('west' in qStrings and qStrings['west']) or ' '
+            exitString = ('exit' in qStrings and qStrings['exit']) or ' '
+
+            eastLen = len(eastString)
+            westLen = len(westString)
+            if eastLen < westLen:
+                eastString = ' '*(westLen-eastLen)+eastString
+            if westLen < eastLen:
+                westString = westString+' '*(eastLen-westLen)
+
+            if 'north' in bestActions:
+                northString = '/'+northString+'\\'
+            if 'south' in bestActions:
+                southString = '\\'+southString+'/'
+            if 'east' in bestActions:
+                eastString = ''+eastString+'>'
+            else:
+                eastString = ''+eastString+' '
+            if 'west' in bestActions:
+                westString = '<'+westString+''
+            else:
+                westString = ' '+westString+''
+            if 'exit' in bestActions:
+                exitString = '[ '+exitString+' ]'
+
+
+            ewString = westString + "     " + eastString
+            if state == currentState:
+                ewString = westString + "  *  " + eastString
+            if state == gridWorld.getStartState():
+                ewString = westString + "  S  " + eastString
+            if state == currentState and state == gridWorld.getStartState():
+                ewString = westString + " S:* " + eastString
+
+            text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString]
+
+            if grid[x][y] == '#':
+                text = ['', '\n#####\n#####\n#####', '']
+
+            newCell = "\n".join(text)
+            newRow.append(newCell)
+        newRows.append(newRow)
+    numCols = grid.width
+    for rowNum, row in enumerate(newRows):
+        row.insert(0,"\n\n\n"+str(rowNum))
+    newRows.reverse()
+    colLabels = [str(colNum) for colNum in range(numCols)]
+    colLabels.insert(0,' ')
+    finalRows = [colLabels] + newRows
+
+    print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True)
+
+def border(text):
+    length = len(text)
+    pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)]
+    return '\n'.join(pieces)
+
+# INDENTING CODE
+
+# Indenting code based on a post from George Sakkis
+# (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662)
+
+import cStringIO,operator
+
+def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
+           separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
+    """Indents a table by column.
+       - rows: A sequence of sequences of items, one sequence per row.
+       - hasHeader: True if the first row consists of the columns' names.
+       - headerChar: Character to be used for the row separator line
+         (if hasHeader==True or separateRows==True).
+       - delim: The column delimiter.
+       - justify: Determines how are data justified in their column.
+         Valid values are 'left','right' and 'center'.
+       - separateRows: True if rows are to be separated by a line
+         of 'headerChar's.
+       - prefix: A string prepended to each printed row.
+       - postfix: A string appended to each printed row.
+       - wrapfunc: A function f(text) for wrapping text; each element in
+         the table is first wrapped by this function."""
+    # closure for breaking logical rows to physical, using wrapfunc
+    def rowWrapper(row):
+        newRows = [wrapfunc(item).split('\n') for item in row]
+        return [[substr or '' for substr in item] for item in map(None,*newRows)]
+    # break each logical row into one or more physical ones
+    logicalRows = [rowWrapper(row) for row in rows]
+    # columns of physical rows
+    columns = map(None,*reduce(operator.add,logicalRows))
+    # get the maximum of each column by the string length of its items
+    maxWidths = [max([len(str(item)) for item in column]) for column in columns]
+    rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
+                                 len(delim)*(len(maxWidths)-1))
+    # select the appropriate justify method
+    justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
+    output=cStringIO.StringIO()
+    if separateRows: print >> output, rowSeparator
+    for physicalRows in logicalRows:
+        for row in physicalRows:
+            print >> output, \
+                prefix \
+                + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
+                + postfix
+        if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
+    return output.getvalue()
+
+import math
+def wrap_always(text, width):
+    """A simple word-wrap function that wraps text on exactly width characters.
+       It doesn't split the text in words."""
+    return '\n'.join([ text[width*i:width*(i+1)] \
+                       for i in xrange(int(math.ceil(1.*len(text)/width))) ])
+
+
+# TEST OF DISPLAY CODE
+
+if __name__ == '__main__':
+    import gridworld, util
+
+    grid = gridworld.getCliffGrid3()
+    print grid.getStates()
+
+    policy = dict([(state,'east') for state in grid.getStates()])
+    values = util.Counter(dict([(state,1000.23) for state in grid.getStates()]))
+    prettyPrintValues(grid, values, policy, currentState = (0,0))
+
+    stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()]
+    qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
+    qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates]))
+    qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])]))
+    prettyPrintQValues(grid, qValues, currentState = (0,0))
--- a/reinforcement/util.py
+++ b/reinforcement/util.py
@ -0,0 +1,653 @@
+# util.py
+# -------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import sys
+import inspect
+import heapq, random
+import cStringIO
+
+
+class FixedRandom:
+    def __init__(self):
+        fixedState = (3, (2147483648L, 507801126L, 683453281L, 310439348L, 2597246090L, \
+            2209084787L, 2267831527L, 979920060L, 3098657677L, 37650879L, 807947081L, 3974896263L, \
+            881243242L, 3100634921L, 1334775171L, 3965168385L, 746264660L, 4074750168L, 500078808L, \
+            776561771L, 702988163L, 1636311725L, 2559226045L, 157578202L, 2498342920L, 2794591496L, \
+            4130598723L, 496985844L, 2944563015L, 3731321600L, 3514814613L, 3362575829L, 3038768745L, \
+            2206497038L, 1108748846L, 1317460727L, 3134077628L, 988312410L, 1674063516L, 746456451L, \
+            3958482413L, 1857117812L, 708750586L, 1583423339L, 3466495450L, 1536929345L, 1137240525L, \
+            3875025632L, 2466137587L, 1235845595L, 4214575620L, 3792516855L, 657994358L, 1241843248L, \
+            1695651859L, 3678946666L, 1929922113L, 2351044952L, 2317810202L, 2039319015L, 460787996L, \
+            3654096216L, 4068721415L, 1814163703L, 2904112444L, 1386111013L, 574629867L, 2654529343L, \
+            3833135042L, 2725328455L, 552431551L, 4006991378L, 1331562057L, 3710134542L, 303171486L, \
+            1203231078L, 2670768975L, 54570816L, 2679609001L, 578983064L, 1271454725L, 3230871056L, \
+            2496832891L, 2944938195L, 1608828728L, 367886575L, 2544708204L, 103775539L, 1912402393L, \
+            1098482180L, 2738577070L, 3091646463L, 1505274463L, 2079416566L, 659100352L, 839995305L, \
+            1696257633L, 274389836L, 3973303017L, 671127655L, 1061109122L, 517486945L, 1379749962L, \
+            3421383928L, 3116950429L, 2165882425L, 2346928266L, 2892678711L, 2936066049L, 1316407868L, \
+            2873411858L, 4279682888L, 2744351923L, 3290373816L, 1014377279L, 955200944L, 4220990860L, \
+            2386098930L, 1772997650L, 3757346974L, 1621616438L, 2877097197L, 442116595L, 2010480266L, \
+            2867861469L, 2955352695L, 605335967L, 2222936009L, 2067554933L, 4129906358L, 1519608541L, \
+            1195006590L, 1942991038L, 2736562236L, 279162408L, 1415982909L, 4099901426L, 1732201505L, \
+            2934657937L, 860563237L, 2479235483L, 3081651097L, 2244720867L, 3112631622L, 1636991639L, \
+            3860393305L, 2312061927L, 48780114L, 1149090394L, 2643246550L, 1764050647L, 3836789087L, \
+            3474859076L, 4237194338L, 1735191073L, 2150369208L, 92164394L, 756974036L, 2314453957L, \
+            323969533L, 4267621035L, 283649842L, 810004843L, 727855536L, 1757827251L, 3334960421L, \
+            3261035106L, 38417393L, 2660980472L, 1256633965L, 2184045390L, 811213141L, 2857482069L, \
+            2237770878L, 3891003138L, 2787806886L, 2435192790L, 2249324662L, 3507764896L, 995388363L, \
+            856944153L, 619213904L, 3233967826L, 3703465555L, 3286531781L, 3863193356L, 2992340714L, \
+            413696855L, 3865185632L, 1704163171L, 3043634452L, 2225424707L, 2199018022L, 3506117517L, \
+            3311559776L, 3374443561L, 1207829628L, 668793165L, 1822020716L, 2082656160L, 1160606415L, \
+            3034757648L, 741703672L, 3094328738L, 459332691L, 2702383376L, 1610239915L, 4162939394L, \
+            557861574L, 3805706338L, 3832520705L, 1248934879L, 3250424034L, 892335058L, 74323433L, \
+            3209751608L, 3213220797L, 3444035873L, 3743886725L, 1783837251L, 610968664L, 580745246L, \
+            4041979504L, 201684874L, 2673219253L, 1377283008L, 3497299167L, 2344209394L, 2304982920L, \
+            3081403782L, 2599256854L, 3184475235L, 3373055826L, 695186388L, 2423332338L, 222864327L, \
+            1258227992L, 3627871647L, 3487724980L, 4027953808L, 3053320360L, 533627073L, 3026232514L, \
+            2340271949L, 867277230L, 868513116L, 2158535651L, 2487822909L, 3428235761L, 3067196046L, \
+            3435119657L, 1908441839L, 788668797L, 3367703138L, 3317763187L, 908264443L, 2252100381L, \
+            764223334L, 4127108988L, 384641349L, 3377374722L, 1263833251L, 1958694944L, 3847832657L, \
+            1253909612L, 1096494446L, 555725445L, 2277045895L, 3340096504L, 1383318686L, 4234428127L, \
+            1072582179L, 94169494L, 1064509968L, 2681151917L, 2681864920L, 734708852L, 1338914021L, \
+            1270409500L, 1789469116L, 4191988204L, 1716329784L, 2213764829L, 3712538840L, 919910444L, \
+            1318414447L, 3383806712L, 3054941722L, 3378649942L, 1205735655L, 1268136494L, 2214009444L, \
+            2532395133L, 3232230447L, 230294038L, 342599089L, 772808141L, 4096882234L, 3146662953L, \
+            2784264306L, 1860954704L, 2675279609L, 2984212876L, 2466966981L, 2627986059L, 2985545332L, \
+            2578042598L, 1458940786L, 2944243755L, 3959506256L, 1509151382L, 325761900L, 942251521L, \
+            4184289782L, 2756231555L, 3297811774L, 1169708099L, 3280524138L, 3805245319L, 3227360276L, \
+            3199632491L, 2235795585L, 2865407118L, 36763651L, 2441503575L, 3314890374L, 1755526087L, \
+            17915536L, 1196948233L, 949343045L, 3815841867L, 489007833L, 2654997597L, 2834744136L, \
+            417688687L, 2843220846L, 85621843L, 747339336L, 2043645709L, 3520444394L, 1825470818L, \
+            647778910L, 275904777L, 1249389189L, 3640887431L, 4200779599L, 323384601L, 3446088641L, \
+            4049835786L, 1718989062L, 3563787136L, 44099190L, 3281263107L, 22910812L, 1826109246L, \
+            745118154L, 3392171319L, 1571490704L, 354891067L, 815955642L, 1453450421L, 940015623L, \
+            796817754L, 1260148619L, 3898237757L, 176670141L, 1870249326L, 3317738680L, 448918002L, \
+            4059166594L, 2003827551L, 987091377L, 224855998L, 3520570137L, 789522610L, 2604445123L, \
+            454472869L, 475688926L, 2990723466L, 523362238L, 3897608102L, 806637149L, 2642229586L, \
+            2928614432L, 1564415411L, 1691381054L, 3816907227L, 4082581003L, 1895544448L, 3728217394L, \
+            3214813157L, 4054301607L, 1882632454L, 2873728645L, 3694943071L, 1297991732L, 2101682438L, \
+            3952579552L, 678650400L, 1391722293L, 478833748L, 2976468591L, 158586606L, 2576499787L, \
+            662690848L, 3799889765L, 3328894692L, 2474578497L, 2383901391L, 1718193504L, 3003184595L, \
+            3630561213L, 1929441113L, 3848238627L, 1594310094L, 3040359840L, 3051803867L, 2462788790L, \
+            954409915L, 802581771L, 681703307L, 545982392L, 2738993819L, 8025358L, 2827719383L, \
+            770471093L, 3484895980L, 3111306320L, 3900000891L, 2116916652L, 397746721L, 2087689510L, \
+            721433935L, 1396088885L, 2751612384L, 1998988613L, 2135074843L, 2521131298L, 707009172L, \
+            2398321482L, 688041159L, 2264560137L, 482388305L, 207864885L, 3735036991L, 3490348331L, \
+            1963642811L, 3260224305L, 3493564223L, 1939428454L, 1128799656L, 1366012432L, 2858822447L, \
+            1428147157L, 2261125391L, 1611208390L, 1134826333L, 2374102525L, 3833625209L, 2266397263L, \
+            3189115077L, 770080230L, 2674657172L, 4280146640L, 3604531615L, 4235071805L, 3436987249L, \
+            509704467L, 2582695198L, 4256268040L, 3391197562L, 1460642842L, 1617931012L, 457825497L, \
+            1031452907L, 1330422862L, 4125947620L, 2280712485L, 431892090L, 2387410588L, 2061126784L, \
+            896457479L, 3480499461L, 2488196663L, 4021103792L, 1877063114L, 2744470201L, 1046140599L, \
+            2129952955L, 3583049218L, 4217723693L, 2720341743L, 820661843L, 1079873609L, 3360954200L, \
+            3652304997L, 3335838575L, 2178810636L, 1908053374L, 4026721976L, 1793145418L, 476541615L, \
+            973420250L, 515553040L, 919292001L, 2601786155L, 1685119450L, 3030170809L, 1590676150L, \
+            1665099167L, 651151584L, 2077190587L, 957892642L, 646336572L, 2743719258L, 866169074L, \
+            851118829L, 4225766285L, 963748226L, 799549420L, 1955032629L, 799460000L, 2425744063L, \
+            2441291571L, 1928963772L, 528930629L, 2591962884L, 3495142819L, 1896021824L, 901320159L, \
+            3181820243L, 843061941L, 3338628510L, 3782438992L, 9515330L, 1705797226L, 953535929L, \
+            764833876L, 3202464965L, 2970244591L, 519154982L, 3390617541L, 566616744L, 3438031503L, \
+            1853838297L, 170608755L, 1393728434L, 676900116L, 3184965776L, 1843100290L, 78995357L, \
+            2227939888L, 3460264600L, 1745705055L, 1474086965L, 572796246L, 4081303004L, 882828851L, \
+            1295445825L, 137639900L, 3304579600L, 2722437017L, 4093422709L, 273203373L, 2666507854L, \
+            3998836510L, 493829981L, 1623949669L, 3482036755L, 3390023939L, 833233937L, 1639668730L, \
+            1499455075L, 249728260L, 1210694006L, 3836497489L, 1551488720L, 3253074267L, 3388238003L, \
+            2372035079L, 3945715164L, 2029501215L, 3362012634L, 2007375355L, 4074709820L, 631485888L, \
+            3135015769L, 4273087084L, 3648076204L, 2739943601L, 1374020358L, 1760722448L, 3773939706L, \
+            1313027823L, 1895251226L, 4224465911L, 421382535L, 1141067370L, 3660034846L, 3393185650L, \
+            1850995280L, 1451917312L, 3841455409L, 3926840308L, 1397397252L, 2572864479L, 2500171350L, \
+            3119920613L, 531400869L, 1626487579L, 1099320497L, 407414753L, 2438623324L, 99073255L, \
+            3175491512L, 656431560L, 1153671785L, 236307875L, 2824738046L, 2320621382L, 892174056L, \
+            230984053L, 719791226L, 2718891946L, 624L), None)
+        self.random = random.Random()
+        self.random.setstate(fixedState)
+
+"""
+ Data structures useful for implementing SearchAgents
+"""
+
+class Stack:
+    "A container with a last-in-first-out (LIFO) queuing policy."
+    def __init__(self):
+        self.list = []
+
+    def push(self,item):
+        "Push 'item' onto the stack"
+        self.list.append(item)
+
+    def pop(self):
+        "Pop the most recently pushed item from the stack"
+        return self.list.pop()
+
+    def isEmpty(self):
+        "Returns true if the stack is empty"
+        return len(self.list) == 0
+
+class Queue:
+    "A container with a first-in-first-out (FIFO) queuing policy."
+    def __init__(self):
+        self.list = []
+
+    def push(self,item):
+        "Enqueue the 'item' into the queue"
+        self.list.insert(0,item)
+
+    def pop(self):
+        """
+          Dequeue the earliest enqueued item still in the queue. This
+          operation removes the item from the queue.
+        """
+        return self.list.pop()
+
+    def isEmpty(self):
+        "Returns true if the queue is empty"
+        return len(self.list) == 0
+
+class PriorityQueue:
+    """
+      Implements a priority queue data structure. Each inserted item
+      has a priority associated with it and the client is usually interested
+      in quick retrieval of the lowest-priority item in the queue. This
+      data structure allows O(1) access to the lowest-priority item.
+
+      Note that this PriorityQueue does not allow you to change the priority
+      of an item.  However, you may insert the same item multiple times with
+      different priorities.
+    """
+    def  __init__(self):
+        self.heap = []
+        self.count = 0
+
+    def push(self, item, priority):
+        # FIXME: restored old behaviour to check against old results better
+        # FIXED: restored to stable behaviour
+        entry = (priority, self.count, item)
+        # entry = (priority, item)
+        heapq.heappush(self.heap, entry)
+        self.count += 1
+
+    def pop(self):
+        (_, _, item) = heapq.heappop(self.heap)
+        #  (_, item) = heapq.heappop(self.heap)
+        return item
+
+    def isEmpty(self):
+        return len(self.heap) == 0
+
+class PriorityQueueWithFunction(PriorityQueue):
+    """
+    Implements a priority queue with the same push/pop signature of the
+    Queue and the Stack classes. This is designed for drop-in replacement for
+    those two classes. The caller has to provide a priority function, which
+    extracts each item's priority.
+    """
+    def  __init__(self, priorityFunction):
+        "priorityFunction (item) -> priority"
+        self.priorityFunction = priorityFunction      # store the priority function
+        PriorityQueue.__init__(self)        # super-class initializer
+
+    def push(self, item):
+        "Adds an item to the queue with priority from the priority function"
+        PriorityQueue.push(self, item, self.priorityFunction(item))
+
+
+def manhattanDistance( xy1, xy2 ):
+    "Returns the Manhattan distance between points xy1 and xy2"
+    return abs( xy1[0] - xy2[0] ) + abs( xy1[1] - xy2[1] )
+
+"""
+  Data structures and functions useful for various course projects
+
+  The search project should not need anything below this line.
+"""
+
+class Counter(dict):
+    """
+    A counter keeps track of counts for a set of keys.
+
+    The counter class is an extension of the standard python
+    dictionary type.  It is specialized to have number values
+    (integers or floats), and includes a handful of additional
+    functions to ease the task of counting data.  In particular,
+    all keys are defaulted to have value 0.  Using a dictionary:
+
+    a = {}
+    print a['test']
+
+    would give an error, while the Counter class analogue:
+
+    >>> a = Counter()
+    >>> print a['test']
+    0
+
+    returns the default 0 value. Note that to reference a key
+    that you know is contained in the counter,
+    you can still use the dictionary syntax:
+
+    >>> a = Counter()
+    >>> a['test'] = 2
+    >>> print a['test']
+    2
+
+    This is very useful for counting things without initializing their counts,
+    see for example:
+
+    >>> a['blah'] += 1
+    >>> print a['blah']
+    1
+
+    The counter also includes additional functionality useful in implementing
+    the classifiers for this assignment.  Two counters can be added,
+    subtracted or multiplied together.  See below for details.  They can
+    also be normalized and their total count and arg max can be extracted.
+    """
+    def __getitem__(self, idx):
+        self.setdefault(idx, 0)
+        return dict.__getitem__(self, idx)
+
+    def incrementAll(self, keys, count):
+        """
+        Increments all elements of keys by the same count.
+
+        >>> a = Counter()
+        >>> a.incrementAll(['one','two', 'three'], 1)
+        >>> a['one']
+        1
+        >>> a['two']
+        1
+        """
+        for key in keys:
+            self[key] += count
+
+    def argMax(self):
+        """
+        Returns the key with the highest value.
+        """
+        if len(self.keys()) == 0: return None
+        all = self.items()
+        values = [x[1] for x in all]
+        maxIndex = values.index(max(values))
+        return all[maxIndex][0]
+
+    def sortedKeys(self):
+        """
+        Returns a list of keys sorted by their values.  Keys
+        with the highest values will appear first.
+
+        >>> a = Counter()
+        >>> a['first'] = -2
+        >>> a['second'] = 4
+        >>> a['third'] = 1
+        >>> a.sortedKeys()
+        ['second', 'third', 'first']
+        """
+        sortedItems = self.items()
+        compare = lambda x, y:  sign(y[1] - x[1])
+        sortedItems.sort(cmp=compare)
+        return [x[0] for x in sortedItems]
+
+    def totalCount(self):
+        """
+        Returns the sum of counts for all keys.
+        """
+        return sum(self.values())
+
+    def normalize(self):
+        """
+        Edits the counter such that the total count of all
+        keys sums to 1.  The ratio of counts for all keys
+        will remain the same. Note that normalizing an empty
+        Counter will result in an error.
+        """
+        total = float(self.totalCount())
+        if total == 0: return
+        for key in self.keys():
+            self[key] = self[key] / total
+
+    def divideAll(self, divisor):
+        """
+        Divides all counts by divisor
+        """
+        divisor = float(divisor)
+        for key in self:
+            self[key] /= divisor
+
+    def copy(self):
+        """
+        Returns a copy of the counter
+        """
+        return Counter(dict.copy(self))
+
+    def __mul__(self, y ):
+        """
+        Multiplying two counters gives the dot product of their vectors where
+        each unique label is a vector element.
+
+        >>> a = Counter()
+        >>> b = Counter()
+        >>> a['first'] = -2
+        >>> a['second'] = 4
+        >>> b['first'] = 3
+        >>> b['second'] = 5
+        >>> a['third'] = 1.5
+        >>> a['fourth'] = 2.5
+        >>> a * b
+        14
+        """
+        sum = 0
+        x = self
+        if len(x) > len(y):
+            x,y = y,x
+        for key in x:
+            if key not in y:
+                continue
+            sum += x[key] * y[key]
+        return sum
+
+    def __radd__(self, y):
+        """
+        Adding another counter to a counter increments the current counter
+        by the values stored in the second counter.
+
+        >>> a = Counter()
+        >>> b = Counter()
+        >>> a['first'] = -2
+        >>> a['second'] = 4
+        >>> b['first'] = 3
+        >>> b['third'] = 1
+        >>> a += b
+        >>> a['first']
+        1
+        """
+        for key, value in y.items():
+            self[key] += value
+
+    def __add__( self, y ):
+        """
+        Adding two counters gives a counter with the union of all keys and
+        counts of the second added to counts of the first.
+
+        >>> a = Counter()
+        >>> b = Counter()
+        >>> a['first'] = -2
+        >>> a['second'] = 4
+        >>> b['first'] = 3
+        >>> b['third'] = 1
+        >>> (a + b)['first']
+        1
+        """
+        addend = Counter()
+        for key in self:
+            if key in y:
+                addend[key] = self[key] + y[key]
+            else:
+                addend[key] = self[key]
+        for key in y:
+            if key in self:
+                continue
+            addend[key] = y[key]
+        return addend
+
+    def __sub__( self, y ):
+        """
+        Subtracting a counter from another gives a counter with the union of all keys and
+        counts of the second subtracted from counts of the first.
+
+        >>> a = Counter()
+        >>> b = Counter()
+        >>> a['first'] = -2
+        >>> a['second'] = 4
+        >>> b['first'] = 3
+        >>> b['third'] = 1
+        >>> (a - b)['first']
+        -5
+        """
+        addend = Counter()
+        for key in self:
+            if key in y:
+                addend[key] = self[key] - y[key]
+            else:
+                addend[key] = self[key]
+        for key in y:
+            if key in self:
+                continue
+            addend[key] = -1 * y[key]
+        return addend
+
+def raiseNotDefined():
+    fileName = inspect.stack()[1][1]
+    line = inspect.stack()[1][2]
+    method = inspect.stack()[1][3]
+
+    print "*** Method not implemented: %s at line %s of %s" % (method, line, fileName)
+    sys.exit(1)
+
+def normalize(vectorOrCounter):
+    """
+    normalize a vector or counter by dividing each value by the sum of all values
+    """
+    normalizedCounter = Counter()
+    if type(vectorOrCounter) == type(normalizedCounter):
+        counter = vectorOrCounter
+        total = float(counter.totalCount())
+        if total == 0: return counter
+        for key in counter.keys():
+            value = counter[key]
+            normalizedCounter[key] = value / total
+        return normalizedCounter
+    else:
+        vector = vectorOrCounter
+        s = float(sum(vector))
+        if s == 0: return vector
+        return [el / s for el in vector]
+
+def nSample(distribution, values, n):
+    if sum(distribution) != 1:
+        distribution = normalize(distribution)
+    rand = [random.random() for i in range(n)]
+    rand.sort()
+    samples = []
+    samplePos, distPos, cdf = 0,0, distribution[0]
+    while samplePos < n:
+        if rand[samplePos] < cdf:
+            samplePos += 1
+            samples.append(values[distPos])
+        else:
+            distPos += 1
+            cdf += distribution[distPos]
+    return samples
+
+def sample(distribution, values = None):
+    if type(distribution) == Counter:
+        items = sorted(distribution.items())
+        distribution = [i[1] for i in items]
+        values = [i[0] for i in items]
+    if sum(distribution) != 1:
+        distribution = normalize(distribution)
+    choice = random.random()
+    i, total= 0, distribution[0]
+    while choice > total:
+        i += 1
+        total += distribution[i]
+    return values[i]
+
+def sampleFromCounter(ctr):
+    items = sorted(ctr.items())
+    return sample([v for k,v in items], [k for k,v in items])
+
+def getProbability(value, distribution, values):
+    """
+      Gives the probability of a value under a discrete distribution
+      defined by (distributions, values).
+    """
+    total = 0.0
+    for prob, val in zip(distribution, values):
+        if val == value:
+            total += prob
+    return total
+
+def flipCoin( p ):
+    r = random.random()
+    return r < p
+
+def chooseFromDistribution( distribution ):
+    "Takes either a counter or a list of (prob, key) pairs and samples"
+    if type(distribution) == dict or type(distribution) == Counter:
+        return sample(distribution)
+    r = random.random()
+    base = 0.0
+    for prob, element in distribution:
+        base += prob
+        if r <= base: return element
+
+def nearestPoint( pos ):
+    """
+    Finds the nearest grid point to a position (discretizes).
+    """
+    ( current_row, current_col ) = pos
+
+    grid_row = int( current_row + 0.5 )
+    grid_col = int( current_col + 0.5 )
+    return ( grid_row, grid_col )
+
+def sign( x ):
+    """
+    Returns 1 or -1 depending on the sign of x
+    """
+    if( x >= 0 ):
+        return 1
+    else:
+        return -1
+
+def arrayInvert(array):
+    """
+    Inverts a matrix stored as a list of lists.
+    """
+    result = [[] for i in array]
+    for outer in array:
+        for inner in range(len(outer)):
+            result[inner].append(outer[inner])
+    return result
+
+def matrixAsList( matrix, value = True ):
+    """
+    Turns a matrix into a list of coordinates matching the specified value
+    """
+    rows, cols = len( matrix ), len( matrix[0] )
+    cells = []
+    for row in range( rows ):
+        for col in range( cols ):
+            if matrix[row][col] == value:
+                cells.append( ( row, col ) )
+    return cells
+
+def lookup(name, namespace):
+    """
+    Get a method or class from any imported module from its name.
+    Usage: lookup(functionName, globals())
+    """
+    dots = name.count('.')
+    if dots > 0:
+        moduleName, objName = '.'.join(name.split('.')[:-1]), name.split('.')[-1]
+        module = __import__(moduleName)
+        return getattr(module, objName)
+    else:
+        modules = [obj for obj in namespace.values() if str(type(obj)) == "<type 'module'>"]
+        options = [getattr(module, name) for module in modules if name in dir(module)]
+        options += [obj[1] for obj in namespace.items() if obj[0] == name ]
+        if len(options) == 1: return options[0]
+        if len(options) > 1: raise Exception, 'Name conflict for %s'
+        raise Exception, '%s not found as a method or class' % name
+
+def pause():
+    """
+    Pauses the output stream awaiting user feedback.
+    """
+    print "<Press enter/return to continue>"
+    raw_input()
+
+
+# code to handle timeouts
+#
+# FIXME
+# NOTE: TimeoutFuncton is NOT reentrant.  Later timeouts will silently
+# disable earlier timeouts.  Could be solved by maintaining a global list
+# of active time outs.  Currently, questions which have test cases calling
+# this have all student code so wrapped.
+#
+import signal
+import time
+class TimeoutFunctionException(Exception):
+    """Exception to raise on a timeout"""
+    pass
+
+
+class TimeoutFunction:
+    def __init__(self, function, timeout):
+        self.timeout = timeout
+        self.function = function
+
+    def handle_timeout(self, signum, frame):
+        raise TimeoutFunctionException()
+
+    def __call__(self, *args, **keyArgs):
+        # If we have SIGALRM signal, use it to cause an exception if and
+        # when this function runs too long.  Otherwise check the time taken
+        # after the method has returned, and throw an exception then.
+        if hasattr(signal, 'SIGALRM'):
+            old = signal.signal(signal.SIGALRM, self.handle_timeout)
+            signal.alarm(self.timeout)
+            try:
+                result = self.function(*args, **keyArgs)
+            finally:
+                signal.signal(signal.SIGALRM, old)
+            signal.alarm(0)
+        else:
+            startTime = time.time()
+            result = self.function(*args, **keyArgs)
+            timeElapsed = time.time() - startTime
+            if timeElapsed >= self.timeout:
+                self.handle_timeout(None, None)
+        return result
+
+
+
+_ORIGINAL_STDOUT = None
+_ORIGINAL_STDERR = None
+_MUTED = False
+
+class WritableNull:
+    def write(self, string):
+        pass
+
+def mutePrint():
+    global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
+    if _MUTED:
+        return
+    _MUTED = True
+
+    _ORIGINAL_STDOUT = sys.stdout
+    #_ORIGINAL_STDERR = sys.stderr
+    sys.stdout = WritableNull()
+    #sys.stderr = WritableNull()
+
+def unmutePrint():
+    global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
+    if not _MUTED:
+        return
+    _MUTED = False
+
+    sys.stdout = _ORIGINAL_STDOUT
+    #sys.stderr = _ORIGINAL_STDERR
+
--- a/reinforcement/valueIterationAgents.py
+++ b/reinforcement/valueIterationAgents.py
@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+# valueIterationAgents.py
+# -----------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import mdp, util
+
+from learningAgents import ValueEstimationAgent
+
+
+class ValueIterationAgent(ValueEstimationAgent):
+    """
+        * Please read learningAgents.py before reading this.*
+
+        A ValueIterationAgent takes a Markov decision process
+        (see mdp.py) on initialization and runs value iteration
+        for a given number of iterations using the supplied
+        discount factor.
+    """
+
+    def __init__(self, mdp, discount=0.9, iterations=100):
+        """
+          Your value iteration agent should take an mdp on
+          construction, run the indicated number of iterations
+          and then act according to the resulting policy.
+
+          Some useful mdp methods you will use:
+              mdp.getStates()
+              mdp.getPossibleActions(state)
+              mdp.getTransitionStatesAndProbs(state, action)
+              mdp.getReward(state, action, nextState)
+              mdp.isTerminal(state)
+        """
+        self.mdp = mdp
+        self.discount = discount
+        self.iterations = iterations
+        self.values = util.Counter()  # A Counter is a dict with default 0
+
+        # Write value iteration code here
+        "*** YOUR CODE HERE ***"
+        states = self.mdp.getStates()
+
+        print "__init__ ... states: " + str(states)
+
+        for i in range(iterations):
+            # On reprend les valeurs de l'itération précédente comme référence
+            # Copie pour batch
+            q_copy = self.values.copy()
+            for state in states:
+                q_new = None
+                for action in self.mdp.getPossibleActions(state):
+                    q = self.computeQValueFromValues(state, action)
+                    # Garder la meilleure Q value
+                    if q_new is None or q_new < q:
+                        q_new = q
+                # Gérer le cas sans successeurs
+                if q_new is None:
+                    q_copy[state] = 0
+                else:
+                    q_copy[state] = q_new
+            # On met à jour pout les prochaines itérations
+            self.values = q_copy
+
+    def getValue(self, state):
+        """
+          Return the value of the state (computed in __init__).
+        """
+        return self.values[state]
+
+    def computeQValueFromValues(self, state, action):
+        """
+          Compute the Q-value of action in state from the
+          value function stored in self.values.
+        """
+        "*** YOUR CODE HERE ***"
+        values = []
+        for nextState, prob in self.mdp.getTransitionStatesAndProbs(state,action):
+            reward = self.mdp.getReward(state, action, nextState)
+            discount = self.discount
+            next_state_value = self.values[nextState]
+            values.append(prob*(reward+discount*next_state_value))
+        return sum(values)
+
+    def computeActionFromValues(self, state):
+        """
+          The policy is the best action in the given state
+          according to the values currently stored in self.values.
+
+          You may break ties any way you see fit.  Note that if
+          there are no legal actions, which is the case at the
+          terminal state, you should return None.
+        """
+        "*** YOUR CODE HERE ***"
+        possibleActions = self.mdp.getPossibleActions(state)
+
+        if len(possibleActions) == 0:
+            return None
+
+        q_values = [self.computeQValueFromValues(state, action) for action in possibleActions]
+        print "computeActionFromValues ... q_values: "+str(q_values)
+        print "index:"+str(q_values.index(max(q_values)))
+        print "action:"+str(possibleActions[q_values.index(max(q_values))])
+        return possibleActions[q_values.index(max(q_values))]
+
+    def getPolicy(self, state):
+        return self.computeActionFromValues(state)
+
+    def getAction(self, state):
+        "Returns the policy at the state (no exploration)."
+        return self.computeActionFromValues(state)
+
+    def getQValue(self, state, action):
+        return self.computeQValueFromValues(state, action)
--- a/Show more
+++ b/Show more