2019-04-13 04:43:54 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2019-04-11 03:58:15 +00:00
|
|
|
# qlearningAgents.py
|
|
|
|
# ------------------
|
|
|
|
# Licensing Information: You are free to use or extend these projects for
|
|
|
|
# educational purposes provided that (1) you do not distribute or publish
|
|
|
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
|
|
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
|
|
|
#
|
|
|
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
|
|
|
# The core projects and autograders were primarily created by John DeNero
|
|
|
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
|
|
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
|
|
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
|
|
|
|
2019-04-13 13:39:29 +00:00
|
|
|
# François Pelletier
|
|
|
|
# IFT-7025
|
|
|
|
# Matricule: 908144032
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
from game import *
|
|
|
|
from learningAgents import ReinforcementAgent
|
|
|
|
from featureExtractors import *
|
|
|
|
|
|
|
|
import random,util,math
|
|
|
|
|
|
|
|
class QLearningAgent(ReinforcementAgent):
|
|
|
|
"""
|
|
|
|
Q-Learning Agent
|
|
|
|
|
|
|
|
Functions you should fill in:
|
|
|
|
- computeValueFromQValues
|
|
|
|
- computeActionFromQValues
|
|
|
|
- getQValue
|
|
|
|
- getAction
|
|
|
|
- update
|
|
|
|
|
|
|
|
Instance variables you have access to
|
|
|
|
- self.epsilon (exploration prob)
|
|
|
|
- self.alpha (learning rate)
|
|
|
|
- self.discount (discount rate)
|
|
|
|
|
|
|
|
Functions you should use
|
|
|
|
- self.getLegalActions(state)
|
|
|
|
which returns legal actions for a state
|
|
|
|
"""
|
|
|
|
def __init__(self, **args):
|
|
|
|
"You can initialize Q-values here..."
|
|
|
|
ReinforcementAgent.__init__(self, **args)
|
|
|
|
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 04:43:54 +00:00
|
|
|
self.q_values = {}
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def getQValue(self, state, action):
|
|
|
|
"""
|
|
|
|
Returns Q(state,action)
|
|
|
|
Should return 0.0 if we have never seen a state
|
|
|
|
or the Q node value otherwise
|
|
|
|
"""
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 04:43:54 +00:00
|
|
|
if (state, action) in self.q_values:
|
|
|
|
return self.q_values.get((state, action))
|
|
|
|
else:
|
|
|
|
return 0
|
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def computeValueFromQValues(self, state):
|
|
|
|
"""
|
|
|
|
Returns max_action Q(state,action)
|
|
|
|
where the max is over legal actions. Note that if
|
|
|
|
there are no legal actions, which is the case at the
|
|
|
|
terminal state, you should return a value of 0.0.
|
|
|
|
"""
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 04:43:54 +00:00
|
|
|
legal_actions = self.getLegalActions(state)
|
|
|
|
|
|
|
|
if len(legal_actions) == 0:
|
|
|
|
return 0.0
|
|
|
|
|
|
|
|
return max([self.getQValue(state, action) for action in legal_actions])
|
|
|
|
|
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def computeActionFromQValues(self, state):
|
|
|
|
"""
|
|
|
|
Compute the best action to take in a state. Note that if there
|
|
|
|
are no legal actions, which is the case at the terminal state,
|
|
|
|
you should return None.
|
|
|
|
"""
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 04:43:54 +00:00
|
|
|
|
|
|
|
legalActions = self.getLegalActions(state)
|
|
|
|
|
|
|
|
if len(legalActions) == 0:
|
|
|
|
return None
|
|
|
|
|
|
|
|
q_values = [self.getQValue(state, action) for action in legalActions]
|
|
|
|
q_max = max(q_values)
|
|
|
|
|
|
|
|
q_max_indices = []
|
|
|
|
for index, value in enumerate(q_values):
|
|
|
|
if value == q_max:
|
|
|
|
q_max_indices.append(index)
|
|
|
|
|
|
|
|
return legalActions[random.choice(q_max_indices)]
|
|
|
|
|
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def getAction(self, state):
|
|
|
|
"""
|
|
|
|
Compute the action to take in the current state. With
|
|
|
|
probability self.epsilon, we should take a random action and
|
|
|
|
take the best policy action otherwise. Note that if there are
|
|
|
|
no legal actions, which is the case at the terminal state, you
|
|
|
|
should choose None as the action.
|
|
|
|
|
|
|
|
HINT: You might want to use util.flipCoin(prob)
|
|
|
|
HINT: To pick randomly from a list, use random.choice(list)
|
|
|
|
"""
|
|
|
|
# Pick Action
|
|
|
|
legalActions = self.getLegalActions(state)
|
2019-04-13 04:43:54 +00:00
|
|
|
|
2019-04-11 03:58:15 +00:00
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 04:43:54 +00:00
|
|
|
if len(legalActions) == 0:
|
|
|
|
return None
|
|
|
|
|
|
|
|
best_action = self.computeActionFromQValues(state)
|
|
|
|
|
|
|
|
if util.flipCoin(self.epsilon):
|
|
|
|
# Action aléatoire
|
|
|
|
return random.choice(legalActions)
|
|
|
|
else:
|
|
|
|
# Meilleure action
|
|
|
|
return best_action
|
2019-04-11 03:58:15 +00:00
|
|
|
|
2019-04-13 04:43:54 +00:00
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def update(self, state, action, nextState, reward):
|
|
|
|
"""
|
|
|
|
The parent class calls this to observe a
|
|
|
|
state = action => nextState and reward transition.
|
|
|
|
You should do your Q-Value update here
|
|
|
|
|
|
|
|
NOTE: You should never call this function,
|
|
|
|
it will be called on your behalf
|
|
|
|
"""
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 04:43:54 +00:00
|
|
|
q_value = self.getQValue(state, action)
|
|
|
|
best_value = self.getValue(nextState)
|
|
|
|
new_q_value = (1-self.alpha)*q_value+self.alpha*(reward+self.discount*best_value)
|
|
|
|
self.q_values[(state, action)] = new_q_value
|
|
|
|
self.q_values.update({(state, action): new_q_value})
|
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def getPolicy(self, state):
|
|
|
|
return self.computeActionFromQValues(state)
|
|
|
|
|
|
|
|
def getValue(self, state):
|
|
|
|
return self.computeValueFromQValues(state)
|
|
|
|
|
|
|
|
|
|
|
|
class PacmanQAgent(QLearningAgent):
|
|
|
|
"Exactly the same as QLearningAgent, but with different default parameters"
|
|
|
|
|
|
|
|
def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
|
|
|
|
"""
|
|
|
|
These default parameters can be changed from the pacman.py command line.
|
|
|
|
For example, to change the exploration rate, try:
|
|
|
|
python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
|
|
|
|
|
|
|
|
alpha - learning rate
|
|
|
|
epsilon - exploration rate
|
|
|
|
gamma - discount factor
|
|
|
|
numTraining - number of training episodes, i.e. no learning after these many episodes
|
|
|
|
"""
|
|
|
|
args['epsilon'] = epsilon
|
|
|
|
args['gamma'] = gamma
|
|
|
|
args['alpha'] = alpha
|
|
|
|
args['numTraining'] = numTraining
|
|
|
|
self.index = 0 # This is always Pacman
|
|
|
|
QLearningAgent.__init__(self, **args)
|
|
|
|
|
|
|
|
def getAction(self, state):
|
|
|
|
"""
|
|
|
|
Simply calls the getAction method of QLearningAgent and then
|
|
|
|
informs parent of action for Pacman. Do not change or remove this
|
|
|
|
method.
|
|
|
|
"""
|
|
|
|
action = QLearningAgent.getAction(self,state)
|
|
|
|
self.doAction(state,action)
|
|
|
|
return action
|
|
|
|
|
|
|
|
|
|
|
|
class ApproximateQAgent(PacmanQAgent):
|
|
|
|
"""
|
|
|
|
ApproximateQLearningAgent
|
|
|
|
|
|
|
|
You should only have to overwrite getQValue
|
|
|
|
and update. All other QLearningAgent functions
|
|
|
|
should work as is.
|
|
|
|
"""
|
|
|
|
def __init__(self, extractor='IdentityExtractor', **args):
|
|
|
|
self.featExtractor = util.lookup(extractor, globals())()
|
|
|
|
PacmanQAgent.__init__(self, **args)
|
|
|
|
self.weights = util.Counter()
|
|
|
|
|
|
|
|
def getWeights(self):
|
|
|
|
return self.weights
|
|
|
|
|
|
|
|
def getQValue(self, state, action):
|
|
|
|
"""
|
|
|
|
Should return Q(state,action) = w * featureVector
|
|
|
|
where * is the dotProduct operator
|
|
|
|
"""
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 13:33:43 +00:00
|
|
|
features = self.featExtractor.getFeatures(state, action)
|
|
|
|
weights = self.getWeights()
|
|
|
|
q_value = 0
|
|
|
|
for i in features.keys():
|
|
|
|
q_value += features[i]*weights[i]
|
|
|
|
return q_value
|
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def update(self, state, action, nextState, reward):
|
|
|
|
"""
|
|
|
|
Should update your weights based on transition
|
|
|
|
"""
|
|
|
|
"*** YOUR CODE HERE ***"
|
2019-04-13 13:33:43 +00:00
|
|
|
features = self.featExtractor.getFeatures(state, action)
|
|
|
|
weights = self.getWeights()
|
|
|
|
difference = (reward + (self.discount * self.getValue(nextState))) - self.getQValue(state,action)
|
|
|
|
for i in features.keys():
|
|
|
|
self.weights[i] = weights[i] + self.alpha * difference * features[i]
|
|
|
|
# util.raiseNotDefined()
|
2019-04-11 03:58:15 +00:00
|
|
|
|
|
|
|
def final(self, state):
|
|
|
|
"Called at the end of each game."
|
|
|
|
# call the super-class final method
|
|
|
|
PacmanQAgent.final(self, state)
|
|
|
|
|
|
|
|
# did we finish training?
|
|
|
|
if self.episodesSoFar == self.numTraining:
|
|
|
|
# you might want to print your weights here for debugging
|
|
|
|
"*** YOUR CODE HERE ***"
|
|
|
|
pass
|