ajout q learning approximatif
This commit is contained in:
parent
77caa10f89
commit
bfb304bd97
1 changed files with 13 additions and 2 deletions
|
@ -208,14 +208,25 @@ class ApproximateQAgent(PacmanQAgent):
|
|||
where * is the dotProduct operator
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
features = self.featExtractor.getFeatures(state, action)
|
||||
weights = self.getWeights()
|
||||
q_value = 0
|
||||
for i in features.keys():
|
||||
q_value += features[i]*weights[i]
|
||||
return q_value
|
||||
# util.raiseNotDefined()
|
||||
|
||||
def update(self, state, action, nextState, reward):
|
||||
"""
|
||||
Should update your weights based on transition
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
features = self.featExtractor.getFeatures(state, action)
|
||||
weights = self.getWeights()
|
||||
difference = (reward + (self.discount * self.getValue(nextState))) - self.getQValue(state,action)
|
||||
for i in features.keys():
|
||||
self.weights[i] = weights[i] + self.alpha * difference * features[i]
|
||||
# util.raiseNotDefined()
|
||||
|
||||
def final(self, state):
|
||||
"Called at the end of each game."
|
||||
|
|
Loading…
Add table
Reference in a new issue