Vidal's libraryTitle: | Learning to Predict by the Methods of Temporal Differences |
Author: | Richard S. Sutton |
Journal: | Machine Learning |
Volume: | 3 |
Number: | 1 |
Pages: | 9--44 |
Year: | 1988 |
DOI: | 10.1023/A:1022633531479 |
Abstract: | This article introduces a class of incremental learning procedures specialized for prediction - that is, for using past experience with an incompletely known system to predict its future behavior. Whereas conventional prediction-learning methods assign credit by means of the difference between predicted and actual outcomes, the new methods assign credit by means of the difference between temporally successive predictions. Although such temporal-difference methods have been used in Samuel's checker player, Holland's bucket brigade, and the author's Adaptive Heuristic Critic, they have remained poorly understood. Here we prove their convergence and optimality for special cases and relate them to supervised-learning methods. For most real-world prediction problems, temporal-difference methods require less memory and less peak computation than conventional methods and they produce more accurate predictions. We argue that most problems to which supervised learning is currently applied are really prediction problems of the sort to which temporaldifference methods can be applied to advantage. |
Cited by 1329 - Google Scholar
@Article{sutton88a,
author = {Richard S. Sutton},
title = {Learning to Predict by the Methods of Temporal
Differences},
journal = {Machine Learning},
year = 1988,
volume = 3,
number = 1,
pages = {9--44},
abstract = {This article introduces a class of incremental
learning procedures specialized for prediction -
that is, for using past experience with an
incompletely known system to predict its future
behavior. Whereas conventional prediction-learning
methods assign credit by means of the difference
between predicted and actual outcomes, the new
methods assign credit by means of the difference
between temporally successive predictions. Although
such temporal-difference methods have been used in
Samuel's checker player, Holland's bucket brigade,
and the author's Adaptive Heuristic Critic, they
have remained poorly understood. Here we prove their
convergence and optimality for special cases and
relate them to supervised-learning methods. For most
real-world prediction problems, temporal-difference
methods require less memory and less peak
computation than conventional methods and they
produce more accurate predictions. We argue that
most problems to which supervised learning is
currently applied are really prediction problems of
the sort to which temporaldifference methods can be
applied to advantage.},
keywords = {ai learning reinforcement},
url = {http://jmvidal.cse.sc.edu/library/sutton88a.pdf},
googleid = {aUSy2dErtroJ:scholar.google.com/},
doi = {10.1023/A:1022633531479},
cluster = {13453989117116433513}
}
Last modified: Wed Mar 9 10:13:46 EST 2011