Vidal's libraryTitle: | Product distribution theory for control of multi-agent systems |
Author: | Chiu Fan Lee and David Wolpert |
Book Tittle: | Proceedings of the Third International Joint Conference on Autonomous Agents and MultiAgent Systems |
Pages: | 522--529 |
Publisher: | ACM |
Year: | 2004 |
Abstract: | Product Distribution (PD) theory is a new framework for controlling Multi-Agent Systems (MAS s). First we review one motivation of PD theory, as the information-theoretic extension of conventional full-rationality game theory to the case of bounded rational agents. In this extension the equilibrium of the game is the optimizer of a Lagrangian of the (probability distribution of) the joint state of the agents. Accordingly we can consider a team game having a shared utility which is a performance measure of the behavior of the MAS. For such a scenario the game is at equilibrium the Lagrangian is optimized when the joint distribution of the agents optimizes the system s expected performance. One common way to find that equilibrium is to have each agent run a reinforcement learning algorithm. Here we investigate the alternative of exploiting PD theory to run gradient descent on the Lagrangian. We present computer experiments validating some of the predictions of PD theory for how best to do that gradient descent. We also demonstrate how PD theory can improve performance even when we are not allowed to rerun the MAS from different initial conditions, a requirement implicit in some previous work. |
Cited by 17 - Google Scholar
@InProceedings{lee04a,
author = {Chiu Fan Lee and David Wolpert},
title = {Product distribution theory for control of
multi-agent systems},
booktitle = {Proceedings of the Third International Joint
Conference on Autonomous Agents and MultiAgent
Systems},
pages = {522--529},
year = 2004,
publisher = {{ACM}},
abstract = {Product Distribution (PD) theory is a new framework
for controlling Multi-Agent Systems (MAS s). First
we review one motivation of PD theory, as the
information-theoretic extension of conventional
full-rationality game theory to the case of bounded
rational agents. In this extension the equilibrium
of the game is the optimizer of a Lagrangian of the
(probability distribution of) the joint state of the
agents. Accordingly we can consider a team game
having a shared utility which is a performance
measure of the behavior of the MAS. For such a
scenario the game is at equilibrium the Lagrangian
is optimized when the joint distribution of the
agents optimizes the system s expected
performance. One common way to find that equilibrium
is to have each agent run a reinforcement learning
algorithm. Here we investigate the alternative of
exploiting PD theory to run gradient descent on the
Lagrangian. We present computer experiments
validating some of the predictions of PD theory for
how best to do that gradient descent. We also
demonstrate how PD theory can improve performance
even when we are not allowed to rerun the MAS from
different initial conditions, a requirement implicit
in some previous work.},
keywords = {multiagent},
url = {http://jmvidal.cse.sc.edu/library/lee04a.pdf},
comment = {masrg},
googleid = {KNjpYnvO9dAJ:scholar.google.com/},
cluster = {15057167958518913064}
}
Last modified: Wed Mar 9 10:16:13 EST 2011