Vidal's libraryTitle: | Convergence to Pareto Optimality in General Sum Games via Learning Opponent's Preference |
Author: | Dipyaman Banerjee and Sandip Sen |
Book Tittle: | Working Notes of the AAMAS Workshop on Adaptation and Learning in Autonomous Agents and Multiagent Systems |
Year: | 2006 |
Abstract: | We consider the learning problem faced by two self-interested agents playing any general-sum game repeatedly where the opponent payoff is unknown. The concept of Nash Equilibrium in repeated games provides us an individually rational solution for playing such games and can be achieved by playing the Nash Equilibrium strategy for the single-shot game in every iteration. However, such a strategy can sometimes lead to a Pareto-dominated outcome for the repeated game. Our goal is to design learning strategies that converge to a Pareto-efcient outcome that also produces a Nash Equilibrium payoff for repeated two player n-action general-sum games. We present a learning algorithm, POSNEL, which learns opponent's preference structure and produces, under self-play, Nash equilibrium payoffs in the limit in all such games. We also show that such learning will generate Pareto-optimal payoffs in a large majority of games. We derive a probability bound for convergence to Nash Equilibrium payoff and experimentally demonstrate convergence to Pareto optimality for all structurally distinct 2-player 2-action conict games. We also compare our algorithm with existing algorithms such as WOLF-IGA and JAL and showed that POSNEL on average, outperforms both the algorithms. |
@InProceedings{banerjee06a,
author = {Dipyaman Banerjee and Sandip Sen},
title = {Convergence to Pareto Optimality in General Sum
Games via Learning Opponent's Preference},
booktitle = {Working Notes of the {AAMAS} Workshop on Adaptation
and Learning in Autonomous Agents and Multiagent
Systems},
year = 2006,
abstract = {We consider the learning problem faced by two
self-interested agents playing any general-sum game
repeatedly where the opponent payoff is unknown. The
concept of Nash Equilibrium in repeated games
provides us an individually rational solution for
playing such games and can be achieved by playing
the Nash Equilibrium strategy for the single-shot
game in every iteration. However, such a strategy
can sometimes lead to a Pareto-dominated outcome for
the repeated game. Our goal is to design learning
strategies that converge to a Pareto-efcient outcome
that also produces a Nash Equilibrium payoff for
repeated two player n-action general-sum games. We
present a learning algorithm, POSNEL, which learns
opponent's preference structure and produces, under
self-play, Nash equilibrium payoffs in the limit in
all such games. We also show that such learning will
generate Pareto-optimal payoffs in a large majority
of games. We derive a probability bound for
convergence to Nash Equilibrium payoff and
experimentally demonstrate convergence to Pareto
optimality for all structurally distinct 2-player
2-action conict games. We also compare our algorithm
with existing algorithms such as WOLF-IGA and JAL
and showed that POSNEL on average, outperforms both
the algorithms.},
url = {http://jmvidal.cse.sc.edu/library/banerjee06a.pdf},
keywords = {multiagent learning game-theory}
}
Last modified: Wed Mar 9 10:16:35 EST 2011