Vidal's libraryTitle: | Reaching pareto-optimality in prisoner's dilemma using conditional joint action learning |
Author: | Dipyaman Banerjee and Sandip Sen |
Journal: | Autonomous Agents and Multi-Agent Systems |
Volume: | 15 |
Number: | 1 |
Pages: | 91--108 |
Publisher: | Kluwer Academic Publishers |
Year: | 2007 |
DOI: | 10.1007/s10458-007-0020-8 |
Abstract: | We consider the learning problem faced by two self-interested agents repeatedly playing a general-sum stage game. We assume that the players can observe each other's actions but not the payoffs received by the other player. The concept of Nash Equilibrium in repeated games provides an individually rational solution for playing such games and can be achieved by playing the Nash Equilibrium strategy for the single-shot game in every iteration. Such a strategy, however can sometimes lead to a Pareto-Dominated outcome for games like Prisoner's Dilemma. So we prefer learning strategies that converge to a Pareto-Optimal outcome that also produces a Nash Equilibrium payoff for repeated two-player, n-action general-sum games. The Folk Theorem enable us to identify such outcomes. In this paper, we introduce the Conditional Joint Action Learner (CJAL) which learns the conditional probability of an action taken by the opponent given its own actions and uses it to decide its next course of action. We empirically show that under self-play and if the payoff structure of the Prisoner's Dilemma game satisfies certain conditions, a CJAL learner, using a random exploration strategy followed by a completely greedy exploitation technique, will learn to converge to a Pareto-Optimal solution. We also show that such learning will generate Pareto-Optimal payoffs in a large majority of other two-player general sum games. We compare the performance of CJAL with that of existing algorithms such as WOLF-PHC and JAL on all structurally distinct two-player conflict games with ordinal payoffs. |
@Article{banerjee07a,
author = {Dipyaman Banerjee and Sandip Sen},
title = {Reaching pareto-optimality in prisoner's dilemma using conditional joint action learning},
journal = {Autonomous Agents and Multi-Agent Systems},
volume = 15,
number = 1,
year = 2007,
issn = {1387-2532},
pages = {91--108},
doi = {10.1007/s10458-007-0020-8},
publisher = {Kluwer Academic Publishers},
address = {Hingham, MA, USA},
abstract = {We consider the learning problem faced by two
self-interested agents repeatedly playing a
general-sum stage game. We assume that the players
can observe each other's actions but not the payoffs
received by the other player. The concept of Nash
Equilibrium in repeated games provides an
individually rational solution for playing such
games and can be achieved by playing the Nash
Equilibrium strategy for the single-shot game in
every iteration. Such a strategy, however can
sometimes lead to a Pareto-Dominated outcome for
games like Prisoner's Dilemma. So we prefer learning
strategies that converge to a Pareto-Optimal outcome
that also produces a Nash Equilibrium payoff for
repeated two-player, n-action general-sum games. The
Folk Theorem enable us to identify such outcomes. In
this paper, we introduce the Conditional Joint
Action Learner (CJAL) which learns the conditional
probability of an action taken by the opponent given
its own actions and uses it to decide its next
course of action. We empirically show that under
self-play and if the payoff structure of the
Prisoner's Dilemma game satisfies certain
conditions, a CJAL learner, using a random
exploration strategy followed by a completely greedy
exploitation technique, will learn to converge to a
Pareto-Optimal solution. We also show that such
learning will generate Pareto-Optimal payoffs in a
large majority of other two-player general sum
games. We compare the performance of CJAL with that
of existing algorithms such as WOLF-PHC and JAL on
all structurally distinct two-player conflict games
with ordinal payoffs.},
url = {http://jmvidal.cse.sc.edu/library/banerjee07a.pdf},
keywords = {multiagent learning}
}
Last modified: Wed Mar 9 10:16:49 EST 2011