Nyeri di Nash (C ++)
Disebut demikian karena fakta bahwa saya harus menulis pemecah kesetimbangan Nash saya sendiri sangat menyakitkan. Saya kagum bahwa tidak ada perpustakaan penyelesaian Nash yang tersedia!
#include <fstream>
#include <iostream>
#include <vector>
#include <array>
#include <random>
#include <utility>
typedef double NumT;
static const NumT EPSILON = 1e-5;
struct Index {
int me;
int them;
Index(int me, int them) : me(me), them(them) {}
};
struct Value {
NumT me;
NumT them;
Value(void) : me(0), them(0) {}
Value(NumT me, NumT them) : me(me), them(them) {}
};
template <int subDimMe, int subDimThem>
struct Game {
const std::array<NumT, 9> *valuesMe;
const std::array<NumT, 9> *valuesThemT;
std::array<int, subDimMe> coordsMe;
std::array<int, subDimThem> coordsThem;
Game(
const std::array<NumT, 9> *valuesMe,
const std::array<NumT, 9> *valuesThemT
)
: valuesMe(valuesMe)
, valuesThemT(valuesThemT)
, coordsMe{}
, coordsThem{}
{}
Index baseIndex(Index i) const {
return Index(coordsMe[i.me], coordsThem[i.them]);
}
Value at(Index i) const {
Index i2 = baseIndex(i);
return Value(
(*valuesMe)[i2.me * 3 + i2.them],
(*valuesThemT)[i2.me + i2.them * 3]
);
}
Game<2, 2> subgame22(int me0, int me1, int them0, int them1) const {
Game<2, 2> b(valuesMe, valuesThemT);
b.coordsMe[0] = coordsMe[me0];
b.coordsMe[1] = coordsMe[me1];
b.coordsThem[0] = coordsThem[them0];
b.coordsThem[1] = coordsThem[them1];
return b;
}
};
struct Strategy {
std::array<NumT, 3> probMe;
std::array<NumT, 3> probThem;
Value expectedValue;
bool valid;
Strategy(void)
: probMe{}
, probThem{}
, expectedValue()
, valid(false)
{}
void findBestMe(const Strategy &b) {
if(b.valid && (!valid || b.expectedValue.me > expectedValue.me)) {
*this = b;
}
}
};
template <int dimMe, int dimThem>
Strategy nash_pure(const Game<dimMe, dimThem> &g) {
Strategy s;
int choiceMe = -1;
int choiceThem = 0;
for(int me = 0; me < dimMe; ++ me) {
for(int them = 0; them < dimThem; ++ them) {
const Value &v = g.at(Index(me, them));
bool valid = true;
for(int me2 = 0; me2 < dimMe; ++ me2) {
if(g.at(Index(me2, them)).me > v.me) {
valid = false;
}
}
for(int them2 = 0; them2 < dimThem; ++ them2) {
if(g.at(Index(me, them2)).them > v.them) {
valid = false;
}
}
if(valid) {
if(choiceMe == -1 || v.me > s.expectedValue.me) {
s.expectedValue = v;
choiceMe = me;
choiceThem = them;
}
}
}
}
if(choiceMe != -1) {
Index iBase = g.baseIndex(Index(choiceMe, choiceThem));
s.probMe[iBase.me] = 1;
s.probThem[iBase.them] = 1;
s.valid = true;
}
return s;
}
Strategy nash_mixed(const Game<2, 2> &g) {
// P Q
// p a A b B
// q c C d D
Value A = g.at(Index(0, 0));
Value B = g.at(Index(0, 1));
Value C = g.at(Index(1, 0));
Value D = g.at(Index(1, 1));
// q = 1-p, Q = 1-P
// Pick p such that choice of P,Q is arbitrary
// p*A+(1-p)*C = p*B+(1-p)*D
// p*A+C-p*C = p*B+D-p*D
// p*(A+D-B-C) = D-C
// p = (D-C) / (A+D-B-C)
NumT p = (D.them - C.them) / (A.them + D.them - B.them - C.them);
// P*a+(1-P)*b = P*c+(1-P)*d
// P*a+b-P*b = P*c+d-P*d
// P*(a+d-b-c) = d-b
// P = (d-b) / (a+d-b-c)
NumT P = (D.me - B.me) / (A.me + D.me - B.me - C.me);
Strategy s;
if(p >= -EPSILON && p <= 1 + EPSILON && P >= -EPSILON && P <= 1 + EPSILON) {
if(p <= 0) {
p = 0;
} else if(p >= 1) {
p = 1;
}
if(P <= 0) {
P = 0;
} else if(P >= 1) {
P = 1;
}
Index iBase0 = g.baseIndex(Index(0, 0));
Index iBase1 = g.baseIndex(Index(1, 1));
s.probMe[iBase0.me] = p;
s.probMe[iBase1.me] = 1 - p;
s.probThem[iBase0.them] = P;
s.probThem[iBase1.them] = 1 - P;
s.expectedValue = Value(
P * A.me + (1 - P) * B.me,
p * A.them + (1 - p) * C.them
);
s.valid = true;
}
return s;
}
Strategy nash_mixed(const Game<3, 3> &g) {
// P Q R
// p a A b B c C
// q d D e E f F
// r g G h H i I
Value A = g.at(Index(0, 0));
Value B = g.at(Index(0, 1));
Value C = g.at(Index(0, 2));
Value D = g.at(Index(1, 0));
Value E = g.at(Index(1, 1));
Value F = g.at(Index(1, 2));
Value G = g.at(Index(2, 0));
Value H = g.at(Index(2, 1));
Value I = g.at(Index(2, 2));
// r = 1-p-q, R = 1-P-Q
// Pick p,q such that choice of P,Q,R is arbitrary
NumT q = ((
+ A.them * (I.them-H.them)
+ G.them * (B.them-C.them)
- B.them*I.them
+ H.them*C.them
) / (
(G.them+E.them-D.them-H.them) * (B.them+I.them-H.them-C.them) -
(H.them+F.them-E.them-I.them) * (A.them+H.them-G.them-B.them)
));
NumT p = (
((G.them+E.them-D.them-H.them) * q + (H.them-G.them)) /
(A.them+H.them-G.them-B.them)
);
NumT Q = ((
+ A.me * (I.me-F.me)
+ C.me * (D.me-G.me)
- D.me*I.me
+ F.me*G.me
) / (
(C.me+E.me-B.me-F.me) * (D.me+I.me-F.me-G.me) -
(F.me+H.me-E.me-I.me) * (A.me+F.me-C.me-D.me)
));
NumT P = (
((C.me+E.me-B.me-F.me) * Q + (F.me-C.me)) /
(A.me+F.me-C.me-D.me)
);
Strategy s;
if(
p >= -EPSILON && q >= -EPSILON && p + q <= 1 + EPSILON &&
P >= -EPSILON && Q >= -EPSILON && P + Q <= 1 + EPSILON
) {
if(p <= 0) { p = 0; }
if(q <= 0) { q = 0; }
if(P <= 0) { P = 0; }
if(Q <= 0) { Q = 0; }
if(p + q >= 1) {
if(p > q) {
p = 1 - q;
} else {
q = 1 - p;
}
}
if(P + Q >= 1) {
if(P > Q) {
P = 1 - Q;
} else {
Q = 1 - P;
}
}
Index iBase0 = g.baseIndex(Index(0, 0));
s.probMe[iBase0.me] = p;
s.probThem[iBase0.them] = P;
Index iBase1 = g.baseIndex(Index(1, 1));
s.probMe[iBase1.me] = q;
s.probThem[iBase1.them] = Q;
Index iBase2 = g.baseIndex(Index(2, 2));
s.probMe[iBase2.me] = 1 - p - q;
s.probThem[iBase2.them] = 1 - P - Q;
s.expectedValue = Value(
A.me * P + B.me * Q + C.me * (1 - P - Q),
A.them * p + D.them * q + G.them * (1 - p - q)
);
s.valid = true;
}
return s;
}
template <int dimMe, int dimThem>
Strategy nash_validate(Strategy &&s, const Game<dimMe, dimThem> &g, Index unused) {
if(!s.valid) {
return s;
}
NumT exp;
exp = 0;
for(int them = 0; them < dimThem; ++ them) {
exp += s.probThem[them] * g.at(Index(unused.me, them)).me;
}
if(exp > s.expectedValue.me) {
s.valid = false;
return s;
}
exp = 0;
for(int me = 0; me < dimMe; ++ me) {
exp += s.probMe[me] * g.at(Index(me, unused.them)).them;
}
if(exp > s.expectedValue.them) {
s.valid = false;
return s;
}
return s;
}
Strategy nash(const Game<2, 2> &g, bool verbose) {
Strategy s = nash_mixed(g);
s.findBestMe(nash_pure(g));
if(!s.valid && verbose) {
std::cerr << "No nash equilibrium found!" << std::endl;
}
return s;
}
Strategy nash(const Game<3, 3> &g, bool verbose) {
Strategy s = nash_mixed(g);
s.findBestMe(nash_validate(nash_mixed(g.subgame22(1, 2, 1, 2)), g, Index(0, 0)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(1, 2, 0, 2)), g, Index(0, 1)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(1, 2, 0, 1)), g, Index(0, 2)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 2, 1, 2)), g, Index(1, 0)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 2, 0, 2)), g, Index(1, 1)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 2, 0, 1)), g, Index(1, 2)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 1, 1, 2)), g, Index(2, 0)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 1, 0, 2)), g, Index(2, 1)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 1, 0, 1)), g, Index(2, 2)));
s.findBestMe(nash_pure(g));
if(!s.valid && verbose) {
// theory says this should never happen, but fp precision makes it possible
std::cerr << "No nash equilibrium found!" << std::endl;
}
return s;
}
struct PlayerState {
int balls;
int ducks;
PlayerState(int balls, int ducks) : balls(balls), ducks(ducks) {}
PlayerState doReload(int maxBalls) const {
return PlayerState(std::min(balls + 1, maxBalls), ducks);
}
PlayerState doThrow(void) const {
return PlayerState(std::max(balls - 1, 0), ducks);
}
PlayerState doDuck(void) const {
return PlayerState(balls, std::max(ducks - 1, 0));
}
std::array<double,3> flail(int maxBalls) const {
// opponent has obvious win;
// try stuff at random and hope the opponent is bad
(void) ducks;
int options = 0;
if(balls > 0) {
++ options;
}
if(balls < maxBalls) {
++ options;
}
if(ducks > 0) {
++ options;
}
std::array<double,3> p{};
if(balls < balls) {
p[0] = 1.0f / options;
}
if(balls > 0) {
p[1] = 1.0f / options;
}
return p;
}
};
class GameStore {
protected:
const int balls;
const int ducks;
const std::size_t playerStates;
const std::size_t gameStates;
public:
static std::string filename(int turn) {
return "nashdata_" + std::to_string(turn) + ".dat";
}
GameStore(int maxBalls, int maxDucks)
: balls(maxBalls)
, ducks(maxDucks)
, playerStates((balls + 1) * (ducks + 1))
, gameStates(playerStates * playerStates)
{}
std::size_t playerIndex(const PlayerState &p) const {
return p.balls * (ducks + 1) + p.ducks;
}
std::size_t gameIndex(const PlayerState &me, const PlayerState &them) const {
return playerIndex(me) * playerStates + playerIndex(them);
}
std::size_t fileIndex(const PlayerState &me, const PlayerState &them) const {
return 2 + gameIndex(me, them) * 2;
}
PlayerState stateFromPlayerIndex(std::size_t i) const {
return PlayerState(i / (ducks + 1), i % (ducks + 1));
}
std::pair<PlayerState, PlayerState> stateFromGameIndex(std::size_t i) const {
return std::make_pair(
stateFromPlayerIndex(i / playerStates),
stateFromPlayerIndex(i % playerStates)
);
}
std::pair<PlayerState, PlayerState> stateFromFileIndex(std::size_t i) const {
return stateFromGameIndex((i - 2) / 2);
}
};
class Generator : public GameStore {
static char toDat(NumT v) {
int iv = int(v * 256.0);
return char(std::max(std::min(iv, 255), 0));
}
std::vector<Value> next;
public:
Generator(int maxBalls, int maxDucks)
: GameStore(maxBalls, maxDucks)
, next()
{}
const Value &nextGame(const PlayerState &me, const PlayerState &them) const {
return next[gameIndex(me, them)];
}
void make_probabilities(
std::array<NumT, 9> &g,
const PlayerState &me,
const PlayerState &them
) const {
const int RELOAD = 0;
const int THROW = 1;
const int DUCK = 2;
g[RELOAD * 3 + RELOAD] =
nextGame(me.doReload(balls), them.doReload(balls)).me;
g[RELOAD * 3 + THROW] =
(them.balls > 0) ? -1
: nextGame(me.doReload(balls), them.doThrow()).me;
g[RELOAD * 3 + DUCK] =
nextGame(me.doReload(balls), them.doDuck()).me;
g[THROW * 3 + RELOAD] =
(me.balls > 0) ? 1
: nextGame(me.doThrow(), them.doReload(balls)).me;
g[THROW * 3 + THROW] =
((me.balls > 0) == (them.balls > 0))
? nextGame(me.doThrow(), them.doThrow()).me
: (me.balls > 0) ? 1 : -1;
g[THROW * 3 + DUCK] =
(me.balls > 0 && them.ducks == 0) ? 1
: nextGame(me.doThrow(), them.doDuck()).me;
g[DUCK * 3 + RELOAD] =
nextGame(me.doDuck(), them.doReload(balls)).me;
g[DUCK * 3 + THROW] =
(them.balls > 0 && me.ducks == 0) ? -1
: nextGame(me.doDuck(), them.doThrow()).me;
g[DUCK * 3 + DUCK] =
nextGame(me.doDuck(), them.doDuck()).me;
}
Game<3, 3> make_game(const PlayerState &me, const PlayerState &them) const {
static std::array<NumT, 9> globalValuesMe;
static std::array<NumT, 9> globalValuesThemT;
#pragma omp threadprivate(globalValuesMe)
#pragma omp threadprivate(globalValuesThemT)
make_probabilities(globalValuesMe, me, them);
make_probabilities(globalValuesThemT, them, me);
Game<3, 3> g(&globalValuesMe, &globalValuesThemT);
for(int i = 0; i < 3; ++ i) {
g.coordsMe[i] = i;
g.coordsThem[i] = i;
}
return g;
}
Strategy solve(const PlayerState &me, const PlayerState &them, bool verbose) const {
if(me.balls > them.balls + them.ducks) { // obvious answer
Strategy s;
s.probMe[1] = 1;
s.probThem = them.flail(balls);
s.expectedValue = Value(1, -1);
return s;
} else if(them.balls > me.balls + me.ducks) { // uh-oh
Strategy s;
s.probThem[1] = 1;
s.probMe = me.flail(balls);
s.expectedValue = Value(-1, 1);
return s;
} else if(me.balls == 0 && them.balls == 0) { // obvious answer
Strategy s;
s.probMe[0] = 1;
s.probThem[0] = 1;
s.expectedValue = nextGame(me.doReload(balls), them.doReload(balls));
return s;
} else {
return nash(make_game(me, them), verbose);
}
}
void generate(int turns, bool saveAll, bool verbose) {
next.clear();
next.resize(gameStates);
std::vector<Value> current(gameStates);
std::vector<char> data(2 + gameStates * 2);
for(std::size_t turn = turns; (turn --) > 0;) {
if(verbose) {
std::cerr << "Generating for turn " << turn << "..." << std::endl;
}
NumT maxDiff = 0;
NumT msd = 0;
data[0] = balls;
data[1] = ducks;
#pragma omp parallel for reduction(+:msd), reduction(max:maxDiff)
for(std::size_t meBalls = 0; meBalls < balls + 1; ++ meBalls) {
for(std::size_t meDucks = 0; meDucks < ducks + 1; ++ meDucks) {
const PlayerState me(meBalls, meDucks);
for(std::size_t themBalls = 0; themBalls < balls + 1; ++ themBalls) {
for(std::size_t themDucks = 0; themDucks < ducks + 1; ++ themDucks) {
const PlayerState them(themBalls, themDucks);
const std::size_t p1 = gameIndex(me, them);
Strategy s = solve(me, them, verbose);
NumT diff;
data[2+p1*2 ] = toDat(s.probMe[0]);
data[2+p1*2+1] = toDat(s.probMe[0] + s.probMe[1]);
current[p1] = s.expectedValue;
diff = current[p1].me - next[p1].me;
msd += diff * diff;
maxDiff = std::max(maxDiff, std::abs(diff));
}
}
}
}
if(saveAll) {
std::ofstream fs(filename(turn).c_str(), std::ios_base::binary);
fs.write(&data[0], data.size());
fs.close();
}
if(verbose) {
std::cerr
<< "Expectations changed by at most " << maxDiff
<< " (RMSD: " << std::sqrt(msd / gameStates) << ")" << std::endl;
}
if(maxDiff < 0.0001f) {
if(verbose) {
std::cerr << "Expectations have converged. Stopping." << std::endl;
}
break;
}
std::swap(next, current);
}
// Always save turn 0 with the final converged expectations
std::ofstream fs(filename(0).c_str(), std::ios_base::binary);
fs.write(&data[0], data.size());
fs.close();
}
};
void open_file(std::ifstream &target, int turn, int maxDucks, int maxBalls) {
target.open(GameStore::filename(turn).c_str(), std::ios::binary);
if(target.is_open()) {
return;
}
target.open(GameStore::filename(0).c_str(), std::ios::binary);
if(target.is_open()) {
return;
}
Generator(maxBalls, maxDucks).generate(200, false, false);
target.open(GameStore::filename(0).c_str(), std::ios::binary);
}
int choose(int turn, const PlayerState &me, const PlayerState &them, int maxBalls) {
std::ifstream fs;
open_file(fs, turn, std::max(me.ducks, them.ducks), maxBalls);
unsigned char balls = fs.get();
unsigned char ducks = fs.get();
fs.seekg(GameStore(balls, ducks).fileIndex(me, them));
unsigned char p0 = fs.get();
unsigned char p1 = fs.get();
fs.close();
// only 1 random number per execution; no need to seed a PRNG
std::random_device rand;
int v = std::uniform_int_distribution<int>(0, 254)(rand);
if(v < p0) {
return 0;
} else if(v < p1) {
return 1;
} else {
return 2;
}
}
int main(int argc, const char *const *argv) {
if(argc == 4) { // maxTurns, maxBalls, maxDucks
Generator(atoi(argv[2]), atoi(argv[3])).generate(atoi(argv[1]), true, true);
return 0;
}
if(argc == 7) { // turn, meBalls, themBalls, meDucks, themDucks, maxBalls
std::cout << choose(
atoi(argv[1]),
PlayerState(atoi(argv[2]), atoi(argv[4])),
PlayerState(atoi(argv[3]), atoi(argv[5])),
atoi(argv[6])
) << std::endl;
return 0;
}
return 1;
}
Kompilasi sebagai C ++ 11 atau lebih baik. Untuk kinerja, sebaiknya dikompilasi dengan dukungan OpenMP (tapi ini hanya untuk kecepatan; itu tidak diperlukan)
g++ -std=c++11 -fopenmp pain_in_the_nash.cpp -o pain_in_the_nash
Ini menggunakan kesetimbangan Nash untuk memutuskan apa yang harus dilakukan pada setiap belokan, yang berarti bahwa secara teori itu akan selalu menang atau seri dalam jangka panjang (dalam banyak pertandingan), apa pun strategi yang digunakan lawan. Apakah itu yang terjadi dalam praktik tergantung pada apakah saya membuat kesalahan dalam implementasi. Namun, karena kompetisi KoTH ini hanya memiliki satu putaran melawan masing-masing lawan, itu mungkin tidak akan berhasil dengan baik di papan peringkat.
Ide awal saya adalah memiliki fungsi penilaian sederhana untuk setiap kondisi permainan (mis. Setiap bola bernilai + b, setiap bebek adalah + d), tetapi ini mengarah pada masalah yang jelas mencari tahu seperti apa penilaian itu seharusnya, dan berarti itu tidak bisa bertindak atas berkurangnya pengembalian semakin banyak bola, dll. Jadi, ini akan menganalisis seluruh pohon permainan , bekerja mundur dari belokan 1000, dan mengisi penilaian aktual berdasarkan pada bagaimana setiap permainan bisa berjalan dengan baik.
Hasilnya adalah bahwa saya sama sekali tidak tahu strategi apa yang digunakan ini, kecuali untuk beberapa perilaku "jelas" keras-kode (melempar bola salju jika Anda memiliki lebih banyak bola daripada lawan Anda memiliki bola + bebek, dan memuat kembali jika Anda berdua keluar bola salju). Jika ada yang ingin menganalisis dataset yang dihasilkannya, saya bayangkan ada beberapa perilaku menarik untuk ditemukan!
Menguji ini terhadap "Simpan Satu" menunjukkan bahwa ia memang menang dalam jangka panjang, tetapi hanya dengan selisih kecil (514 menang, 486 kerugian, 0 imbang dalam batch pertama dari 1000 pertandingan, dan 509 menang, 491 kerugian, 0 menarik yang kedua).
Penting!
Ini akan berhasil, tetapi itu bukan ide yang bagus. Dibutuhkan sekitar 9 menit pada laptop saya yang cukup-pengembang-spesifik untuk menghasilkan pohon permainan penuh. Tapi itu akan menyimpan probabilitas akhir menjadi file setelah mereka dibuat, dan setelah itu setiap belokan hanya menghasilkan angka acak dan membandingkannya dengan 2 byte, jadi ini sangat cepat.
Untuk pintas semua itu, cukup unduh file ini (3.5MB) dan letakkan di direktori dengan executable.
Atau Anda dapat membuatnya sendiri dengan menjalankan:
./pain_in_the_nash 1000 50 25
Yang akan menyimpan satu file per giliran, hingga konvergensi. Perhatikan bahwa setiap file berukuran 3,5 MB dan konvergen pada gilirannya 720 (yaitu 280 file, ~ 1GB), dan karena sebagian besar game tidak mendekati 720, file pra-konvergensi memiliki kepentingan yang sangat rendah.