Ansambel
Ini menjalankan ansambel model terkait. Masing-masing model mempertimbangkan jumlah riwayat yang berbeda, dan memiliki opsi untuk selalu memilih langkah yang akan mengoptimalkan perbedaan pembayaran yang diharapkan, atau akan secara acak memilih langkah dalam proporsi terhadap perbedaan pembayaran yang diharapkan.
Setiap anggota ensemble kemudian memberikan suara pada langkah yang mereka sukai. Mereka mendapatkan jumlah suara yang sama dengan berapa banyak yang telah mereka menangkan daripada lawan (yang berarti bahwa model yang buruk akan mendapatkan suara negatif). Langkah mana pun yang memenangkan suara kemudian dipilih.
(Mereka mungkin harus membagi suara mereka di antara gerakan sesuai dengan seberapa besar mereka menyukai masing-masing, tapi saya tidak cukup peduli untuk melakukan itu sekarang.)
Itu mengalahkan semua yang diposting sejauh ini kecuali EvaluaterBot dan PatternFinder. (Satu-satu, itu mengalahkan EvaluaterBot dan kehilangan ke PatternFinder).
from collections import defaultdict
import random
class Number6:
class Choices:
def __init__(self, C = 0, N = 0, D = 0):
self.C = C
self.N = N
self.D = D
def __init__(self, strategy = "maxExpected", markov_order = 3):
self.MARKOV_ORDER = markov_order;
self.my_choices = ""
self.opponent = defaultdict(lambda: self.Choices())
self.choice = None # previous choice
self.payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
self.total_payoff = 0
# if random, will choose in proportion to payoff.
# otherwise, will always choose argmax
self.strategy = strategy
# maxExpected: maximize expected relative payoff
# random: like maxExpected, but it chooses in proportion to E[payoff]
# argmax: always choose the option that is optimal for expected opponent choice
def update_opponent_model(self, last):
for i in range(0, self.MARKOV_ORDER):
hist = self.my_choices[i:]
self.opponent[hist].C += ("C" == last)
self.opponent[hist].N += ("N" == last)
self.opponent[hist].D += ("D" == last)
def normalize(self, counts):
sum = float(counts.C + counts.N + counts.D)
if 0 == sum:
return self.Choices(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)
return self.Choices(
counts.C / sum, counts.N / sum, counts.D / sum)
def get_distribution(self):
for i in range(0, self.MARKOV_ORDER):
hist = self.my_choices[i:]
#print "check hist = " + hist
if hist in self.opponent:
return self.normalize(self.opponent[hist])
return self.Choices(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)
def choose(self, dist):
payoff = self.Choices()
# We're interested in *beating the opponent*, not
# maximizing our score, so we optimize the difference
payoff.C = (3-3) * dist.C + (4-1) * dist.N + (0-5) * dist.D
payoff.N = (1-4) * dist.C + (2-2) * dist.N + (3-2) * dist.D
payoff.D = (5-0) * dist.C + (2-3) * dist.N + (1-1) * dist.D
# D has slightly better payoff on uniform opponent,
# so we select it on ties
if self.strategy == "maxExpected":
if payoff.C > payoff.N:
return "C" if payoff.C > payoff.D else "D"
return "N" if payoff.N > payoff.D else "D"
elif self.strategy == "randomize":
payoff = self.normalize(payoff)
r = random.uniform(0.0, 1.0)
if (r < payoff.C): return "C"
return "N" if (r < payoff.N) else "D"
elif self.strategy == "argMax":
if dist.C > dist.N:
return "D" if dist.C > dist.D else "N"
return "C" if dist.N > dist.D else "N"
assert(0) #, "I am not a number! I am a free man!")
def update_history(self):
self.my_choices += self.choice
if len(self.my_choices) > self.MARKOV_ORDER:
assert(len(self.my_choices) == self.MARKOV_ORDER + 1)
self.my_choices = self.my_choices[1:]
def round(self, last):
if last: self.update_opponent_model(last)
dist = self.get_distribution()
self.choice = self.choose(dist)
self.update_history()
return self.choice
class Ensemble:
def __init__(self):
self.models = []
self.votes = []
self.prev_choice = []
for order in range(0, 6):
self.models.append(Number6("maxExpected", order))
self.models.append(Number6("randomize", order))
#self.models.append(Number6("argMax", order))
for i in range(0, len(self.models)):
self.votes.append(0)
self.prev_choice.append("D")
self.payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
def round(self, last):
if last:
for i in range(0, len(self.models)):
self.votes[i] += self.payoff[self.prev_choice[i]][last]
# vote. Sufficiently terrible models get negative votes
C = 0
N = 0
D = 0
for i in range(0, len(self.models)):
choice = self.models[i].round(last)
if "C" == choice: C += self.votes[i]
if "N" == choice: N += self.votes[i]
if "D" == choice: D += self.votes[i]
self.prev_choice[i] = choice
if C > D and C > N: return "C"
elif N > D: return "N"
else: return "D"
Kerangka Uji
Jika ada orang lain yang merasakan manfaatnya, inilah kerangka uji coba untuk melihat pertarungan individu. Python2. Masukkan saja semua lawan yang Anda minati di dalam oposisi.py, dan ubah referensi ke Ensemble menjadi milik Anda.
import sys, inspect
import opponents
from ensemble import Ensemble
def count_payoff(label, them):
if None == them: return
me = choices[label]
payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
if label not in total_payoff: total_payoff[label] = 0
total_payoff[label] += payoff[me][them]
def update_hist(label, choice):
choices[label] = choice
opponents = [ x[1] for x
in inspect.getmembers(sys.modules['opponents'], inspect.isclass)]
for k in opponents:
total_payoff = {}
for j in range(0, 100):
A = Ensemble()
B = k()
choices = {}
aChoice = None
bChoice = None
for i in range(0, 100):
count_payoff(A.__class__.__name__, bChoice)
a = A.round(bChoice)
update_hist(A.__class__.__name__, a)
count_payoff(B.__class__.__name__, aChoice)
b = B.round(aChoice)
update_hist(B.__class__.__name__, b)
aChoice = a
bChoice = b
print total_payoff