ダーウィン-C
とにかく、誰が戦略を必要としますか?穴居人のグループにお互いに行き、自然選択に任せましょう!
穴居人の原始的な頭脳には非常に単純なモデルを使用します。記憶はなく、相手と相手のスティックの鋭さだけを考慮します。これらは、有限次数のバイナリ多項式の変数として使用されます。各アクション(ブロック、シャープ、ポーク)には関連付けられた多項式があり、その結果により、このアクションを選択する相対的な確率が決まります。これでほとんどすべてです。ランダム係数から始めて、繰り返し最適化します。
ボット:
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* magic numbers */
#define SWORD_SHARPNESS 5
#define PROGRAM_DIM 4 /* polynomial order + 1 */
#define DEFAULT_FILENAME "players/Darwin/program"
typedef double real;
typedef real program[PROGRAM_DIM][PROGRAM_DIM];
typedef program caveman_brain[3];
typedef char action; /* S, B or P */
/* encodes a pair of actions */
#define ACTION_PAIR(a1, a2) (((int)(a1) << (sizeof(action) * 8)) | (a2))
real eval_program(const program p, double x, double y) {
real v = 0;
int i, j;
for (i = 0; i < PROGRAM_DIM; ++i) {
real w = 0;
for (j = 0; j < PROGRAM_DIM; ++j)
w = x * w + p[i][j];
v = y * v + w;
}
if (v < 0)
v = 0;
return v;
}
void read_program(FILE* f, program p) {
int i, j;
for (i = 0; i < PROGRAM_DIM; ++i) {
for (j = 0; j < PROGRAM_DIM; ++j) {
double v;
fscanf(f, "%lg", &v);
p[i][j] = v;
}
}
}
int blunt(int* s) {
int temp = *s;
if (temp)
--*s;
return temp;
}
void sharpen(int* s) { ++*s; }
/* takes two sharpness/action pairs and updates the sharpness accordingly.
* returns negative value if first caveman wins, positive value if second
* caveman wins and 0 otherwise. */
int act(int* s1, action a1, int* s2, action a2) {
switch (ACTION_PAIR(a1, a2)) {
case ACTION_PAIR('B', 'B'): return 0;
case ACTION_PAIR('B', 'S'): sharpen(s2); return 0;
case ACTION_PAIR('B', 'P'): return blunt(s2) >= SWORD_SHARPNESS ? 1 :
0;
case ACTION_PAIR('S', 'B'): sharpen(s1); return 0;
case ACTION_PAIR('S', 'S'): sharpen(s1); sharpen(s2); return 0;
case ACTION_PAIR('S', 'P'): sharpen(s1); return *s2 > 0 ? 1 : 0;
case ACTION_PAIR('P', 'B'): return blunt(s1) >= SWORD_SHARPNESS ? -1 :
0;
case ACTION_PAIR('P', 'S'): sharpen(s2); return *s1 > 0 ? -1 : 0;
case ACTION_PAIR('P', 'P'): {
int t1 = blunt(s1), t2 = blunt(s2);
if (t1 >= SWORD_SHARPNESS && t2 < SWORD_SHARPNESS)
return -1;
else if (t2 >= SWORD_SHARPNESS && t1 < SWORD_SHARPNESS)
return 1;
else
return 0;
}
}
}
/* processes a pair of strings of actions */
int str_act(int* s1, const char* a1, int* s2, const char* a2) {
for (; *a1 && *a2; ++a1, ++a2) {
int winner = act(s1, *a1, s2, *a2);
if (winner)
return winner;
}
return 0;
}
double frandom() { return (double)rand() / RAND_MAX; }
/* chooses an action based on self and opponent's sharpness */
action choose_action(const caveman_brain b, int s1, int s2) {
double v[3];
double sum = 0;
double r;
int i;
for (i = 0; i < 3; ++i) {
v[i] = eval_program(b[i], s1, s2);
sum += v[i];
}
r = frandom() * sum;
if (r <= v[0])
return 'B';
else if (r <= v[0] + v[1])
return 'S';
else
return 'P';
}
/* portable tick-count for random seed */
#ifdef _WIN32
#include <Windows.h>
unsigned int tick_count() { return GetTickCount(); }
#else
#include <sys/time.h>
unsigned int tick_count() {
struct timeval t;
gettimeofday(&t, NULL);
return 1000 * t.tv_sec + t.tv_usec / 1000;
}
#endif
int main(int argc, const char* argv[]) {
const char* filename = DEFAULT_FILENAME;
const char *a1, *a2;
FILE* f;
caveman_brain b;
int s1 = 0, s2 = 0;
int i;
srand(tick_count()); rand();
a1 = argc > 1 ? argv[1] : "";
if (*a1) {
a2 = strchr(a1, ',');
if (a2 == NULL) {
printf("invalid input!\n");
return 1;
}
++a2;
} else
a2 = a1;
if (argc > 2)
filename = argv[2];
f = fopen(filename, "r");
if (f == NULL) {
printf("failed to open `%s'\n", filename);
return 1;
}
for (i = 0; i < 3; ++i)
read_program(f, b[i]);
fclose(f);
str_act(&s1, a1, &s2, a2);
printf("%c\n", choose_action(b, s1, s2));
return 0;
}
でコンパイルしますgcc darwin.c -odarwin -w -O3
。で実行します./darwin <history>
。
ボットprogram
は、players/Darwin
ディレクトリで指定されたファイルから係数を読み取ります(別のファイルを2番目のコマンドライン引数として指定できます)。このプログラムはうまくいくようです:
0.286736 0.381578 -0.128122 1.33933
0.723126 0.380574 1.21659 -0.9734
0.924371 0.998632 -0.0951554 0.744323
-0.113888 -0.321772 -0.260496 -0.136341
0.280292 -0.699782 -0.246245 1.27435
-1.24563 -0.959822 -0.745656 0.0347998
-0.917928 -0.384105 0.319008 -0.70434
0.484375 0.802138 0.0967234 0.638466
0.406679 0.597322 1.39409 0.902353
-0.735946 0.742589 0.955567 0.643268
-0.503946 0.446167 1.002 0.328205
0.26037 0.113346 0.0517265 -0.223298
として保存しplayers/Darwin/program
ます。
以下は、program
ボットが使用できるファイルを生成するプログラムです(program
上記のファイルを使用する場合、コンパイルする必要はありません)。
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* magic numbers */
#define SWORD_SHARPNESS 5
#define MAX_TURN_COUNT 100
#define PROGRAM_DIM 4 /* polynomial order + 1 */
#define CAVEMAN_COUNT 500
#define GENERATION_COUNT 12
#define DUEL_COUNT 8
#define ERROR_BACKOFF 0.5
#define DEFAULT_FILENAME "players/Darwin/program"
typedef double real;
typedef real program[PROGRAM_DIM][PROGRAM_DIM];
typedef program caveman_brain[3];
typedef char action; /* S, B or P */
/* encodes a pair of actions */
#define ACTION_PAIR(a1, a2) (((int)(a1) << (sizeof(action) * 8)) | (a2))
real eval_program(const program p, double x, double y) {
real v = 0;
int i, j;
for (i = 0; i < PROGRAM_DIM; ++i) {
real w = 0;
for (j = 0; j < PROGRAM_DIM; ++j)
w = x * w + p[i][j];
v = y * v + w;
}
if (v < 0)
v = 0;
return v;
}
void write_program(FILE* f, const program p) {
int i, j;
for (i = 0; i < PROGRAM_DIM; ++i) {
for (j = 0; j < PROGRAM_DIM; ++j)
fprintf(f, "%g ", p[i][j]);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
int blunt(int* s) {
int temp = *s;
if (temp)
--*s;
return temp;
}
void sharpen(int* s) { ++*s; }
/* takes two sharpness/action pairs and updates the sharpness accordingly.
* returns negative value if first caveman wins, positive value if second
* caveman wins and 0 otherwise. */
int act(int* s1, action a1, int* s2, action a2) {
switch (ACTION_PAIR(a1, a2)) {
case ACTION_PAIR('B', 'B'): return 0;
case ACTION_PAIR('B', 'S'): sharpen(s2); return 0;
case ACTION_PAIR('B', 'P'): return blunt(s2) >= SWORD_SHARPNESS ? 1 :
0;
case ACTION_PAIR('S', 'B'): sharpen(s1); return 0;
case ACTION_PAIR('S', 'S'): sharpen(s1); sharpen(s2); return 0;
case ACTION_PAIR('S', 'P'): sharpen(s1); return *s2 > 0 ? 1 : 0;
case ACTION_PAIR('P', 'B'): return blunt(s1) >= SWORD_SHARPNESS ? -1 :
0;
case ACTION_PAIR('P', 'S'): sharpen(s2); return *s1 > 0 ? -1 : 0;
case ACTION_PAIR('P', 'P'): {
int t1 = blunt(s1), t2 = blunt(s2);
if (t1 >= SWORD_SHARPNESS && t2 < SWORD_SHARPNESS)
return -1;
else if (t2 >= SWORD_SHARPNESS && t1 < SWORD_SHARPNESS)
return 1;
else
return 0;
}
}
}
/* processes a pair of strings of actions */
int str_act(int* s1, const char* a1, int* s2, const char* a2) {
for (; *a1 && *a2; ++a1, ++a2) {
int winner = act(s1, *a1, s2, *a2);
if (winner)
return winner;
}
return 0;
}
double frandom() { return (double)rand() / RAND_MAX; }
double firandom() { return 2.0 * rand() / RAND_MAX - 1.0; }
/* chooses an action based on self and opponent's sharpness */
action choose_action(const caveman_brain b, int s1, int s2) {
double v[3];
double sum = 0;
double r;
int i;
for (i = 0; i < 3; ++i) {
v[i] = eval_program(b[i], s1, s2);
sum += v[i];
}
r = frandom() * sum;
if (r <= v[0])
return 'B';
else if (r <= v[0] + v[1])
return 'S';
else
return 'P';
}
typedef struct {
caveman_brain brain;
int sharpness;
int score;
} caveman;
void init_caveman(caveman* c, const caveman* m, double e) {
int p, i, j;
c->score = 0;
for (p = 0; p < 3; ++p) {
for (i = 0; i < PROGRAM_DIM; ++i) {
for (j = 0; j < PROGRAM_DIM; ++j) {
c->brain[p][i][j] = m->brain[p][i][j] + firandom() * e;
}
}
}
}
int duel(caveman* c1, caveman* c2) {
int winner;
int turn;
c1->sharpness = c2->sharpness = 0;
for (turn = 0; turn < MAX_TURN_COUNT; ++turn) {
winner = act(&c1->sharpness,
choose_action(c1->brain, c1->sharpness, c2->sharpness),
&c2->sharpness,
choose_action(c2->brain, c2->sharpness, c1->sharpness));
if (winner)
break;
}
if (winner < 0)
++c1->score;
else if (winner > 0)
++c2->score;
return winner;
}
/* portable tick-count for random seed */
#ifdef _WIN32
#include <Windows.h>
unsigned int tick_count() { return GetTickCount(); }
#else
#include <sys/time.h>
unsigned int tick_count() {
struct timeval t;
gettimeofday(&t, NULL);
return 1000 * t.tv_sec + t.tv_usec / 1000;
}
#endif
int main(int argc, const char* argv[]) {
const char* filename = DEFAULT_FILENAME;
FILE* f;
caveman* cavemen;
caveman winner;
int gen;
double err = 1.0;
int i;
srand(tick_count()); rand();
memset(&winner, 0, sizeof(caveman));
if ((cavemen = (caveman*)malloc(sizeof(caveman) * CAVEMAN_COUNT)) == NULL) {
printf("not enough memory!\n");
return 1;
}
for (gen = 0; gen < GENERATION_COUNT; ++gen) {
int i, j, k;
const caveman* leader;
printf("[Gen. %d / %d] ", gen + 1, GENERATION_COUNT);
fflush(stdout);
for (i = 0; i < CAVEMAN_COUNT; ++i)
init_caveman(&cavemen[i], &winner, err);
for (i = 0; i < CAVEMAN_COUNT; ++i) {
for (j = i + 1; j < CAVEMAN_COUNT; ++j) {
for (k = 0; k < DUEL_COUNT; ++k)
duel(&cavemen[i], &cavemen[j]);
}
}
leader = cavemen;
for (i = 1; i < CAVEMAN_COUNT; ++i) {
if (cavemen[i].score > leader->score)
leader = &cavemen[i];
}
printf("Caveman #%d wins with %d victories in %d duels\n",
leader - cavemen + 1,
leader->score, (CAVEMAN_COUNT - 1) * DUEL_COUNT);
memcpy(&winner, leader, sizeof(caveman));
err *= ERROR_BACKOFF;
}
free(cavemen);
if (argc > 1)
filename = argv[1];
printf("Dumping brain to `%s'\n", filename);
f = fopen(filename, "w");
if (f == NULL) {
printf("failed to open `%s'\n", filename);
return 1;
}
for (i = 0; i < 3; ++i)
write_program(f, winner.brain[i]);
fclose(f);
return 0;
}
でコンパイルしますgcc genprog.c -ogenprog -w -O3
。で実行します./genprog [output-filename]
。
ワトソン
勝利の穴居人のDNAとは何ですか?おそらく、このフェラには答えがあります:
# That's the actual logic. Initialization goes below.
def run():
if his_sharpness[-10] - turn / 15 + 1 + turn % 3 - his_sharpness[-6] < 0:
act(B=0, S=0, P=100) # 7.21% chance
elif his_sharpness[-6] + 1 - his_sharpness[-2] < 0:
act(B=0, S=0, P=100) # 4.15% chance
elif his_history[-3] - my_history[-1] <= 0 and my_sharpness[-1] - turn / 10 <= 0:
act(B=0, S=100, P=0) # 11.34% chance
elif his_sharpness[-1] == 0:
act(B=0, S=100, P=0) # 27.84% chance
else:
act(B=100, S=0, P=0) # 49.46% chance
# Boring stuff go here...
import sys, random
# Actions
block, sharpen, poke, idle = range(4)
# Converts textual history to internal format
def convert_history(textual_history):
return ["BSP".index(action) for action in textual_history]
# Calculates sharpness after performing an action sequence
def calculate_sharpness(history):
return history.count(sharpen) - history.count(poke)
# Returns a list containing the sharpness at the end of each turn
def sharpness_history(history):
return [calculate_sharpness(history[:i + 1]) for i in range(len(history))]
# Acts based on the probability distribution (B%, S%, P%)
def act(B, S, P):
r = random.random() * 100
print "BSP"[(r >= B) + (r >= B + S)]
# Setup data
textual_history = sys.argv[1] if len(sys.argv) > 1 else ","
my_history, his_history = (convert_history(h) for h in textual_history.split(','))
my_sharpness, his_sharpness = (sharpness_history(h) for h in (my_history, his_history))
turn = len(my_history)
my_history, his_history = ([idle] * 16 + h for h in (my_history, his_history))
my_sharpness, his_sharpness = ([0] * 16 + s for s in (my_sharpness, his_sharpness))
# Make a move
run()
で実行: python Watson.py
ワトソンは遺伝的アルゴリズムの産物です。ダーウィンとは異なり、今回の遺伝データは実際のプログラムであり、小さなドメイン固有の言語(ここではPythonに翻訳されています)で記述されています。
単純なシーケンスが大きなプレーヤーを打ち負かす
この小さなファラは、特にリーダーに対して、驚くほど(あるいは、それほど驚くほどではないかもしれません)うまくいきます。
import sys
print "Simple Sequence Beats Big Players".split(' ')[
len(sys.argv[1]) / 2 % 5 if len(sys.argv) > 1 else 0
]
で実行: python SSBBP.py