├── README.md ├── with dizhu ├── data_save.py ├── game.py ├── generator.py ├── mcts.py ├── net.py ├── policy.py ├── rules.py └── valuenet_train.py └── without dizhu ├── data_save.py ├── game.py ├── generator.py ├── mc.py ├── mcts.py ├── net.py ├── policy.py ├── rules.py └── valuenet_train.py /README.md: -------------------------------------------------------------------------------- 1 | # 斗地主AI 2 | # Without Dizhu 3 | 4 | 该文件夹中为无地主1V1V1版本,每人18张牌,出牌规则与正常斗地主相同。 5 | 6 | AI目前只有价值网络,决策网络会在以后加入 7 | 8 | 训练主要分为两部分,第一阶段是有监督学习,通过多次模拟获得输赢概率,从随机出牌开是不断学习,并积累数据用于又监督训练。 9 | 第二阶段为增强学习,同alpha go。 10 | 11 | rules为出牌规则,policy为价值网络模块,net模块为resnet网络,game中包括Position类(记录当前局面),Yimodel类为RL部分 12 | 13 | mcts模块为蒙特卡洛树搜索,主要用于之后加入决策网络,也可以加入之后的online play中。 14 | 15 | 神经网络输入为 上上一个玩家上一轮出牌、上一个玩家上上轮出牌、当前玩家手牌、当前玩家出过的牌、下家出过的牌、下下家出过的牌、当前玩家准备出的牌,输出为当前玩家当前出牌的价值。 16 | 17 | 已在gpu服务器上训练得到满意结果,第二阶段RL效果明显优于第一阶段有监督学习,速度也更快,所以可以直接开始RL不需要之前的有监督学习,第一阶段的代码也有部分删去。resvalue_5.h5为训练好的模型。 18 | 19 | 20 | 21 | 22 | 23 | 24 | # With Dizhu 25 | 26 | 该文件夹中为有地主1V2版本,地主20张牌,两个农名各17张。 27 | 28 | 目前还在设计网络结构,目前准备用到transfer learning,神经网络前部分固定用于提取相同信息,最后全连接层分开用于分别决策(三人决策思路都有不同,其中两个农名的合作尤为重要) 29 | -------------------------------------------------------------------------------- /with dizhu/data_save.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from policy import card_transform 3 | 4 | class data_saver: 5 | def __init__(self, path, batch_size=10): 6 | self.num = 1 7 | self.batch_size = batch_size 8 | self.batch = 0 9 | self.path = path 10 | self.x_train = [] 11 | self.y_train = [] 12 | 13 | def __call__(self, state): 14 | x = [card_transform(state.current_game.players[(state.round + j) % 3].player_last_card) for j in [2, 1]] 15 | x.append(card_transform(state.current_game.players[state.round % 3].cards)) 16 | x.extend([card_transform(state.current_game.card_show[(state.round+i) % 3]) for i in range(3)]) 17 | x.append(card_transform(state.move)) 18 | self.x_train.append(x) 19 | self.y_train.append(state.prob_win) 20 | if self.num % self.batch_size == 0: 21 | pickle.dump(self.x_train, open(self.path + 'x_%d.pkl' % self.batch, "wb")) 22 | pickle.dump(self.y_train, open(self.path + 'y_%d.pkl' % self.batch, "wb")) 23 | self.x_train = [] 24 | self.y_train = [] 25 | self.batch += 1 26 | print('record %d scores' % self.num) 27 | self.num += 1 28 | 29 | def save(self): 30 | pickle.dump(self.x_train, open(self.path + 'x_%d.pkl' % self.batch, "wb")) 31 | pickle.dump(self.y_train, open(self.path + 'y_%d.pkl' % self.batch, "wb")) 32 | self.batch += 1 33 | -------------------------------------------------------------------------------- /with dizhu/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from rules import * 3 | from policy import * 4 | import numpy as np 5 | from copy import deepcopy 6 | from collections import namedtuple 7 | from multiprocessing import Queue, Process 8 | 9 | N = 5 10 | 11 | def print_card(cards): 12 | print_cards = [] 13 | dic = {0:'3',1:'4',2:'5',3:'6',4:'7',5:'8',6:'9',7:'10',8:'J',9:'Q',10:'K',11:'A',12:'2',13:'小王',14:'大王'} 14 | for i in sorted(cards): 15 | print_cards.append(dic[i]) 16 | print(print_cards) 17 | 18 | 19 | class Game: 20 | def __init__(self, policy_dizhu = Choose_dizhu(32, 64), policy=random_play): 21 | self.card = list(range(13)) + list(range(13)) + list(range(13)) + list(range(13)) + [13, 14] 22 | # 发牌,有地主 23 | random.shuffle(self.card) 24 | self.bonus_card = self.card[:3] 25 | self.card = self.card[3:] 26 | 27 | # 每人18张牌,card_2为每个牌的编号 28 | # 定义游戏中三个玩家 29 | self.players = [Player(i, self, self.card[17 * i:(17 * i + 17)], brain=policy) for i in range(3)] 30 | # card_show为记录玩家出过的牌 31 | # last_card为记录本局游戏最后出现的牌(后面牌要按前面牌的规则出牌,如果另外两名玩家都选择不出,则可以任意出牌) 32 | self.card_show = [[], [], []]#分别记录三个玩家出过的牌 33 | self.last_card = [] 34 | # end=1代表游戏进行中,end=0代表游戏结束 35 | self.end = 1 36 | # 记录游戏回合数,round%3即为当前玩家编号 37 | self.round = 0 38 | #选地主 39 | value = policy_dizhu.predict(np.array([card_transform(self.players[i].cards) for i in range(3)])) 40 | dizhu_num = np.argmax(value) 41 | self.players = [self.players[(dizhu_num+i) % 3] for i in range(3)] 42 | self.dizhu = self.players[0] 43 | self.dizhu.cards += self.bonus_card 44 | self.card_num = [20, 17, 17] 45 | 46 | 47 | 48 | def play(self): 49 | # 显示出牌 50 | i = self.round % 3 51 | while self.end: 52 | handout_card = self.players[i].move() 53 | print(i, handout_card) 54 | if handout_card == 'winner': 55 | print('player ' + str(i) + ' win') 56 | self.end = 0 57 | break 58 | else: 59 | if handout_card != []: 60 | self.last_card = handout_card 61 | self.card_num[i] -= len(handout_card) 62 | self.card_show[i] += handout_card 63 | self.round += 1 64 | i = (i + 1) % 3 65 | 66 | def simulate_play(self): 67 | # 不显示出牌,返回胜利玩家 68 | i = self.round % 3 69 | while self.end: 70 | handout_card = self.players[i].move() 71 | if handout_card == 'winner': 72 | return i 73 | else: 74 | if handout_card != []: 75 | self.last_card = handout_card 76 | self.card_num[i] -= len(handout_card) 77 | self.card_show[i] += handout_card 78 | self.round += 1 79 | i = (i + 1) % 3 80 | 81 | def play_one_round(self, move=None, verbose=0): 82 | # 游戏只进行一轮, 83 | if verbose == 1: 84 | print_card(sorted(self.players[self.round % 3].cards)) 85 | handout_card = self.players[self.round % 3].move(move) 86 | 87 | if handout_card == 'winner': 88 | self.end = 0 89 | else: 90 | if handout_card != []: 91 | self.last_card = handout_card 92 | self.card_num[self.round % 3] -= len(handout_card) 93 | self.card_show[self.round % 3] += handout_card 94 | if verbose == 1: 95 | print_card(sorted(handout_card)) 96 | self.round += 1 97 | 98 | 99 | class Player: 100 | def __init__(self, i, game, cards, brain=random_play): 101 | 102 | self.index = i 103 | self.game = game 104 | self.cards = cards 105 | self.brain = brain 106 | self.player_last_card = [] 107 | 108 | ''' 109 | def jiaodizhu_random(self): 110 | return(np.random.rand(1)) 111 | ''' 112 | 113 | def get_legal_move(self): 114 | if self.game.last_card == self.player_last_card: 115 | legal_list = all_legal_move(self.cards) 116 | else: 117 | legal_list = legal_move_after(self.game.last_card, self.cards) 118 | return legal_list 119 | 120 | def move(self, move_card=None): 121 | legal_list = self.get_legal_move() 122 | # 出牌 123 | #WARNING 124 | if move_card is None: 125 | handout = self.brain(legal_list, self.cards, self.game.card_show[0]+self.game.card_show[1]+self.game.card_show[2],self.game.players[(self.index + 1) % 3].cards,self.game.players[(self.index + 2) % 3].cards) 126 | elif move_card not in legal_list: 127 | print('INLEGAL MOVE') 128 | raise Exception('INLEGAL MOVE') 129 | else: 130 | handout = move_card 131 | for card in handout: 132 | self.cards.remove(card) 133 | self.player_last_card = handout 134 | if len(self.cards) == 0: 135 | return 'winner' 136 | else: 137 | return handout 138 | 139 | ####新轮子 140 | 141 | 142 | class Position(namedtuple('Position','players_cards shown_cards game_last_card player_last_card to_play_player')): 143 | def move(self, c): 144 | new_PC, new_SC, new_PLC = [[], [], []], [[], [], []], [i for i in self.player_last_card] 145 | for i in range(3): 146 | new_PC[i] = [card for card in self.players_cards[i]] 147 | new_SC[i] = [card for card in self.shown_cards[i]] 148 | if i == self.to_play_player: 149 | for card in c: 150 | new_PC[i].remove(card) 151 | if len(new_PC[i]) == 0: 152 | return self.to_play_player 153 | new_SC[i] += c 154 | new_PLC[i] = c 155 | if c == []: 156 | new_glc = self.game_last_card 157 | else: 158 | new_glc = c 159 | return Position(players_cards=new_PC, shown_cards=new_SC, game_last_card=new_glc 160 | , player_last_card=new_PLC, to_play_player=(self.to_play_player+1) % 3) 161 | 162 | def moves(self): 163 | if self.player_last_card[self.to_play_player] == self.game_last_card: 164 | return all_legal_move(self.players_cards[self.to_play_player]) 165 | else: 166 | return legal_move_after(self.game_last_card, self.players_cards[self.to_play_player]) 167 | 168 | def simulate(self, net, a=1, display=False): 169 | pos = self 170 | while type(pos) is not int: 171 | move = net.predict_pos_move(pos, a) 172 | if display: 173 | print_card(pos.players_cards[pos.to_play_player]) 174 | print_card(move) 175 | pos = pos.move(move) 176 | return pos 177 | 178 | 179 | def game_to_position(game): 180 | players_cards = [game.players[i].cards for i in range(3)] 181 | shown_card = game.card_show 182 | game_last_card = game.last_card 183 | player_last_card = [game.players[i].player_last_card for i in range(3)] 184 | return Position(players_cards=players_cards, shown_cards=shown_card, 185 | game_last_card=game_last_card, player_last_card=player_last_card, 186 | to_play_player=game.round % 3) 187 | 188 | 189 | class ModelServer(Process): 190 | def __init__(self, cmd_queue, res_queues, load_snapshot=None): 191 | super(ModelServer, self).__init__() 192 | self.cmd_queue = cmd_queue 193 | self.res_queues = res_queues 194 | self.load_snapshot = load_snapshot 195 | 196 | def run(self): 197 | try: 198 | if self.load_snapshot: 199 | net = load_model(self.load_snapshot) 200 | else: 201 | from net import ResNet 202 | net = ResNet(N) 203 | net.create() 204 | 205 | class PredictStash(object): 206 | def __init__(self, trigger, res_queues): 207 | self.stash = [] 208 | self.trigger = trigger 209 | self.res_queues = res_queues 210 | 211 | def add(self,kind, X_pos): 212 | self.stash.append((kind, X_pos)) 213 | if len(self.stash) >= self.trigger: 214 | self.process() 215 | 216 | def process(self): 217 | if not self.stash: 218 | return 219 | value = net.predict([s[1] for s in self.stash]) 220 | 221 | except: 222 | import traceback 223 | traceback.print_exc() 224 | 225 | 226 | class YiModel(object): 227 | def __init__(self, load_snapshot=None): 228 | self.cmd_queue = Queue() 229 | self.res_queues = [Queue() for i in range(128)] 230 | self.server = Model -------------------------------------------------------------------------------- /with dizhu/generator.py: -------------------------------------------------------------------------------- 1 | from game import Game 2 | from data_save import data_saver 3 | import numpy as np 4 | from mc import * 5 | from policy import card_transform 6 | from keras.layers import Dense, Input, Conv2D, Dropout, Activation, Flatten, Reshape 7 | from keras.models import Model 8 | from keras.models import load_model 9 | 10 | 11 | def generator(path='./data/stage1/', sample_size=50): 12 | print('start') 13 | value_net = load_model('C:/Users/wangzixi/Desktop/doudizhu_model/value.h5') 14 | def value_net_random(legal_list, card_in_hand, card_show, next_player_hand_card, nnext_player_hand_card): 15 | if len(legal_list) == 1: 16 | return legal_list[0] 17 | x = [[card_transform(card_in_hand), 18 | card_transform(next_player_hand_card), 19 | card_transform(nnext_player_hand_card), 20 | card_transform(card_show), 21 | card_transform(move)] for move in legal_list] 22 | prob = value_net.predict(x) 23 | prob = (np.exp(prob) / np.sum(np.exp(prob))).reshape(len(prob)) 24 | index = np.random.choice(len(legal_list), 1, p=prob)[0] 25 | return legal_list[index] 26 | 27 | saver = data_saver(path) 28 | games = [Game(value_net_random) for _ in range(sample_size)] 29 | for simu_game in games: 30 | while simu_game.end: 31 | best_score = 0 32 | best_move = [] 33 | legal_list = simu_game.players[simu_game.round % 3].get_legal_move() 34 | print(legal_list) 35 | for move in legal_list: 36 | print(move) 37 | state = MCState(simu_game) 38 | state(move) 39 | score = state.prob_win 40 | print(score) 41 | if score > best_score: 42 | best_score = score 43 | best_move = move 44 | saver(state) 45 | simu_game.play_one_round(best_move) 46 | print(best_move) 47 | print('finish a game') 48 | 49 | -------------------------------------------------------------------------------- /with dizhu/mcts.py: -------------------------------------------------------------------------------- 1 | from game import * 2 | 3 | EXPAND_VISITS = 1 4 | 5 | 6 | 7 | class Treenode(): 8 | def __init__(self, net, pos): 9 | self.net = net 10 | self.pos = pos 11 | self.v = 0 12 | self.w = 0 13 | 14 | 15 | self.children = None 16 | 17 | def expand(self): 18 | """add and initialize children to a leaf node""" 19 | #distribution = self.net.predict_distribution(self.pos) 20 | self.children = [] 21 | for c in self.pos.moves(): 22 | pos2 = self.pos.move(c) 23 | #如果存在斩杀,children应为空值(即表面以结束游戏?) 24 | if pos2 is int: 25 | continue 26 | node = Treenode(self.net, pos2) 27 | self.children.append(node) 28 | 29 | 30 | def winrate(self): 31 | return float(self.w) / self.v if self.v > 0 else float('nan') 32 | 33 | def best_move(self, proportional=False): 34 | if self.children is None: 35 | return None 36 | if proportional: 37 | probs = [(float(node.v) / self.v)**2 for node in self.children] 38 | probs_tot = sum(probs) 39 | probs = [p / probs_tot for p in probs] 40 | i = np.random.choice(len(self.children), p=probs) 41 | return self.children[i] 42 | else: 43 | return max(self.children, key=lambda node: node.v) 44 | 45 | def global_puct_urgency(children): 46 | #calculate urgency 47 | 48 | return np.random.random(len(children)) 49 | 50 | def tree_descend(tree, display=False): 51 | tree.v += 1 52 | nodes = [tree] 53 | root = True 54 | while nodes[-1].children is not None: 55 | if display: print_pos(nodes[-1].pos) 56 | 57 | children = list(nodes[-1].children) 58 | random.shuffle(children) 59 | urgencies = global_puct_urgency(children) 60 | #if root: 61 | # print() 62 | node = max(zip(children, urgencies), key=lambda t: t[1])[0] 63 | nodes.append(node) 64 | if node.children is None and node.v > EXPAND_VISITS: 65 | node.expand() 66 | return nodes 67 | 68 | 69 | def score(winner, pos): 70 | if winner == 0: 71 | if pos.to_play_player == 0: 72 | return 1 73 | else: return 0 74 | else: 75 | if pos.to_play_player == 0: 76 | return 0 77 | else: return 1 78 | 79 | 80 | def tree_update(nodes ,winner, display=False): 81 | for node in reversed(nodes): 82 | if display: print() 83 | node.w += score(winner, node.pos) 84 | 85 | 86 | 87 | def tree_search(tree, n, display=False, debug_disp=False): 88 | 89 | if tree.children is None: 90 | tree.expand() 91 | 92 | i = 0 93 | while i < n: 94 | nodes = tree_descend(tree, debug_disp) 95 | i += 1 96 | last_node = nodes[-1] 97 | winner = last_node.pos.simulate(last_node.net, a=30) 98 | tree_update(nodes, winner, debug_disp) 99 | print(i) 100 | return tree.best_move() 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | def print_pos(position): 109 | print('地主:') 110 | print_card(position.players_cards[0]) 111 | print('农名1:') 112 | print_card(position.players_cards[1]) 113 | print('农名2:') 114 | print_card(position.players_cards[2]) 115 | print('上一张牌') 116 | print_card(position.game_last_card) -------------------------------------------------------------------------------- /with dizhu/net.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Activation, BatchNormalization, Dense, Flatten, Input, Reshape 3 | from keras.layers.convolutional import Conv2D 4 | from keras.layers.merge import add 5 | 6 | class ResNet(object): 7 | def __init__(self, input_N=256, filter_N=256, n_stages=5, 8 | kernel_width=3, kernel_height=3, 9 | inpkern_width=3, inpkern_height=3): 10 | self.input_N = input_N 11 | self.filter_N = filter_N 12 | self.n_stages = n_stages 13 | self.kernel_width = kernel_width 14 | self.kernel_height = kernel_height 15 | self.inpkern_width = inpkern_width 16 | self.inpkern_height = inpkern_height 17 | 18 | def create(self, input_width=15, input_height=6): 19 | bn_axis = 3 20 | inp = Input(shape=(input_height, input_width)) 21 | 22 | x = Reshape((input_width, input_height, 1))(inp) 23 | x = Conv2D(self.input_N, (self.inpkern_width, self.inpkern_height), padding='same', name='conv1')(x) 24 | x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) 25 | x = Activation('relu')(x) 26 | 27 | for i in range(self.n_stages): 28 | x = self.res_block(x, [self.filter_N, self.filter_N], stage=i + 1, block='a') 29 | 30 | res = Conv2D(1, (1, 1))(x) 31 | res = BatchNormalization(axis=bn_axis)(res) 32 | res = Activation('relu')(res) 33 | res = Flatten()(res) 34 | res = Dense(256, activation='relu')(res) 35 | res = Dense(1, activation='sigmoid', name='result')(res) 36 | 37 | self.model = Model(inp, res) 38 | self.model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['mae']) 39 | return self.model 40 | 41 | def res_block(self, input_tensor, filters, stage, block): 42 | nb_filter1, nb_filter2 = filters 43 | bn_axis = 3 44 | conv_name_base = 'res' + str(stage) + block + '_branch' 45 | bn_name_base = 'bn' + str(stage) + block + '_branch' 46 | 47 | x = Conv2D(nb_filter1, (self.kernel_height,self.kernel_width), padding='same', name=conv_name_base+'_a')(input_tensor) 48 | x = BatchNormalization(axis=bn_axis, name=bn_name_base+'_a')(x) 49 | x = Activation('relu')(x) 50 | x = Conv2D(nb_filter2, (self.kernel_height, self.kernel_width), padding='same', name=conv_name_base+'_b')(x) 51 | x = add([x, input_tensor]) 52 | x = BatchNormalization(axis=bn_axis, name=bn_name_base+'_b')(x) 53 | x = Activation('relu')(x) 54 | 55 | return x 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /with dizhu/policy.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | ''' 3 | 出牌方式 4 | policy of playing cards 5 | ''' 6 | import random 7 | import numpy as np 8 | from net import * 9 | from keras.layers import Dense, Input, Conv2D, Dropout, Activation, Flatten, Reshape, BatchNormalization 10 | from keras.models import Model 11 | from keras.models import load_model 12 | 13 | 14 | def card_transform(card): 15 | a = np.zeros(15) 16 | for i in card: 17 | a[i] += 1 18 | return a 19 | 20 | def card_transform_all(mx): 21 | return [card_transform(i) for i in mx] 22 | 23 | 24 | def random_play(a,b,c,d,e): 25 | random.shuffle(a) 26 | return a[0] 27 | 28 | 29 | class Value_net(): 30 | #first version of value net( input 5*15 ) 31 | def __init__(self,path,method='best'): 32 | self.model = load_model(path) 33 | self.method = method 34 | 35 | def __call__(self, legal_list, card_in_hand, card_show, 36 | next_player_hand_card, nnext_player_hand_card): 37 | if len(legal_list) == 1: 38 | return legal_list[0] 39 | x = [[card_transform(card_in_hand), 40 | card_transform(next_player_hand_card), 41 | card_transform(nnext_player_hand_card), 42 | card_transform(card_show), 43 | card_transform(move)] for move in legal_list] 44 | prob = self.model.predict(x) 45 | if self.method == 'best': 46 | return legal_list[np.argmax(prob)] 47 | else: 48 | prob = (np.exp(prob) / np.sum(np.exp(prob))).reshape(len(prob)) 49 | index = np.random.choice(len(legal_list), 1, p=prob)[0] 50 | return legal_list[index] 51 | 52 | 53 | class Choose_dizhu(): 54 | def __init__(self,batch_size, archive_fit_sample): 55 | inp = Input(shape=(15,)) 56 | x = Dense(64, activation='relu')(inp) 57 | x = Dense(1, activation='sigmoid')(x) 58 | self.model = Model(inp, x) 59 | self.model.compile(loss='binary_crossentropy', optimizer='nadam') 60 | self.archive = [] 61 | self.batch_size = batch_size 62 | self.archive_fit_sample = archive_fit_sample 63 | self.wait = 32 64 | self.i = 0 65 | 66 | def predict(self, cards): 67 | return self.model.predict(cards) 68 | 69 | def fit_game(self, game): 70 | x = card_transform([i for i in game.players[0].cards]) 71 | if game.simulate_play() == 0: 72 | #如果是地主赢了, 应该去叫地主 73 | x = (x, 1) 74 | #如果是地主输了,则不应该去叫地主 75 | else: 76 | x = (x, 0) 77 | self.archive.append(x) 78 | self.i += 1 79 | if self.i > self.wait: 80 | if len(self.archive) >= self.archive_fit_sample: 81 | archive_train_sample = random.sample(self.archive, self.archive_fit_sample) 82 | x_t,y_t = [], [] 83 | for x, y in archive_train_sample: 84 | x_t.append(x) 85 | y_t.append(y) 86 | if len(x_t) % self.archive_fit_sample: 87 | self.model.train_on_batch(np.array(x_t),np.array(y_t)) 88 | x_t, y_t = [], [] 89 | self.i = 0 90 | 91 | def save(self, path): 92 | self.model.save(path) 93 | 94 | def load(self, path): 95 | self.model.load(path) 96 | 97 | 98 | class random_value_net(): 99 | def predict_pos_move(self,pos): 100 | moves = pos.moves() 101 | random.shuffle(moves) 102 | return moves[0] 103 | 104 | 105 | class cnn_value_net(): 106 | def __init__(self, model = 'best',load_snapshot=None): 107 | self.model = model 108 | if load_snapshot: 109 | self.model = load_model(load_snapshot) 110 | else: 111 | inp = Input((6, 15)) 112 | x = Reshape((6, 15, 1))(inp) 113 | x = BatchNormalization()(x) 114 | x = Conv2D(filters=128, kernel_size=(6, 1), activation='relu')(x) 115 | x = Dropout(0.2)(x) 116 | x = Conv2D(filters=128, kernel_size=(1, 3), activation='relu')(x) 117 | x = Dropout(0.2)(x) 118 | x = Flatten()(x) 119 | x = Dense(64, activation='relu')(x) 120 | x = Dropout(0.2)(x) 121 | x = Dense(1, activation='sigmoid')(x) 122 | self.model = Model(inputs=inp, outputs=x) 123 | self.model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['mae']) 124 | 125 | def predict_pos_move(self, pos, a=1): 126 | moves = pos.moves() 127 | if len(moves) != 1: 128 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 129 | pos.player_last_card[(pos.to_play_player + 1) % 3], 130 | pos.players_cards[pos.to_play_player], 131 | pos.shown_cards[(pos.to_play_player + 1) % 3], 132 | pos.shown_cards[(pos.to_play_player + 2) % 3], 133 | move]) for move in moves] 134 | value = self.model.predict(x) 135 | if self.model == 'best': 136 | return moves[np.argmax(value)] 137 | else: 138 | prob = (np.exp(a*value) / np.sum(np.exp(a*value))).reshape(len(value)) 139 | index = np.random.choice(len(value), 1, p=prob)[0] 140 | return moves[index] 141 | else: 142 | return moves[0] 143 | 144 | 145 | def predict_pos_values(self,pos): 146 | moves = pos.moves() 147 | if pos.players_cards[pos.to_play_player] in moves: 148 | return pos.players_cards[pos.to_play_player] 149 | if len(moves) != 1: 150 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 151 | pos.player_last_card[(pos.to_play_player + 1) % 3], 152 | pos.players_cards[pos.to_play_player], 153 | pos.shown_cards[(pos.to_play_player + 1) % 3], 154 | pos.shown_cards[(pos.to_play_player + 2) % 3], 155 | move]) for move in moves] 156 | value = self.model.predict(x) 157 | return zip(moves, value) 158 | else: 159 | return 1 160 | 161 | class res_value_net: 162 | def __init__(self,batch_size=32, archive_fit_samples=64, n_stage=5, model='best',load_snapshot=None): 163 | self.batch_size = batch_size 164 | self.archive_fit_samples = archive_fit_samples 165 | self.position_archive = [] 166 | self.model = model 167 | if load_snapshot: 168 | self.model = load_model(load_snapshot) 169 | else: 170 | net = ResNet(n_stage=n_stage) 171 | self.model = net.create() 172 | 173 | 174 | def predict_pos_move(self, pos): 175 | moves = pos.moves() 176 | if len(moves) != 1: 177 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 178 | pos.player_last_card[(pos.to_play_player + 1) % 3], 179 | pos.players_cards[pos.to_play_player], 180 | pos.shown_cards[(pos.to_play_player + 1) % 3], 181 | pos.shown_cards[(pos.to_play_player + 2) % 3], 182 | move]) for move in moves] 183 | value = self.model.predict(x) 184 | if self.model == 'best': 185 | return moves[np.argmax(value)] 186 | else: 187 | prob = (np.exp(50*value) / np.sum(np.exp(50*value))).reshape(len(value)) 188 | index = np.random.choice(len(value), 1, p=prob)[0] 189 | return moves[index] 190 | else: 191 | return moves[0] 192 | 193 | def predict_pos_values(self,pos): 194 | moves = pos.moves() 195 | if pos.players_cards[pos.to_play_player] in moves: 196 | return pos.players_cards[pos.to_play_player] 197 | if len(moves) != 1: 198 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 199 | pos.player_last_card[(pos.to_play_player + 1) % 3], 200 | pos.players_cards[pos.to_play_player], 201 | pos.shown_cards[(pos.to_play_player + 1) % 3], 202 | pos.shown_cards[(pos.to_play_player + 2) % 3], 203 | move]) for move in moves] 204 | value = self.model.predict(x) 205 | return zip(moves, value) 206 | else: 207 | return 1 208 | 209 | def fit_game(self, X_positions, result): 210 | X_positions 211 | 212 | 213 | class cnn_value_net_odd(): 214 | def __init__(self, model = 'best',load_snapshot=None): 215 | self.model = model 216 | if load_snapshot: 217 | self.model = load_model(load_snapshot) 218 | else: 219 | inp = Input((6, 15)) 220 | x = Reshape((6, 15, 1))(inp) 221 | x = BatchNormalization()(x) 222 | x = Conv2D(filters=128, kernel_size=(6, 1), activation='relu')(x) 223 | x = Dropout(0.2)(x) 224 | x = Conv2D(filters=128, kernel_size=(1, 3), activation='relu')(x) 225 | x = Dropout(0.2)(x) 226 | x = Flatten()(x) 227 | x = Dense(64, activation='relu')(x) 228 | x = Dropout(0.2)(x) 229 | x = Dense(1, activation='sigmoid')(x) 230 | self.model = Model(inputs=inp, outputs=x) 231 | self.model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['mae']) 232 | 233 | def predict_pos_move(self, pos, a=1): 234 | moves = pos.moves() 235 | if pos.players_cards[pos.to_play_player] in moves: 236 | return pos.players_cards[pos.to_play_player] 237 | if len(moves) != 1: 238 | x = [card_transform_all([ 239 | pos.player_last_card[pos.to_play_player], 240 | pos.player_last_card[(pos.to_play_player + 1) % 3], 241 | pos.player_last_card[(pos.to_play_player + 2) % 3], 242 | pos.players_cards[0]+pos.players_cards[1]+pos.players_cards[2], 243 | move]) for move in moves] 244 | value = self.model.predict(x) 245 | if self.model == 'best': 246 | return moves[np.argmax(value)] 247 | else: 248 | prob = (np.exp(a*value) / np.sum(np.exp(a*value))).reshape(len(value)) 249 | index = np.random.choice(len(value), 1, p=prob)[0] 250 | return moves[index] 251 | else: 252 | return moves[0] 253 | 254 | def fit_game(self): 255 | return -------------------------------------------------------------------------------- /with dizhu/rules.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | ''' 3 | 出牌规则 4 | rules for legal action 5 | ''' 6 | ALLOW_THREE_ONE = True 7 | ALLOW_THREE_TWO = True 8 | 9 | 10 | def all_legal_move(cards): 11 | #最快找到所有出牌可能,主动出击 12 | combs = [] 13 | dic = {} 14 | for card in cards: 15 | dic[card] = dic.get(card, 0) + 1 16 | for card in dic: 17 | if dic[card] >= 1: 18 | combs.append([card]) 19 | if dic[card] >= 2: 20 | combs.append([card] * 2) 21 | if dic[card] >= 3: 22 | combs.append([card] * 3) 23 | if ALLOW_THREE_TWO: 24 | for another_card in dic: 25 | if dic[another_card] >= 2 and another_card != 13 and another_card != 14 and another_card != card: 26 | combs.append([card] * 3 + [another_card] * 2) 27 | if ALLOW_THREE_ONE: 28 | for another_card in dic: 29 | if dic[another_card] >= 1 and another_card != 13 and another_card != 14 and another_card != card: 30 | combs.append([card] * 3 + [another_card]) 31 | if dic[card] >= 4: 32 | combs.append([card] * 4) 33 | if 13 in cards and 14 in cards: 34 | combs.append([13, 14]) 35 | combs.extend(detect_con(cards)) 36 | combs.extend(detect_double_con(cards)) 37 | return combs 38 | 39 | 40 | def detect_con(cards, length=False, minimum=-1): 41 | # 顺,最短5最长12,3~A 42 | combs = [] 43 | distinct_cards = sorted(list(set(cards))) 44 | cs = 0 45 | last = distinct_cards[0] - 1 46 | for i in distinct_cards: 47 | if i > minimum and i < 12: 48 | if i - last == 1: 49 | cs += 1 50 | else: 51 | cs = 1 52 | if cs >= 5: 53 | if not length: 54 | combs.extend([list(range(i + 1 - j, i + 1)) for j in range(5, cs + 1)]) 55 | elif cs >= length: 56 | combs.append(list(range(i + 1 - length, i + 1))) 57 | last = i 58 | return combs 59 | 60 | 61 | def detect_bomb(cards, minimum=-1): 62 | #炸弹 63 | combs = [] 64 | dic = {} 65 | for card in cards: 66 | dic[card] = dic.get(card, 0) + 1 67 | for card in dic: 68 | if dic[card] == 4 and card > minimum: 69 | combs.append([card] * 4) 70 | if 13 in cards and 14 in cards: 71 | combs.append([13, 14]) 72 | return combs 73 | 74 | 75 | 76 | def detect_triple(cards, minimum=-1, dai=0): 77 | #飞机 78 | combs = [] 79 | dic = {} 80 | for card in cards: 81 | dic[card] = dic.get(card, 0) + 1 82 | for card in dic: 83 | if dic[card] >= 3 and card > minimum: 84 | if dai == 0: 85 | combs.append([card] * 3) 86 | elif dai == 1: 87 | for another_card in dic: 88 | if dic[another_card] >= 1 and another_card != 13 and another_card != 14 and another_card != card: 89 | combs.append([card] * 3 + [another_card]) 90 | else: 91 | for another_card in dic: 92 | if dic[another_card] >= 2 and another_card != 13 and another_card != 14 and another_card != card: 93 | combs.append([card] * 3 + [another_card] * 2) 94 | return combs 95 | 96 | 97 | def detect_double(cards, minimum=-1): 98 | #对 99 | combs = [] 100 | dic = {} 101 | for card in cards: 102 | dic[card] = dic.get(card, 0) + 1 103 | for card in dic: 104 | if dic[card] >= 2 and card > minimum: 105 | combs.append([card] * 2) 106 | return combs 107 | 108 | 109 | def legal_move_after(last_card, cards): 110 | #接上一家 111 | combs = [[]] 112 | dic = {} 113 | for i in last_card: 114 | dic[i] = dic.get(i, 0) + 1 115 | if len(dic) == 2: 116 | for i in dic: 117 | if dic[i] == 3: 118 | minimum = i 119 | break 120 | else: 121 | minimum = min(last_card) 122 | if 13 in last_card and 14 in last_card: 123 | return combs 124 | else: 125 | if len(last_card) == 4 and len(dic) == 1: 126 | combs.extend(detect_bomb(cards, minimum)) 127 | else: 128 | combs.extend(detect_bomb(cards)) 129 | if len(last_card) >= 6 and min(dic.values()) == 2: 130 | combs.extend(detect_double_con(cards, len(dic), minimum)) 131 | elif max(dic.values()) == 3: 132 | combs.extend(detect_triple(cards, minimum, len(last_card) - 3)) 133 | elif len(last_card) == 2: 134 | combs.extend(detect_double(cards, minimum)) 135 | elif len(last_card) == 1: 136 | combs.extend([[i] for i in set(cards) if i > minimum]) 137 | else: 138 | combs.extend(detect_con(cards, len(last_card), minimum)) 139 | return combs 140 | 141 | 142 | 143 | def detect_double_con(cards, length=False, minimum=-1): 144 | # 对顺,最短5最长12,3~A 145 | dic = {} 146 | for i in cards: 147 | dic[i] = dic.get(i, 0)+1 148 | combs = [] 149 | distinct_cards = sorted(list(dic.keys())) 150 | cs = 0 151 | last = distinct_cards[0] - 1 152 | for i in distinct_cards: 153 | if 12 > i > minimum: 154 | if dic[i] > 1: 155 | if i - last == 1: 156 | cs += 1 157 | if cs >= 3: 158 | if not length: 159 | combs.extend([list(range(i + 1 - j, i + 1)) * 2 for j in range(3, cs + 1)]) 160 | elif cs >= length: 161 | combs.append(list(range(i + 1 - length, i + 1)) * 2) 162 | else: 163 | cs = 1 164 | last = i 165 | else: 166 | pass 167 | return combs 168 | 169 | ALL_POSSIBLE_MOVE = all_legal_move(list(range(13)) + list(range(13)) + list(range(13)) + list(range(13)) + [13, 14]) 170 | ALL_POSSIBLE_MOVE_DICT = {} 171 | for card, i in zip(ALL_POSSIBLE_MOVE, range(len(ALL_POSSIBLE_MOVE))): 172 | card = tuple(card) 173 | ALL_POSSIBLE_MOVE_DICT[card] = i -------------------------------------------------------------------------------- /with dizhu/valuenet_train.py: -------------------------------------------------------------------------------- 1 | from net import * 2 | import pickle 3 | 4 | import tensorflow as tf 5 | import keras.backend.tensorflow_backend as KTF 6 | 7 | config = tf.ConfigProto() 8 | config.gpu_options.allow_growth = True #不全部占满显存, 按需分配 9 | session = tf.Session(config=config) 10 | 11 | # 设置session 12 | KTF.set_session(session) 13 | 14 | resnet = ResNet() 15 | resnet.create() 16 | x = pickle.load(open('./data/stage2/x.pkl','rb')) 17 | y = pickle.load(open('./data/stage2/y.pkl','rb')) 18 | resnet.model.fit(x, y, batch_size=1024, epochs=100, validation_split=0.1) 19 | resnet.model.save('./value_net/value_2.h5') -------------------------------------------------------------------------------- /without dizhu/data_save.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from policy import card_transform 3 | 4 | class data_saver: 5 | def __init__(self, path, batch_size=10): 6 | self.num = 1 7 | self.batch_size = batch_size 8 | self.batch = 0 9 | self.path = path 10 | self.x_train = [] 11 | self.y_train = [] 12 | 13 | def __call__(self, state): 14 | x = [card_transform(state.current_game.players[(state.round + j) % 3].player_last_card) for j in [2, 1]] 15 | x.append(card_transform(state.current_game.players[state.round % 3].cards)) 16 | x.extend([card_transform(state.current_game.card_show[(state.round+i) % 3]) for i in [1, 2]]) 17 | x.append(card_transform(state.move)) 18 | self.x_train.append(x) 19 | self.y_train.append(state.prob_win) 20 | if self.num % self.batch_size == 0: 21 | pickle.dump(self.x_train, open(self.path + 'x_%d.pkl' % self.batch, "wb")) 22 | pickle.dump(self.y_train, open(self.path + 'y_%d.pkl' % self.batch, "wb")) 23 | self.x_train = [] 24 | self.y_train = [] 25 | self.batch += 1 26 | print('record %d scores' % self.num) 27 | self.num += 1 28 | 29 | def save(self): 30 | pickle.dump(self.x_train, open(self.path + 'x_%d.pkl' % self.batch, "wb")) 31 | pickle.dump(self.y_train, open(self.path + 'y_%d.pkl' % self.batch, "wb")) 32 | self.batch += 1 33 | -------------------------------------------------------------------------------- /without dizhu/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from rules import * 3 | from policy import * 4 | from collections import namedtuple 5 | 6 | 7 | def print_card(cards): 8 | print_cards = [] 9 | dic = {0:'3',1:'4',2:'5',3:'6',4:'7',5:'8',6:'9',7:'10',8:'J',9:'Q',10:'K',11:'A',12:'2',13:'小王',14:'大王'} 10 | for i in cards: 11 | print_cards.append(dic[i]) 12 | print(print_cards) 13 | 14 | 15 | class Game: 16 | def __init__(self, policy=random_play): 17 | self.card = list(range(13)) + list(range(13)) + list(range(13)) + list(range(13)) + [13, 14] 18 | # 发牌,无地主 19 | random.shuffle(self.card) 20 | ''' 21 | self.bonus_card = self.card_2[:3] 22 | self.card_2 = self.card_2[3:] 23 | ''' 24 | # 每人18张牌,card_2为每个牌的编号 25 | # 定义游戏中三个玩家 26 | self.players = [Player(i, self, self.card[18 * i:(18 * i + 18)], brain=policy) for i in range(3)] 27 | # card_show为记录玩家出过的牌 28 | # last_card为记录本局游戏最后出现的牌(后面牌要按前面牌的规则出牌,如果另外两名玩家都选择不出,则可以任意出牌) 29 | self.card_show = [[], [], []]#分别记录三个玩家出过的牌 30 | self.last_card = [] 31 | # end=1代表游戏进行中,end=0代表游戏结束 32 | self.end = 1 33 | # 记录游戏回合数,round%3即为当前玩家编号 34 | self.round = 0 35 | self.card_num = [18, 18, 18] 36 | 37 | 38 | 39 | class Player: 40 | def __init__(self, i, game, cards, brain=random_play): 41 | 42 | self.index = i 43 | self.game = game 44 | self.cards = cards 45 | self.brain = brain 46 | self.player_last_card = [] 47 | 48 | ''' 49 | def jiaodizhu_random(self): 50 | return(np.random.rand(1)) 51 | ''' 52 | 53 | def get_legal_move(self): 54 | if self.game.last_card == self.player_last_card: 55 | legal_list = all_legal_move(self.cards) 56 | else: 57 | legal_list = legal_move_after(self.game.last_card, self.cards) 58 | return legal_list 59 | 60 | def move(self, move_card=None): 61 | legal_list = self.get_legal_move() 62 | # 出牌 63 | if move_card is None: 64 | handout = self.brain(legal_list, self.cards, self.game.card_show, 65 | self.game.players[(self.index + 1) % 3].cards, 66 | self.game.players[(self.index + 2) % 3].cards) 67 | elif move_card not in legal_list: 68 | print('INLEGAL MOVE') 69 | raise Exception('INLEGAL MOVE') 70 | else: 71 | handout = move_card 72 | for card in handout: 73 | self.cards.remove(card) 74 | self.player_last_card = handout 75 | if len(self.cards) == 0: 76 | return 'winner' 77 | else: 78 | return handout 79 | 80 | 81 | #positon 82 | 83 | 84 | class Position(namedtuple('Position','players_cards shown_cards game_last_card player_last_card to_play_player')): 85 | def move(self, c): 86 | new_PC, new_SC, new_PLC = [[], [], []], [[], [], []], [i for i in self.player_last_card] 87 | for i in range(3): 88 | new_PC[i] = [card for card in self.players_cards[i]] 89 | new_SC[i] = [card for card in self.shown_cards[i]] 90 | if i == self.to_play_player: 91 | for card in c: 92 | new_PC[i].remove(card) 93 | if len(new_PC[i]) == 0: 94 | return self.to_play_player 95 | new_SC[i] += c 96 | new_PLC[i] = c 97 | if c == []: 98 | new_glc = self.game_last_card 99 | else: 100 | new_glc = c 101 | return Position(players_cards=new_PC, shown_cards=new_SC, game_last_card=new_glc 102 | , player_last_card=new_PLC, to_play_player=(self.to_play_player+1) % 3) 103 | 104 | def moves(self): 105 | if self.player_last_card[self.to_play_player] == self.game_last_card: 106 | return all_legal_move(self.players_cards[self.to_play_player]) 107 | else: 108 | return legal_move_after(self.game_last_card, self.players_cards[self.to_play_player]) 109 | 110 | def simulate(self, net, a=1, display=False): 111 | pos = self 112 | while type(pos) is Position: 113 | move = net.predict_pos_move(pos, a) 114 | if display: 115 | print_card(pos.players_cards[pos.to_play_player]) 116 | print_card(move) 117 | pos = pos.move(move) 118 | return pos 119 | 120 | 121 | def game_to_position(game): 122 | players_cards = [game.players[i].cards for i in range(3)] 123 | shown_card = game.card_show 124 | game_last_card = game.last_card 125 | player_last_card = [game.players[i].player_last_card for i in range(3)] 126 | return Position(players_cards=players_cards, shown_cards=shown_card, 127 | game_last_card=game_last_card, player_last_card=player_last_card, 128 | to_play_player=game.round % 3) 129 | 130 | 131 | 132 | class YiModel(object): 133 | def __init__(self, load_snapshot=None,save_path=None): 134 | if save_path is None: 135 | self.save_path = load_snapshot 136 | else: 137 | self.save_path = save_path 138 | if load_snapshot: 139 | self.net = res_value_net(load_snapshot=load_snapshot) 140 | else: 141 | self.net = res_value_net() 142 | 143 | def reinforce_learning(self, n=1000, save_num=10, display=False): 144 | i = 0 145 | while i < n: 146 | print(i) 147 | new_game = Game() 148 | pos = game_to_position(new_game) 149 | x_positions = [] 150 | while type(pos) is Position: 151 | move = self.net.predict_pos_move(pos, a=20, mode='qlearning') 152 | if display: 153 | print(pos.to_play_player) 154 | print_card(sorted(pos.players_cards[pos.to_play_player])) 155 | print_card(move) 156 | x_positions.append((pos, move)) 157 | pos = pos.move(move) 158 | print(pos) 159 | self.net.fit_game(x_positions, pos) 160 | i += 1 161 | if i % save_num == 0: 162 | self.net.save(self.save_path) 163 | self.net.save(self.save_path) 164 | 165 | -------------------------------------------------------------------------------- /without dizhu/generator.py: -------------------------------------------------------------------------------- 1 | from game import Game 2 | from data_save import data_saver 3 | import numpy as np 4 | from mc import * 5 | from policy import card_transform 6 | from keras.layers import Dense, Input, Conv2D, Dropout, Activation, Flatten, Reshape 7 | from keras.models import Model 8 | from keras.models import load_model 9 | 10 | 11 | def generator(path='./data/stage1/', sample_size=50): 12 | print('start') 13 | value_net = load_model('C:/Users/wangzixi/Desktop/doudizhu_model/value.h5') 14 | def value_net_random(legal_list, card_in_hand, card_show, next_player_hand_card, nnext_player_hand_card): 15 | if len(legal_list) == 1: 16 | return legal_list[0] 17 | x = [[card_transform(card_in_hand), 18 | card_transform(next_player_hand_card), 19 | card_transform(nnext_player_hand_card), 20 | card_transform(card_show), 21 | card_transform(move)] for move in legal_list] 22 | prob = value_net.predict(x) 23 | prob = (np.exp(prob) / np.sum(np.exp(prob))).reshape(len(prob)) 24 | index = np.random.choice(len(legal_list), 1, p=prob)[0] 25 | return legal_list[index] 26 | 27 | saver = data_saver(path) 28 | games = [Game(value_net_random) for _ in range(sample_size)] 29 | for simu_game in games: 30 | while simu_game.end: 31 | best_score = 0 32 | best_move = [] 33 | legal_list = simu_game.players[simu_game.round % 3].get_legal_move() 34 | print(legal_list) 35 | for move in legal_list: 36 | print(move) 37 | state = MCState(simu_game) 38 | state(move) 39 | score = state.prob_win 40 | print(score) 41 | if score > best_score: 42 | best_score = score 43 | best_move = move 44 | saver(state) 45 | simu_game.play_one_round(best_move) 46 | print(best_move) 47 | print('finish a game') 48 | 49 | -------------------------------------------------------------------------------- /without dizhu/mc.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | 4 | class MCState: 5 | def __init__(self, game): 6 | self.current_game = deepcopy(game) 7 | self.move = [] 8 | self.round = self.current_game.round 9 | self.player = self.round % 3 10 | self.prob_win = 0 11 | 12 | def __call__(self, move, iter_num=200): 13 | self.move = move 14 | simulation_game_root = deepcopy(self.current_game) 15 | simulation_game_root.play_one_round(move) 16 | if simulation_game_root.end == 0: 17 | self.prob_win = 1 18 | else: 19 | win_time = 0 20 | for i in range(iter_num): 21 | simulation_game = deepcopy(simulation_game_root) 22 | if simulation_game.simulate_play() == self.player: 23 | win_time += 1 24 | self.prob_win = win_time / float(iter_num) 25 | -------------------------------------------------------------------------------- /without dizhu/mcts.py: -------------------------------------------------------------------------------- 1 | from game import * 2 | 3 | EXPAND_VISITS = 2 4 | PUCT_C = 0.5 5 | 6 | 7 | class Treenode(): 8 | def __init__(self, net, pos, move=[]): 9 | self.move = move 10 | self.net = net 11 | self.pos = pos 12 | self.v = 0 13 | self.w = 0 14 | 15 | self.children = None 16 | 17 | def expand(self): 18 | """add and initialize children to a leaf node""" 19 | # distribution = self.net.predict_distribution(self.pos) 20 | self.children = [] 21 | for c in self.pos.moves(): 22 | pos2 = self.pos.move(c) 23 | # 如果存在斩杀,children应为空值(即表面以结束游戏?) 24 | if pos2 is int: 25 | self.children = [Treenode(self.net, pos2, c)] 26 | node = Treenode(self.net, pos2, move=c) 27 | node.v += 1 28 | tree_update([self, node], node.pos.simulate(self.net)) 29 | self.children.append(node) 30 | 31 | def winrate(self): 32 | return float(self.w) / self.v if self.v > 0 else float('nan') 33 | 34 | def best_move(self, proportional=False): 35 | if self.children is None: 36 | return None 37 | if proportional: 38 | probs = [(float(node.v) / self.v) ** 2 for node in self.children] 39 | probs_tot = sum(probs) 40 | probs = [p / probs_tot for p in probs] 41 | i = np.random.choice(len(self.children), p=probs) 42 | return self.children[i] 43 | else: 44 | return max(self.children, key=lambda node: node.v) 45 | 46 | 47 | def puct_urgency_input(nodes): 48 | w = np.array([float(n.w) for n in nodes]) 49 | v = np.array([float(n.v) for n in nodes]) 50 | return w, v 51 | 52 | 53 | def global_puct_urgency(n0, w, v): 54 | expectation = w/v 55 | bonus = PUCT_C * np.square(2*np.log(n0)/v) 56 | return expectation+bonus 57 | 58 | 59 | def tree_descend(tree, display=False): 60 | tree.v += 1 61 | nodes = [tree] 62 | root = True 63 | while nodes[-1].children is not None and nodes[-1].children[0].pos is not int: 64 | if display: print_pos(nodes[-1].pos) 65 | 66 | children = list(nodes[-1].children) 67 | random.shuffle(children) 68 | urgencies = global_puct_urgency(nodes[-1].v, *puct_urgency_input(children)) 69 | # if root: 70 | # print() 71 | node = max(zip(children, urgencies), key=lambda t: t[1])[0] 72 | node.v += 1 73 | nodes.append(node) 74 | if node.children is None and node.v > EXPAND_VISITS: 75 | node.expand() 76 | 77 | return nodes 78 | 79 | 80 | def score(winner, pos): 81 | if winner == (pos.to_play_player+2)%3: 82 | return 1 83 | else: return 0 84 | 85 | 86 | def tree_update(nodes, winner, display=False): 87 | for node in reversed(nodes): 88 | if display: print() 89 | node.w += score(winner, node.pos) 90 | 91 | 92 | def tree_search(tree, n, display=False, debug_disp=False): 93 | if tree.children is None: 94 | tree.expand() 95 | i = 0 96 | while i < n: 97 | nodes = tree_descend(tree, debug_disp) 98 | i += 1 99 | last_node = nodes[-1] 100 | if last_node.children is not None and last_node.children[0].pos is int: 101 | winner = last_node.children[0].pos 102 | else: 103 | winner = last_node.pos.simulate(last_node.net, a=30) 104 | tree_update(nodes, winner, debug_disp) 105 | return tree.best_move() 106 | 107 | 108 | def print_pos(position): 109 | print('地主:') 110 | print_card(position.players_cards[0]) 111 | print('农名1:') 112 | print_card(position.players_cards[1]) 113 | print('农名2:') 114 | print_card(position.players_cards[2]) 115 | print('上一张牌') 116 | print_card(position.game_last_card) -------------------------------------------------------------------------------- /without dizhu/net.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Activation, BatchNormalization, Dense, Flatten, Input, Reshape 3 | from keras.layers.convolutional import Conv2D 4 | from keras.layers.merge import add 5 | 6 | class ResNet(object): 7 | def __init__(self, input_N=256, filter_N=256, n_stages=5, 8 | kernel_width=3, kernel_height=3, 9 | inpkern_width=3, inpkern_height=3): 10 | self.input_N = input_N 11 | self.filter_N = filter_N 12 | self.n_stages = n_stages 13 | self.kernel_width = kernel_width 14 | self.kernel_height = kernel_height 15 | self.inpkern_width = inpkern_width 16 | self.inpkern_height = inpkern_height 17 | 18 | def create(self, input_width=15, input_height=7): 19 | bn_axis = 3 20 | inp = Input(shape=(input_height, input_width)) 21 | 22 | x = Reshape((input_width, input_height, 1))(inp) 23 | x = Conv2D(self.input_N, (self.inpkern_width, self.inpkern_height), padding='same', name='conv1')(x) 24 | x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) 25 | x = Activation('relu')(x) 26 | 27 | for i in range(self.n_stages): 28 | x = self.res_block(x, [self.filter_N, self.filter_N], stage=i + 1, block='a') 29 | 30 | res = Conv2D(1, (1, 1))(x) 31 | res = BatchNormalization(axis=bn_axis)(res) 32 | res = Activation('relu')(res) 33 | res = Flatten()(res) 34 | res = Dense(256, activation='relu')(res) 35 | res = Dense(1, activation='sigmoid', name='result')(res) 36 | 37 | self.model = Model(inp, res) 38 | self.model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['mae']) 39 | return self.model 40 | 41 | def res_block(self, input_tensor, filters, stage, block): 42 | nb_filter1, nb_filter2 = filters 43 | bn_axis = 3 44 | conv_name_base = 'res' + str(stage) + block + '_branch' 45 | bn_name_base = 'bn' + str(stage) + block + '_branch' 46 | 47 | x = Conv2D(nb_filter1, (self.kernel_height,self.kernel_width), padding='same', name=conv_name_base+'_a')(input_tensor) 48 | x = BatchNormalization(axis=bn_axis, name=bn_name_base+'_a')(x) 49 | x = Activation('relu')(x) 50 | x = Conv2D(nb_filter2, (self.kernel_height, self.kernel_width), padding='same', name=conv_name_base+'_b')(x) 51 | x = add([x, input_tensor]) 52 | x = BatchNormalization(axis=bn_axis, name=bn_name_base+'_b')(x) 53 | x = Activation('relu')(x) 54 | 55 | return x 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /without dizhu/policy.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | ''' 3 | 出牌方式 4 | policy of playing cards 5 | ''' 6 | import random 7 | import numpy as np 8 | from keras.models import load_model 9 | from keras.layers import Dense, Input, Conv2D, Dropout, Activation, Flatten, Reshape, BatchNormalization 10 | 11 | from net import * 12 | import itertools 13 | 14 | def card_transform(card): 15 | a = np.zeros(15) 16 | for i in card: 17 | a[i] += 1 18 | return a 19 | 20 | 21 | def card_transform_all(mx): 22 | return [card_transform(i) for i in mx] 23 | 24 | 25 | def random_play(a,b,c,d,e,f): 26 | random.shuffle(a) 27 | return a[0] 28 | 29 | #changable 30 | def pos_to_x(pos, move): 31 | x = card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 32 | pos.player_last_card[(pos.to_play_player + 1) % 3], 33 | pos.players_cards[pos.to_play_player], 34 | pos.shown_cards[pos.to_play_player], 35 | pos.shown_cards[(pos.to_play_player + 1) % 3], 36 | pos.shown_cards[(pos.to_play_player + 2) % 3], 37 | move]) 38 | return x 39 | 40 | 41 | class res_value_net: 42 | def __init__(self, batch_size=32, archive_fit_samples=64, n_stages=5,load_snapshot=None): 43 | self.batch_size = batch_size 44 | self.archive_fit_samples = archive_fit_samples 45 | self.position_archive = [] 46 | if load_snapshot: 47 | self.model = load_model(load_snapshot) 48 | else: 49 | net = ResNet(n_stages=n_stages) 50 | self.model = net.create() 51 | 52 | 53 | def predict_pos_move(self, pos,a=1,mode='best'): 54 | moves = pos.moves() 55 | if len(moves) != 1: 56 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 57 | pos.player_last_card[(pos.to_play_player + 1) % 3], 58 | pos.players_cards[pos.to_play_player], 59 | pos.shown_cards[pos.to_play_player], 60 | pos.shown_cards[(pos.to_play_player + 1) % 3], 61 | pos.shown_cards[(pos.to_play_player + 2) % 3], 62 | move]) for move in moves] 63 | value = self.model.predict(x) 64 | if mode == 'best': 65 | return moves[np.argmax(value)] 66 | elif mode =='qlearning': 67 | if random.random() < 0.2: 68 | prob = (np.exp(a * value) / np.sum(np.exp(a * value))).reshape(len(value)) 69 | index = np.random.choice(len(value), 1, p=prob)[0] 70 | return moves[index] 71 | else: 72 | return moves[np.argmax(value)] 73 | else: 74 | prob = (np.exp(a*value) / np.sum(np.exp(a*value))).reshape(len(value)) 75 | index = np.random.choice(len(value), 1, p=prob)[0] 76 | return moves[index] 77 | else: 78 | return moves[0] 79 | 80 | 81 | def predict_pos_values(self,pos): 82 | moves = pos.moves() 83 | if pos.players_cards[pos.to_play_player] in moves: 84 | return pos.players_cards[pos.to_play_player] 85 | if len(moves) != 1: 86 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 87 | pos.player_last_card[(pos.to_play_player + 1) % 3], 88 | pos.players_cards[pos.to_play_player], 89 | pos.shown_cards[pos.to_play_player], 90 | pos.shown_cards[(pos.to_play_player + 1) % 3], 91 | pos.shown_cards[(pos.to_play_player + 2) % 3], 92 | move]) for move in moves] 93 | value = self.model.predict(x) 94 | for i in zip(moves, value):print(i) 95 | else: 96 | return 1 97 | 98 | def fit_game(self, X_positions, result): 99 | X_posres = [] 100 | for pos, move in X_positions: 101 | X_posres.append((pos_to_x(pos, move), 1 if pos.to_play_player == result else 0)) 102 | self.position_archive.extend(X_posres) 103 | if len(self.position_archive) >= self.archive_fit_samples: 104 | archive_samples = random.sample(self.position_archive, self.archive_fit_samples) 105 | else: 106 | archive_samples = self.position_archive 107 | 108 | X_fit_samples = list(itertools.chain(X_posres, archive_samples)) 109 | random.shuffle(X_fit_samples) 110 | x_t, y_t = [], [] 111 | for x,y in X_fit_samples: 112 | x_t.append(x) 113 | y_t.append(y) 114 | if len(x_t) % self.batch_size == 0: 115 | self.model.train_on_batch(np.array(x_t), np.array(y_t)) 116 | if len(x_t) > 0: 117 | self.model.train_on_batch(np.array(x_t), np.array(y_t)) 118 | 119 | def save(self,path): 120 | self.model.save(path) 121 | 122 | class cnn_value_net(): 123 | def __init__(self, model = 'best',load_snapshot=None): 124 | self.model = model 125 | if load_snapshot: 126 | self.model = load_model(load_snapshot) 127 | else: 128 | inp = Input((6, 15)) 129 | x = Reshape((6, 15, 1))(inp) 130 | x = BatchNormalization()(x) 131 | x = Conv2D(filters=128, kernel_size=(6, 1), activation='relu')(x) 132 | x = Dropout(0.2)(x) 133 | x = Conv2D(filters=128, kernel_size=(1, 3), activation='relu')(x) 134 | x = Dropout(0.2)(x) 135 | x = Flatten()(x) 136 | x = Dense(64, activation='relu')(x) 137 | x = Dropout(0.2)(x) 138 | x = Dense(1, activation='sigmoid')(x) 139 | self.model = Model(inputs=inp, outputs=x) 140 | self.model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['mae']) 141 | 142 | def predict_pos_move(self, pos, a=1): 143 | moves = pos.moves() 144 | if len(moves) != 1: 145 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 146 | pos.player_last_card[(pos.to_play_player + 1) % 3], 147 | pos.players_cards[pos.to_play_player], 148 | pos.shown_cards[(pos.to_play_player + 1) % 3], 149 | pos.shown_cards[(pos.to_play_player + 2) % 3], 150 | move]) for move in moves] 151 | value = self.model.predict(x) 152 | if self.model == 'best': 153 | return moves[np.argmax(value)] 154 | else: 155 | prob = (np.exp(a*value) / np.sum(np.exp(a*value))).reshape(len(value)) 156 | index = np.random.choice(len(value), 1, p=prob)[0] 157 | return moves[index] 158 | else: 159 | return moves[0] 160 | 161 | 162 | def predict_pos_values(self,pos): 163 | moves = pos.moves() 164 | if pos.players_cards[pos.to_play_player] in moves: 165 | return pos.players_cards[pos.to_play_player] 166 | if len(moves) != 1: 167 | x = [card_transform_all([pos.player_last_card[(pos.to_play_player + 2) % 3], 168 | pos.player_last_card[(pos.to_play_player + 1) % 3], 169 | pos.players_cards[pos.to_play_player], 170 | pos.shown_cards[(pos.to_play_player + 1) % 3], 171 | pos.shown_cards[(pos.to_play_player + 2) % 3], 172 | move]) for move in moves] 173 | value = self.model.predict(x) 174 | return zip(moves, value) 175 | else: 176 | return 1 177 | 178 | 179 | -------------------------------------------------------------------------------- /without dizhu/rules.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | ''' 3 | 出牌规则 4 | rules for legal action 5 | ''' 6 | ALLOW_THREE_ONE = True 7 | ALLOW_THREE_TWO = True 8 | 9 | 10 | def all_legal_move(cards): 11 | #最快找到所有出牌可能,主动出击 12 | combs = [] 13 | dic = {} 14 | for card in cards: 15 | dic[card] = dic.get(card, 0) + 1 16 | for card in dic: 17 | if dic[card] >= 1: 18 | combs.append([card]) 19 | if dic[card] >= 2: 20 | combs.append([card] * 2) 21 | if dic[card] >= 3: 22 | combs.append([card] * 3) 23 | if ALLOW_THREE_TWO: 24 | for another_card in dic: 25 | if dic[another_card] >= 2 and another_card != 13 and another_card != 14 and another_card != card: 26 | combs.append([card] * 3 + [another_card] * 2) 27 | if ALLOW_THREE_ONE: 28 | for another_card in dic: 29 | if dic[another_card] >= 1 and another_card != 13 and another_card != 14 and another_card != card: 30 | combs.append([card] * 3 + [another_card]) 31 | if dic[card] >= 4: 32 | combs.append([card] * 4) 33 | if 13 in cards and 14 in cards: 34 | combs.append([13, 14]) 35 | combs.extend(detect_con(cards)) 36 | combs.extend(detect_double_con(cards)) 37 | return combs 38 | 39 | 40 | def detect_con(cards, length=False, minimum=-1): 41 | # 顺,最短5最长12,3~A 42 | combs = [] 43 | distinct_cards = sorted(list(set(cards))) 44 | cs = 0 45 | last = distinct_cards[0] - 1 46 | for i in distinct_cards: 47 | if i > minimum and i < 12: 48 | if i - last == 1: 49 | cs += 1 50 | else: 51 | cs = 1 52 | if cs >= 5: 53 | if not length: 54 | combs.extend([list(range(i + 1 - j, i + 1)) for j in range(5, cs + 1)]) 55 | elif cs >= length: 56 | combs.append(list(range(i + 1 - length, i + 1))) 57 | last = i 58 | return combs 59 | 60 | 61 | def detect_bomb(cards, minimum=-1): 62 | #炸弹 63 | combs = [] 64 | dic = {} 65 | for card in cards: 66 | dic[card] = dic.get(card, 0) + 1 67 | for card in dic: 68 | if dic[card] == 4 and card > minimum: 69 | combs.append([card] * 4) 70 | if 13 in cards and 14 in cards: 71 | combs.append([13, 14]) 72 | return combs 73 | 74 | 75 | 76 | def detect_triple(cards, minimum=-1, dai=0): 77 | #飞机 78 | combs = [] 79 | dic = {} 80 | for card in cards: 81 | dic[card] = dic.get(card, 0) + 1 82 | for card in dic: 83 | if dic[card] >= 3 and card > minimum: 84 | if dai == 0: 85 | combs.append([card] * 3) 86 | elif dai == 1: 87 | for another_card in dic: 88 | if dic[another_card] >= 1 and another_card != 13 and another_card != 14 and another_card != card: 89 | combs.append([card] * 3 + [another_card]) 90 | else: 91 | for another_card in dic: 92 | if dic[another_card] >= 2 and another_card != 13 and another_card != 14 and another_card != card: 93 | combs.append([card] * 3 + [another_card] * 2) 94 | return combs 95 | 96 | 97 | def detect_double(cards, minimum=-1): 98 | #对 99 | combs = [] 100 | dic = {} 101 | for card in cards: 102 | dic[card] = dic.get(card, 0) + 1 103 | for card in dic: 104 | if dic[card] >= 2 and card > minimum: 105 | combs.append([card] * 2) 106 | return combs 107 | 108 | 109 | def legal_move_after(last_card, cards): 110 | #接上一家 111 | combs = [[]] 112 | dic = {} 113 | for i in last_card: 114 | dic[i] = dic.get(i, 0) + 1 115 | if len(dic) == 2: 116 | for i in dic: 117 | if dic[i] == 3: 118 | minimum = i 119 | break 120 | else: 121 | minimum = min(last_card) 122 | if 13 in last_card and 14 in last_card: 123 | return combs 124 | else: 125 | if len(last_card) == 4 and len(dic) == 1: 126 | combs.extend(detect_bomb(cards, minimum)) 127 | else: 128 | combs.extend(detect_bomb(cards)) 129 | if len(last_card) >= 6 and min(dic.values()) == 2: 130 | combs.extend(detect_double_con(cards, len(dic), minimum)) 131 | elif max(dic.values()) == 3: 132 | combs.extend(detect_triple(cards, minimum, len(last_card) - 3)) 133 | elif len(last_card) == 2: 134 | combs.extend(detect_double(cards, minimum)) 135 | elif len(last_card) == 1: 136 | combs.extend([[i] for i in set(cards) if i > minimum]) 137 | else: 138 | combs.extend(detect_con(cards, len(last_card), minimum)) 139 | return combs 140 | 141 | 142 | 143 | def detect_double_con(cards, length=False, minimum=-1): 144 | # 对顺,最短5最长12,3~A 145 | dic = {} 146 | for i in cards: 147 | dic[i] = dic.get(i, 0)+1 148 | combs = [] 149 | distinct_cards = sorted(list(dic.keys())) 150 | cs = 0 151 | last = distinct_cards[0] - 1 152 | for i in distinct_cards: 153 | if 12 > i > minimum: 154 | if dic[i] > 1: 155 | if i - last == 1: 156 | cs += 1 157 | if cs >= 3: 158 | if not length: 159 | combs.extend([list(range(i + 1 - j, i + 1)) * 2 for j in range(3, cs + 1)]) 160 | elif cs >= length: 161 | combs.append(list(range(i + 1 - length, i + 1)) * 2) 162 | else: 163 | cs = 1 164 | last = i 165 | else: 166 | pass 167 | return combs 168 | 169 | ALL_POSSIBLE_MOVE = all_legal_move(list(range(13)) + list(range(13)) + list(range(13)) + list(range(13)) + [13, 14]) 170 | ALL_POSSIBLE_MOVE_DICT = {} 171 | for card, i in zip(ALL_POSSIBLE_MOVE, range(len(ALL_POSSIBLE_MOVE))): 172 | card = tuple(card) 173 | ALL_POSSIBLE_MOVE_DICT[card] = i -------------------------------------------------------------------------------- /without dizhu/valuenet_train.py: -------------------------------------------------------------------------------- 1 | from resnet import * 2 | import pickle 3 | 4 | import tensorflow as tf 5 | import keras.backend.tensorflow_backend as KTF 6 | 7 | config = tf.ConfigProto() 8 | config.gpu_options.allow_growth = True #不全部占满显存, 按需分配 9 | session = tf.Session(config=config) 10 | 11 | # 设置session 12 | KTF.set_session(session) 13 | 14 | resnet = ResNet() 15 | resnet.create() 16 | x = pickle.load(open('./data/stage2/x.pkl','rb')) 17 | y = pickle.load(open('./data/stage2/y.pkl','rb')) 18 | resnet.model.fit(x, y, batch_size=1024, epochs=100, validation_split=0.1) 19 | resnet.model.save('./value_net/value_2.h5') --------------------------------------------------------------------------------