├── .gitignore ├── requirements.txt ├── dice.py ├── README.md ├── simple_bot_player.py ├── human_player.py ├── board.py ├── ai_player.py ├── player.py ├── game.py ├── ui_pygame.py └── trainer.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.dat 3 | venv/ 4 | __pycache__/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | pygame==2.0.1 3 | -------------------------------------------------------------------------------- /dice.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | 3 | 4 | class Dice: 5 | """throws dice""" 6 | def __init__(self, sides=6): 7 | self.sides = sides 8 | 9 | def throw_dice(self): 10 | """creates a random list of eyes of 6 dice""" 11 | lst_eyes = [] 12 | for i in range(6): # 6 dice in game qwixx 13 | lst_eyes.append(randint(1, 6)) 14 | return lst_eyes 15 | # return [3, 4, 1, 1, 1, 1] 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # qwixx-trainer 2 | A genetic algorithm that learns to play the game Qwixx 3 | ## Setup 4 | install [Python](https://www.python.org/downloads/) and set the check mark in the setup to add Python to the environment variables (or PATH) 5 | 6 | go to cmd: 7 | ```shell 8 | git clone https://github.com/sjanetzki/qwixx-trainer 9 | cd qwixx_trainer 10 | pip install -r requirements.txt 11 | ``` 12 | 13 | ## Usage 14 | ### Trainer 15 | --> genetic algorithm 16 | ```shell 17 | python trainer.py 18 | ``` 19 | 20 | ### Game 21 | --> play via PyGameUi 22 | ```shell 23 | python game.py 24 | ``` 25 | -------------------------------------------------------------------------------- /simple_bot_player.py: -------------------------------------------------------------------------------- 1 | from player import Player 2 | from player import CrossPossibility 3 | 4 | 5 | class SimpleBotPlayer(Player): 6 | """a Player that has no intelligence; only crosses penalties""" 7 | 8 | def cross_active(self, lst_eyes, valid_turns, completed_lines): 9 | """crosses penalty if active""" 10 | super().cross_active(lst_eyes, valid_turns, completed_lines) 11 | assert (self.board.penalties < 4) 12 | return [CrossPossibility(4, None)] 13 | 14 | def cross_passive(self, lst_eyes, valid_turns, completed_lines): 15 | """crosses nothing (skips) if passive""" 16 | super().cross_passive(lst_eyes, valid_turns, completed_lines) 17 | return [] 18 | -------------------------------------------------------------------------------- /human_player.py: -------------------------------------------------------------------------------- 1 | """This file gives the opportunity to play Qwixx as a human player; not a UI; 2 | place where valid turns are 'translated' into a standardized format""" 3 | from player import Player 4 | 5 | 6 | class HumanPlayer(Player): 7 | """creates an environment for a human Player to play the game in conformity with the rules""" 8 | 9 | def cross_passive(self, lst_eyes, valid_turns, completed_lines): 10 | """chooses one valid cross or skips""" 11 | super().cross_passive(lst_eyes, valid_turns, completed_lines) 12 | 13 | assert (self.ui is not None) 14 | wish = self.ui.get_turn() 15 | if wish != "skip": 16 | return [wish] 17 | return [] 18 | 19 | def cross_active(self, lst_eyes, valid_turns, completed_lines, turn_index): 20 | """chooses two valid crosses in succession; 2nd cross can be 'skip'""" 21 | super().cross_active(lst_eyes, valid_turns, completed_lines) 22 | assert (self.ui is not None) 23 | 24 | # get 1st turn 25 | while turn_index == 0: 26 | wish = self.ui.get_turn() 27 | # active player is not allowed to skip first cross 28 | if wish == "skip": 29 | self.inform_about_invalid_turn() 30 | else: 31 | return [wish] 32 | 33 | # get 2nd turn 34 | wish = self.ui.get_turn() 35 | if wish != "skip": 36 | return [wish] 37 | return [] 38 | -------------------------------------------------------------------------------- /board.py: -------------------------------------------------------------------------------- 1 | """This file creates the board of Qwixx and sets valid crosses""" 2 | import numpy as np 3 | from enum import IntEnum 4 | 5 | 6 | class Row(IntEnum): 7 | """gives colors of the rows a number""" 8 | RED = 0 9 | YELLOW = 1 10 | GREEN = 2 11 | BLUE = 3 12 | 13 | 14 | class Board: 15 | """does everything that happens on the board""" 16 | def __init__(self): 17 | self.penalties = 0 18 | self.crosses_by_color = [set(), set(), set(), set()] # Instanzvariabeln 19 | 20 | @property 21 | def row_limits(self): 22 | """finds the row limit of each line, which is the number on the furthest right""" 23 | row_limits = np.array([1, 1, 13, 13]) 24 | for color, crosses in enumerate(self.crosses_by_color): 25 | if len(crosses) == 0: 26 | continue 27 | if color in (Row.RED, Row.YELLOW): 28 | row_limits[color] = max(crosses) 29 | else: 30 | row_limits[color] = min(crosses) 31 | return row_limits 32 | 33 | def _set_row_limits(self, row, value) -> None: 34 | """adds a new row limit (new cross)""" 35 | self.crosses_by_color[row].add(value) 36 | 37 | @property 38 | def row_numbers(self): 39 | """finds the number of crosses made in each line""" 40 | row_numbers = np.array([0, 0, 0, 0]) 41 | for color, crosses in enumerate(self.crosses_by_color): 42 | row_numbers[color] = len(crosses) 43 | return row_numbers 44 | 45 | def cross(self, position, completed_lines, is_active_player) -> None: 46 | """sets the crosses chosen by the player after checking their validity""" 47 | row = position.row 48 | eyes = position.eyes 49 | if row is None: 50 | assert(not is_active_player) 51 | return 52 | assert(row in range(5)) 53 | if row == 4: 54 | assert(self.penalties < 4) 55 | if self.penalties < 4: 56 | self.penalties += 1 57 | return 58 | self._make_colored_cross(eyes, row, completed_lines) 59 | 60 | def _make_colored_cross(self, eyes, row, completed_lines) -> None: 61 | """make cross in a colored row""" 62 | assert (eyes in range(2, 13)) 63 | if completed_lines[row]: 64 | assert (not completed_lines[row]) # row closed -> no crosses can be made there anymore 65 | if row in (Row.RED, Row.YELLOW): 66 | cross_last_number = eyes == 12 67 | else: 68 | cross_last_number = eyes == 2 69 | assert (not (self.row_numbers[row] < 5 and cross_last_number)) 70 | if row in (Row.RED, Row.YELLOW) and self.row_limits[row] < eyes: 71 | self._set_row_limits(row, eyes) 72 | if cross_last_number: 73 | self._set_row_limits(row, 13) 74 | elif row in (Row.GREEN, Row.BLUE) and self.row_limits[row] > eyes: 75 | self._set_row_limits(row, eyes) 76 | if cross_last_number: 77 | self._set_row_limits(row, 1) 78 | else: 79 | assert False 80 | -------------------------------------------------------------------------------- /ai_player.py: -------------------------------------------------------------------------------- 1 | """This file creates the 'brain' of an AI-player -> place of decision process what to do in the next turn""" 2 | from time import sleep 3 | 4 | import numpy as np 5 | 6 | from player import CrossPossibility, Player 7 | from typing import List 8 | from copy import copy 9 | import math 10 | 11 | 12 | class SampleStrategies: 13 | alpha_qwixx_quadratic_factor = np.array([0.0, 0, 0, 0]) 14 | alpha_qwixx_linear_factor = np.array([1.0, -0.5, 0.5, -2.5]) 15 | alpha_qwixx_bias = np.array([0.0, 0, -6, 0]) 16 | 17 | caira_quadratic_factor = np.array([0.5, 0, 0, 0]) 18 | caira_linear_factor = np.array([0.5, 0, 0, -5]) 19 | caira_bias = np.array([0.0, 0.0, 0.0, 0.0]) # 0.0 for float type (important for mutation) 20 | 21 | 22 | class AiPlayer(Player): 23 | """an AI Player that decides on its own which crosses to make by evaluation; 24 | individual is characterized by its strategy""" 25 | 26 | strategy_length = 4 # number x; limit r/y; limit g/b; penalty 27 | 28 | def __init__(self, name, quadratic_factor, linear_factor, bias, ui=None): 29 | super().__init__(name, ui) 30 | self.quadratic_factor = copy(quadratic_factor) 31 | self.linear_factor = copy(linear_factor) 32 | self.bias = copy(bias) 33 | 34 | def __repr__(self): 35 | return "AI(" + self.name + ")" 36 | 37 | def _get_sum_situation_(self, hypothetical_situation) -> float: 38 | """evaluates the quality of a hypothetical situation that will be the situation after this turn""" 39 | situation_quality = 0 40 | assert (len(self.quadratic_factor) == len(self.linear_factor) == len(self.bias) == AiPlayer.strategy_length 41 | and len(hypothetical_situation) == Player.situation_length) 42 | quadratic_factor_extended = self._extend_strategy_length(self.quadratic_factor) 43 | linear_factor_extended = self._extend_strategy_length(self.linear_factor) 44 | bias_extended = self._extend_strategy_length(self.bias) 45 | 46 | for index in range(len(hypothetical_situation)): 47 | situation_quality += math.pow(hypothetical_situation[index], 2) * quadratic_factor_extended[index] 48 | situation_quality += hypothetical_situation[index] * linear_factor_extended[index] 49 | situation_quality += bias_extended[index] 50 | return situation_quality 51 | 52 | @staticmethod 53 | def _extend_strategy_length(strategy_part): 54 | """extends a part of the strategy to make it fit the dimensions of the situation""" 55 | assert (Player.situation_length == 9 and len(strategy_part) == 4) 56 | strategy_part_extended = np.zeros((Player.situation_length,)) 57 | strategy_part_extended[0] = strategy_part[0] 58 | strategy_part_extended[1] = strategy_part[1] 59 | strategy_part_extended[2] = strategy_part[0] 60 | strategy_part_extended[3] = strategy_part[1] 61 | strategy_part_extended[4] = strategy_part[0] 62 | strategy_part_extended[5] = strategy_part[2] 63 | strategy_part_extended[6] = strategy_part[0] 64 | strategy_part_extended[7] = strategy_part[2] 65 | strategy_part_extended[8] = strategy_part[3] 66 | return strategy_part_extended 67 | 68 | def _find_best_turns(self, possibilities, completed_lines, is_active_player) -> List[CrossPossibility]: 69 | """finds the turn(s) with the highest strength due to the evaluation of _get_sum_situation""" 70 | max_turns_strength = float("-inf") 71 | best_turn = None 72 | for possibility in possibilities: 73 | situation = self._get_hypothetical_situation_after_turns(completed_lines, is_active_player, possibility) 74 | turns_strength = self._get_sum_situation_(situation) 75 | if turns_strength > max_turns_strength: 76 | best_turn = possibility 77 | max_turns_strength = turns_strength 78 | return best_turn 79 | 80 | def cross_active(self, lst_eyes, valid_turns, completed_lines) -> List[CrossPossibility]: 81 | """crosses the best known active turn""" 82 | super().cross_active(lst_eyes, valid_turns, completed_lines) 83 | # sleep(0.3) 84 | return self._find_best_turns(valid_turns, completed_lines, True) 85 | 86 | def cross_passive(self, lst_eyes, valid_turns, completed_lines): 87 | """crosses the best known passive turn""" 88 | super().cross_passive(lst_eyes, valid_turns, completed_lines) 89 | return self._find_best_turns(valid_turns, completed_lines, False) 90 | -------------------------------------------------------------------------------- /player.py: -------------------------------------------------------------------------------- 1 | """This file creates the players of Qwixx, divides them into subclasses (human, SimpleBot, and AI) and is the place 2 | where decisions are made.""" 3 | 4 | from board import Board, Row 5 | import numpy as np 6 | from abc import ABC 7 | from copy import deepcopy 8 | 9 | 10 | class CrossPossibility(object): 11 | """puts row and eyes of a button into a precise string format""" 12 | def __init__(self, row, eyes): 13 | assert (isinstance(row, Row) or row == 4) 14 | assert (row != 4 or eyes is None) 15 | self.row = row 16 | self.eyes = eyes 17 | 18 | def __repr__(self): 19 | return "cp(" + str(self.row) + ", " + str(self.eyes) + ")" 20 | 21 | def __eq__(self, other): 22 | return other != "skip" and self.row == other.row and self.eyes == other.eyes 23 | 24 | 25 | class Player(ABC): 26 | """makes all decision for doing crosses and informs the player about the state of the boards""" 27 | situation_length = 9 28 | penalty_position = situation_length - 1 29 | 30 | def __init__(self, name, ui=None): # !!! remember to reset all variables update start_new_game !!! 31 | self.name = name 32 | # self.opponents = opponents 33 | self.ui = ui 34 | self.board = Board() # own board 35 | # self.others = [] 36 | # for opponent_index in range(self.opponents): 37 | # self.others.append(Board()) # list with board of the others # todo #1 avoid duplicated boards 38 | 39 | def start_new_game(self) -> None: 40 | """sets up a new game""" 41 | self.board = Board() 42 | # self.others = [] 43 | # for opponent_index in range(self.opponents): 44 | # self.others.append(Board()) 45 | 46 | def cross_active(self, lst_eyes, valid_turns, completed_lines) -> None: 47 | """gives UI information about (active) crosses to make""" 48 | if self.ui is None: 49 | return 50 | self.ui.lst_eyes = lst_eyes 51 | self.ui.is_active_player = True 52 | 53 | def cross_passive(self, lst_eyes, valid_turns, completed_lines) -> None: 54 | """gives UI information about (passive) crosses to make""" 55 | if self.ui is None: 56 | return 57 | self.ui.lst_eyes = lst_eyes 58 | self.ui.is_active_player = False 59 | 60 | def inform(self, boards, own_index) -> None: 61 | """informs about boards of all players and updates the knowledge about completed lines/rows""" 62 | self.board = boards[own_index] 63 | self._update_ui() 64 | for player_index in range(len(boards)): 65 | if player_index == own_index: 66 | continue 67 | # if own_index > player_index: 68 | # self.others[player_index] = boards[player_index] 69 | # else: 70 | # self.others[player_index - 1] = boards[player_index] 71 | 72 | def inform_about_invalid_turn(self) -> None: 73 | """informs UI about an invalid turn done by the (human) player""" 74 | assert(self.ui is not None) 75 | self.ui.is_turn_invalid = True 76 | 77 | def _update_ui(self) -> None: 78 | """updates crosses on the UI""" 79 | if self.ui is None: 80 | return 81 | self.ui.penalties = self.board.penalties 82 | self.ui.crosses_by_color = self.board.crosses_by_color 83 | self.ui.show_board() 84 | 85 | def show_options(self, possibility_lst) -> None: 86 | """gives the UI the order to show possible fields to make a cross on""" 87 | if self.ui is None: 88 | return 89 | self.ui.show_options_on_board(possibility_lst) 90 | 91 | def _get_current_situation(self): 92 | """wrapper function to skip computation of hypothetical turns""" 93 | return self._get_hypothetical_situation_after_turns(None, None, [None]) 94 | 95 | def _get_hypothetical_situation_after_turns(self, completed_lines, is_active_player, turns): 96 | """creates an numpy array (situation) that describes all boards after turns""" 97 | situation = np.zeros((Player.situation_length,)) # version for longer situation in git history 98 | board = deepcopy(self.board) 99 | for turn in turns: # todo split -> with/without turns 100 | if turn is not None: 101 | assert (is_active_player is not None) 102 | board.cross(turn, completed_lines, is_active_player) 103 | for parameter_type in range(Player.situation_length): 104 | if parameter_type % 2 == 0 and parameter_type != Player.penalty_position: 105 | situation_value = board.row_numbers[parameter_type // 2] 106 | elif parameter_type != Player.penalty_position: 107 | situation_value = board.row_limits[parameter_type // 2] 108 | else: 109 | situation_value = board.penalties 110 | situation[parameter_type] = situation_value 111 | return situation 112 | 113 | def get_points(self): 114 | """calculates current points of a player""" 115 | situation = self._get_current_situation() 116 | player_count = int(len(situation) / Player.situation_length) 117 | player_points = 0 118 | player_index = player_count - 1 119 | 120 | # gives points for number of crosses in a row 121 | for row_start in range(0, player_count * Player.penalty_position, player_count * 2): # will be called 4 times 122 | colored_row_number = situation[row_start + player_index] 123 | player_points += (colored_row_number ** 2 + colored_row_number) / 2 # counter(understand only with example): II 124 | 125 | # calculates points for penalties 126 | penalty = situation[player_count * Player.penalty_position + player_index] 127 | player_points += penalty * (-5) 128 | return player_points 129 | 130 | def end(self, points) -> None: 131 | """prints the own points""" 132 | print(self.name + f"´s points: {points}") 133 | -------------------------------------------------------------------------------- /game.py: -------------------------------------------------------------------------------- 1 | """This file creates an algorithm (Game) that leads the game through its course""" 2 | from typing import List, Tuple 3 | 4 | from dice import Dice 5 | from board import Board, Row 6 | from ai_player import AiPlayer, SampleStrategies 7 | from human_player import HumanPlayer 8 | from player import CrossPossibility, Player 9 | from ui_pygame import PyGameUi 10 | import pickle 11 | 12 | 13 | class Game: 14 | """role of the Game Master; 15 | directs the game, interacts with the players, and minds the conformity of the rules of the players' actions""" 16 | 17 | def __init__(self, lst_player): 18 | self.player_count = len(lst_player) 19 | self.lst_player = lst_player 20 | self.lst_boards = [] 21 | for index in range(self.player_count): 22 | self.lst_boards.append(Board()) 23 | self.completed_lines = [False, False, False, False] # anybody able to complete line --> variable in game and not board 24 | self.dice = Dice() 25 | for player in lst_player: 26 | player.start_new_game() 27 | 28 | def _is_completed(self) -> bool: 29 | """checks whether the game is completed""" 30 | for player_index in range(self.player_count): 31 | penalties = self.lst_boards[player_index].penalties 32 | assert (penalties in range(5)) 33 | if penalties == 4: 34 | return True 35 | for color in range(4): 36 | if (color in (0, 1) and self.lst_boards[player_index].row_limits[color] == 13) or \ 37 | (color in (2, 3) and self.lst_boards[player_index].row_limits[color] == 1): 38 | self.completed_lines[color] = True 39 | if sum(self.completed_lines) >= 2: 40 | return True 41 | return False 42 | 43 | def compute_ranking(self) -> List[Tuple[Player, int]]: 44 | """computes a ranking of the players by points after the game is completed; function used by trainer""" 45 | ranking = [] 46 | for player in self.lst_player: 47 | points = player.get_points() 48 | ranking.append((player, points)) 49 | ranking = sorted(ranking, key=lambda x: x[1], reverse=True) 50 | return ranking 51 | 52 | def _make_valid_turn(self, player_index, turn, valid_turns, is_active_player, previous_turn=None) -> bool: 53 | """checks validity of a turn""" 54 | is_turn_valid = False 55 | # check whether first turn and, if applicable, both turns combined, are valid 56 | for complete_valid_turn in valid_turns: 57 | if (previous_turn is None and len(complete_valid_turn) > 0 and complete_valid_turn[0] == turn) or ( 58 | previous_turn is not None and complete_valid_turn == [previous_turn, turn]): 59 | is_turn_valid = True 60 | break 61 | 62 | assert (is_turn_valid or isinstance(self.lst_player[player_index], HumanPlayer)) 63 | if is_turn_valid: 64 | self.lst_boards[player_index].cross(turn, self.completed_lines, is_active_player) 65 | else: 66 | self.lst_player[player_index].inform_about_invalid_turn() 67 | return is_turn_valid 68 | 69 | def _get_possibilities_active(self, lst_eyes, player_index) -> List[List[CrossPossibility]]: 70 | """creates a list of all possible fields to make a cross on (while active player)""" 71 | possibilities_white_white = self._find_possible_white_white_sum(lst_eyes, player_index) 72 | possibility_lst = possibilities_white_white.copy() 73 | possibilities_white_color = self._find_possible_white_color_sum(lst_eyes, player_index) 74 | possibility_lst.extend(possibilities_white_color) 75 | 76 | for white_white in possibilities_white_white: 77 | white_white = white_white[0] 78 | for white_color in possibilities_white_color: 79 | white_color = white_color[0] 80 | if (white_white.row != white_color.row) or ( 81 | white_white.row in (Row.RED, Row.YELLOW) and white_white.eyes < white_color.eyes) or ( 82 | white_white.row in (Row.GREEN, Row.BLUE) and white_white.eyes > white_color.eyes): 83 | possibility_lst.append([white_white, white_color]) 84 | 85 | assert(self.lst_boards[player_index].penalties < 4) 86 | possibility_lst.append([CrossPossibility(4, None)]) 87 | return possibility_lst 88 | 89 | def _find_possible_white_white_sum(self, lst_eyes, player_index) -> List[List[CrossPossibility]]: 90 | """finds all possible fields that can be crossed with the sum of the 2 white dice""" 91 | possibilities_white_white = [] 92 | white_dice_sum = lst_eyes[0] + lst_eyes[1] 93 | for row in Row: 94 | possibilities_white_white.extend(self._check_possibility_rules(row, white_dice_sum, player_index)) 95 | return possibilities_white_white 96 | 97 | def _find_possible_white_color_sum(self, lst_eyes, player_index): 98 | """finds all possible fields that can be crossed with the sum of 1 white and 1 colored dice""" 99 | possibilities_white_color = [] 100 | for color in Row: 101 | for white in range(2): 102 | white_color_sum = lst_eyes[white] + lst_eyes[color + 2] 103 | possibilities_white_color.extend(self._check_possibility_rules(color, white_color_sum, player_index)) 104 | return possibilities_white_color 105 | 106 | def _get_possibilities_passive(self, lst_eyes, player_index) -> List[List[CrossPossibility]]: 107 | """creates a list of all possible fields to make a cross on (while passive player)""" 108 | possibility_lst = self._find_possible_white_white_sum(lst_eyes, player_index) 109 | assert (self.lst_boards[player_index].penalties < 4) 110 | possibility_lst.append([CrossPossibility(4, None)]) 111 | possibility_lst.append([]) 112 | return possibility_lst 113 | 114 | def _check_possibility_rules(self, row, white_plus_a_dice_sum, player_index) -> List[List[CrossPossibility]]: 115 | """checks witch possible fields are allowed to be crossed""" 116 | possibilities_white_plus_a_dice = [] 117 | if row in (Row.RED, Row.YELLOW): 118 | if not self.completed_lines[row] and self.lst_boards[player_index].row_limits[ 119 | row] < white_plus_a_dice_sum and ( 120 | (self.lst_boards[player_index].row_numbers[ 121 | row] >= 5 and white_plus_a_dice_sum == 12) or white_plus_a_dice_sum < 12): 122 | possibilities_white_plus_a_dice.append([CrossPossibility(row, white_plus_a_dice_sum)]) 123 | else: 124 | if not self.completed_lines[row] and self.lst_boards[player_index].row_limits[ 125 | row] > white_plus_a_dice_sum and ( 126 | (self.lst_boards[player_index].row_numbers[ 127 | row] >= 5 and white_plus_a_dice_sum == 2) or white_plus_a_dice_sum > 2): 128 | possibilities_white_plus_a_dice.append([CrossPossibility(row, white_plus_a_dice_sum)]) 129 | return possibilities_white_plus_a_dice 130 | 131 | def _make_turns_for_active_human_player(self, lst_eyes, player_index, player, is_active_player) -> None: 132 | """lets a human player cross active until the turn chosen is valid""" 133 | is_turn_valid = False 134 | turn_index = 0 135 | valid_turns = self._get_possibilities_active(lst_eyes, player_index) 136 | previous_turn = None 137 | self.lst_player[player_index].show_options(self._get_possibilities_active(lst_eyes, player_index)) 138 | while not is_turn_valid: 139 | turns = player.cross_active(lst_eyes, valid_turns, self.completed_lines, turn_index) 140 | assert (len(turns) == 1) 141 | is_turn_valid = self._make_valid_turn(player_index, turns[0], valid_turns, is_active_player) 142 | previous_turn = turns[0] 143 | self.lst_player[player_index].inform(self.lst_boards, player_index) 144 | 145 | turn_index += 1 146 | is_turn_valid = False 147 | self.lst_player[player_index].show_options(self._get_possibilities_passive(lst_eyes, player_index)) 148 | # only allow 2nd if previous turn was done with WHITE dice 149 | while not is_turn_valid and previous_turn.row != 4 and previous_turn.eyes == lst_eyes[0] + lst_eyes[1]: 150 | turns = player.cross_active(lst_eyes, valid_turns, self.completed_lines, turn_index) 151 | assert (len(turns) <= 1) 152 | if len(turns) != 0: 153 | is_turn_valid = self._make_valid_turn(player_index, turns[0], valid_turns, is_active_player, 154 | previous_turn) 155 | else: 156 | is_turn_valid = True 157 | 158 | def _make_turns_for_ai_or_passive_human_player(self, lst_eyes, player_index, player, is_active_player) -> None: 159 | """lets an AI player (passive or active) or passive human player cross until the turn chosen is valid """ 160 | is_turn_valid = False 161 | while not is_turn_valid: 162 | if is_active_player: 163 | valid_turns = self._get_possibilities_active(lst_eyes, player_index) 164 | turns = player.cross_active(lst_eyes, valid_turns, self.completed_lines) 165 | assert (1 <= len(turns) <= 2) 166 | else: 167 | valid_turns = self._get_possibilities_passive(lst_eyes, player_index) 168 | turns = player.cross_passive(lst_eyes, valid_turns, self.completed_lines) 169 | assert (len(turns) <= 1) 170 | 171 | if len(turns) == 0: 172 | is_turn_valid = True 173 | else: 174 | is_turn_valid = self._make_valid_turn(player_index, turns[0], valid_turns, is_active_player) 175 | if len(turns) == 2: 176 | is_turn_valid = self._make_valid_turn(player_index, turns[1], valid_turns, is_active_player, turns[0]) 177 | 178 | def play(self, prints_points=False) -> None: 179 | """manages the run of a game (Game Master) until the game is completed; directs when the players are prompted to 180 | do their turns; also used by the trainer""" 181 | game_in_progress = True 182 | while game_in_progress: 183 | for active_player_index in range(self.player_count): 184 | lst_eyes = self.dice.throw_dice() 185 | for player_index in range(self.player_count): 186 | player = self.lst_player[player_index] 187 | is_active_player = player_index == active_player_index 188 | if isinstance(player, HumanPlayer) and is_active_player: 189 | self._make_turns_for_active_human_player(lst_eyes, player_index, player, is_active_player) 190 | else: 191 | self._make_turns_for_ai_or_passive_human_player(lst_eyes, player_index, player, 192 | is_active_player) 193 | if prints_points: 194 | print(self.compute_ranking()) 195 | 196 | if self._is_completed(): 197 | game_in_progress = False 198 | 199 | # inform all players about new game situation AFTER they made their turns 200 | for player_index in range(self.player_count): 201 | self.lst_player[player_index].inform(self.lst_boards, player_index) 202 | 203 | if not game_in_progress: 204 | break # todo execute turns for all players and evaluate turns (separate) 205 | 206 | 207 | def load_best_ai(): 208 | """loads the AI that was saved""" 209 | file = open("best_ai.dat", "rb") 210 | best_ai = pickle.load(file) 211 | file.close() 212 | return best_ai 213 | 214 | 215 | if __name__ == "__main__": 216 | ui = PyGameUi() 217 | ui.show_board() 218 | # ai_opponent = load_best_ai() 219 | 220 | 221 | 222 | # game = Game([HumanPlayer("meep", ui), 223 | # AiPlayer("meeep", np.random.randn(18), np.random.randn(18), np.random.randn(18))]) 224 | 225 | game = Game([HumanPlayer("visitor", ui), 226 | AiPlayer("", SampleStrategies.alpha_qwixx_quadratic_factor, SampleStrategies.alpha_qwixx_linear_factor, 227 | SampleStrategies.alpha_qwixx_bias)]) 228 | game.play(True) 229 | -------------------------------------------------------------------------------- /ui_pygame.py: -------------------------------------------------------------------------------- 1 | """This file creates a user interface (UI) with PyGame that shows the board and that can be interacted with by a 2 | human player by clicking buttons""" 3 | from board import Row 4 | from player import CrossPossibility 5 | import pygame 6 | 7 | 8 | class PyGameUi(object): 9 | """creates a board with PyGame to interact with (as a human Player) in conformity with the rules""" 10 | pygame.init() 11 | 12 | scale_factor = 3/4 13 | 14 | # define colors 15 | black = (0, 0, 0) # Klassenvariabeln 16 | dark_grey = (120, 120, 120) 17 | light_grey = (205, 205, 205) 18 | white = (255, 255, 255) 19 | light_red = (255, 156, 163) 20 | red = (255, 103, 115) 21 | light_yellow = (248, 220, 127) 22 | yellow = (251, 212, 85) 23 | yellow_vibrant = (255, 195, 0) 24 | light_green = (184, 221, 196) 25 | green = (142, 220, 166) 26 | light_blue = (197, 220, 242) 27 | blue = (149, 198, 248) 28 | blue_vibrant = (0, 129, 255) 29 | green_vibrant = (62, 224, 109) 30 | red_vibrant = (255, 0, 20) 31 | 32 | # define lengths, coordinates, and proportions of objects appearing on the board 33 | screen_x_length = int(1216 * scale_factor) 34 | screen_y_length = int(650 * scale_factor) 35 | box_x = int(32 * scale_factor) 36 | box_y = box_x 37 | box_y_distance = int(126 * scale_factor) 38 | box_x_length = int(1152 * scale_factor) 39 | box_y_length = int(118 * scale_factor) 40 | button_length = int(80 * scale_factor) 41 | button_x_distance = int(92 * scale_factor) 42 | button_y = int(50 * scale_factor) 43 | button_x = button_y 44 | button_text_y = int(70 * scale_factor) 45 | button_mark_y = int(50 * scale_factor) # here 46 | circle_diameter = int(72 * scale_factor) 47 | circle_radius = circle_diameter // 2 48 | circle_x = int(1112 * scale_factor) 49 | circle_text_x_offset = int(-10 * scale_factor) 50 | circle_y = int(90 * scale_factor) 51 | circle_text_y_offset = int(6 * scale_factor) 52 | penalty_box_x = int(784 * scale_factor) 53 | penalty_box_y = (536 * scale_factor) 54 | penalty_box_x_length = int(400 * scale_factor) 55 | penalty_box_y_length = int(60 * scale_factor) 56 | penalty_button_length = button_length // 2 57 | penalty_button_x_offset = int(146 * scale_factor) 58 | penalty_text_x_offset = int(16 * scale_factor) 59 | penalty_text_y_offset = (penalty_box_y_length - penalty_button_length) // 2 60 | skip_button_x = int(680 * scale_factor) 61 | skip_button_x_length = int(75 * scale_factor) 62 | dice_text_x_offset = int(225 * scale_factor) 63 | dice_text_y_offset = int(40 * scale_factor) 64 | player_mode_x_offset = int(205 * scale_factor) 65 | player_mode_y_offset = dice_text_y_offset * 2 66 | font_numbers_size = int(28 * scale_factor) 67 | font_lock_size = int(50 * scale_factor) 68 | 69 | def __init__(self): 70 | size = (PyGameUi.screen_x_length, PyGameUi.screen_y_length) 71 | self.screen = pygame.display.set_mode(size) 72 | self.is_mouse_down = False 73 | self.last_action = None 74 | self.crosses_by_color = [set(), set(), set(), set()] 75 | self.penalties = 0 76 | self.lst_eyes = [0, 0, 0, 0, 0, 0] 77 | self.is_turn_invalid = False 78 | self.is_active_player = True 79 | 80 | def show_board(self) -> None: 81 | """shows board with PyGame functions""" 82 | pygame.display.set_caption("Qwixx Board") 83 | if self.is_turn_invalid: 84 | self.screen.fill(PyGameUi.red_vibrant) 85 | else: 86 | self.screen.fill(PyGameUi.white) 87 | 88 | font = pygame.font.SysFont('Comic Sans MS', PyGameUi.font_numbers_size, True, False) 89 | lock = pygame.font.SysFont('Comic Sans MS', PyGameUi.font_lock_size, True, False) 90 | 91 | self._render_colored_rows(font, lock) 92 | self._render_penalties(font) 93 | self._render_skip_button(font) 94 | self._render_dice(font) 95 | self._show_player_mode(font) 96 | 97 | clock = pygame.time.Clock() 98 | clock.tick(60) 99 | pygame.display.flip() 100 | 101 | def _render_colored_rows(self, font, lock) -> None: 102 | """draws the colored rows and creates buttons in these lines""" 103 | for row in range(4): 104 | for event in pygame.event.get(): # User did something 105 | if event.type == pygame.QUIT: # If user clicked close 106 | PyGameUi.close() 107 | return 108 | inactive_color, background_color, active_color = PyGameUi.convert_number_to_color(row) 109 | pygame.draw.rect(self.screen, background_color, 110 | [PyGameUi.box_x, PyGameUi.box_y + PyGameUi.box_y_distance * row, PyGameUi.box_x_length, 111 | PyGameUi.box_y_length], 0) # box behind the buttons 112 | for eyes in range(0, 11): 113 | self.button(eyes, PyGameUi.button_length, PyGameUi.button_length, inactive_color, active_color) 114 | text = font.render(f"{int(eyes + 2)}", True, PyGameUi.white) 115 | if row < 2: 116 | self.screen.blit(text, [PyGameUi.button_length + PyGameUi.button_x_distance * eyes, 117 | PyGameUi.box_y_distance * row + PyGameUi.button_text_y]) 118 | else: 119 | self.screen.blit(text, [PyGameUi.button_length + PyGameUi.button_x_distance * (10 - eyes), 120 | PyGameUi.box_y_distance * row + PyGameUi.button_text_y]) 121 | self.button(12, PyGameUi.circle_diameter, PyGameUi.circle_diameter, inactive_color, active_color, True) 122 | text = lock.render("*", True, PyGameUi.white) 123 | self.screen.blit(text, [PyGameUi.circle_x + PyGameUi.circle_text_x_offset, 124 | PyGameUi.circle_y * (row + 1) + PyGameUi.circle_radius * ( 125 | row - 1) + PyGameUi.circle_text_y_offset]) 126 | 127 | def _render_penalties(self, font) -> None: 128 | """draws the penalty row and creates four buttons in that line""" 129 | pygame.draw.rect(self.screen, PyGameUi.light_grey, 130 | [PyGameUi.penalty_box_x, PyGameUi.penalty_box_y, PyGameUi.penalty_box_x_length, 131 | PyGameUi.penalty_box_y_length], 0) 132 | for eyes in range(1, 5): 133 | self.button(eyes, PyGameUi.penalty_button_length, PyGameUi.penalty_button_length, PyGameUi.dark_grey, 134 | PyGameUi.black) 135 | text = font.render("penalties", True, PyGameUi.white) 136 | self.screen.blit(text, [PyGameUi.penalty_box_x + PyGameUi.penalty_text_x_offset, 137 | PyGameUi.penalty_box_y + PyGameUi.penalty_text_y_offset]) 138 | 139 | def _render_skip_button(self, font) -> None: 140 | """draws a skip button""" 141 | pygame.draw.rect(self.screen, PyGameUi.light_grey, 142 | [PyGameUi.skip_button_x, PyGameUi.penalty_box_y, PyGameUi.skip_button_x_length, 143 | PyGameUi.penalty_box_y_length], 0) 144 | self.button(0, PyGameUi.skip_button_x_length, PyGameUi.penalty_box_y_length, PyGameUi.light_grey, 145 | PyGameUi.dark_grey) 146 | text = font.render("skip", True, PyGameUi.white) 147 | self.screen.blit(text, [PyGameUi.skip_button_x + PyGameUi.penalty_text_y_offset, 148 | PyGameUi.penalty_box_y + PyGameUi.penalty_text_y_offset]) 149 | 150 | def _render_dice(self, font) -> None: 151 | """renders the dice onto the board""" 152 | for dice in range(len(self.lst_eyes)): 153 | text = font.render(f"{self.lst_eyes[dice]}", True, self.convert_number_to_color(dice, True)) 154 | self.screen.blit(text, 155 | [PyGameUi.button_length + PyGameUi.button_x_distance * dice, 156 | PyGameUi.penalty_box_y + PyGameUi.penalty_text_y_offset]) 157 | 158 | text = font.render("your dice", True, PyGameUi.dark_grey) 159 | self.screen.blit(text, [PyGameUi.box_x + PyGameUi.dice_text_x_offset, 160 | PyGameUi.penalty_box_y + PyGameUi.dice_text_y_offset]) 161 | 162 | def show_options_on_board(self, possibility_lst) -> None: 163 | """marks the fields that can be crossed for a turn""" 164 | font = pygame.font.SysFont('Comic Sans MS', PyGameUi.font_numbers_size, True, False) 165 | 166 | for possibility in possibility_lst: 167 | if len(possibility) == 0 or possibility[0].row == 4: 168 | continue 169 | # don't have to look at double turns because fields are marked anyway in single turn option 170 | row = possibility[0].row 171 | eyes = int(possibility[0].eyes) 172 | assert (eyes is not None) 173 | text = font.render("°", True, PyGameUi.black) 174 | if row < 2: 175 | self.screen.blit(text, [PyGameUi.button_length + PyGameUi.button_x_distance * (eyes - 2), 176 | PyGameUi.box_y_distance * row + PyGameUi.button_mark_y]) 177 | else: 178 | self.screen.blit(text, [PyGameUi.button_length + PyGameUi.button_x_distance * (12 - eyes), 179 | PyGameUi.box_y_distance * row + PyGameUi.button_mark_y]) 180 | clock = pygame.time.Clock() 181 | clock.tick(5) 182 | pygame.display.flip() 183 | 184 | def _show_player_mode(self, font) -> None: 185 | """shows whether the player is active or passive """ 186 | if self.is_active_player: 187 | player_mode = "active player" 188 | else: 189 | player_mode = "passive player" 190 | text = font.render(f"{player_mode}", True, PyGameUi.dark_grey) 191 | self.screen.blit(text, [PyGameUi.box_x + PyGameUi.player_mode_x_offset, 192 | PyGameUi.penalty_box_y + PyGameUi.player_mode_y_offset]) 193 | 194 | def get_turn(self) -> CrossPossibility: 195 | """waits for a player's action, returns it, and resets it""" 196 | while self.last_action is None: 197 | self.show_board() 198 | last_action = self.last_action 199 | self.last_action = None 200 | return last_action 201 | 202 | def button(self, eyes, w, h, inactive_color, active_color, circle=False) -> None: 203 | """makes the appearance of buttons interactive""" 204 | mouse = pygame.mouse.get_pos() 205 | click = pygame.mouse.get_pressed() 206 | x, y = PyGameUi.convert_eyes_to_coordinates(PyGameUi.convert_color_to_row(active_color), eyes, circle) 207 | if click[0] == 0: 208 | self.is_mouse_down = False 209 | if PyGameUi.is_mouse_over_button(x, y, w, h, circle, mouse): 210 | self._choose_color_for_button_under_mouse(x, y, w, h, active_color, click, circle) 211 | else: 212 | self._choose_color_for_button_independently_of_mouse(x, y, w, h, active_color, inactive_color, circle) 213 | 214 | def _choose_color_for_button_under_mouse(self, x, y, w, h, active_color, click, circle) -> None: 215 | """choose color for button when the cursor is pointed at it""" 216 | if circle: 217 | pygame.draw.circle(self.screen, active_color, [x, y], w // 2, 0) 218 | else: 219 | pygame.draw.rect(self.screen, active_color, (x, y, w, h)) 220 | if click[0] == 1: 221 | self._click_button(x, active_color) 222 | 223 | def _choose_color_for_button_independently_of_mouse(self, x, y, w, h, active_color, inactive_color, circle) -> None: 224 | """choose color for button when the cursor isn't pointed at it""" 225 | eyes = PyGameUi.convert_coordinates_to_eyes(active_color, x) 226 | if active_color == PyGameUi.red_vibrant and eyes in self.crosses_by_color[0]: 227 | inactive_color = active_color 228 | if active_color == PyGameUi.yellow_vibrant and eyes in self.crosses_by_color[1]: 229 | inactive_color = active_color 230 | if active_color == PyGameUi.green_vibrant and eyes in self.crosses_by_color[2]: 231 | inactive_color = active_color 232 | if active_color == PyGameUi.blue_vibrant and eyes in self.crosses_by_color[3]: 233 | inactive_color = active_color 234 | if active_color == PyGameUi.black and eyes <= self.penalties: 235 | inactive_color = active_color 236 | 237 | if circle: 238 | pygame.draw.circle(self.screen, inactive_color, [x, y], w // 2, 0) 239 | else: 240 | pygame.draw.rect(self.screen, inactive_color, (x, y, w, h)) 241 | 242 | def _click_button(self, x, active_color) -> bool: # comparable to 'cross()' 243 | """sets a cross chosen by the player, chooses a penalty, or skips one turn""" 244 | if self.is_mouse_down or self.last_action is not None: 245 | return False 246 | self.is_mouse_down = True 247 | self.is_turn_invalid = False 248 | row = active_color 249 | eyes = PyGameUi.convert_coordinates_to_eyes(row, x) 250 | 251 | if eyes is not None: 252 | if row == PyGameUi.red_vibrant: 253 | self.last_action = CrossPossibility(Row.RED, eyes) 254 | if row == PyGameUi.yellow_vibrant: 255 | self.last_action = CrossPossibility(Row.YELLOW, eyes) 256 | if row == PyGameUi.green_vibrant: 257 | self.last_action = CrossPossibility(Row.GREEN, eyes) 258 | if row == PyGameUi.blue_vibrant: 259 | self.last_action = CrossPossibility(Row.BLUE, eyes) 260 | 261 | if row == PyGameUi.black and eyes - 1 == self.penalties: 262 | self.last_action = CrossPossibility(4, None) 263 | 264 | if row == PyGameUi.dark_grey: 265 | self.last_action = "skip" 266 | 267 | @staticmethod 268 | def close() -> None: 269 | """closes the game""" 270 | pygame.quit() 271 | 272 | @staticmethod 273 | def is_mouse_over_button(x, y, w, h, circle, mouse) -> bool: 274 | """checks whether the cursor is pointed on a button""" 275 | return (not circle and x < mouse[0] < x + w and y < mouse[1] < y + h) or \ 276 | (circle and x - w / 2 < mouse[0] < x + w / 2 and y - h / 2 < mouse[1] < y + h / 2) 277 | 278 | @staticmethod 279 | def convert_coordinates_to_eyes(row, x) -> float: 280 | """converts the coordinates of a button to the number on the button (or the index of the penalty)""" 281 | if row in (PyGameUi.red_vibrant, PyGameUi.yellow_vibrant): 282 | return (x - PyGameUi.button_x) // PyGameUi.button_x_distance + 2 # + 2 because eyes in index from 0 -11 -> 2 - 13 283 | elif row in (PyGameUi.green_vibrant, PyGameUi.blue_vibrant): 284 | return 12 - ((x - PyGameUi.button_x) // PyGameUi.button_x_distance) # eyes originally in index from 0 -11 -> 12 - 1 285 | else: # penalties 286 | return (x - PyGameUi.penalty_box_x - PyGameUi.penalty_button_x_offset) // ( 287 | PyGameUi.penalty_button_length + PyGameUi.penalty_text_y_offset) 288 | 289 | @staticmethod 290 | def convert_eyes_to_coordinates(row, eyes, circle) -> tuple: # todo is tuple the right type? 291 | """converts the number on a button (or the index of a penalty) to the coordinates of the button""" 292 | assert (row in range(6)) 293 | if circle: 294 | return PyGameUi.circle_x, PyGameUi.circle_y * (row + 1) + PyGameUi.circle_radius * row 295 | if row < 2: 296 | return (PyGameUi.button_x + PyGameUi.button_x_distance * eyes, 297 | PyGameUi.button_y * (row + 1) + (PyGameUi.box_y_distance - PyGameUi.button_y) * row) # x, y 298 | if row < 4: 299 | return (PyGameUi.button_x + PyGameUi.button_x_distance * (10 - eyes)), (PyGameUi.button_y * (row + 1) + ( 300 | PyGameUi.box_y_distance - PyGameUi.button_y) * row) # todo why 10 and not 12? 301 | if row == 4: 302 | return (PyGameUi.penalty_box_x + PyGameUi.penalty_button_x_offset + ( 303 | PyGameUi.penalty_button_length + PyGameUi.penalty_text_y_offset) * eyes, 304 | PyGameUi.penalty_box_y + PyGameUi.penalty_text_y_offset) 305 | return PyGameUi.skip_button_x, PyGameUi.penalty_box_y 306 | 307 | @staticmethod 308 | def convert_color_to_row(color) -> int: 309 | """converts the color of a button to a row number""" 310 | if color == PyGameUi.red_vibrant: 311 | return 0 312 | if color == PyGameUi.yellow_vibrant: 313 | return 1 314 | if color == PyGameUi.green_vibrant: 315 | return 2 316 | if color == PyGameUi.blue_vibrant: 317 | return 3 318 | if color == PyGameUi.black: 319 | return 4 320 | if color == PyGameUi.dark_grey: 321 | return 5 322 | 323 | @staticmethod 324 | def convert_number_to_color(number, is_dice=False) -> tuple: 325 | """converts numbers of dice or rows to one or a tuple of colors""" 326 | if is_dice: 327 | if number in (0, 1): 328 | return PyGameUi.dark_grey 329 | if number == 2: 330 | return PyGameUi.red 331 | if number == 3: 332 | return PyGameUi.yellow_vibrant 333 | if number == 4: 334 | return PyGameUi.green_vibrant 335 | if number == 5: 336 | return PyGameUi.blue_vibrant 337 | else: # inactive, background, active 338 | if number == 0: 339 | return PyGameUi.red, PyGameUi.light_red, PyGameUi.red_vibrant 340 | if number == 1: 341 | return PyGameUi.yellow, PyGameUi.light_yellow, PyGameUi.yellow_vibrant 342 | if number == 2: 343 | return PyGameUi.green, PyGameUi.light_green, PyGameUi.green_vibrant 344 | if number == 3: 345 | return PyGameUi.blue, PyGameUi.light_blue, PyGameUi.blue_vibrant 346 | if number == 4: 347 | return PyGameUi.light_grey, PyGameUi.dark_grey, PyGameUi.black 348 | 349 | 350 | if __name__ == "__main__": 351 | pygame_board = PyGameUi() 352 | pygame_board.show_board() 353 | 354 | # test driven development (tdd -> bool) 355 | # done: skip button, play, ranking,python tutorials, show possible turns on UI 356 | -------------------------------------------------------------------------------- /trainer.py: -------------------------------------------------------------------------------- 1 | """This file creates a trainer that finds the best (fittest) AI""" 2 | from ai_player import AiPlayer 3 | from game import Game, SampleStrategies 4 | import numpy as np 5 | from numpy import random 6 | import random 7 | from typing import List, Tuple, Any 8 | import pickle 9 | from copy import copy, deepcopy 10 | 11 | 12 | class AiLogEntry: 13 | """data to track evolution of AIs across generations during training""" 14 | 15 | def __init__(self, points_average, points_variance, events): 16 | self.points_average = points_average 17 | self.points_variance = points_variance 18 | self.events = events 19 | 20 | def __repr__(self): 21 | return "(" + str(self.points_average) + ", " + str(self.points_variance) + ", " + str(self.events) + ")" 22 | 23 | 24 | class Trainer: 25 | """trains AIs (with a genetic algorithm) in order to get the best AI out of all; 26 | parameters therefore given manually""" 27 | 28 | strategy_parameter_min = -10 29 | strategy_parameter_max = 10 30 | 31 | def __init__(self, group_size=5, play_against_own_copies=False, population_size=100, survivor_rate=0.74, 32 | child_rate=1, mutation_rate=0.05, mutation_copy_rate=0.0, lowest_variance_rate=0.98, 33 | num_generations=200): 34 | assert (population_size % group_size == 0) 35 | self.group_size = group_size 36 | self.play_against_own_copies = play_against_own_copies 37 | self.population_size = population_size 38 | self.mutation_rate = mutation_rate 39 | self.mutation_copy_rate = mutation_copy_rate 40 | self.lowest_variance_rate = lowest_variance_rate 41 | self.num_generations = num_generations 42 | self.survivor_rate = survivor_rate 43 | self.num_survivors = int(survivor_rate * self.population_size) 44 | self.child_rate = child_rate 45 | self.num_children = int((self.population_size - self.num_survivors) * child_rate) # child_rate is relative amount of died AIs that is 'reborn' by recombination 46 | self.num_parents = self.num_children * 2 47 | self.fitness_game_number = 5 # empiric value 48 | self.points_per_ai = None 49 | self.ai_histories = dict() 50 | self.generation = 0 51 | self.population = [] 52 | 53 | def _group(self) -> List[Tuple[Any, List[AiPlayer]]]: 54 | """puts the individuals (AIs) of a population into random groups of the same size""" 55 | if self.play_against_own_copies: 56 | groups = [(ai, [deepcopy(ai) for _ in range(self.group_size)]) for ai in self.population] 57 | else: 58 | random.shuffle(self.population) 59 | groups = [] 60 | for ai_index in range(0, len(self.population), self.group_size): 61 | groups.append(("different AIs", self.population[ai_index: ai_index + self.group_size])) 62 | return groups 63 | 64 | def _select_extreme_ais(self, point_sum_per_ai, num_extreme_ais, selects_best_ais, variance_threshold) -> List[ 65 | AiPlayer]: 66 | """selects strongest or weakest AIs of a population""" 67 | extreme_ais = [] 68 | for _ in range(num_extreme_ais): 69 | max_points = float("-inf") 70 | min_points = float("inf") 71 | extreme_ai = None 72 | for ai, points in point_sum_per_ai.items(): 73 | if self.ai_histories[ai][self.generation].points_variance > variance_threshold: 74 | continue # ignore AIs with high points variance in ranking -> middle field (extreme good/bad daily form) 75 | if selects_best_ais and max_points < points: 76 | max_points = points 77 | extreme_ai = ai 78 | elif not selects_best_ais and min_points > points: 79 | min_points = points 80 | extreme_ai = ai 81 | extreme_ais.append(extreme_ai) 82 | del point_sum_per_ai[extreme_ai] 83 | assert(None not in extreme_ais) 84 | return extreme_ais 85 | 86 | def _compute_avg_points_per_ai(self, point_list_per_ai) -> None: 87 | """creates a dictionary with average points of every AI""" 88 | self.points_per_ai = dict() 89 | for ai, point_list in point_list_per_ai.items(): 90 | self.points_per_ai[ai] = sum(point_list) / len(point_list) 91 | points_variance = Trainer._compute_variance(point_list_per_ai[ai], self.points_per_ai[ai]) 92 | if self.generation in self.ai_histories[ai]: 93 | self.ai_histories[ai][self.generation].points_average = int(self.points_per_ai[ai]) 94 | self.ai_histories[ai][self.generation].points_variance = int(points_variance) 95 | else: 96 | self.ai_histories[ai][self.generation] = \ 97 | AiLogEntry(int(self.points_per_ai[ai]), int(points_variance), []) 98 | 99 | @staticmethod 100 | def _compute_variance(numbers, average) -> float: 101 | """calculates the corrected sample variance of any list of numbers with a given average""" 102 | variance = 0 # korrigierte Stichprobenvarianz 103 | for number in numbers: 104 | variance += (number - average) ** 2 105 | return variance / len(numbers) 106 | 107 | def _play_in_groups(self, point_list_per_ai, game_count) -> None: 108 | """part of _rank(); lets AIs play in groups""" 109 | groups = self._group() 110 | for original_ai, group in groups: 111 | game = Game(group) 112 | # same lists of dice eyes for each generation to make AIs more comparable 113 | random.seed(self.generation * self.fitness_game_number + game_count) 114 | game.play() 115 | for ai in group: 116 | points = ai.get_points() 117 | if original_ai == "different AIs": 118 | point_list_per_ai[ai].append(points) 119 | else: 120 | point_list_per_ai[original_ai].append(points) 121 | 122 | def _play_against_own_copies(self, final_ai): 123 | """lets final AI play against its copies""" 124 | group = [deepcopy(final_ai) for _ in range(self.group_size)] 125 | point_list = [] 126 | for _ in range(10): 127 | game = Game(group) 128 | game.play() 129 | for ai in group: 130 | point_list.append(ai.get_points()) 131 | average = sum(point_list) / len(point_list) 132 | return average, self._compute_variance(point_list, average) 133 | 134 | def _split_ais_by_fitness(self, point_sum_per_ai): 135 | """part of _rank(); splits AIs by fitness after playing in groups""" 136 | variance_threshold = self._calculate_variance_threshold() 137 | point_sum_per_ai_temp = copy(point_sum_per_ai) 138 | strongest_ais = self._select_extreme_ais(point_sum_per_ai_temp, self.num_parents, 139 | True, variance_threshold) # side-effect intentional 140 | weakest_ais = self._select_extreme_ais(point_sum_per_ai_temp, self.population_size - self.num_survivors, 141 | False, variance_threshold) 142 | weakest_ais = list(reversed(weakest_ais)) 143 | middle_field = point_sum_per_ai_temp.keys() # keys() only selects the AIs, not the points 144 | return strongest_ais, middle_field, weakest_ais 145 | 146 | def _calculate_variance_threshold(self): 147 | """calculates the threshold of variance; necessary to put AIs with extreme points variance into the middle field 148 | (influence from daily form too high)""" 149 | variances = [] 150 | for ai in self.population: 151 | variances.append(self.ai_histories[ai][self.generation].points_variance) 152 | return list(sorted(variances))[int(self.lowest_variance_rate * self.population_size) - 1] 153 | 154 | def _create_ranking(self, point_list_per_ai): 155 | """part of _rank(); creates a ranking based on the fitness of an AI""" 156 | point_sum_per_ai = {ai: sum(point_list) for ai, point_list in point_list_per_ai.items()} 157 | strongest_ais, middle_field, weakest_ais = self._split_ais_by_fitness(point_sum_per_ai) 158 | ranking = copy(strongest_ais) 159 | ranking.extend(middle_field) 160 | ranking.extend(weakest_ais) 161 | assert (len(ranking) == self.population_size) 162 | return ranking 163 | 164 | def _rank(self) -> None: 165 | """lets the groups play and ranks them inside these groups by performance""" 166 | point_list_per_ai = {ai: [] for ai in self.population} 167 | for game_count in range(self.fitness_game_number): 168 | self._play_in_groups(point_list_per_ai, game_count) 169 | if not self.play_against_own_copies: 170 | for point_list in point_list_per_ai.values(): 171 | assert (len(point_list) == self.fitness_game_number) 172 | self._compute_avg_points_per_ai(point_list_per_ai) 173 | self.population = self._create_ranking(point_list_per_ai) 174 | 175 | def _select(self) -> None: 176 | """selects the best AIs, these survive -> rate of survivors given by parameter""" 177 | dead_ais = self.population[self.num_survivors:] 178 | for ai in dead_ais: 179 | self._add_event_to_ai_history(ai, "SELEction") 180 | self.population = self.population[: self.num_survivors] 181 | 182 | def _mix_strategies(self, parent1, parent2) -> AiPlayer: 183 | """mixes the strategies of the parents by creating averages of the different factors / bias to be their child's 184 | strategy""" 185 | child_quadratic_factor = np.array([x / 2 for x in (parent1.quadratic_factor + parent2.quadratic_factor)]) 186 | child_linear_factor = np.array([x / 2 for x in (parent1.linear_factor + parent2.linear_factor)]) 187 | child_bias = np.array([x / 2 for x in (parent1.bias + parent2.bias)]) 188 | assert (len(child_linear_factor) == len(parent1.linear_factor) and len(child_linear_factor) == len( 189 | parent2.linear_factor)) 190 | self._add_event_to_ai_history(parent1, "PAREnt") 191 | self._add_event_to_ai_history(parent2, "PAREnt") 192 | ai_number = len(self.ai_histories) 193 | return AiPlayer(str(ai_number), child_quadratic_factor, child_linear_factor, child_bias) 194 | 195 | def _add_event_to_ai_history(self, ai, event) -> None: 196 | if self.generation in self.ai_histories[ai]: 197 | self.ai_histories[ai][self.generation].events.append(event) 198 | else: 199 | self.ai_histories[ai][self.generation] = AiLogEntry(None, None, [event]) 200 | 201 | def _recombine(self) -> None: 202 | """extends the population by children that are created by recombination of their parents strategies""" 203 | children = [] 204 | for child_index in range(self.num_children): # build pairs 205 | child = self._mix_strategies(self.population[child_index * 2], self.population[child_index * 2 + 1]) 206 | children.append(child) 207 | self.ai_histories[child] = dict() 208 | self._add_event_to_ai_history(child, "RECOmbination") 209 | self.population.extend(children) 210 | assert (len(self.population) <= self.population_size) 211 | 212 | def _mutate_strategy(self, ai) -> int: 213 | """mutates randomly small parts of the strategy of an AI""" 214 | mutation_counter = 0 215 | for value_index in range(AiPlayer.strategy_length): 216 | if random.random() < self.mutation_rate: 217 | ai.quadratic_factor[value_index] = Trainer._adjust_strategy_range(random.random()) 218 | mutation_counter += 1 219 | if random.random() < self.mutation_rate: 220 | ai.linear_factor[value_index] = Trainer._adjust_strategy_range(random.random()) 221 | mutation_counter += 1 222 | if random.random() < self.mutation_rate: 223 | ai.bias[value_index] = Trainer._adjust_strategy_range(random.random()) 224 | mutation_counter += 1 225 | return mutation_counter # call by reference on ai (pointer) 226 | 227 | def _mutate(self) -> None: 228 | """creates a list of mutated AIs""" 229 | copied_ais = [] 230 | max_copies = (self.population_size - len(self.population)) * self.mutation_copy_rate 231 | for ai in self.population: 232 | ai_copy = deepcopy(ai) 233 | ai_number = len(self.ai_histories) 234 | ai_copy.name = str(ai_number) 235 | ai_history_copy = deepcopy(self.ai_histories[ai]) 236 | mutation_counter = self._mutate_strategy(ai) 237 | if mutation_counter > 0: 238 | self._add_event_to_ai_history(ai, f"MUTAtion, {mutation_counter}") 239 | if len(copied_ais) < max_copies: 240 | copied_ais.append(ai_copy) 241 | self.ai_histories[ai_copy] = ai_history_copy 242 | self._add_event_to_ai_history(ai_copy, f"COPY from {ai}") 243 | self.population.extend(copied_ais) 244 | assert (len(self.population) <= self.population_size) 245 | 246 | def _add_random_ais(self) -> None: 247 | """adds random AIs to the population in order to reach the original population size""" 248 | missing_ais = self.population_size - len(self.population) 249 | ais = [] 250 | for ai_number in range(len(self.ai_histories), len(self.ai_histories) + missing_ais): 251 | ai = AiPlayer(str(ai_number), Trainer._build_random_strategy(), 252 | Trainer._build_random_strategy(), Trainer._build_random_strategy()) 253 | # ai = AiPlayer(str(ai_number), SampleStrategies.bodo_quadratic_factor, 254 | # SampleStrategies.bodo_linear_factor, SampleStrategies.bodo_bias) 255 | self.ai_histories[ai] = dict() 256 | self._add_event_to_ai_history(ai, "INITialization") 257 | ais.append(ai) 258 | self.population.extend(ais) 259 | assert (len(self.population) <= self.population_size) 260 | 261 | def _find_strongest_ai(self): 262 | """finds the highest scoring ai in a population""" 263 | max_points = float("-inf") 264 | strongest_ai = None 265 | for ai in self.population: 266 | if max_points < self.points_per_ai[ai]: 267 | max_points = self.points_per_ai[ai] 268 | strongest_ai = ai 269 | return strongest_ai, max_points 270 | 271 | def _compute_points_statistics(self): 272 | """calculates the average points that were scored in a generation""" 273 | sum_points = 0 274 | for ai in self.population: 275 | sum_points += self.points_per_ai[ai] 276 | avg_points = sum_points / self.population_size 277 | 278 | var_points = Trainer._compute_variance(self.points_per_ai.values(), avg_points) 279 | return avg_points, var_points 280 | 281 | def _compute_next_generation(self) -> None: 282 | """directs all steps that have to be done to create the next generation / a (partly) new population""" 283 | if self.generation > 0: 284 | self._select() 285 | self._recombine() 286 | self._mutate() 287 | self._add_random_ais() 288 | self._rank() # &[(ai, list(reversed(list(self.ai_histories[ai].items())))) for ai in self.population] 289 | 290 | @staticmethod 291 | def _build_random_strategy(): 292 | """builds any part of strategy (quadratic, linear, bias)""" 293 | return Trainer._adjust_strategy_range(np.random.rand(AiPlayer.strategy_length)) 294 | 295 | @staticmethod 296 | def _adjust_strategy_range(array_or_value): 297 | width = Trainer.strategy_parameter_max - Trainer.strategy_parameter_min 298 | return array_or_value * width + Trainer.strategy_parameter_min 299 | 300 | def train(self, load_population_from_file, load_ai_as_population, save_population_in_file) -> None: 301 | """trains the AIs due to the parameters and returns the final and best AI""" 302 | if load_population_from_file: 303 | self._load_population() 304 | if load_ai_as_population: 305 | self._load_ai_as_population() 306 | else: 307 | assert (self.generation == 0 and self.population == []) 308 | 309 | best_ai = None 310 | max_points = float("-inf") 311 | stop_generation = self.num_generations + self.generation 312 | print(f"group_size = {self.group_size}, play_against_own_copies = {self.play_against_own_copies}, " 313 | f"population_size = {self.population_size}, survivor_rate = {self.survivor_rate}, \n" 314 | f"child_rate = {self.child_rate}, mutation_rate = {self.mutation_rate}, " 315 | f"mutation_copy_rate = {self.mutation_copy_rate}, lowest_variance_rate = {self.lowest_variance_rate}, " 316 | f"num_generations = {self.num_generations}") 317 | while self.generation < stop_generation: 318 | self._compute_next_generation() 319 | strong_ai, strong_ai_points = self._find_strongest_ai() 320 | if strong_ai_points > max_points: 321 | max_points = strong_ai_points 322 | best_ai = strong_ai 323 | avg_points, variance = self._compute_points_statistics() 324 | print(f"Generation: {self.generation} \t Max: {strong_ai_points:.0f} \t Avg: {avg_points:.0f} " 325 | f"\t Var: {variance:.0f}") 326 | self.generation += 1 327 | avg_points, variance = self._play_against_own_copies(best_ai) 328 | print(f"Best AI \t\t\t\t\t Avg: {avg_points:.0f} \t Var: {variance:.0f}") 329 | print("Evolution finished") 330 | if save_population_in_file: 331 | self._save_final_population_and_best_ai(best_ai, avg_points) 332 | 333 | def _save_final_population_and_best_ai(self, best_ai, avg_points) -> None: 334 | """saves the final population and the best AI of a train cycle """ 335 | pickle.dump((self.population, self.ai_histories, self.generation), open("final_population.dat", "wb")) 336 | pickle.dump((best_ai, self.ai_histories[best_ai]), open(f"best_ai_{avg_points:.0f}_points.dat", "wb")) 337 | 338 | def _load_ai_as_population(self): 339 | """loads a saved AI as a population, like AI loaded by SampleStrategies""" 340 | file = open("best_ai_67_points.dat", "rb") 341 | ai, ai_history = pickle.load(file) 342 | for _ in range(self.population_size): 343 | ai_copy = deepcopy(ai) 344 | self.population.append(ai_copy) 345 | self.ai_histories[ai_copy] = ai_history 346 | file.close() 347 | 348 | def _load_population(self) -> None: 349 | """loads the population that was saved""" 350 | file = open("final_population.dat", "rb") 351 | self.population, self.ai_histories, self.generation = pickle.load(file) 352 | file.close() 353 | 354 | 355 | if __name__ == "__main__": 356 | trainer = Trainer() 357 | trainer.train(load_population_from_file=False, load_ai_as_population=False, save_population_in_file=True) 358 | --------------------------------------------------------------------------------