├── brains ├── __init__.py ├── abstractbrain.py ├── brainsimple.py ├── braindqn.py └── brainpg.py ├── creatures ├── __init__.py ├── humandqnbrain.py ├── zombie.py ├── human.py ├── bacterium.py ├── humanpgbrain.py ├── HumanPRLF.py └── abstractcreature.py ├── visualization ├── __init__.py ├── dash_online.py ├── gui.py └── dashboard.py ├── _config.yml ├── docs ├── favicon.ico ├── origin-icon.png ├── Authors.md ├── AIQ.md ├── Space.md ├── Execution.md ├── Brain.md ├── Evolution.md ├── FuturePlans.md ├── Creature.md ├── Universe.md └── Scientific.md ├── requirements.txt ├── sound.py ├── configsimulator.py ├── printing.py ├── app.py ├── config.py ├── LICENSE ├── creature_actions.py ├── cell.py ├── tests └── agent_test.py ├── simulator.py ├── utils.py ├── evolution.py ├── statistics.py ├── space.py ├── aiq.py ├── README.md └── universe.py /brains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /creatures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /visualization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman 2 | 3 | plugins: 4 | - jekyll-sitemap -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kourgeorge/project-origin/HEAD/docs/favicon.ico -------------------------------------------------------------------------------- /docs/origin-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kourgeorge/project-origin/HEAD/docs/origin-icon.png -------------------------------------------------------------------------------- /docs/Authors.md: -------------------------------------------------------------------------------- 1 | ### This is the list of project-origin contributors for copyright purposes. 2 | 3 | * George Kour 4 | * Pierre Kour -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bokeh>=1.0.2 2 | tensorflow>=1.12.0 3 | matplotlib>=3.0.1 4 | torch>=1.0.0 5 | scipy>=1.1.0 6 | pandas>=0.23.4 7 | numpy>=1.15.4 8 | -------------------------------------------------------------------------------- /sound.py: -------------------------------------------------------------------------------- 1 | __author__ = 'pkour' 2 | 3 | from config import ConfigPhysics 4 | 5 | 6 | class Sound: 7 | def __init__(self, creature, time, syllable='Bla', lasting_time=ConfigPhysics.SOUND_LASTING_TIME): 8 | self._creature = creature 9 | self._initial_time = time 10 | self._syllable = syllable 11 | self._end_time = time + lasting_time 12 | 13 | def creature(self): 14 | return self._creature 15 | 16 | def get_end_time(self): 17 | return self._end_time 18 | -------------------------------------------------------------------------------- /docs/AIQ.md: -------------------------------------------------------------------------------- 1 | ## Artificial IQ 2 | To validate the fit of creatures, and to test their intelligence development, we perform artificial iq test to a sample from the population and each time step. 3 | The tests include creating states that the correct action to do is obvious or alternatively a specific action is clearly wrong. 4 | Each test contains a state, the action to do or not to do and the test weight. 5 | For instance, given that many enemy are around with no food, and there is a haven cell on the right with much food and no enemies, the creature must go to the right. 6 | -------------------------------------------------------------------------------- /configsimulator.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | #from creatures.human import Human 4 | from creatures.humandqnbrain import HumanDQNBrain 5 | from creatures.humanpgbrain import HumanPGBrain, HumanPGUnifiedBrain 6 | from creatures.HumanPRLF import HumanPRLF, HumanPRLFUnifiedBrain 7 | from creatures.zombie import Zombie 8 | #from creatures.bacterium import Bacterium 9 | 10 | 11 | class ConfigSimulator: 12 | CSV_FILE_PATH = './log/output{}.csv' 13 | CSV_LOGGING = False 14 | LOGGING_BATCH_SIZE = 10 15 | UI_UPDATE_INTERVAL = 100 # ms 16 | RACES = [Zombie, HumanPRLFUnifiedBrain] 17 | SIMULATION_TIME_UNIT = 0.5 #s 18 | -------------------------------------------------------------------------------- /printing.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import os 4 | 5 | 6 | def print_step_stats(stats): 7 | printing_stats = stats.step_stats_df.drop(columns=['CreaturesDist', 'FoodDist']).tail(1) 8 | print(printing_stats.to_json(orient='records')) 9 | 10 | 11 | def dataframe2csv(data_frame, file_path): 12 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 13 | data_frame.to_csv(path_or_buf=file_path, index=False) 14 | 15 | 16 | def print_epoch_stats(stats): 17 | last_update = stats.epoch_stats_df.tail(1).to_dict() 18 | for (key, value) in last_update.items(): 19 | print('{}: {}'.format(key, value)) 20 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import tkinter as tk 4 | from visualization.gui import OriginGUI 5 | from configsimulator import ConfigSimulator 6 | 7 | 8 | class OriginApp: 9 | 10 | def __init__(self, master): 11 | self.master = master 12 | 13 | # Set up the GUI part 14 | self.gui = OriginGUI(master) 15 | 16 | self.periodic_call() 17 | 18 | def periodic_call(self): 19 | self.gui.process_incoming_msg() 20 | try: 21 | self.master.after(ConfigSimulator.UI_UPDATE_INTERVAL, self.periodic_call) 22 | except: 23 | print("Periodic call exception.") 24 | 25 | 26 | if __name__ == '__main__': 27 | root = tk.Tk() 28 | client = OriginApp(root) 29 | root.mainloop() 30 | -------------------------------------------------------------------------------- /brains/abstractbrain.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | 4 | class AbstractBrain: 5 | 6 | def __init__(self, observation_shape, num_actions): 7 | self._observation_shape = observation_shape 8 | self._num_actions = num_actions 9 | 10 | def think(self, obs): 11 | '''Given an observation should return a distribution over the action set''' 12 | raise NotImplementedError() 13 | 14 | def train(self, experience): 15 | raise NotImplementedError() 16 | 17 | def observation_shape(self): 18 | return self._observation_shape 19 | 20 | def num_actions(self): 21 | return self._num_actions 22 | 23 | def save_model(self, path): 24 | raise NotImplementedError() 25 | 26 | def load_model(self, path): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /docs/Space.md: -------------------------------------------------------------------------------- 1 | ## Space 2 | The [`space`](/space.py) is built up of a matrix of [`Cells`](/cell.py). 3 | Depending on the configuration, each Cell can contain a limited or unlimited number of creatures and the food. 4 | 5 | The universe is not exposed directly to Cells but uses space as a proxy, thus, most of the services the class space provides are delegated to the Cell class. 6 | However, it provides additional services such as returning the state in a given coordinate and finding another creature, for mating, fighting, etc. 7 | 8 | Depending on the configuration, space may be slippery or edged. 9 | If space is edged, creatures which get to the edge of the grid, and choose to take a step out of it, will stay in place. 10 | They are not encouraged to do so, because they will pay in MOVE energy but stay in place. 11 | If space is slippery, doing so will cause the creature to fall from the grid and die. 12 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | 4 | class ConfigPhysics: 5 | SPACE_SIZE = 20 6 | NUM_FATHERS = 50 7 | ETERNITY = 100000 8 | SLIPPERY_SPACE = False 9 | FOOD_CREATURE_RATIO = 0.5 10 | INITIAL_FOOD_AMOUNT = NUM_FATHERS * 10 11 | SOUND_LASTING_TIME = 2 12 | 13 | 14 | class ConfigBiology: 15 | BASE_LIFE_EXPECTANCY = 0 16 | DNA_SIZE = 6 17 | MOVE_ENERGY = 0.5 18 | FIGHT_ENERGY = MOVE_ENERGY * 5 19 | INITIAL_ENERGY = 50 20 | MATE_ENERGY = MOVE_ENERGY * 10 21 | MATURITY_AGE = 12 # int(BASE_LIFE_EXPECTANCY / 5) 22 | BASE_LEARN_FREQ = 5 23 | BASE_VISION_RANGE = 2 24 | MEAL_SIZE = 6 25 | WORK_ENERGY = 3 26 | BASE_MEMORY_SIZE = 30 27 | VOCALIZE_ENERGY = 1 28 | EVOLUTION_MUTATION_STD = 0.5 29 | 30 | 31 | class ConfigBrain: 32 | BASE_REWARD_DISCOUNT = 0.99 33 | BASE_LEARNING_RATE = 5e-4 34 | BASE_BRAIN_STRUCTURE_PARAM = 10 35 | BASE_EPSILON = 0.1 36 | -------------------------------------------------------------------------------- /brains/brainsimple.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import numpy as np 4 | import random 5 | from brains.abstractbrain import AbstractBrain 6 | 7 | 8 | class DNABrain(AbstractBrain): 9 | def __init__(self, dna): 10 | super(AbstractBrain, self).__init__() 11 | self._dna = dna 12 | 13 | def think(self, obs, eps=0): 14 | return np.random.choice(len(self._dna), 1, p=self._dna) 15 | 16 | def train(self, experience): 17 | pass 18 | 19 | def save_model(self, path): 20 | pass 21 | 22 | def load_model(self, path): 23 | pass 24 | 25 | 26 | class RandomBrain(AbstractBrain): 27 | def __init__(self, action_size): 28 | super(AbstractBrain, self).__init__() 29 | self._action_size = action_size 30 | 31 | def think(self, obs): 32 | return np.random.rand(self._action_size) 33 | 34 | def train(self, experience): 35 | pass 36 | 37 | def save_model(self, path): 38 | pass 39 | 40 | def load_model(self, path): 41 | pass 42 | -------------------------------------------------------------------------------- /docs/Execution.md: -------------------------------------------------------------------------------- 1 | ## Simulation Installation and Execution: 2 | 3 | Currently, the project can run in two modes, with or without GUI. 4 | The GUI is basically a dashboard that provides aggregated information about the population state, location, actions, age, AIQ, etc.. 5 | 6 | First you should install all requirements using the following command:\ 7 | `pip3 install -r requirements.txt` \ 8 | 9 | if you are using Anaconda use the following:\ 10 | `conda install --yes --file requirements.txt` 11 | 12 | To run GUI, run the following command:\ 13 | `python app.py` 14 | 15 | To run the simulator in console mode use the following command:\ 16 | `python simulator.py` 17 | 18 | ## Changing the simulator configuration 19 | To select the creatures to put in the universe, use the simulation configuration file, [`configsimulator.py`](configsimulator.py). 20 | in the config file you can play with the physical and biological configuration of the environment. see [`config.py`](config.py). 21 | 22 | ## Dev mode: 23 | Auto generate requirement file:\ 24 | `pipreqs origin --force` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2018, George Kour 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /creature_actions.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AutoNumber(Enum): 5 | def __new__(cls): 6 | value = len(cls.__members__) # note no + 1 7 | obj = object.__new__(cls) 8 | obj._value_ = value 9 | return obj 10 | 11 | 12 | class Actions(AutoNumber): 13 | LEFT = () 14 | RIGHT = () 15 | UP = () 16 | DOWN = () 17 | EAT = () 18 | MATE = () 19 | FIGHT = () 20 | WORK = () 21 | DIVIDE = () 22 | VOCALIZE = () 23 | 24 | @staticmethod 25 | def get_all_actions(): 26 | return [Actions.LEFT, Actions.RIGHT, Actions.UP, Actions.DOWN, Actions.EAT, Actions.MATE, Actions.FIGHT, 27 | Actions.WORK, Actions.DIVIDE, Actions.VOCALIZE] 28 | 29 | def __str__(self): 30 | return str(self.name) 31 | 32 | @staticmethod 33 | def num_actions(): 34 | return len(Actions.get_all_actions()) 35 | 36 | @staticmethod 37 | def is_legal(action): 38 | if action in Actions.get_all_actions(): 39 | return True 40 | return False 41 | 42 | @staticmethod 43 | def index_to_enum(index): 44 | return Actions.get_all_actions()[index] 45 | 46 | @staticmethod 47 | def enum_to_index(action): 48 | return Actions.get_all_actions().index(action) 49 | 50 | @staticmethod 51 | def get_actions_str(): 52 | return [str(action) for action in Actions.get_all_actions()] 53 | -------------------------------------------------------------------------------- /creatures/humandqnbrain.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from config import ConfigBiology, ConfigBrain 4 | from brains.braindqn import BrainDQN 5 | import utils 6 | from creatures.human import Human 7 | from evolution import DNA 8 | 9 | 10 | class HumanDQNBrain(Human): 11 | Fitrah = [0, 0, 0, 0, 0, 0, 0] 12 | 13 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 14 | super(HumanDQNBrain, self).__init__(universe, id, dna, age, energy, parents) 15 | 16 | def initialize_brain(self): 17 | self._brain = BrainDQN(observation_shape=self.observation_shape(), 18 | num_actions=self.num_actions(), 19 | reward_discount=self.reward_discount()) 20 | 21 | 22 | 23 | @staticmethod 24 | def get_race(): 25 | return HumanDQNBrain 26 | 27 | @staticmethod 28 | def race_name(): 29 | return 'HumanDQNBrain' 30 | 31 | @staticmethod 32 | def race_basic_dna(): 33 | return DNA(ConfigBiology.BASE_MEMORY_SIZE, 34 | ConfigBrain.BASE_LEARNING_RATE, 35 | ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 36 | ConfigBiology.BASE_LEARN_FREQ, 37 | ConfigBiology.BASE_LIFE_EXPECTANCY, 38 | ConfigBrain.BASE_REWARD_DISCOUNT, 39 | HumanDQNBrain.race_fitrah()) 40 | 41 | @staticmethod 42 | def race_fitrah(): 43 | return utils.normalize_dist(HumanDQNBrain.Fitrah) 44 | 45 | 46 | def model_path(self): 47 | return './models/' + self.race_name() 48 | 49 | def new_born(self): 50 | pass 51 | -------------------------------------------------------------------------------- /cell.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | 4 | class Cell: 5 | def __init__(self, coord): 6 | self._coord = coord 7 | self._creatures = [] 8 | self._food = 0 9 | self._sounds = [] 10 | 11 | def insert_creature(self, creature): 12 | self._creatures.append(creature) 13 | return self 14 | 15 | def remove_creature(self, creature): 16 | self._creatures.remove(creature) 17 | 18 | def add_food(self, amount): 19 | self._food += amount 20 | 21 | def get_food(self): 22 | return self._food 23 | 24 | def remove_all_food(self): 25 | self._food = 0 26 | 27 | def remove_food(self, amount): 28 | self._food -= amount 29 | 30 | def add_sound(self, sound): 31 | self._sounds.append(sound) 32 | 33 | def remove_sounds(self, time): 34 | self._sounds = [s for s in self._sounds if s.get_end_time() > time] 35 | 36 | def get_sounds(self): 37 | return self._sounds 38 | 39 | def get_coord(self): 40 | return self._coord 41 | 42 | def creatures(self): 43 | return self._creatures 44 | 45 | def energy_level(self): 46 | return sum([creature.energy() for creature in self.creatures()]) 47 | 48 | def race_energy_level(self, race): 49 | return sum([creature.energy() for creature in self.creatures() if creature.race_name() == race.race_name()]) 50 | 51 | def get_state_in_cell(self, races): 52 | food = [self.get_food()] 53 | sounds = [len(self.get_sounds())] 54 | races_energy = [self.race_energy_level(race) for race in races] 55 | return food + sounds + races_energy 56 | 57 | def num_creatures(self): 58 | return len(self._creatures) 59 | 60 | def __str__(self): 61 | return str(self._coord) + ':F(' + str(self.get_food()) + ')C(' + str(len(self.creatures())) + ') ' 62 | -------------------------------------------------------------------------------- /docs/Brain.md: -------------------------------------------------------------------------------- 1 | ## Brain 2 | 3 | The brain is a part of every creature in project-origin. 4 | The role of the brain is to decide on an action, given the current state of the creature, which includes information about the creature's surrounding environment and the internal state of the creature, such as age and energy. 5 | The size of the and the nature of cues in the observed surroundings is determined by the creature senses. 6 | 7 | There could be many types of brains, from random to complicated brain that employ neural networks and advanced learning algorithms, such as reinforcement learning. 8 | The brain can be trained and improved by the experience of the creature. 9 | The creature experiences are saved in the creature (limited) memory which size is defined in the creature DNA. 10 | The frequency of brain training is also defined in the creature DNA. 11 | 12 | Each creature may have a separate brain, or alternatively, the entire race may share the same brain which centralizes the race knowledge. 13 | If every creature has a separate brain, it's inherited DNA may determine the structure of the brain. 14 | In this case, the brain is trained only by the experience of the individual creature. 15 | The structure of the race brain is predefined by the race and parts determining the brain structure in the creature's DNA are treated like [Exon](https://en.wikipedia.org/wiki/Exon). 16 | 17 | ## Creating new brains 18 | The class [`abstractbrain.py`](/brains/abstractbrain.py) is an abstract class that each brain should implement. 19 | The derived class must implement the following methods: 20 | 21 | #### `think` 22 | Returns a distribution over the actions given an observation. 23 | 24 | #### `train` 25 | Trains the brain based on given experience. 26 | It may first sample from the experience 27 | 28 | #### `save_model` (optional) 29 | Saves the brain model parameters to path. 30 | 31 | #### `load_model` (optional) 32 | Loads the brain parameters from the disk given a path. -------------------------------------------------------------------------------- /docs/Evolution.md: -------------------------------------------------------------------------------- 1 | ## Evolution 2 | Each creature has a DNA which defines its biological structure, intelligence, senses, action tendency, etc. 3 | For instance, how far it sees, what is the brain architecture and hyper-parameters, and what is its life expectancy and maturity age. 4 | The DNA is inherited by from the creature ancestor/s in an evolutionary process. 5 | The `DNA` class is defined in [`evolution.py`](/evolution.py) 6 | 7 | ### DNA 8 | The race defines a basic DNA to its creatures. 9 | However, when each individual is born, the descendant does not get the same exact genes as his parents. 10 | The mixing of two DNA's is defined by in the method `Evolution.mutate_dna()` in [`evolution.py`](/evolution.py). 11 | Each creature has a DNA which defines its biological structure, intelligence, senses, action tendency, etc. 12 | For instance, how far it sees, what is the brain architecture and hyper-parameters, and what is its life expectancy and maturity age. 13 | The DNA is inherited by from the creature ancestor/s in an evolutionary process. 14 | 15 | 16 | ### Fitrah 17 | The ["Fitrah"](https://en.wikipedia.org/wiki/Fitra) is an Arabic word that has no exact English equivalent. 18 | It is usually translated as “original disposition”, “natural constitution,” or “innate nature”. 19 | The creature DNA includes a Fitrah part, which dictates his inherited tendency to perform a specific action. 20 | The Fitrah is taken into account in addition to the brain decision when selecting an action to take. 21 | Note that the Fitrah is implemented simply as a vector of action probabilities and do not take the current state into account. 22 | Since the Fitrah is part of the creature DNA, it cannot change during the lifetime of the creature but is inherited to the creature descendants. 23 | The closest concept to Fitrah is instinct, is the inherent inclination of a living organism towards a particular complex behavior as a response to specific stimuli. 24 | However, note that while instinct is an innate behavior in a reaction to stimuli, Fitrah is not so. 25 | -------------------------------------------------------------------------------- /docs/FuturePlans.md: -------------------------------------------------------------------------------- 1 | ## The future of project-origin 2 | This project is a seed for a far-reaching vision. In the future, it should allow developing a wide variety of universes. Below are some examples of future capabilities of project-origin: 3 | 4 | - project-origin should allow extending the physics of the environment easily and maybe incorporate realistic physical simulators. 5 | - It should support morphology of creatures, i.e. to give physical shape to creatures, that may affect their capabilities such as movement speed, power, etc. 6 | - It should support the introduction of different inanimated and non-intelligent living objects with a variety of functionalities and behavior to the universe, e.g. poison. 7 | - While currently, space is a 2-D, in the future it should allow supporting different types of spaces. 8 | - Easily controlling biological aspects of physics such as mating rule, evolution, and intelligence inheritance. 9 | - It should allow extending the creature's capabilities, such as adding vocal communication and even love, hate, and motivation. 10 | - It should allow defining dynamic natures, like periods of dearth and epidemics. 11 | - Use dynamic graph deep learning framework such as TensorFlow 2.0 or Pytorch. 12 | - Use openAI baselines and/or Google dopamine projects as an implementation of the creature brain. 13 | - Using Unity or other engines, implement a visual simulation environment. 14 | - Add morphology and form to creatures that define its biological and physical features. 15 | 16 | 17 | ## Far-reaching plans and directions 18 | - Model selection and hyper-parameter tuning. Show that Convolutional can better survive than fully connected layers. 19 | - Special relation between individuals based on graph networks to process the relationship between individuals. 20 | - Hebbian Learning vs. Gradient-based in a survival environment. Which is better? 21 | - Develop a game platform which allow training of a race and then allow it compete over resources with different race. 22 | - compare between the different implementation of RL algorithms, model-based and model-free, policy gradient and TD learning. 23 | -------------------------------------------------------------------------------- /creatures/zombie.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from config import ConfigBiology, ConfigBrain 4 | from brains.brainsimple import RandomBrain 5 | from creatures.human import Human 6 | import utils 7 | from evolution import DNA 8 | 9 | 10 | class Zombie(Human): 11 | """Human like creature but with no reason, acting from the inherited fitrah or behave randomly""" 12 | _master_brain = None 13 | Fitrah = [0, 0, 0, 0, 0, 0] 14 | 15 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 16 | super(Zombie, self).__init__(universe, id, dna, age, energy, parents) 17 | 18 | @staticmethod 19 | def get_race(): 20 | return Zombie 21 | 22 | @staticmethod 23 | def race_name(): 24 | return 'Zombie' 25 | 26 | @staticmethod 27 | def race_basic_dna(): 28 | return DNA(ConfigBiology.BASE_MEMORY_SIZE, 29 | ConfigBrain.BASE_LEARNING_RATE, 30 | ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 31 | ConfigBiology.BASE_LEARN_FREQ, 32 | ConfigBiology.BASE_LIFE_EXPECTANCY, 33 | ConfigBrain.BASE_REWARD_DISCOUNT, 34 | Zombie.race_fitrah()) 35 | 36 | def decide(self, state): 37 | brain_actions_prob = self._brain.think(state) 38 | action_prob = utils.normalize_dist(brain_actions_prob) # + self.fitrah() 39 | decision = utils.epsilon_greedy(0, dist=action_prob) 40 | return decision 41 | 42 | def initialize_brain(self): 43 | self._brain = self.get_master_brain() 44 | 45 | def get_master_brain(self): 46 | if Zombie._master_brain is None: 47 | Zombie._master_brain = RandomBrain(self.num_actions()) 48 | return Zombie._master_brain 49 | return Zombie._master_brain 50 | 51 | @staticmethod 52 | def race_fitrah(): 53 | return utils.normalize_dist(Zombie.Fitrah) 54 | 55 | # @staticmethod 56 | # def self_race_enemy(): 57 | # return True 58 | 59 | def dying(self): 60 | pass 61 | 62 | def smarten(self): 63 | pass 64 | -------------------------------------------------------------------------------- /tests/agent_test.py: -------------------------------------------------------------------------------- 1 | # I need to build a basic environment that will help me sanity check the brains. 2 | # Make sure there are no serious and dump bugs in the models implementation using gym. 3 | 4 | 5 | from collections import deque 6 | 7 | import gym 8 | import numpy as np 9 | import torch 10 | from torch.distributions import Categorical 11 | 12 | import utils 13 | from brains.brainpg import BrainPG 14 | 15 | # Building the environment 16 | env = gym.make("FrozenLake-v0") 17 | state_size = env.observation_space.n 18 | 19 | def rollout(env, brain): 20 | episode = [] 21 | observation = env.reset() 22 | done = False 23 | while not done: 24 | # env.render() 25 | # action = env.action_space.sample() # your agent here (this takes random actions) 26 | obs_1hot = np.zeros(state_size) 27 | obs_1hot[observation] = 1 28 | brain_actions_prob = brain.think(obs=obs_1hot) 29 | action = Categorical(probs=torch.tensor(brain_actions_prob)).sample().item() 30 | new_observation, reward, done, info = env.step(action) 31 | 32 | dec_1hot = np.zeros(env.action_space.n) 33 | dec_1hot[action] = 1 34 | 35 | newobs_1hot = np.zeros(state_size) 36 | newobs_1hot[new_observation] = 1 37 | 38 | experience = [obs_1hot, dec_1hot, reward, newobs_1hot, done] 39 | episode.append(experience) 40 | observation = new_observation 41 | 42 | return episode 43 | 44 | 45 | 46 | # building the agent 47 | memory = deque(maxlen=1000) 48 | brain = BrainPG(observation_shape=tuple([state_size]), num_actions=env.action_space.n, reward_discount=0.99, learning_rate=0.001) 49 | success = [] 50 | for i in range(1, 100000): 51 | episode = rollout(env, brain) 52 | discounted_rewards = utils.discount_rewards([data[2] for data in episode], gamma=0.99) 53 | for step in range(len(episode)): 54 | episode[step][2] = discounted_rewards[step] 55 | memory.extend(episode) 56 | success += [episode[-1][2]] 57 | if i % 1000 == 0: 58 | print(np.mean(success[-1000:])) 59 | 60 | loss = brain.train(memory) 61 | # print(loss) 62 | 63 | env.close() 64 | -------------------------------------------------------------------------------- /creatures/human.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from creatures.abstractcreature import AbstractCreature 4 | from creature_actions import Actions 5 | from config import ConfigBiology, ConfigBrain 6 | import utils 7 | from evolution import DNA 8 | from brains.brainsimple import RandomBrain 9 | import random 10 | import numpy as np 11 | 12 | 13 | class Human(AbstractCreature): 14 | Fitrah = [0, 0, 0, 0, 0, 0] 15 | 16 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 17 | super(Human, self).__init__(universe, id, dna, age, energy, parents) 18 | self.new_born() 19 | 20 | @staticmethod 21 | def race_basic_dna(): 22 | return DNA(ConfigBiology.BASE_MEMORY_SIZE, 23 | ConfigBrain.BASE_LEARNING_RATE, 24 | ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 25 | ConfigBiology.BASE_LEARN_FREQ, 26 | ConfigBiology.BASE_LIFE_EXPECTANCY, 27 | ConfigBrain.BASE_REWARD_DISCOUNT, 28 | Human.race_fitrah()) 29 | 30 | 31 | @staticmethod 32 | def get_actions(): 33 | return [Actions.LEFT, Actions.RIGHT, Actions.UP, Actions.DOWN, Actions.EAT, Actions.FIGHT] 34 | 35 | @staticmethod 36 | def get_race(): 37 | return Human 38 | 39 | @staticmethod 40 | def race_name(): 41 | return 'Human' 42 | 43 | @staticmethod 44 | def race_fitrah(): 45 | return utils.normalize_dist(Human.Fitrah) 46 | 47 | @staticmethod 48 | def self_race_enemy(): 49 | return False 50 | 51 | def decide(self, state): 52 | eps = max(ConfigBrain.BASE_EPSILON, 53 | 1 - (self._age / (self.learning_frequency() * ConfigBiology.MATURITY_AGE))) 54 | brain_actions_prob = self.brain().think(state) 55 | #There is a problem with the dna fitrah (7 instead of 6). 56 | #action_prob = utils.normalize_dist(self.fitrah() + brain_actions_prob) 57 | decision = utils.epsilon_greedy(eps, brain_actions_prob) 58 | 59 | return decision 60 | 61 | def new_born(self): 62 | if self.get_parents() is None: 63 | return 64 | memories = [parent.get_memory() for parent in self.get_parents() if len(parent.get_memory()) > 0] 65 | if len(memories) == 0: 66 | return 67 | 68 | oral_tradition = np.concatenate(memories) 69 | self._memory.extend( 70 | random.sample(oral_tradition.tolist(), min(int(self.memory_size() / 2), len(oral_tradition)))) 71 | for i in range(5): 72 | self.brain().train(self.get_memory()) 73 | -------------------------------------------------------------------------------- /creatures/bacterium.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from creatures.abstractcreature import AbstractCreature 4 | from creature_actions import Actions 5 | from config import ConfigBiology, ConfigBrain 6 | from brains.braindqntf import BrainDQN 7 | import utils 8 | from evolution import DNA 9 | 10 | 11 | class Bacterium(AbstractCreature): 12 | _master_brain = None 13 | Fitrah = [0, 0, 0, 0, 1, 1] 14 | 15 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 16 | super(Bacterium, self).__init__(universe, id, dna, age, energy, parents) 17 | self._brain = self.get_master_brain() 18 | 19 | def get_master_brain(self): 20 | if Bacterium._master_brain is None: 21 | Bacterium._master_brain = BrainDQN(lr=ConfigBrain.BASE_LEARNING_RATE, 22 | observation_shape=self.observation_shape(), 23 | num_actions=self.num_actions(), 24 | h_size=ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 25 | reward_discount=ConfigBrain.BASE_REWARD_DISCOUNT, scope='master' + self.race_name()) 26 | return Bacterium._master_brain 27 | return Bacterium._master_brain 28 | 29 | @staticmethod 30 | def get_actions(): 31 | return [Actions.LEFT, Actions.RIGHT, Actions.UP, Actions.DOWN, Actions.EAT, Actions.DIVIDE] 32 | 33 | @staticmethod 34 | def race_basic_dna(): 35 | return DNA(ConfigBiology.BASE_MEMORY_SIZE, 36 | ConfigBrain.BASE_LEARNING_RATE, 37 | ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 38 | ConfigBiology.BASE_LEARN_FREQ, 39 | ConfigBiology.BASE_LIFE_EXPECTANCY, 40 | ConfigBrain.BASE_REWARD_DISCOUNT, 41 | Bacterium.race_fitrah()) 42 | 43 | @staticmethod 44 | def race_name(): 45 | return 'Bacterium' 46 | 47 | def get_race(self): 48 | return Bacterium 49 | 50 | @staticmethod 51 | def self_race_enemy(): 52 | return True 53 | 54 | @staticmethod 55 | def race_fitrah(): 56 | return utils.normalize_dist(Bacterium.Fitrah) 57 | 58 | def decide(self, state): 59 | eps = max(ConfigBrain.BASE_EPSILON, 60 | 1 - (self.age() / (self.learning_frequency() * ConfigBiology.MATURITY_AGE))) 61 | brain_actions_prob = self._brain.think(state) 62 | action_prob = utils.normalize_dist(brain_actions_prob + self.fitrah()) 63 | action = utils.epsilon_greedy(eps, dist=action_prob) 64 | return action 65 | -------------------------------------------------------------------------------- /simulator.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from universe import Universe 4 | from statistics import Stats 5 | import printing 6 | from configsimulator import ConfigSimulator 7 | import time 8 | import threading 9 | from enum import Enum 10 | 11 | 12 | class SimState(Enum): 13 | IDLE = "Simulation Idle" 14 | INITIALIZING = "Simulation Initialized" 15 | RUNNING = "Simulation Running" 16 | STOPPING = "Simulation Terminating" 17 | 18 | 19 | class Simulator: 20 | def __init__(self, queue=None): 21 | self._thread = None 22 | self._status = SimState.IDLE 23 | self._msg_queue = queue 24 | 25 | def run(self): 26 | self._status = SimState.INITIALIZING 27 | self._report_state() 28 | 29 | stats = Stats() 30 | universe = Universe(ConfigSimulator.RACES, stats) 31 | 32 | self._status = SimState.RUNNING 33 | self._report_state() 34 | 35 | while self._status == SimState.RUNNING and universe.pass_time(): 36 | time.sleep(ConfigSimulator.SIMULATION_TIME_UNIT) 37 | stats.accumulate_step_stats(universe) 38 | printing.print_step_stats(stats) 39 | self._report(stats) 40 | stats.initialize_inter_step_stats() 41 | 42 | if universe.get_time() % ConfigSimulator.LOGGING_BATCH_SIZE == 0: 43 | stats.accumulate_epoch_stats(universe) 44 | printing.print_epoch_stats(stats) 45 | 46 | if ConfigSimulator.CSV_LOGGING: 47 | printing.dataframe2csv(stats.step_stats_df, 48 | ConfigSimulator.CSV_FILE_PATH.format(time.strftime("%Y%m%d-%H%M%S"))) 49 | 50 | self._status = SimState.IDLE 51 | self._report_state() 52 | 53 | def status(self): 54 | return self._status 55 | 56 | def stop(self): 57 | if self._status == SimState.IDLE: 58 | return 59 | self._status = SimState.STOPPING 60 | if self._thread is None or not self._thread.isAlive(): 61 | self._status = SimState.IDLE 62 | else: 63 | self._status = SimState.STOPPING 64 | self._report_state() 65 | 66 | def _report(self, msg): 67 | if self._msg_queue: 68 | self._msg_queue.put(msg) 69 | 70 | def _report_state(self): 71 | if self._msg_queue is not None: 72 | self._msg_queue.put(self.status()) 73 | 74 | def run_in_thread(self): 75 | self._thread = threading.Thread(target=self.run) 76 | self._thread.start() 77 | 78 | 79 | if __name__ == '__main__': 80 | sim = Simulator() 81 | sim.run() 82 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import numpy as np 4 | from scipy.signal import lfilter 5 | 6 | 7 | def discount_rewards(r, gamma): 8 | discounted_r = np.zeros_like(r).astype(float) 9 | running_add = 0 10 | for t in reversed(range(0, len(r))): 11 | running_add = running_add * gamma + r[t] 12 | discounted_r[t] = running_add 13 | return discounted_r 14 | 15 | 16 | def discount(x, gamma): 17 | x = np.asarray(x, dtype=np.float32) 18 | return lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1] 19 | 20 | 21 | def epsilon_greedy(eps, dist): 22 | p = np.random.rand() 23 | if p < eps: 24 | selection = np.random.randint(low=0, high=len(dist)) 25 | else: 26 | selection = np.argmax(dist) 27 | 28 | return selection 29 | 30 | 31 | def dist_selection(dist): 32 | if sum(dist) != 1: 33 | dist[0] = dist[0] + (1 - sum(dist)) 34 | action = np.argmax(np.random.multinomial(1, dist)) 35 | 36 | return action 37 | 38 | 39 | # Arg is an int and size is the len of the returning vector 40 | def one_hot(arg, size): 41 | result = np.zeros(size) 42 | if 0 <= arg < size: 43 | result[arg] = 1 44 | return result 45 | else: 46 | return None 47 | 48 | 49 | def moving_average(data, window_width): 50 | cumsum_vec = np.cumsum(np.insert(data, 0, 0)) 51 | return (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width 52 | 53 | 54 | def softmax(x, temprature=1): 55 | """ 56 | Compute softmax values for each sets of scores in x. 57 | 58 | Rows are scores for each class. 59 | Columns are predictions (samples). 60 | """ 61 | # x = normalize(np.reshape(x, (1, -1)), norm='l2')[0] 62 | ex_x = np.exp(temprature * np.subtract(x, np.max(x))) 63 | if np.isinf(np.sum(ex_x)): 64 | raise Exception('Inf in softmax') 65 | return ex_x / np.sum(ex_x) 66 | 67 | 68 | def roll_fight(energy1, energy2): 69 | dist = normalize_dist([energy1, energy2]) 70 | return np.random.choice(a=[-1, 1], p=dist) 71 | 72 | 73 | def emptynanmean(array): 74 | if array is not None and len(array) > 0: 75 | return np.nanmean(array) 76 | return 0 77 | 78 | 79 | def safe_log2(number): 80 | if number <= np.e: 81 | return 0 82 | return int(number/10) 83 | 84 | 85 | def cosine_similarity(vec1, vec2): 86 | return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) 87 | 88 | 89 | def normalize_dist(p): 90 | return softmax(p) 91 | 92 | 93 | def linear_dist_normalization(p): 94 | p = np.asarray(p) 95 | p += abs(min(p)) 96 | norm = sum(p) 97 | if norm == 0: 98 | norm = 1e-16 99 | res = p / norm 100 | return res 101 | -------------------------------------------------------------------------------- /docs/Creature.md: -------------------------------------------------------------------------------- 1 | ## Creatures 2 | 3 | Creatures are intelligent agents that live and behave in the universe. 4 | Their main goal is to survive and flourish. 5 | Creatures have senses, basically vision, from which they get information about their local environment. 6 | Different creatures may have a different set of abilities, namely, different actions they can do in their environment and to other surrounding creatures. 7 | 8 | Creatures belong to a **Race** (see below) determined by the race of his ancestors and which define his set of abilities (actions). 9 | It has a DNA which it inherits from his parents in an evolutionary process controlled by the evolution. 10 | See more information in [Evolution.md](/docs/Evolution.md). 11 | At each time step, the creature makes an action, which is decided upon by one or several "intelligence sources" such as his brain, his instincts and by chance. 12 | To decide on his best action, the creature sees the environment around him and his internal states such as energy and age. 13 | In reinforcement learning terminology, this is called the "environment state". 14 | The state description of the surrounding environment may contain the distribution of food, creatures, their race, and energy. 15 | If the edge of the grid is in the vision range of the creature, each aspect of the environment in the location of the edge and beyond will be marked by (-1). 16 | 17 | ### Races: 18 | The creature race defines the actions he can make and whom he can mate with and fight. 19 | There could be several races in a single experiment. 20 | Usually, the goal in such scenarios is to see which race is more effective and has better survival skills. 21 | 22 | 23 | ### Memory and Oral Tradition 24 | The creature has a memory that accumulates its experiences. 25 | The size of the memory is limited by the creature's DNA. 26 | "Oral Tradition" is the knowledge passed from generation to generation. 27 | While this functionality is not implemented in the abstract class creature, in humans, a subclass of creature, ancestors inherit their memories to their offsprings. 28 | Note that while the knowledge may pass between generations, the brain parameters are unique to each individual. 29 | 30 | 31 | ### Creating New Creatures 32 | Implementation wise, each race should be derived from [`abstractcreature.py`](/creatures/abstractcreature.py) or from one of its sub-classes. 33 | It should implement some basic methods to define the race nature. 34 | For implementation examples, see class [`human.py`](/creatures/human.py), class [`bacterium.py`](/creatures/bacterium.py) and class [`zombie.py`](/creatures/zombie.py) that inherits from class human but changes the action decision method. 35 | One of the important methods in the creature is the `decide` method, which given the state, decides on an action. 36 | It may take into consideration the brain recommendation, the fitrah, the creature curiosity mechanism, age, energy or any other information to make a decision. 37 | -------------------------------------------------------------------------------- /evolution.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import numpy as np 4 | from random import randint 5 | import utils 6 | from config import ConfigBiology 7 | 8 | 9 | class Evolution: 10 | 11 | @staticmethod 12 | def mix_dna(dna1, dna2): 13 | new_dna = DNA(np.mean([dna1.memory_size(), dna2.memory_size()]), 14 | np.mean([dna1.learning_rate(), dna2.learning_rate()]), 15 | np.mean([dna1.brain_structure_param(), dna2.brain_structure_param()]), 16 | np.mean([dna1.learning_frequency(), dna2.learning_frequency()]), 17 | np.mean([dna1.life_expectancy(), dna2.life_expectancy()]), 18 | np.mean([dna1.reward_discount(), dna2.reward_discount()]), 19 | np.mean([dna1.fitrah(), dna2.fitrah()], axis=0)) 20 | return Evolution.mutate_dna(new_dna) 21 | 22 | @staticmethod 23 | def mutate_dna(dna): 24 | memory_size = max(10, int(dna.memory_size()) + randint(-1, 1)) 25 | learning_rate = max(np.random.normal(loc=dna.learning_rate(), scale=0.001), 1e-6) 26 | brain_structure_param = max(2, dna.brain_structure_param() + randint(-1, +1)) 27 | learning_frequency = max(dna.learning_frequency() + randint(-1, 1), 1) 28 | life_expectancy = max(0, dna.life_expectancy() + randint(-10, 10)) 29 | reward_discount = max(0.1, min(1, np.random.normal(loc=dna.reward_discount(), scale=0.1))) 30 | fitrah = utils.normalize_dist( 31 | dna.fitrah() + np.random.normal(loc=0, scale=ConfigBiology.EVOLUTION_MUTATION_STD, size=dna.fitrah().size)) 32 | return DNA(memory_size, learning_rate, brain_structure_param, learning_frequency, life_expectancy, reward_discount, fitrah) 33 | 34 | 35 | class DNA: 36 | def __init__(self, base_memory_size, 37 | base_learning_rate, 38 | base_brain_structure_param, 39 | base_learning_frequency, 40 | base_life_expectancy, 41 | base_reward_discount, 42 | fitrah): 43 | self._memory_size = base_memory_size 44 | self._learning_rate = base_learning_rate 45 | self._brain_structure_param = base_brain_structure_param 46 | self._learning_frequency = base_learning_frequency 47 | self._life_expectancy = base_life_expectancy 48 | self._reward_discount = base_reward_discount 49 | self._fitrah = fitrah 50 | 51 | def memory_size(self): 52 | return self._memory_size 53 | 54 | def learning_rate(self): 55 | return self._learning_rate 56 | 57 | def learning_frequency(self): 58 | return self._learning_frequency 59 | 60 | def brain_structure_param(self): 61 | return self._brain_structure_param 62 | 63 | def life_expectancy(self): 64 | return self._life_expectancy 65 | 66 | def reward_discount(self): 67 | return self._reward_discount 68 | 69 | def fitrah(self): 70 | ''' Fitra means innate nature is Arabic. This defines a basic tendency of doing actions dictated in the dna. 71 | See documentation for more information''' 72 | return self._fitrah 73 | 74 | def flatten(self): 75 | return [self.memory_size(), self.learning_rate(), self.learning_frequency(), self.brain_structure_param(), 76 | self.life_expectancy(), self.reward_discount()] + list(self.fitrah()) 77 | -------------------------------------------------------------------------------- /creatures/humanpgbrain.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import os 4 | 5 | import torch 6 | from torch.distributions import Categorical 7 | 8 | import utils 9 | from brains.brainpg import BrainPG 10 | from config import ConfigBiology, ConfigBrain 11 | from creatures.human import Human 12 | from evolution import DNA 13 | 14 | 15 | class HumanPGBrain(Human): 16 | Fitrah = [0, 0, 0, 0, 0] 17 | 18 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 19 | super(HumanPGBrain, self).__init__(universe, id, dna, age, energy, parents) 20 | 21 | @staticmethod 22 | def get_race(): 23 | return HumanPGBrain 24 | 25 | @staticmethod 26 | def race_name(): 27 | return 'HumanPGBrain' 28 | 29 | @staticmethod 30 | def race_basic_dna(): 31 | return DNA(ConfigBiology.BASE_MEMORY_SIZE, 32 | ConfigBrain.BASE_LEARNING_RATE, 33 | ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 34 | ConfigBiology.BASE_LEARN_FREQ, 35 | ConfigBiology.BASE_LIFE_EXPECTANCY, 36 | ConfigBrain.BASE_REWARD_DISCOUNT, 37 | HumanPGBrain.race_fitrah()) 38 | 39 | @staticmethod 40 | def race_fitrah(): 41 | return utils.normalize_dist(HumanPGBrain.Fitrah) 42 | 43 | # @staticmethod 44 | # def self_race_enemy(): 45 | # return True 46 | 47 | def initialize_brain(self): 48 | self._brain = BrainPG(observation_shape=tuple(self.observation_shape()), 49 | num_actions=self.num_actions(), reward_discount=self.reward_discount()) 50 | 51 | def decide(self, state): 52 | eps = max(ConfigBrain.BASE_EPSILON, 53 | 1 - (self._age / (self.learning_frequency() * ConfigBiology.MATURITY_AGE))) 54 | brain_actions_prob = self.brain().think(state) 55 | # action_prob = utils.normalize_dist(self.fitrah() + brain_actions_prob) 56 | # decision = utils.dist_selection(brain_actions_prob) 57 | decision = Categorical(probs=torch.tensor(brain_actions_prob)).sample().item() 58 | return decision 59 | 60 | 61 | class HumanPGUnifiedBrain(HumanPGBrain): 62 | _master_brain = None 63 | 64 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 65 | super(HumanPGUnifiedBrain, self).__init__(universe, id, dna, age, energy, parents) 66 | 67 | def initialize_brain(self): 68 | self._brain = self.get_master_brain() 69 | 70 | def get_master_brain(self): 71 | if HumanPGUnifiedBrain._master_brain is None: 72 | HumanPGUnifiedBrain._master_brain = BrainPG(observation_shape=tuple(self.observation_shape()), 73 | num_actions=self.num_actions(), 74 | reward_discount=ConfigBrain.BASE_REWARD_DISCOUNT, 75 | learning_rate=ConfigBrain.BASE_LEARNING_RATE) 76 | if self.model_path() is not None and os.path.exists(self.model_path()): 77 | HumanPGUnifiedBrain._master_brain.load_model(self.model_path()) 78 | return HumanPGUnifiedBrain._master_brain 79 | 80 | @staticmethod 81 | def get_race(): 82 | return HumanPGUnifiedBrain 83 | 84 | @staticmethod 85 | def race_name(): 86 | return 'HumanPGUnifiedBrain' 87 | 88 | def new_born(self): 89 | pass 90 | # 91 | # def get_state(self): 92 | # state = super().get_state() 93 | # return state[0,:,:] 94 | -------------------------------------------------------------------------------- /brains/braindqn.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import random 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | import torch.nn.functional as F 9 | from brains.abstractbrain import AbstractBrain 10 | import os.path 11 | 12 | device = "cpu" 13 | 14 | 15 | class BrainDQN(AbstractBrain): 16 | BATCH_SIZE = 128 17 | 18 | def __init__(self, observation_shape, num_actions, reward_discount): 19 | super(BrainDQN, self).__init__(observation_shape, num_actions) 20 | self.policy_net = DQN(observation_shape[0], num_actions).to(device) 21 | self.target_net = DQN(observation_shape[0], num_actions).to(device) 22 | self.optimizer = optim.RMSprop(self.policy_net.parameters()) 23 | self.target_net.load_state_dict(self.policy_net.state_dict()) 24 | self.target_net.eval() 25 | self.reward_discount = reward_discount 26 | #print("Pytorch DQN. Num parameters: " + str(self.num_trainable_parameters())) 27 | 28 | def think(self, obs): 29 | with torch.no_grad(): 30 | action = self.policy_net(torch.from_numpy(obs).float().unsqueeze_(0)).argmax().item() 31 | distribution = np.zeros(self.num_actions()) 32 | distribution[action] = 1 33 | return distribution 34 | 35 | def train(self, experience): 36 | minibatch_size = min(BrainDQN.BATCH_SIZE, len(experience)) 37 | if minibatch_size == 0: 38 | return 39 | 40 | minibatch = random.sample(experience, minibatch_size) 41 | state_batch = torch.from_numpy(np.stack([np.stack(data[0]) for data in minibatch])).float() 42 | action_batch = torch.FloatTensor([data[1] for data in minibatch]) 43 | reward_batch = torch.FloatTensor([data[2] for data in minibatch]) 44 | nextstate_batch = torch.from_numpy(np.stack([data[3] for data in minibatch])).float() 45 | 46 | state_action_values, _ = torch.max(self.policy_net(state_batch) * action_batch, dim=1) 47 | # Compute V(s_{t+1}) for all next states. 48 | qvalue_batch = self.target_net(nextstate_batch) 49 | expected_state_action_values = [] 50 | for i in range(0, minibatch_size): 51 | terminal = minibatch[i][4] 52 | if terminal: 53 | expected_state_action_values.append(reward_batch[i]) 54 | else: 55 | expected_state_action_values.append(reward_batch[i] + self.reward_discount * torch.max(qvalue_batch[i])) 56 | 57 | # Compute Huber loss 58 | loss = F.smooth_l1_loss(state_action_values, torch.stack(expected_state_action_values).detach()) 59 | 60 | # Optimize the model 61 | self.optimizer.zero_grad() 62 | loss.backward() 63 | torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1) 64 | self.optimizer.step() 65 | 66 | self.target_net.load_state_dict(self.policy_net.state_dict()) 67 | 68 | def save_model(self, path): 69 | torch.save(self.policy_net.state_dict(), path) 70 | 71 | def load_model(self, path): 72 | if os.path.exists(path): 73 | self.policy_net.load_state_dict(torch.load(path)) 74 | self.target_net.load_state_dict(torch.load(path)) 75 | 76 | def num_trainable_parameters(self): 77 | return sum(p.numel() for p in self.policy_net.parameters()) 78 | 79 | 80 | class DQN(nn.Module): 81 | def __init__(self, num_channels, num_actions): 82 | super(DQN, self).__init__() 83 | self.conv1 = nn.Conv2d(num_channels, 4, kernel_size=2) 84 | self.bn1 = nn.BatchNorm2d(4) 85 | self.conv2 = nn.Conv2d(4, 5, kernel_size=2) 86 | self.bn2 = nn.BatchNorm2d(5) 87 | self.head = nn.Linear(45, num_actions) 88 | 89 | def forward(self, x): 90 | x = self.bn1(F.relu(self.conv1(x))) 91 | x = self.bn2(F.relu(self.conv2(x))) 92 | return self.head(x.view(x.size(0), -1)) 93 | -------------------------------------------------------------------------------- /statistics.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import numpy as np 4 | from collections import OrderedDict 5 | from aiq import AIQ 6 | import pandas as pd 7 | import utils 8 | from creature_actions import Actions 9 | from config import ConfigBrain, ConfigBiology 10 | 11 | 12 | class Stats: 13 | 14 | def __init__(self): 15 | self.action_dist = [] # [Left Right Eat Mate Fight] 16 | self.death_cause = [] # [Fatigue Fight Elderly Fall] 17 | self.step_stats_df = pd.DataFrame() 18 | self.epoch_stats_df = pd.DataFrame() 19 | 20 | def accumulate_step_stats(self, universe): 21 | step_stats_dict = self.collect_last_step_stats(universe) 22 | temp_df = pd.DataFrame([step_stats_dict], columns=step_stats_dict.keys()) 23 | self.step_stats_df = pd.concat([self.step_stats_df, temp_df], axis=0).reset_index(drop=True) 24 | 25 | def collect_last_step_stats(self, universe): 26 | return OrderedDict([ 27 | ('Time', universe.get_time()), 28 | ('Population', universe.num_creatures()), 29 | ('IDs', universe.get_creatures_counter()), 30 | ('MeanAge', np.round(utils.emptynanmean([creature.age() for creature in universe.get_all_creatures()]), 2)), 31 | # ('MaxAge', 32 | # np.round(utils.emptynanmean([creature.life_expectancy() for creature in universe.get_all_creatures()]), 2)), 33 | # ('BrainParam', 34 | # np.round(utils.emptynanmean([creature.brain_structure_param() for creature in universe.get_all_creatures()]), 2)), 35 | # ('MemorySize', 36 | # np.round( 37 | # utils.emptynanmean([creature.memory_size() for creature in universe.get_all_creatures()]),2)), 38 | # ('LFreq', 39 | # np.round(utils.emptynanmean([creature.learning_frequency() for creature in universe.get_all_creatures()]), 40 | # 2)), 41 | # ('LRate', 42 | # np.round(utils.emptynanmean([creature.learning_rate() for creature in universe.get_all_creatures()]) * 43 | # (1 / ConfigBrain.BASE_LEARNING_RATE), 2)), 44 | # ('RDiscount', np.round(utils.emptynanmean([creature.reward_discount() for creature in universe.get_all_creatures()]), 2)), 45 | # ('VRange', 46 | # np.round(utils.emptynanmean([creature.vision_range() for creature in universe.get_all_creatures()]), 2)), 47 | 48 | ('RacesDist', universe.races_dist()), 49 | ('ActionDist', self.actions_dist_hist()), 50 | ('DeathCause', self.death_cause_hist()), 51 | ('CreaturesDist', universe.get_creatures_distribution()), 52 | ('FoodDist', universe.get_food_distribution()), 53 | # ('Fitrah', np.round(np.nanmean([creature.fitrah() for creature in universe.get_all_creatures()], axis=0), 54 | # 2)), 55 | ('AIQ', 0) 56 | ]) 57 | 58 | def accumulate_epoch_stats(self, universe): 59 | epoch_stats_dict = self.collect_last_epoch_states(universe) 60 | temp_df = pd.DataFrame([epoch_stats_dict], columns=epoch_stats_dict.keys()) 61 | self.epoch_stats_df = pd.concat([self.epoch_stats_df, temp_df], axis=0).reset_index(drop=True) 62 | 63 | def collect_last_epoch_states(self, universe): 64 | return OrderedDict([ 65 | ('Time', universe.get_time()), 66 | ('PopulationAgeDist', np.histogram([creature.age() for creature in universe.get_all_creatures()], 67 | bins=[0, ConfigBiology.MATURITY_AGE, 68 | 2 * ConfigBiology.MATURITY_AGE, 69 | 3 * ConfigBiology.MATURITY_AGE])[0]), 70 | ]) 71 | 72 | def initialize_inter_step_stats(self): 73 | self.action_dist = [] 74 | self.death_cause = [] 75 | 76 | def actions_dist_hist(self): 77 | actions = [creature_action_log[2] for creature_action_log in self.action_dist if creature_action_log[1].race_name()=='HumanPRLFUnifiedBrain'] 78 | return np.histogram(actions, bins=range(0, Actions.num_actions()+1))[0] 79 | 80 | def death_cause_hist(self): 81 | causes = [creature_action_log[2] for creature_action_log in self.death_cause] 82 | return np.histogram(causes, bins=range(0, 5))[0] -------------------------------------------------------------------------------- /visualization/dash_online.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | from bokeh.plotting import figure, output_file, show, save 3 | from bokeh.models import ColumnDataSource, Slider, Select 4 | import numpy as np 5 | 6 | 7 | def _create_prices(t): 8 | last_average = 100 9 | returns = np.asarray(np.random.lognormal(mean.value, stddev.value, 1)) 10 | average = last_average * np.cumprod(returns) 11 | high = average * np.exp(abs(np.random.gamma(1, 0.03, size=1))) 12 | low = average / np.exp(abs(np.random.gamma(1, 0.03, size=1))) 13 | delta = high - low 14 | open = low + delta * np.random.uniform(0.05, 0.95, size=1) 15 | close = low + delta * np.random.uniform(0.05, 0.95, size=1) 16 | return open[0], high[0], low[0], close[0], average[0] 17 | 18 | 19 | def _moving_avg(prices, days=10): 20 | if len(prices) < days: return [100] 21 | return np.convolve(prices[-days:], np.ones(days, dtype=float), mode="valid") / days 22 | 23 | 24 | def _ema(prices, days=10): 25 | if len(prices) < days or days < 2: return [prices[-1]] 26 | a = 2.0 / (days + 1) 27 | kernel = np.ones(days, dtype=float) 28 | kernel[1:] = 1 - a 29 | kernel = a * np.cumprod(kernel) 30 | # The 0.8647 normalizes out that we stop the EMA after a finite number of terms 31 | return np.convolve(prices[-days:], kernel, mode="valid") / (0.8647) 32 | 33 | 34 | MA12, MA26, EMA12, EMA26 = '12-tick Moving Avg', '26-tick Moving Avg', '12-tick EMA', '26-tick EMA' 35 | mean = Slider(title="mean", value=0, start=-0.01, end=0.01, step=0.001) 36 | stddev = Slider(title="stddev", value=0.04, start=0.01, end=0.1, step=0.01) 37 | mavg = Select(value=MA12, options=[MA12, MA26, EMA12, EMA26]) 38 | 39 | 40 | class Dashborad: 41 | def __init__(self, file_path="lines.html"): 42 | self._source = ColumnDataSource(dict( 43 | time=[], average=[], low=[], high=[], open=[], close=[], 44 | ma=[], macd=[], macd9=[], macdh=[], color=[])) 45 | 46 | output_file(file_path) 47 | 48 | p = figure(plot_height=500, tools="xpan,xwheel_zoom,xbox_zoom,reset", x_axis_type=None, y_axis_location="right") 49 | p.x_range.follow = "end" 50 | p.x_range.follow_interval = 100 51 | p.x_range.range_padding = 0 52 | 53 | p.line(x='time', y='average', alpha=0.2, line_width=3, color='navy', source=self._source) 54 | p.line(x='time', y='ma', alpha=0.8, line_width=2, color='orange', source=self._source) 55 | p.segment(x0='time', y0='low', x1='time', y1='high', line_width=2, color='black', source=self._source) 56 | p.segment(x0='time', y0='open', x1='time', y1='close', line_width=8, color='color', source=self._source) 57 | 58 | show(p) 59 | 60 | def update(self, t): 61 | # df = pd.DataFrame.from_csv(path=self._input_csv, header=0, index_col=0) 62 | # 63 | # x = [1, 2, 3, 4, 5] 64 | # y = np.random.randint(1, 10, 5) 65 | # 66 | # self._p.line(x, y, legend="Temp.", line_width=2) 67 | # save(self._p) 68 | 69 | open, high, low, close, average = _create_prices(t) 70 | color = "green" if open < close else "red" 71 | 72 | new_data = dict( 73 | time=[t], 74 | open=[open], 75 | high=[high], 76 | low=[low], 77 | close=[close], 78 | average=[average], 79 | color=[color], 80 | ) 81 | 82 | close = self._source.data['close'] + [close] 83 | ma12 = _moving_avg(close[-12:], 12)[0] 84 | ma26 = _moving_avg(close[-26:], 26)[0] 85 | ema12 = _ema(close[-12:], 12)[0] 86 | ema26 = _ema(close[-26:], 26)[0] 87 | 88 | if mavg.value == MA12: 89 | new_data['ma'] = [ma12] 90 | elif mavg.value == MA26: 91 | new_data['ma'] = [ma26] 92 | elif mavg.value == EMA12: 93 | new_data['ma'] = [ema12] 94 | elif mavg.value == EMA26: 95 | new_data['ma'] = [ema26] 96 | 97 | macd = ema12 - ema26 98 | new_data['macd'] = [macd] 99 | 100 | macd_series = self._source.data['macd'] + [macd] 101 | macd9 = _ema(macd_series[-26:], 9)[0] 102 | new_data['macd9'] = [macd9] 103 | new_data['macdh'] = [macd - macd9] 104 | 105 | self._source.stream(new_data, 300) 106 | 107 | dash = Dashborad() 108 | for t in range(1000): 109 | dash.update(t) 110 | -------------------------------------------------------------------------------- /space.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from cell import Cell 4 | import numpy as np 5 | from itertools import chain 6 | 7 | 8 | class Space: 9 | def __init__(self, space_size): 10 | self._space_size = space_size 11 | self._grid = [] 12 | for i in range(space_size): 13 | self._grid = [[Cell((i, j)) for j in range(self._space_size)] for i in range(self._space_size)] 14 | 15 | def grid(self): 16 | return self._grid 17 | 18 | def cells(self): 19 | return list(chain.from_iterable(self._grid)) 20 | 21 | def update_sounds(self, time): 22 | for cell in self.cells(): 23 | cell.remove_sounds(time) 24 | 25 | def insert_creature(self, creature, coord): 26 | if not self.valid_coord(coord): 27 | print("Exception: bad coordinated in space.insert_creature") 28 | return None 29 | cell = self._grid[coord[0]][coord[1]] 30 | cell.insert_creature(creature) 31 | return cell 32 | 33 | def add_food(self, coord, amount): 34 | if not self.valid_coord(coord): 35 | print("Exception: bad coordinated in space.add_food") 36 | return None 37 | cell = self._grid[coord[0]][coord[1]] 38 | cell.add_food(amount) 39 | return cell 40 | 41 | def remove_creature(self, creature): 42 | x, y = creature.coord() 43 | self._grid[x][y].remove_creature(creature) 44 | 45 | def get_state_in_coord(self, coord, vision_range, races): 46 | if not self.valid_coord(coord): 47 | raise Exception("Exception: bad coordinated in space.get_state_in_coord") 48 | state_dim_size = 2 * vision_range + 1 49 | dims = len(races) + 2 # races, food, and sound 50 | state = np.ones([dims, state_dim_size, state_dim_size]) * -1 51 | 52 | for i in range(state_dim_size): 53 | for j in range(state_dim_size): 54 | abs_i = coord[0] - vision_range + i 55 | abs_j = coord[1] - vision_range + j 56 | if 0 <= abs_i < self._space_size and 0 <= abs_j < self._space_size: 57 | state[:, i, j] = self._grid[abs_i][abs_j].get_state_in_cell(races) 58 | return state 59 | 60 | def get_all_creatures(self): 61 | return [creature for cell in self.cells() for creature in cell.creatures()] 62 | 63 | def get_food_distribution(self): 64 | return [[self._grid[i][j].get_food() for j in range(self._space_size)] for i in range(self._space_size)] 65 | 66 | def get_creatures_distribution(self): 67 | return [[self._grid[i][j].num_creatures() for j in range(self._space_size)] for i in range(self._space_size)] 68 | 69 | def get_sounds_distribution(self): 70 | return [[len(self._grid[i][j].get_sounds()) for j in range(self._space_size)] for i in range(self._space_size)] 71 | 72 | def valid_coord(self, coord): 73 | x, y = coord 74 | return 0 <= x < self._space_size and 0 <= y < self._space_size 75 | 76 | def find_nearby_creature(self, creature): 77 | nearby_creatures = creature.cell().creatures() 78 | if len(nearby_creatures) < 2: 79 | return None 80 | others = [creat for creat in nearby_creatures if creat != creature] 81 | return np.random.permutation(others)[0] 82 | 83 | def find_nearby_creature_from_same_race(self, creature): 84 | others = self.get_nearby_creatures_from_same_race(creature) 85 | if others: 86 | return np.random.permutation(others)[0] 87 | return None 88 | 89 | def find_nearby_creature_from_different_race(self, creature): 90 | others = self.get_nearby_creatures_from_different_race(creature) 91 | if others: 92 | return np.random.permutation(others)[0] 93 | return None 94 | 95 | def get_nearby_creatures_from_same_race(self, creature): 96 | return [creat for creat in creature.cell().creatures() if 97 | creat != creature and creat.race_name() == creature.race_name()] 98 | 99 | def get_nearby_creatures_from_different_race(self, creature): 100 | return [creat for creat in creature.cell().creatures() if creat.race_name() != creature.race_name()] 101 | 102 | def __str__(self): 103 | string = '' 104 | for cell in self._grid: 105 | string = string + str(cell) + ' ' 106 | 107 | return string 108 | -------------------------------------------------------------------------------- /visualization/gui.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | from simulator import Simulator, SimState 4 | from visualization.dashboard import Dashboard 5 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg 6 | import tkinter as tk 7 | from tkinter import ttk, Scale 8 | import matplotlib.pyplot as plt 9 | from config import ConfigPhysics 10 | import sys 11 | from queue import Queue 12 | 13 | plt.style.use('seaborn-paper') 14 | LARGE_FONT = ("Verdana", 12) 15 | 16 | 17 | class OriginGUI: 18 | 19 | def __init__(self, master, *args, **kwargs): 20 | tk.Tk.wm_title(master, "Project Origin") 21 | #master.iconbitmap(default="visualization/originicon.bmp") 22 | 23 | self.master = master 24 | self.msg_queue = Queue() 25 | 26 | container = tk.Frame(master) 27 | container.pack(side="top", fill="both", expand=True) 28 | container.grid_rowconfigure(0, weight=1) 29 | container.grid_columnconfigure(0, weight=1) 30 | 31 | self._simulation_page = SimulationPage(container, master, self.msg_queue) 32 | self._simulation_page.grid(row=0, column=0, sticky="nsew") 33 | self._simulation_page.tkraise() 34 | 35 | def refresh_data(self, msg): 36 | self._simulation_page.refresh_data(msg) 37 | 38 | def process_incoming_msg(self): 39 | """Handle all messages currently in the queue, if any.""" 40 | while self.msg_queue.qsize(): 41 | try: 42 | self.refresh_data(self.msg_queue.get()) 43 | except Exception as exp: 44 | print(str(exp)) 45 | pass 46 | 47 | 48 | class SimulationPage(tk.Frame): 49 | 50 | def __init__(self, parent, controller, queue): 51 | self._dashboard = Dashboard() 52 | self.controller = controller 53 | self.simulator = Simulator(queue) 54 | self.window_closed = False 55 | 56 | tk.Frame.__init__(self, parent, bg='white') 57 | title_label = tk.Label(self, text="Project Origin Dashboard", font=LARGE_FONT, foreground='blue', bg='white') 58 | title_label.pack(pady=10, padx=10) 59 | 60 | self.s = ttk.Style() 61 | #self.s.theme_use('vista') 62 | 63 | self.status_label = tk.Label(self, text="Simulator Ready.", bg='white') 64 | self.status_label.pack(pady=10, padx=10) 65 | 66 | self.sim_btn = ttk.Button(self, text="Start Simulation", command=lambda: self.on_simulation_btn_click()) 67 | 68 | self.sim_btn.pack() 69 | self.food_creature_scale = Scale(self, from_=0, to=1, orient=tk.HORIZONTAL, resolution=0.1, bg='white', 70 | command=lambda x: self.set_food_creature_ratio(x)) 71 | self.food_creature_scale.set(ConfigPhysics.FOOD_CREATURE_RATIO) 72 | self.food_creature_scale.pack() 73 | 74 | dash_fig = self._dashboard.get_figure() 75 | 76 | canvas = FigureCanvasTkAgg(dash_fig, self) 77 | canvas.draw() 78 | canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True) 79 | 80 | controller.protocol("WM_DELETE_WINDOW", self.close_window_event) 81 | 82 | self.on_simulation_btn_click() 83 | 84 | def close_window_event(self): 85 | self.stop_simulation() 86 | self.window_closed = True 87 | if self.simulator.status() == SimState.IDLE: 88 | self.close_window() 89 | 90 | def close_window(self): 91 | self.controller.destroy() 92 | sys.exit() 93 | 94 | def refresh_data(self, msg): 95 | if type(msg) == SimState: 96 | print(msg.value) 97 | if msg == SimState.IDLE: 98 | self.sim_btn['text'] = 'Start Simulation' 99 | self.sim_btn['state'] = tk.ACTIVE 100 | if self.window_closed: 101 | self.close_window() 102 | self.status_label['text'] = str(msg.value) 103 | else: 104 | self._dashboard.update_step_dash(msg.step_stats_df) 105 | self._dashboard.update_epoch_dash(msg.epoch_stats_df) 106 | 107 | def on_simulation_btn_click(self): 108 | if self.sim_btn['text'] == 'Start Simulation': 109 | self.start_simulation() 110 | self.sim_btn['text'] = 'Stop Simulation' 111 | else: 112 | self.stop_simulation() 113 | self.sim_btn['state'] = tk.DISABLED 114 | 115 | def stop_simulation(self): 116 | self.simulator.stop() 117 | self.status_label['text'] = "Simulation Interrupted. Stopping..." 118 | 119 | def start_simulation(self): 120 | self.simulator.run_in_thread() 121 | 122 | @staticmethod 123 | def set_food_creature_ratio(new): 124 | ConfigPhysics.FOOD_CREATURE_RATIO = float(new) 125 | -------------------------------------------------------------------------------- /brains/brainpg.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import random 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | import torch.nn.functional as F 9 | from brains.abstractbrain import AbstractBrain 10 | import os.path 11 | import math 12 | 13 | #torch.manual_seed(0) 14 | 15 | device = "cpu" 16 | 17 | 18 | def has_err(x): 19 | return bool(((x != x) | (x == float("inf")) | (x == float("-inf"))).any().item()) 20 | 21 | 22 | class BrainPG(AbstractBrain): 23 | BATCH_SIZE = 20 24 | 25 | def __init__(self, observation_shape, num_actions, reward_discount, learning_rate=0.01): 26 | super(BrainPG, self).__init__(observation_shape[0], num_actions) 27 | self.policy = Policy(observation_shape[0], num_actions).to(device) 28 | self.optimizer = optim.Adam(self.policy.parameters(), lr=learning_rate) 29 | self.reward_discount = reward_discount 30 | self.num_optimizations = 0 31 | print("Pytorch PG. Num parameters: " + str(self.num_trainable_parameters())) 32 | 33 | def think(self, obs): 34 | with torch.no_grad(): 35 | action_probs = self.policy(torch.from_numpy(obs).float().unsqueeze_(0)) 36 | if math.isnan(action_probs[0][0].item()): 37 | raise Exception('nan probability') 38 | return action_probs[0].tolist() 39 | 40 | def train(self, experience): 41 | minibatch_size = min(BrainPG.BATCH_SIZE, len(experience)) 42 | if minibatch_size < BrainPG.BATCH_SIZE: 43 | return 44 | self.num_optimizations += 1 45 | 46 | minibatch = experience #list(experience)[-5:] # random.sample(experience, minibatch_size) 47 | state_batch = torch.from_numpy(np.stack([np.stack(data[0]) for data in minibatch])).float() 48 | action_batch = torch.FloatTensor([data[1] for data in minibatch]) 49 | # removing the discounting from here. 50 | #reward_batch = torch.FloatTensor(utils.discount_rewards([data[2] for data in minibatch], self.reward_discount)) 51 | reward_batch = torch.FloatTensor([data[2] for data in minibatch]) 52 | nextstate_batch = torch.from_numpy(np.stack([data[3] for data in minibatch])).float() 53 | 54 | # Scale rewards 55 | #reward_std = 1 if torch.isnan(reward_batch.std()) else reward_batch.std() 56 | #rewards = (reward_batch - reward_batch.mean()) / (reward_std + np.finfo(np.float32).eps) 57 | 58 | prob_action_batch = self.policy(state_batch) 59 | prob_actions = torch.max(prob_action_batch.mul(action_batch), dim=1)[0] 60 | log_prob_actions = torch.log(prob_actions) 61 | 62 | # Calculate loss 63 | loss = (torch.mean(torch.mul(log_prob_actions, reward_batch).mul(-1), -1)) 64 | 65 | # Optimize the model 66 | self.optimizer.zero_grad() 67 | loss.backward() 68 | 69 | torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1) 70 | # for param in self.policy_net.parameters(): 71 | # param.grad.data.clamp_(-1, 1) 72 | self.optimizer.step() 73 | #print(loss.item()) 74 | if math.isinf(loss.item()): 75 | raise Exception('INF probability') 76 | return loss.item() 77 | 78 | def save_model(self, path): 79 | torch.save(self.policy.state_dict(), path) 80 | 81 | def load_model(self, path): 82 | if os.path.exists(path): 83 | self.policy.load_state_dict(torch.load(path)) 84 | 85 | def num_trainable_parameters(self): 86 | return sum(p.numel() for p in self.policy.parameters()) 87 | 88 | 89 | class Policy(nn.Module): 90 | def __init__(self, num_channels, num_actions): 91 | super(Policy, self).__init__() 92 | self.conv1 = nn.Conv2d(num_channels, 4, kernel_size=2) 93 | self.bn1 = nn.BatchNorm2d(4) 94 | self.conv2 = nn.Conv2d(4, 5, kernel_size=2) 95 | self.bn2 = nn.BatchNorm2d(5) 96 | self.head = nn.Linear(45, num_actions) 97 | 98 | self.model = torch.nn.Sequential( 99 | self.conv1, 100 | #nn.ReLU, 101 | nn.BatchNorm2d(4), 102 | self.conv2, 103 | #nn.ReLU, 104 | nn.BatchNorm2d(5), 105 | #nn.Dropout(p=0.6), 106 | nn.Sigmoid(), 107 | nn.Flatten(), 108 | self.head, 109 | nn.Softmax(dim=-1) 110 | ) 111 | 112 | # self.net = nn.Sequential( 113 | # nn.Flatten(), 114 | # nn.Linear(in_features=num_channels, out_features=8, bias=False), 115 | # nn.PReLU(), 116 | # nn.Linear(in_features=8, out_features=8, bias=False), 117 | # nn.PReLU(), 118 | # nn.Linear(in_features=8, out_features=num_actions, bias=False), 119 | # nn.Softmax(dim=-1) 120 | # ) 121 | 122 | def forward(self, x): 123 | return self.model(x) 124 | -------------------------------------------------------------------------------- /docs/Universe.md: -------------------------------------------------------------------------------- 1 | ## Universe 2 | 3 | In each experiment, there is a single Universe, which the simulator engine instantiate and run. 4 | The main functionality of the Universe is to give the opportunity for each creature in the grid to perform an action. 5 | The Universe responses to the action, and changes accordingly. 6 | It also changes the acting creature internal state and maybe other affected creatures, depending on the action. 7 | 8 | 9 | ### Food: 10 | Food is distributed in the world in several ways. 11 | First, at the beginning of time, on the universe creation, the universe may distribute a specific amount of food determined in the config file. 12 | Then, at each time step, the universe distribute food amount proportional to the currently living creatures. 13 | This can be controlled by a config parameter (which can be controlled from the GUI). 14 | In addition, food can be generated by the creatures if they can perform the action "work" (see below). 15 | In this case, the food is placed in the creature current cell. 16 | Note that the food can be eaten by any creature, not only the one that worked for it. 17 | 18 | 19 | ### Actions: 20 | While the creature decides upon the action, the environment executes the actions, changes its state and the state of the creature accordingly. 21 | These are the actions supported by the environment: MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT, EAT, MATE, DIVIDE and WORK. 22 | Every action requires a different amount of energy defined in the biological configuration file. 23 | If a creature performs an for which it has no enough energy, the creature will die. 24 | Not every action is possible in every situation, for instance, mating in not allowed before the creature get to maturity age. 25 | Another example is that moving out of the grid is not possible in a non-slippery world. 26 | If the creature tries to perform an implausible action, it loses energy and but his action will not be executed. 27 | Note that this loss of energy may lead to the creature dies. 28 | 29 | 30 | #### Moving 31 | Since the world is a finite 2D space, creatures can move up, down, left and right. 32 | Each movement requires an energy, which amount depends on the biological features defined in the configuration. 33 | In the future, the amount of needed energy for the creature to move may depend on the creature size. 34 | 35 | #### Eating 36 | As its name, suggests, this action allow the creature to consume food from the space. 37 | The consumed food amount is limited by it's the amount in the current cell and by the meal size defined in the biological configuration. 38 | This action reduces accordingly the amount of food in the creature's cell and adds proportional units of energy to the creature allowing him to continue living. 39 | 40 | #### Dividing 41 | Dividing and mating are the two breeding mechanisms that a project-origin supports. 42 | Creatures may have the ability to divide like bacteria or to mate like humans. 43 | In asexual creatures, this action creates two daughter creatures from that divide the mother creature energy. 44 | Except the evolutionary DNA mutation, the DNA of the daughter creatures is the same as of the mother creature. 45 | 46 | #### Mating 47 | In sexual creatures, the mating creates a new offspring creature, have a recombined and mutated version DNA from its parents. 48 | Choosing to mate, the creature gets a list of all the creatures in it's surrounding, and probabilistically chooses the one that it is most attracted to. 49 | Currently, the sexual attraction is implemented by the function s(1-s), where s is the cosine similarity between the two creatures' DNA. 50 | Finally, the spouse is selected by the distribution determined by the attraction (softmax overall attractions). 51 | This action may be limited by age, called maturity age defined by the configuration. 52 | 53 | #### Fighting 54 | In battle mode, in which two or more races exist in space, creatures are usually aggressive - having the "fight" action. 55 | The creature can fight a random creature in his environment from same or different race. 56 | The fight result is determined by the distribution drawn by the involved creatures' energy, thus, the creature should learn not to fight a much stronger enemy. 57 | The creature that wins the fight takes half of the enemy energy, therefore, it is worthy to fight an enemy having enough energy. 58 | The fight action requires a substantial amount of energy, controlled in the config file. 59 | 60 | #### Working 61 | When the creature choose to work, it spends working energy (defined in the configuration file) to add food to the cell it is located in. 62 | Note that the action work only spends energy, and the creature should perform the action each to consume food from the space. 63 | Therefore, the creature should learn that it should work only if there is no food in the surrounding environment. 64 | In addition, it should learn that working is not enough to provide it with energy, but it must eat to consume it's labor product. -------------------------------------------------------------------------------- /creatures/HumanPRLF.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import os 4 | 5 | import numpy as np 6 | import pfrl 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | import utils 12 | from config import ConfigBiology, ConfigBrain 13 | from creatures.human import Human 14 | from evolution import DNA 15 | 16 | 17 | class HumanPRLF(Human): 18 | Fitrah = [0, 0, 0, 0, 0, 0] 19 | 20 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 21 | super(HumanPRLF, self).__init__(universe, id, dna, age, energy, parents) 22 | 23 | @staticmethod 24 | def get_race(): 25 | return HumanPRLF 26 | 27 | @staticmethod 28 | def race_name(): 29 | return 'HumanPRLF' 30 | 31 | @staticmethod 32 | def race_basic_dna(): 33 | return DNA(ConfigBiology.BASE_MEMORY_SIZE, 34 | ConfigBrain.BASE_LEARNING_RATE, 35 | ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM, 36 | ConfigBiology.BASE_LEARN_FREQ, 37 | ConfigBiology.BASE_LIFE_EXPECTANCY, 38 | ConfigBrain.BASE_REWARD_DISCOUNT, 39 | HumanPRLF.race_fitrah()) 40 | 41 | @staticmethod 42 | def race_fitrah(): 43 | return utils.normalize_dist(HumanPRLF.Fitrah) 44 | 45 | # @staticmethod 46 | # def self_race_enemy(): 47 | # return True 48 | 49 | def initialize_brain(self): 50 | # self._brain = BrainPRLF(observation_shape=tuple(self.observation_shape()), 51 | # num_actions=self.num_actions(), reward_discount=self.reward_discount()) 52 | self._brain = initialize_PRLF_agent(self.observation_shape(), self.num_actions()) 53 | 54 | def decide(self, state): 55 | action = self._brain.act(state) 56 | return action 57 | 58 | def add_experience(self, experience): 59 | self._brain.observe(experience[3], experience[2], experience[4], -1) 60 | 61 | def smarten(self): 62 | pass 63 | 64 | 65 | class HumanPRLFUnifiedBrain(HumanPRLF): 66 | _master_brain = None 67 | 68 | def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None): 69 | super(HumanPRLFUnifiedBrain, self).__init__(universe, id, dna, age, energy, parents) 70 | 71 | def initialize_brain(self): 72 | self._brain = self.get_master_brain() 73 | 74 | def get_master_brain(self): 75 | if HumanPRLFUnifiedBrain._master_brain is None: 76 | HumanPRLFUnifiedBrain._master_brain = initialize_PRLF_agent(self.observation_shape(), self.num_actions()) 77 | if self.model_path() is not None and os.path.exists(self.model_path()): 78 | HumanPRLFUnifiedBrain._master_brain.load_model(self.model_path()) 79 | return HumanPRLFUnifiedBrain._master_brain 80 | 81 | @staticmethod 82 | def get_race(): 83 | return HumanPRLFUnifiedBrain 84 | 85 | @staticmethod 86 | def race_name(): 87 | return 'HumanPRLFUnifiedBrain' 88 | 89 | def new_born(self): 90 | pass 91 | 92 | 93 | def initialize_PRLF_agent(obs_size, n_actions): 94 | class DQN(nn.Module): 95 | def __init__(self, num_channels, num_actions): 96 | super(DQN, self).__init__() 97 | self.conv1 = nn.Conv2d(num_channels, 4, kernel_size=2) 98 | self.bn1 = nn.BatchNorm2d(4) 99 | self.conv2 = nn.Conv2d(4, 5, kernel_size=2) 100 | self.bn2 = nn.BatchNorm2d(5) 101 | self.head = nn.Linear(45, num_actions) 102 | 103 | def forward(self, x): 104 | x = self.bn1(F.relu(self.conv1(x))) 105 | x = self.bn2(F.relu(self.conv2(x))) 106 | return pfrl.action_value.DiscreteActionValue(self.head(x.view(x.size(0), -1))) 107 | 108 | # obs_size = env.observation_space.low.size 109 | # obs_size = self.observation_shape() 110 | # n_actions = self.num_actions() 111 | q_func = DQN(obs_size[0], n_actions) 112 | 113 | # Use Adam to optimize q_func. eps=1e-2 is for stability. 114 | optimizer = torch.optim.Adam(q_func.parameters(), eps=1e-2) 115 | 116 | # Set the discount factor that discounts future rewards. 117 | gamma = 0.99 118 | 119 | def random_action_func(): 120 | return np.random.randint(0, n_actions) 121 | 122 | # Use epsilon-greedy for exploration 123 | explorer = pfrl.explorers.ConstantEpsilonGreedy( 124 | epsilon=0.3, random_action_func=random_action_func) 125 | 126 | # DQN uses Experience Replay. 127 | # Specify a replay buffer and its capacity. 128 | replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=10 ** 6) 129 | 130 | # Since observations from CartPole-v0 is numpy.float64 while 131 | # As PyTorch only accepts numpy.float32 by default, specify 132 | # a converter as a feature extractor function phi. 133 | phi = lambda x: x.astype(np.float32, copy=False) 134 | 135 | # Set the device id to use GPU. To use CPU only, set it to -1. 136 | gpu = -1 137 | 138 | # Now create an agent that will interact with the environment. 139 | agent = pfrl.agents.DoubleDQN( 140 | q_func, 141 | optimizer, 142 | replay_buffer, 143 | gamma, 144 | explorer, 145 | replay_start_size=500, 146 | update_interval=1, 147 | target_update_interval=100, 148 | phi=phi, 149 | gpu=gpu, 150 | ) 151 | 152 | print("Pytorch PRLF. Num parameters: {}".format(sum(p.numel() for p in q_func.parameters()))) 153 | return agent 154 | -------------------------------------------------------------------------------- /visualization/dashboard.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from creature_actions import Actions 6 | from configsimulator import ConfigSimulator 7 | 8 | 9 | class Dashboard: 10 | 11 | def __init__(self): 12 | 13 | self._fig = plt.figure(figsize=(9, 5), dpi=120, facecolor='w') 14 | self._fig.canvas.set_window_title('Origin Dashboard') 15 | self._axes_pop = self._fig.add_subplot(221) 16 | self._axes_pop.set_ylabel('Population Size') 17 | self._line_pop, = self._axes_pop.plot([], [], '-', label=self._axes_pop.yaxis.label.get_text()) 18 | self._axes_age = self._axes_pop.twinx() 19 | self._axes_age.set_ylabel('AVG Age') 20 | self._line_age, = self._axes_age.plot([], [], 'y-', label=self._axes_age.yaxis.label.get_text()) 21 | self._axes_pop.legend([self._line_pop, self._line_age], 22 | [self._line_pop.get_label(), self._line_age.get_label()], loc=0) 23 | 24 | self._axes_aiq = self._fig.add_subplot(222) 25 | self._axes_aiq.set_ylabel('Population AIQ') 26 | self._line_aiq, = self._axes_aiq.plot([], [], '-', label=self._axes_aiq.yaxis.label.get_text()) 27 | self._axes_aiq.set_ylim(bottom=0, top=1) 28 | 29 | self._fig_creatures_loc = self._fig.add_axes([0.05, 0.1, 0.2, 0.3]) 30 | self._fig_creatures_loc.yaxis.set_major_locator(plt.NullLocator()) 31 | self._fig_creatures_loc.xaxis.set_major_locator(plt.NullLocator()) 32 | self._fig_food_loc = self._fig.add_axes([0.26, 0.1, 0.2, 0.3]) 33 | self._fig_food_loc.yaxis.set_major_locator(plt.NullLocator()) 34 | self._fig_food_loc.xaxis.set_major_locator(plt.NullLocator()) 35 | self._fig_action = self._fig.add_subplot(2, 3, 6) 36 | self._fig_death = self._fig.add_subplot(4, 6, 16) 37 | self._fig_races = self._fig.add_subplot(4, 6, 22) 38 | 39 | def update_epoch_dash(self, epoch_stats_df): 40 | if epoch_stats_df is None or epoch_stats_df.empty: 41 | return 42 | 43 | @staticmethod 44 | def make_autopct(values): 45 | def my_autopct(pct): 46 | if pct < 0.1: 47 | return '' 48 | total = sum(values) 49 | val = int(round(pct * total / 100.0)) 50 | return '{v:d}'.format(v=val) 51 | return my_autopct 52 | 53 | @staticmethod 54 | def make_numpct(values): 55 | def my_autopct(pct): 56 | if pct == 0: 57 | return '' 58 | total = sum(values) 59 | val = int(round(pct * total / 100.0)) 60 | return '({v:d}, {p:1.1f})'.format(v=val, p=pct / 100) 61 | 62 | return my_autopct 63 | 64 | def update_step_dash(self, step_stats_df): 65 | if step_stats_df is None or step_stats_df.empty: 66 | return 67 | self._line_pop.set_xdata(step_stats_df['Time']) 68 | self._line_pop.set_ydata(step_stats_df['Population']) 69 | 70 | self._line_age.set_xdata(step_stats_df['Time']) 71 | self._line_age.set_ydata(step_stats_df['MeanAge']) 72 | 73 | self._line_aiq.set_xdata(step_stats_df['Time']) 74 | self._line_aiq.set_ydata(step_stats_df['AIQ']) 75 | 76 | self._fig.canvas.draw() 77 | self._fig.canvas.flush_events() 78 | self._axes_pop.relim() 79 | self._axes_pop.autoscale_view() 80 | self._axes_age.relim() 81 | self._axes_age.autoscale_view() 82 | self._axes_aiq.relim() 83 | self._axes_aiq.autoscale_view() 84 | 85 | ## Creatures Dist 86 | creatures_dist = np.asarray(step_stats_df['CreaturesDist'].iloc[-1]) 87 | self._fig_creatures_loc.clear() 88 | self._fig_creatures_loc.imshow(creatures_dist, cmap="Purples", aspect="auto", vmin=0, vmax=10) 89 | self._fig_creatures_loc.set_title('Creatures Location') 90 | self._fig_creatures_loc.yaxis.set_major_locator(plt.NullLocator()) 91 | self._fig_creatures_loc.xaxis.set_major_locator(plt.NullLocator()) 92 | 93 | ## Food Supply 94 | food_supply = np.asarray(step_stats_df['FoodDist'].iloc[-1]) 95 | self._fig_food_loc.clear() 96 | self._fig_food_loc.imshow(food_supply, cmap="Greens", aspect="auto", vmin=0, vmax=100) 97 | self._fig_food_loc.set_title('Food Dist') 98 | self._fig_food_loc.yaxis.set_major_locator(plt.NullLocator()) 99 | self._fig_food_loc.xaxis.set_major_locator(plt.NullLocator()) 100 | 101 | ## Action Dist Pie 102 | actions_dist = np.mean(step_stats_df['ActionDist'].tail(ConfigSimulator.LOGGING_BATCH_SIZE).values, axis=0) 103 | self._fig_action.clear() 104 | patches, texts, autotexts = self._fig_action.pie(actions_dist, 105 | startangle=90, autopct=self.make_autopct(actions_dist), 106 | normalize=True) 107 | self._fig_action.legend(patches, labels=Actions.get_actions_str(), loc=(1, 0)) 108 | 109 | ## Death Dist Pie 110 | death_cause = np.mean(step_stats_df['DeathCause'].tail(ConfigSimulator.LOGGING_BATCH_SIZE).values, axis=0) 111 | self._fig_death.clear() 112 | self._fig_death.pie(death_cause, labels=['Fatigue', 'Fight', 'Elderly', 'Fall'], 113 | startangle=90, autopct=self.make_autopct(death_cause)) 114 | 115 | ## races Dist 116 | races = np.mean(step_stats_df['RacesDist'].tail(ConfigSimulator.LOGGING_BATCH_SIZE).values, axis=0) 117 | self._fig_races.clear() 118 | self._fig_races.pie(races, labels=[race.race_name() for race in ConfigSimulator.RACES], 119 | startangle=90, autopct=self.make_autopct(races)) 120 | 121 | def get_figure(self): 122 | return self._fig 123 | -------------------------------------------------------------------------------- /aiq.py: -------------------------------------------------------------------------------- 1 | __author__ = 'gkour' 2 | 3 | import numpy as np 4 | from config import ConfigBiology 5 | import utils 6 | from creature_actions import Actions 7 | import random 8 | 9 | 10 | class AIQ: 11 | 12 | @staticmethod 13 | def get_population_aiq(universe): 14 | creatures = universe.get_all_creatures() 15 | sample_creatures = random.sample(creatures, utils.safe_log2(len(creatures))) 16 | all_aiq = [AIQ.get_creature_aiq(creature) for creature in sample_creatures] 17 | return np.round(utils.emptynanmean(all_aiq), 2) 18 | 19 | @staticmethod 20 | def get_population_aiq_dist(universe): 21 | creatures = universe.get_all_creatures() 22 | bounds = [ConfigBiology.BASE_LIFE_EXPECTANCY / 3, 2 * ConfigBiology.BASE_LIFE_EXPECTANCY / 3] 23 | young = [AIQ.get_creature_aiq(creature) for creature in creatures if creature.age() <= bounds[0]] 24 | adult = [AIQ.get_creature_aiq(creature) for creature in creatures if bounds[0] < creature.age() <= bounds[1]] 25 | old = [AIQ.get_creature_aiq(creature) for creature in creatures if bounds[1] < creature.age()] 26 | 27 | return np.round([utils.emptynanmean(young), utils.emptynanmean(adult), utils.emptynanmean(old)], 2) 28 | 29 | @staticmethod 30 | def get_creature_aiq(creature): 31 | score = 0 32 | scenarios = [AIQ.haven_left, AIQ.haven_right, AIQ.haven_inplace, AIQ.haven_up, AIQ.haven_down, 33 | AIQ.border_awareness_up, AIQ.border_awareness_down, AIQ.border_awareness_left, AIQ.border_awareness_right] 34 | w = 0 35 | for i in range(len(scenarios)): 36 | test_state, positive_test_type, expected_actions, weight = scenarios[i](creature.vision_range()) 37 | if creature._universe.num_races() == 1: 38 | # delete the other race creatures entry from the state 39 | test_state = np.delete(test_state, obj=2, axis=0) 40 | w += weight 41 | decision = creature.index_to_enum(np.argmax(creature.brain().think(test_state))) 42 | if positive_test_type: 43 | score += weight if decision in expected_actions else 0 44 | else: 45 | score += weight if decision not in expected_actions else 0 46 | 47 | return score / w 48 | 49 | @staticmethod 50 | def _haven(vision_range, where): 51 | ''' Haven cell in current location.''' 52 | energy = 3 53 | age = 3 54 | 55 | food = np.zeros(shape=(2 * vision_range + 1, 2 * vision_range + 1)) 56 | same_race_creatures = np.ones(shape=(2 * vision_range + 1, 2 * vision_range + 1)) * 20 57 | different_race_creatures = np.ones(shape=(2 * vision_range + 1, 2 * vision_range + 1)) * 20 58 | sound = np.zeros(shape=(2 * vision_range + 1, 2 * vision_range + 1)) 59 | energy = np.ones(shape=(2 * vision_range + 1, 2 * vision_range + 1)) * energy 60 | age = np.ones(shape=(2 * vision_range + 1, 2 * vision_range + 1)) * age 61 | if where == 'INPLACE': 62 | food[vision_range][vision_range] = 20 63 | same_race_creatures[vision_range][vision_range] = 0 64 | different_race_creatures[vision_range][vision_range] = 0 65 | optimal_action = Actions.EAT 66 | if where == 'UP': 67 | food[vision_range - 1][vision_range] = 20 68 | same_race_creatures[vision_range - 1][vision_range] = 0 69 | different_race_creatures[vision_range - 1][vision_range] = 0 70 | optimal_action = Actions.UP 71 | if where == 'DOWN': 72 | food[vision_range + 1][vision_range] = 20 73 | same_race_creatures[vision_range + 1][vision_range] = 0 74 | different_race_creatures[vision_range + 1][vision_range] = 0 75 | optimal_action = Actions.DOWN 76 | if where == 'LEFT': 77 | food[vision_range][vision_range - 1] = 20 78 | same_race_creatures[vision_range][vision_range - 1] = 0 79 | different_race_creatures[vision_range][vision_range - 1] = 0 80 | optimal_action = Actions.LEFT 81 | if where == 'RIGHT': 82 | food[vision_range][vision_range + 1] = 20 83 | same_race_creatures[vision_range][vision_range + 1] = 0 84 | different_race_creatures[vision_range][vision_range + 1] = 0 85 | optimal_action = Actions.RIGHT 86 | 87 | return np.stack((food, sound, same_race_creatures, different_race_creatures, energy, age)), True, [ 88 | optimal_action], 1 89 | 90 | @staticmethod 91 | def haven_inplace(vision_range): 92 | return AIQ._haven(vision_range, 'INPLACE') 93 | 94 | @staticmethod 95 | def haven_right(vision_range): 96 | return AIQ._haven(vision_range, 'RIGHT') 97 | 98 | @staticmethod 99 | def haven_left(vision_range): 100 | return AIQ._haven(vision_range, 'LEFT') 101 | 102 | @staticmethod 103 | def haven_up(vision_range): 104 | return AIQ._haven(vision_range, 'UP') 105 | 106 | @staticmethod 107 | def haven_down(vision_range): 108 | return AIQ._haven(vision_range, 'DOWN') 109 | 110 | @staticmethod 111 | def _border_awareness(vision_range, direction): 112 | energy = 3 113 | age = 3 114 | 115 | food = np.zeros(shape=(2 * vision_range + 1, 2 * vision_range + 1)) 116 | creatures = np.zeros(shape=(2 * vision_range + 1, 2 * vision_range + 1)) 117 | energy = np.ones(shape=(2 * vision_range + 1, 2 * vision_range + 1)) * energy 118 | age = np.ones(shape=(2 * vision_range + 1, 2 * vision_range + 1)) * age 119 | sound = np.zeros(shape=(2 * vision_range + 1, 2 * vision_range + 1)) 120 | 121 | if direction == 'DOWN': 122 | food[vision_range + 1:][:] = -1 123 | creatures[vision_range + 1:][:] = -1 124 | sound[vision_range + 1:][:] = -1 125 | bad_action = Actions.DOWN 126 | if direction == 'UP': 127 | food[:vision_range][:] = -1 128 | creatures[:vision_range][:] = -1 129 | sound[:vision_range][:] = -1 130 | bad_action = Actions.UP 131 | if direction == 'LEFT': 132 | food[:][:vision_range] = -1 133 | creatures[:][:vision_range] = -1 134 | sound[:][:vision_range] = -1 135 | bad_action = Actions.LEFT 136 | if direction == 'RIGHT': 137 | food[:][vision_range + 1:] = -1 138 | creatures[:][vision_range + 1:] = -1 139 | sound[:][vision_range + 1:] = -1 140 | bad_action = Actions.RIGHT 141 | 142 | return np.stack((food, sound, creatures, creatures, energy, age)), False, [bad_action], 0.5 143 | 144 | @staticmethod 145 | def border_awareness_up(vision_range): 146 | return AIQ._border_awareness(vision_range, 'UP') 147 | 148 | @staticmethod 149 | def border_awareness_down(vision_range): 150 | return AIQ._border_awareness(vision_range, 'DOWN') 151 | 152 | @staticmethod 153 | def border_awareness_left(vision_range): 154 | return AIQ._border_awareness(vision_range, 'LEFT') 155 | 156 | @staticmethod 157 | def border_awareness_right(vision_range): 158 | return AIQ._border_awareness(vision_range, 'RIGHT') 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | ![]()
3 |