├── .gitignore ├── LICENSE ├── README.md ├── assets └── schedule.jpg ├── course1 ├── README.md ├── dataset │ └── wrestling │ │ ├── README.md │ │ └── obs_actions.zip ├── docs │ ├── 111.gif │ ├── AI-Olympics.png │ ├── Data_type.png │ ├── MNIST1.png │ ├── MNIST2.png │ ├── MNIST3.png │ ├── MacInstall.png │ ├── Pytorch_advanced.ipynb │ ├── RNN.png │ ├── WindowsInstall.png │ ├── data │ │ ├── FashionMNIST.zip │ │ ├── MNIST.zip │ │ └── wrestling.zip │ ├── index.png │ ├── matrices.png │ ├── ppt │ │ └── RLChina-PyTorch_tutorial---Yan.pptx │ ├── pytorch_intro.ipynb │ ├── tensor2.png │ ├── tensors1.png │ ├── torch_variable.png │ ├── vectors.png │ └── 及第平台使用手册以及暑期实践课环境准备.pdf ├── env │ ├── __init__.py │ ├── chooseenv.py │ ├── config.json │ ├── obs_interfaces │ │ └── observation.py │ ├── olympics_wrestling.py │ └── simulators │ │ ├── game.py │ │ └── gridgame.py ├── examples │ ├── BC_submission │ │ ├── README.md │ │ ├── actor_state_dict.pt │ │ └── submission.py │ ├── random │ │ └── submission.py │ └── submission │ │ └── submission.py ├── olympics_engine │ ├── .gitignore │ ├── AI_olympics.py │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── agent.py │ ├── assets │ │ ├── AI-Olympics.png │ │ ├── blue rock.png │ │ ├── board.png │ │ ├── crown.png │ │ ├── curling ground.png │ │ ├── energy bar.png │ │ ├── energy-blue-bar.png │ │ ├── energy-blue.png │ │ ├── energy-red-bar.png │ │ ├── energy-red.png │ │ ├── football │ │ │ ├── agent1-V1.png │ │ │ ├── agent1-V2.png │ │ │ ├── agent1_bold.png │ │ │ ├── agent2-V1.png │ │ │ ├── agent2-V2.png │ │ │ ├── agent2_bold.png │ │ │ ├── football.png │ │ │ ├── playground.png │ │ │ ├── sight1.png │ │ │ └── sight2.png │ │ ├── green rock.png │ │ ├── purple rock.png │ │ ├── red rock.png │ │ ├── table_hockey │ │ │ ├── ball.png │ │ │ ├── player1.png │ │ │ ├── player2.png │ │ │ ├── playground.png │ │ │ ├── sight1.png │ │ │ └── sight2.png │ │ ├── wood.png │ │ └── wrestling │ │ │ ├── player1.png │ │ │ ├── player2.png │ │ │ ├── playground.png │ │ │ ├── sight1.png │ │ │ └── sight2.png │ ├── core.py │ ├── env_wrapper │ │ ├── README.md │ │ ├── __init__.py │ │ ├── chooseenv.py │ │ ├── config.json │ │ ├── obs_interfaces │ │ │ └── observation.py │ │ ├── olympics_running.py │ │ └── simulators │ │ │ └── game.py │ ├── generator.py │ ├── main.py │ ├── objects.py │ ├── scenario.json │ ├── scenario │ │ ├── __init__.py │ │ ├── billiard.py │ │ ├── billiard_joint.py │ │ ├── curling.py │ │ ├── curling_competition.py │ │ ├── curling_joint.py │ │ ├── curling_long.py │ │ ├── football.py │ │ ├── longjump.py │ │ ├── running.py │ │ ├── running_competition.py │ │ ├── running_competition_maps │ │ │ └── maps.json │ │ ├── seeks.py │ │ ├── table_hockey.py │ │ ├── volleyball.py │ │ └── wrestling.py │ ├── test.py │ ├── test │ │ ├── DDA.py │ │ └── test_engine.py │ ├── tools │ │ ├── func.py │ │ └── settings.py │ ├── train │ │ ├── algo │ │ │ ├── active_inference_MDP.py │ │ │ ├── active_inference_POMDP.py │ │ │ ├── network.py │ │ │ ├── ppo.py │ │ │ └── random.py │ │ ├── log_path.py │ │ ├── rllib_train.py │ │ ├── train2avoid_ppo.py │ │ ├── train2run_AI_MDP.py │ │ ├── train2run_AI_POMDP.py │ │ └── train_ppo.py │ ├── utils │ │ ├── box.py │ │ ├── get_logger.py │ │ └── space.py │ └── viewer.py ├── run_log.py └── utils │ ├── box.py │ ├── discrete.py │ ├── get_logger.py │ ├── mutli_discrete_particle.py │ └── space.py ├── course2 ├── README.md ├── env │ ├── __init__.py │ ├── chooseenv.py │ ├── config.json │ ├── obs_interfaces │ │ └── observation.py │ ├── reversi.py │ ├── simulators │ │ ├── game.py │ │ └── gridgame.py │ └── sokoban.py ├── examples │ ├── alphabeta-reversi │ │ └── submission.py │ ├── bfs-sokoban │ │ └── submission.py │ ├── mcts-reversi │ │ └── submission.py │ ├── random │ │ └── submission.py │ └── submission │ │ └── submission.py ├── run_log.py └── utils │ ├── box.py │ ├── discrete.py │ ├── get_logger.py │ ├── mutli_discrete_particle.py │ └── space.py ├── course3 ├── README.md ├── docs │ └── rlcn_2022_rl_algorithms.zip ├── env │ ├── __init__.py │ ├── ccgame.py │ ├── chooseenv.py │ ├── config.json │ ├── obs_interfaces │ │ └── observation.py │ └── simulators │ │ ├── game.py │ │ └── gridgame.py ├── examples │ ├── ddpg │ │ ├── actor_200.pth │ │ └── submission.py │ ├── random │ │ └── submission.py │ └── submission │ │ └── submission.py ├── run_log.py ├── train.py └── utils │ ├── box.py │ ├── discrete.py │ ├── get_logger.py │ ├── mutli_discrete_particle.py │ └── space.py ├── course4 ├── README.md ├── docs │ └── rlchina_pbl.zip ├── env │ ├── __init__.py │ ├── chessandcard.py │ ├── chooseenv.py │ ├── config.json │ ├── obs_interfaces │ │ └── observation.py │ └── simulators │ │ ├── game.py │ │ └── gridgame.py ├── examples │ ├── random │ │ └── submission.py │ └── submission │ │ └── submission.py ├── run_log.py └── utils │ ├── box.py │ ├── discrete.py │ ├── get_logger.py │ ├── mutli_discrete_particle.py │ └── space.py ├── course5 ├── README.md ├── env │ ├── __init__.py │ ├── chooseenv.py │ ├── config.json │ ├── obs_interfaces │ │ └── observation.py │ ├── revive │ │ └── refrigerator.py │ ├── revive_refrigerator.py │ └── simulators │ │ ├── game.py │ │ └── gridgame.py ├── examples │ ├── random │ │ └── submission.py │ ├── revive_example │ │ ├── revive_policy.pkl │ │ └── submission.py │ └── submission │ │ └── submission.py ├── run_log.py └── utils │ ├── box.py │ ├── discrete.py │ ├── get_logger.py │ ├── mutli_discrete_particle.py │ └── space.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | /*/__pycache__/ 3 | /*/*/__pycache__/ 4 | .idea* 5 | /course1/logs/ 6 | /course2/logs/ 7 | /course3/logs/ 8 | /course4/logs/ 9 | /course5/logs/ 10 | .DS_Store 11 | logs/* 12 | /course3/examples/ddpg/trained_model 13 | /course3/docs/rlcn_2022_rl_algorithms 14 | /course4/docs/rlchina_pbl 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 jidiai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SummerCourse2022 2 | 3 | 参与RLChina暑期实践课,一起来学习强化学习吧^0^ 4 | 5 | 小编把每天的课程独立在一个文件夹里面,学习内容递进,完成所有五次作业的学员还将获得电子版结课证书!筒子们冲啊! 6 | 7 | 课程表👇 8 | 9 | ![image](assets/schedule.jpg) 10 | 11 | 详细信息 👉请看 [RLChina官网](http://rlchina.org/) 12 | --- 13 | ### Env Dependency 14 | 15 | #### Install Anaconda or Miniconda 16 | 17 | - Anaconda: https://www.anaconda.com/products/distribution 18 | - Miniconda: https://conda.io/projects/conda/en/latest/user-guide/install/index.html 19 | 20 | >conda create -n jidi_2022 python=3.7.5 21 | 22 | >conda activate jidi_2022 23 | 24 | >pip install -r requirements.txt 25 | 26 | -------------------------------------------------------------------------------- /assets/schedule.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/assets/schedule.jpg -------------------------------------------------------------------------------- /course1/README.md: -------------------------------------------------------------------------------- 1 | ## 实践课第一天 2 | 3 | ### 任务:奥林匹克 相扑 作业要求: 提交通过并且在金榜的排名高于Jidi_random 4 | 5 | 6 | --- 7 | ### Env 👉请看 [olympics_wrestling.py](env/olympics_wrestling.py) 8 | 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py) 10 | 11 | ### 提交 👉请看 [submission.py](examples/random/submission.py) 12 | 13 | --- 14 | 15 | ### 作业:从 expert data 中做 Behaviour Cloning 学习保持在擂台的方法,使用 PyTorch 框架进行训练。 16 | 17 | 1. 下载 `/dataset/wrestling/obs_actions.zip` dataset 18 | 19 | 2. 读取 dataset, 搭建model, 进行训练 20 | 21 | 3. 在`run_log.py`测试训练的策略 22 | 23 | 4. 提交策略至及第平台 24 | 25 | 26 | --- 27 | 28 | ### How to test submission 29 | 30 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py 31 | >python run_log.py 32 | 33 | If no errors, your submission is ready to go~ 34 | 35 | ___ 36 | Have a good time~~~ -------------------------------------------------------------------------------- /course1/dataset/wrestling/README.md: -------------------------------------------------------------------------------- 1 | ## Behaviour Cloning expert data 2 | 3 | 这里有用作Behaviour Cloning的专家数据(10000 pairs),包含观测和动作。该策略的风格为围着圆心转圈,能够保持自己在界内,足以打败random 策略。用户需要提交一个BC获得的强于random的策略,并显露出该专家数据的风格。 -------------------------------------------------------------------------------- /course1/dataset/wrestling/obs_actions.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/dataset/wrestling/obs_actions.zip -------------------------------------------------------------------------------- /course1/docs/111.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/111.gif -------------------------------------------------------------------------------- /course1/docs/AI-Olympics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/AI-Olympics.png -------------------------------------------------------------------------------- /course1/docs/Data_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/Data_type.png -------------------------------------------------------------------------------- /course1/docs/MNIST1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MNIST1.png -------------------------------------------------------------------------------- /course1/docs/MNIST2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MNIST2.png -------------------------------------------------------------------------------- /course1/docs/MNIST3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MNIST3.png -------------------------------------------------------------------------------- /course1/docs/MacInstall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MacInstall.png -------------------------------------------------------------------------------- /course1/docs/RNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/RNN.png -------------------------------------------------------------------------------- /course1/docs/WindowsInstall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/WindowsInstall.png -------------------------------------------------------------------------------- /course1/docs/data/FashionMNIST.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/data/FashionMNIST.zip -------------------------------------------------------------------------------- /course1/docs/data/MNIST.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/data/MNIST.zip -------------------------------------------------------------------------------- /course1/docs/data/wrestling.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/data/wrestling.zip -------------------------------------------------------------------------------- /course1/docs/index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/index.png -------------------------------------------------------------------------------- /course1/docs/matrices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/matrices.png -------------------------------------------------------------------------------- /course1/docs/ppt/RLChina-PyTorch_tutorial---Yan.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/ppt/RLChina-PyTorch_tutorial---Yan.pptx -------------------------------------------------------------------------------- /course1/docs/tensor2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/tensor2.png -------------------------------------------------------------------------------- /course1/docs/tensors1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/tensors1.png -------------------------------------------------------------------------------- /course1/docs/torch_variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/torch_variable.png -------------------------------------------------------------------------------- /course1/docs/vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/vectors.png -------------------------------------------------------------------------------- /course1/docs/及第平台使用手册以及暑期实践课环境准备.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/及第平台使用手册以及暑期实践课环境准备.pdf -------------------------------------------------------------------------------- /course1/env/__init__.py: -------------------------------------------------------------------------------- 1 | from .olympics_wrestling import * 2 | -------------------------------------------------------------------------------- /course1/env/chooseenv.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/9/11 11:17 上午 4 | # 描述:选择运行环境,需要维护env/__ini__.py && config.json(存储环境默认参数) 5 | 6 | import json 7 | import env 8 | import os 9 | 10 | 11 | def make(env_type, conf=None): 12 | file_path = os.path.join(os.path.dirname(__file__), 'config.json') 13 | if not conf: 14 | with open(file_path) as f: 15 | conf = json.load(f)[env_type] 16 | class_literal = conf['class_literal'] 17 | return getattr(env, class_literal)(conf) 18 | 19 | 20 | if __name__ == "__main__": 21 | make("classic_MountainCar-v0") 22 | -------------------------------------------------------------------------------- /course1/env/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "olympics-wrestling": { 3 | "class_literal": "OlympicsWrestling", 4 | "n_player": 2, 5 | "max_step": 500, 6 | "game_name": "wrestling", 7 | "is_obs_continuous": true, 8 | "is_act_continuous": true, 9 | "agent_nums": [1,1], 10 | "obs_type": ["vector", "vector"] 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /course1/env/obs_interfaces/observation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/11/13 3:51 下午 4 | # 描述:observation的各种接口类 5 | obs_type = ["grid", "vector", "dict"] 6 | 7 | 8 | class GridObservation(object): 9 | def get_grid_observation(self, current_state, player_id, info_before): 10 | raise NotImplementedError 11 | 12 | def get_grid_many_observation(self, current_state, player_id_list, info_before=''): 13 | all_obs = [] 14 | for i in player_id_list: 15 | all_obs.append(self.get_grid_observation(current_state, i, info_before)) 16 | return all_obs 17 | 18 | 19 | class VectorObservation(object): 20 | def get_vector_observation(self, current_state, player_id, info_before): 21 | raise NotImplementedError 22 | 23 | def get_vector_many_observation(self, current_state, player_id_list, info_before=''): 24 | all_obs = [] 25 | for i in player_id_list: 26 | all_obs.append(self.get_vector_observation(current_state, i, info_before)) 27 | return all_obs 28 | 29 | 30 | class DictObservation(object): 31 | def get_dict_observation(self, current_state, player_id, info_before): 32 | raise NotImplementedError 33 | 34 | def get_dict_many_observation(self, current_state, player_id_list, info_before=''): 35 | all_obs = [] 36 | for i in player_id_list: 37 | all_obs.append(self.get_dict_observation(current_state, i, info_before)) 38 | return all_obs 39 | 40 | 41 | # todo: observation builder 42 | class CustomObservation(object): 43 | def get_custom_observation(self, current_state, player_id): 44 | raise NotImplementedError 45 | 46 | def get_custom_obs_space(self, player_id): 47 | raise NotImplementedError 48 | 49 | def get_custom_many_observation(self, current_state, player_id_list): 50 | all_obs = [] 51 | for i in player_id_list: 52 | all_obs.append(self.get_custom_observation(current_state, i)) 53 | return all_obs 54 | 55 | def get_custom_many_obs_space(self, player_id_list): 56 | all_obs_space = [] 57 | for i in player_id_list: 58 | all_obs_space.append(self.get_custom_obs_space(i)) 59 | return all_obs_space 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /course1/env/olympics_wrestling.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | root_dir = str(Path(__file__).resolve().parent.parent.parent) 5 | sys.path.append(root_dir) 6 | 7 | from course1.olympics_engine.generator import create_scenario 8 | from course1.olympics_engine.scenario.wrestling import * 9 | 10 | from course1.utils.box import Box 11 | from course1.env.simulators.game import Game 12 | 13 | import numpy as np 14 | 15 | 16 | class OlympicsWrestling(Game): 17 | def __init__(self, conf, seed=None): 18 | super(OlympicsWrestling, self).__init__(conf['n_player'], conf['is_obs_continuous'], conf['is_act_continuous'], 19 | conf['game_name'], conf['agent_nums'], conf['obs_type']) 20 | self.seed = seed 21 | self.set_seed() 22 | 23 | Gamemap = create_scenario("wrestling") 24 | self.env_core = wrestling(Gamemap) 25 | self.max_step = int(conf['max_step']) 26 | self.joint_action_space = self.set_action_space() 27 | self.action_dim = self.joint_action_space 28 | 29 | self.step_cnt = 0 30 | self.init_info = None 31 | self.won = {} 32 | self.n_return = [0] * self.n_player 33 | 34 | _ = self.reset() 35 | 36 | self.board_width = self.env_core.view_setting['width'] + 2 * self.env_core.view_setting['edge'] 37 | self.board_height = self.env_core.view_setting['height'] + 2 * self.env_core.view_setting['edge'] 38 | 39 | @staticmethod 40 | def create_seed(): 41 | seed = random.randrange(1000) 42 | return seed 43 | 44 | def set_seed(self, seed=None): 45 | if not seed: # use previous seed when no new seed input 46 | seed = self.seed 47 | else: # update env global seed 48 | self.seed = seed 49 | random.seed(seed) 50 | np.random.seed(seed) 51 | 52 | def reset(self): 53 | init_obs = self.env_core.reset() 54 | self.step_cnt = 0 55 | self.done = False 56 | self.init_info = None 57 | self.won = {} 58 | self.n_return = [0] * self.n_player 59 | 60 | self.current_state = init_obs 61 | self.all_observes = self.get_all_observes() 62 | 63 | return self.all_observes 64 | 65 | def step(self, joint_action): 66 | self.is_valid_action(joint_action) 67 | info_before = self.step_before_info() 68 | joint_action_decode = self.decode(joint_action) 69 | all_observations, reward, done, info_after = self.env_core.step(joint_action_decode) 70 | info_after = '' 71 | self.current_state = all_observations 72 | self.all_observes = self.get_all_observes() 73 | 74 | self.step_cnt += 1 75 | self.done = done 76 | if self.done: 77 | self.set_n_return() 78 | 79 | return self.all_observes, reward, self.done, info_before, info_after 80 | 81 | def is_valid_action(self, joint_action): 82 | if len(joint_action) != self.n_player: # check number of player 83 | raise Exception("Input joint action dimension should be {}, not {}".format( 84 | self.n_player, len(joint_action))) 85 | 86 | def step_before_info(self, info=''): 87 | return info 88 | 89 | def decode(self, joint_action): 90 | joint_action_decode = [] 91 | for act_id, nested_action in enumerate(joint_action): 92 | temp_action = [0, 0] 93 | temp_action[0] = nested_action[0][0] 94 | temp_action[1] = nested_action[1][0] 95 | joint_action_decode.append(temp_action) 96 | 97 | return joint_action_decode 98 | 99 | def get_all_observes(self): 100 | all_observes = [] 101 | for i in range(self.n_player): 102 | each = {"obs": self.current_state[i], "controlled_player_index": i} 103 | all_observes.append(each) 104 | 105 | return all_observes 106 | 107 | def set_action_space(self): 108 | return [[Box(-100, 200, shape=(1,)), Box(-30, 30, shape=(1,))] for _ in range(self.n_player)] 109 | 110 | def get_reward(self, reward): 111 | return [reward] 112 | 113 | def is_terminal(self): 114 | return self.env_core.is_terminal() 115 | 116 | def set_n_return(self): 117 | 118 | if self.env_core.agent_list[0].finished and not (self.env_core.agent_list[1].finished): 119 | self.n_return = [0, 1] 120 | elif not (self.env_core.agent_list[0].finished) and self.env_core.agent_list[1].finished: 121 | self.n_return = [1, 0] 122 | elif self.env_core.agent_list[0].finished and self.env_core.agent_list[1].finished: 123 | self.n_return = [0, 0] 124 | else: 125 | self.n_return = [0, 0] 126 | 127 | def check_win(self): 128 | 129 | if self.env_core.agent_list[0].finished and not (self.env_core.agent_list[1].finished): 130 | return '1' 131 | elif not (self.env_core.agent_list[0].finished) and self.env_core.agent_list[1].finished: 132 | return '0' 133 | else: 134 | return '-1' 135 | 136 | def get_single_action_space(self, player_id): 137 | return self.joint_action_space[player_id] 138 | -------------------------------------------------------------------------------- /course1/env/simulators/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/7/10 10:24 上午 4 | # 描述: 5 | from abc import ABC, abstractmethod 6 | 7 | 8 | class Game(ABC): 9 | def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type): 10 | self.n_player = n_player 11 | self.current_state = None 12 | self.all_observes = None 13 | self.is_obs_continuous = is_obs_continuous 14 | self.is_act_continuous = is_act_continuous 15 | self.game_name = game_name 16 | self.agent_nums = agent_nums 17 | self.obs_type = obs_type 18 | 19 | def get_config(self, player_id): 20 | raise NotImplementedError 21 | 22 | def get_render_data(self, current_state): 23 | return current_state 24 | 25 | def set_current_state(self, current_state): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def is_terminal(self): 30 | raise NotImplementedError 31 | 32 | def get_next_state(self, all_action): 33 | raise NotImplementedError 34 | 35 | def get_reward(self, all_action): 36 | raise NotImplementedError 37 | 38 | @abstractmethod 39 | def step(self, all_action): 40 | raise NotImplementedError 41 | 42 | @abstractmethod 43 | def reset(self): 44 | raise NotImplementedError 45 | 46 | def set_action_space(self): 47 | raise NotImplementedError 48 | 49 | -------------------------------------------------------------------------------- /course1/examples/BC_submission/README.md: -------------------------------------------------------------------------------- 1 | ## Behaviour Cloning example submission 2 | 3 | 这里是一个是用了BC的提交例子,针对奥林匹克相扑环境(Olympics-wrestling)。用户只需要提交`submission.py`和 `actor_state_dict.pt`文件至环境提交页面。 4 | 5 | `submission.py`文件里的`my_controller`函数为评测时所调用的策略主函数,输入为观测obs,输出为动作actions。注意格式对齐。提交前可以在`course1/run_log.py`文件内测试,若能跑通则提交也能通过。 -------------------------------------------------------------------------------- /course1/examples/BC_submission/actor_state_dict.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/examples/BC_submission/actor_state_dict.pt -------------------------------------------------------------------------------- /course1/examples/BC_submission/submission.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | 4 | current_path = Path(__file__).resolve().parent 5 | model_path = os.path.join(current_path, 'actor_state_dict.pt') 6 | 7 | 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | class Net(nn.Module): 13 | def __init__(self): 14 | super(Net, self).__init__() 15 | self.net = nn.Sequential( 16 | nn.Linear(1600, 400), 17 | nn.ReLU(), 18 | nn.Linear(400, 128), 19 | nn.ReLU(), 20 | nn.Linear(128, 64), 21 | nn.ReLU(), 22 | nn.Linear(64, 2) 23 | ) 24 | def forward(self, X): 25 | action_batch = self.net(X) 26 | action_batch[:, 0] = torch.tanh(action_batch[:,0])*150+50 27 | action_batch[:, 1] = torch.tanh(action_batch[:, 1])*30 28 | return action_batch 29 | 30 | model = Net() 31 | loaded_actor_state = torch.load(model_path) 32 | model.load_state_dict(loaded_actor_state) 33 | 34 | def my_controller(observation, action_space, is_act_continuous=True): 35 | 36 | obs_array = torch.tensor(observation['obs']['agent_obs']).float().reshape(1, -1) 37 | action = model(obs_array) 38 | 39 | return [[action[0][0].item()], [action[0][1].item()]] 40 | 41 | -------------------------------------------------------------------------------- /course1/examples/random/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | agent_action = [] 16 | for i in range(len(action_space)): 17 | action_ = sample_single_dim(action_space[i], is_act_continuous) 18 | agent_action.append(action_) 19 | return agent_action 20 | 21 | 22 | def sample_single_dim(action_space_list_each, is_act_continuous): 23 | each = [] 24 | if is_act_continuous: 25 | each = action_space_list_each.sample() 26 | else: 27 | if action_space_list_each.__class__.__name__ == "Discrete": 28 | each = [0] * action_space_list_each.n 29 | idx = action_space_list_each.sample() 30 | each[idx] = 1 31 | elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle": 32 | each = [] 33 | nvec = action_space_list_each.high - action_space_list_each.low + 1 34 | sample_indexes = action_space_list_each.sample() 35 | 36 | for i in range(len(nvec)): 37 | dim = nvec[i] 38 | new_action = [0] * dim 39 | index = sample_indexes[i] 40 | new_action[index] = 1 41 | each.extend(new_action) 42 | elif action_space_list_each.__class__.__name__ == "Discrete_SC2": 43 | each = action_space_list_each.sample() 44 | elif action_space_list_each.__class__.__name__ == "Box": 45 | each = action_space_list_each.sample() 46 | return each 47 | -------------------------------------------------------------------------------- /course1/examples/submission/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | pass 16 | -------------------------------------------------------------------------------- /course1/olympics_engine/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | __pycache__/* 3 | scenario/__pycache__/* 4 | test/.pytest_cache/ 5 | .idea 6 | -------------------------------------------------------------------------------- /course1/olympics_engine/AI_olympics.py: -------------------------------------------------------------------------------- 1 | from scenario import Running_competition, table_hockey, football, wrestling, curling_competition, billiard_joint 2 | import sys 3 | from pathlib import Path 4 | base_path = str(Path(__file__).resolve().parent.parent) 5 | sys.path.append(base_path) 6 | from olympics_engine.generator import create_scenario 7 | 8 | import random 9 | 10 | 11 | class AI_Olympics: 12 | def __init__(self, random_selection, minimap): 13 | 14 | self.random_selection = True 15 | self.minimap_mode = minimap 16 | 17 | self.max_step = 400 18 | self.vis = 200 19 | self.vis_clear = 5 20 | 21 | running_Gamemap = create_scenario("running-competition") 22 | self.running_game = Running_competition(running_Gamemap, vis = 200, vis_clear=5, agent1_color = 'light red', agent2_color='blue') 23 | 24 | self.tablehockey_game = table_hockey(create_scenario("table-hockey")) 25 | self.football_game = football(create_scenario('football')) 26 | self.wrestling_game = wrestling(create_scenario('wrestling')) 27 | self.curling_game = curling_competition(create_scenario('curling-IJACA-competition')) 28 | self.billiard_game = billiard_joint(create_scenario("billiard-joint")) 29 | 30 | self.running_game.max_step = self.max_step 31 | self.tablehockey_game.max_step = self.max_step 32 | self.football_game.max_step = self.max_step 33 | self.wrestling_game.max_step = self.max_step 34 | # self.curling_game.max_step = 35 | 36 | self.game_pool = [{"name": 'running-competition', 'game': self.running_game}, 37 | {"name": 'table-hockey', "game": self.tablehockey_game}, 38 | {"name": 'football', "game": self.football_game}, 39 | {"name": 'wrestling', "game": self.wrestling_game}, 40 | {"name": "curling", "game": self.curling_game}, 41 | {"name": "billiard", "game": self.billiard_game}] 42 | self.view_setting = self.running_game.view_setting 43 | 44 | def reset(self): 45 | 46 | self.done = False 47 | selected_game_idx_pool = list(range(len(self.game_pool))) 48 | if self.random_selection: 49 | random.shuffle(selected_game_idx_pool) #random game playing sequence 50 | 51 | self.selected_game_idx_pool = selected_game_idx_pool #fix game playing sequence 52 | self.current_game_count = 0 53 | selected_game_idx = self.selected_game_idx_pool[self.current_game_count] 54 | 55 | 56 | print(f'Playing {self.game_pool[selected_game_idx]["name"]}') 57 | # if self.game_pool[selected_game_idx]['name'] == 'running-competition': 58 | # self.game_pool[selected_game_idx]['game'] = \ 59 | # Running_competition.reset_map(meta_map= self.running_game.meta_map,map_id=None, vis=200, vis_clear=5, 60 | # agent1_color = 'light red', agent2_color = 'blue') #random sample a map 61 | # self.game_pool[selected_game_idx]['game'].max_step = self.max_step 62 | 63 | self.current_game = self.game_pool[selected_game_idx]['game'] 64 | self.game_score = [0,0] 65 | 66 | init_obs = self.current_game.reset() 67 | if self.current_game.game_name == 'running-competition': 68 | init_obs = [{'agent_obs': init_obs[i], 'id': f'team_{i}'} for i in [0,1]] 69 | for i in init_obs: 70 | i['game_mode'] = 'NEW GAME' 71 | 72 | for i,j in enumerate(init_obs): 73 | if 'curling' in self.current_game.game_name: 74 | j['energy'] = 1000 75 | else: 76 | j['energy'] = self.current_game.agent_list[i].energy 77 | 78 | return init_obs 79 | 80 | def step(self, action_list): 81 | 82 | obs, reward, done, _ = self.current_game.step(action_list) 83 | 84 | if self.current_game.game_name == 'running-competition': 85 | obs = [{'agent_obs': obs[i], 'id': f'team_{i}'} for i in [0,1]] 86 | for i in obs: 87 | i['game_mode'] = '' 88 | 89 | for i,j in enumerate(obs): 90 | if 'curling' in self.current_game.game_name: 91 | j['energy'] = 1000 92 | elif 'billiard' in self.current_game.game_name: 93 | j['energy'] = self.current_game.agent_energy[i] 94 | else: 95 | j['energy'] = self.current_game.agent_list[i].energy 96 | 97 | if done: 98 | winner = self.current_game.check_win() 99 | if winner != '-1': 100 | self.game_score[int(winner)] += 1 101 | 102 | if self.current_game_count == len(self.game_pool)-1: 103 | self.done = True 104 | else: 105 | # self.current_game_idx += 1 106 | self.current_game_count += 1 107 | self.current_game_idx = self.selected_game_idx_pool[self.current_game_count] 108 | 109 | self.current_game = self.game_pool[self.current_game_idx]['game'] 110 | print(f'Playing {self.game_pool[self.current_game_idx]["name"]}') 111 | obs = self.current_game.reset() 112 | if self.current_game.game_name == 'running-competition': 113 | obs = [{'agent_obs': obs[i], 'id': f'team_{i}'} for i in [0,1]] 114 | for i in obs: 115 | i['game_mode'] = 'NEW GAME' 116 | for i,j in enumerate(obs): 117 | if 'curling' in self.current_game.game_name: 118 | j['energy'] = 1000 119 | else: 120 | j['energy'] = self.current_game.agent_list[i].energy 121 | 122 | if self.done: 123 | print('game score = ', self.game_score) 124 | if self.game_score[0] > self.game_score[1]: 125 | self.final_reward = [100, 0] 126 | print('Results: team 0 win!') 127 | elif self.game_score[1] > self.game_score[0]: 128 | self.final_reward = [0, 100] 129 | print('Results: team 1 win!') 130 | else: 131 | self.final_reward = [0,0] 132 | print('Results: Draw!') 133 | 134 | return obs, self.final_reward, self.done, '' 135 | else: 136 | return obs, reward, self.done, '' 137 | 138 | def is_terminal(self): 139 | return self.done 140 | 141 | def __getattr__(self, item): 142 | return getattr(self.current_game, item) 143 | 144 | 145 | def render(self): 146 | self.current_game.render() 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /course1/olympics_engine/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 jidiai-olympics 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /course1/olympics_engine/README.md: -------------------------------------------------------------------------------- 1 | # olympics_engine 2 | 3 | Third-party package for env *olympics-tablehockey*, *olympics-football*, *olympics-wrestling*. 4 | -------------------------------------------------------------------------------- /course1/olympics_engine/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __version__ = "0.1.0" 4 | __author__ = 'Yan Song' 5 | __credits__ = 'CASIA' -------------------------------------------------------------------------------- /course1/olympics_engine/agent.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class random_agent: 4 | def __init__(self): 5 | self.force_range = [-100, 200] 6 | self.angle_range = [-30, 30] 7 | 8 | def act(self, obs): 9 | force = random.uniform(self.force_range[0], self.force_range[1]) 10 | angle = random.uniform(self.angle_range[0], self.angle_range[1]) 11 | 12 | return [force, angle] 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /course1/olympics_engine/assets/AI-Olympics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/AI-Olympics.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/blue rock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/blue rock.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/board.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/board.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/crown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/crown.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/curling ground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/curling ground.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/energy bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy bar.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/energy-blue-bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-blue-bar.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/energy-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-blue.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/energy-red-bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-red-bar.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/energy-red.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-red.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/agent1-V1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent1-V1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/agent1-V2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent1-V2.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/agent1_bold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent1_bold.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/agent2-V1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent2-V1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/agent2-V2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent2-V2.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/agent2_bold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent2_bold.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/football.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/football.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/playground.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/sight1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/sight1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/football/sight2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/sight2.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/green rock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/green rock.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/purple rock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/purple rock.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/red rock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/red rock.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/table_hockey/ball.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/ball.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/table_hockey/player1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/player1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/table_hockey/player2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/player2.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/table_hockey/playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/playground.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/table_hockey/sight1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/sight1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/table_hockey/sight2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/sight2.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wood.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/wrestling/player1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/player1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/wrestling/player2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/player2.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/wrestling/playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/playground.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/wrestling/sight1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/sight1.png -------------------------------------------------------------------------------- /course1/olympics_engine/assets/wrestling/sight2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/sight2.png -------------------------------------------------------------------------------- /course1/olympics_engine/env_wrapper/README.md: -------------------------------------------------------------------------------- 1 | ## Jidi environment wrapper 2 | 3 | Here lies the environment wrapper for the running scenario which is also the env-wrapper we use to evaluate your submission on Jidi platform. 4 | 5 | -------------------------------------------------------------------------------- /course1/olympics_engine/env_wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | from olympics_engine.env_wrapper.olympics_running import * -------------------------------------------------------------------------------- /course1/olympics_engine/env_wrapper/chooseenv.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/9/11 11:17 上午 4 | # 描述:选择运行环境,需要维护env/__ini__.py && config.json(存储环境默认参数) 5 | 6 | import json 7 | import env_wrapper 8 | import os 9 | 10 | 11 | def make(env_type, seed=None, conf=None): 12 | file_path = os.path.join(os.path.dirname(__file__), 'config.json') 13 | if not conf: 14 | with open(file_path) as f: 15 | conf = json.load(f)[env_type] 16 | class_literal = conf['class_literal'] 17 | if env_type.split('-')[0] in ["olympics"]: 18 | return getattr(env_wrapper, class_literal)(conf, seed) 19 | else: 20 | return getattr(env_wrapper, class_literal)(conf) 21 | 22 | 23 | if __name__ == "__main__": 24 | make("olympics_running") -------------------------------------------------------------------------------- /course1/olympics_engine/env_wrapper/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "olympics-running": { 3 | "class_literal": "OlympicsRunning", 4 | "n_player": 2, 5 | "max_step": 500, 6 | "game_name": "running", 7 | "is_obs_continuous": true, 8 | "is_act_continuous": true, 9 | "agent_nums": [1,1], 10 | "obs_type": ["vector", "vector"], 11 | "map_num": 11 12 | } 13 | } -------------------------------------------------------------------------------- /course1/olympics_engine/env_wrapper/obs_interfaces/observation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/11/13 3:51 下午 4 | # 描述:observation的各种接口类 5 | obs_type = ["grid", "vector", "dict"] 6 | 7 | 8 | class GridObservation(object): 9 | def get_grid_observation(self, current_state, player_id, info_before): 10 | raise NotImplementedError 11 | 12 | def get_grid_many_observation(self, current_state, player_id_list, info_before=''): 13 | all_obs = [] 14 | for i in player_id_list: 15 | all_obs.append(self.get_grid_observation(current_state, i, info_before)) 16 | return all_obs 17 | 18 | 19 | class VectorObservation(object): 20 | def get_vector_observation(self, current_state, player_id, info_before): 21 | raise NotImplementedError 22 | 23 | def get_vector_many_observation(self, current_state, player_id_list, info_before=''): 24 | all_obs = [] 25 | for i in player_id_list: 26 | all_obs.append(self.get_vector_observation(current_state, i, info_before)) 27 | return all_obs 28 | 29 | 30 | class DictObservation(object): 31 | def get_dict_observation(self, current_state, player_id, info_before): 32 | raise NotImplementedError 33 | 34 | def get_dict_many_observation(self, current_state, player_id_list, info_before=''): 35 | all_obs = [] 36 | for i in player_id_list: 37 | all_obs.append(self.get_dict_observation(current_state, i, info_before)) 38 | return all_obs 39 | 40 | 41 | # todo: observation builder 42 | class CustomObservation(object): 43 | def get_custom_observation(self, current_state, player_id): 44 | raise NotImplementedError 45 | 46 | def get_custom_obs_space(self, player_id): 47 | raise NotImplementedError 48 | 49 | def get_custom_many_observation(self, current_state, player_id_list): 50 | all_obs = [] 51 | for i in player_id_list: 52 | all_obs.append(self.get_custom_observation(current_state, i)) 53 | return all_obs 54 | 55 | def get_custom_many_obs_space(self, player_id_list): 56 | all_obs_space = [] 57 | for i in player_id_list: 58 | all_obs_space.append(self.get_custom_obs_space(i)) 59 | return all_obs_space -------------------------------------------------------------------------------- /course1/olympics_engine/env_wrapper/simulators/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/7/10 10:24 上午 4 | # 描述: 5 | from abc import ABC, abstractmethod 6 | 7 | 8 | class Game(ABC): 9 | def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type): 10 | self.n_player = n_player 11 | self.current_state = None 12 | self.all_observes = None 13 | self.is_obs_continuous = is_obs_continuous 14 | self.is_act_continuous = is_act_continuous 15 | self.game_name = game_name 16 | self.agent_nums = agent_nums 17 | self.obs_type = obs_type 18 | 19 | def get_config(self, player_id): 20 | raise NotImplementedError 21 | 22 | def get_render_data(self, current_state): 23 | return current_state 24 | 25 | def set_current_state(self, current_state): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def is_terminal(self): 30 | raise NotImplementedError 31 | 32 | def get_next_state(self, all_action): 33 | raise NotImplementedError 34 | 35 | def get_reward(self, all_action): 36 | raise NotImplementedError 37 | 38 | @abstractmethod 39 | def step(self, all_action): 40 | raise NotImplementedError 41 | 42 | @abstractmethod 43 | def reset(self): 44 | raise NotImplementedError 45 | 46 | def set_action_space(self): 47 | raise NotImplementedError -------------------------------------------------------------------------------- /course1/olympics_engine/generator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import os 4 | from os import path 5 | father_path = path.dirname(__file__) 6 | sys.path.append(str(father_path)) 7 | 8 | module = __import__("objects") 9 | 10 | def create_scenario(scenario_name, file_path = None): 11 | if file_path is None: 12 | file_path = os.path.join(os.path.dirname(__file__), 'scenario.json') 13 | 14 | with open(file_path) as f: 15 | conf = json.load(f)[scenario_name] 16 | 17 | GameMap = dict() 18 | GameMap["objects"] = list() 19 | GameMap["agents"] = list() 20 | GameMap["view"] = conf["view"] 21 | 22 | for type in conf: 23 | if type == 'env_cfg': 24 | env_cfg_dict = conf[type] 25 | GameMap["env_cfg"] = env_cfg_dict 26 | elif type == 'obs_cfg': 27 | obs_cfg_dict = conf[type] 28 | GameMap["obs_cfg"] = obs_cfg_dict 29 | 30 | elif (type == "wall") or (type == "cross"): 31 | #print("!!", conf[type]["objects"]) 32 | for key, value in conf[type]["objects"].items(): 33 | GameMap["objects"].append(getattr(module, type.capitalize()) 34 | ( 35 | init_pos=value["initial_position"], 36 | length=None, 37 | color=value["color"], 38 | ball_can_pass = value['ball_pass'] if ("ball_pass" in value.keys() 39 | and value['ball_pass']=="True") else False, 40 | width=value['width'] if ('width' in value.keys()) else None 41 | ) 42 | ) 43 | elif type == 'arc': 44 | for key, value in conf[type]['objects'].items(): 45 | #print("passable = ", bool(value['passable'])) 46 | GameMap['objects'].append(getattr(module, type.capitalize())( 47 | init_pos = value["initial_position"], 48 | start_radian = value["start_radian"], 49 | end_radian = value["end_radian"], 50 | passable = True if value["passable"] == "True" else False, 51 | color = value['color'], 52 | collision_mode=value['collision_mode'], 53 | width = value['width'] if ("width" in value.keys()) else None 54 | )) 55 | 56 | elif type in ["agent","ball"]: 57 | for key, value in conf[type]["objects"].items(): 58 | GameMap["agents"].append(getattr(module, type.capitalize()) 59 | ( 60 | mass=value["mass"], 61 | r=value["radius"], 62 | position=value["initial_position"], 63 | color=value["color"], 64 | vis = value["vis"] if ("vis" in value.keys()) else None, 65 | vis_clear = value["vis_clear"] if ("vis_clear" in value.keys()) else None 66 | ), 67 | ) 68 | # print(" ========================== check GameMap ==========================") 69 | #print(GameMap) 70 | return GameMap -------------------------------------------------------------------------------- /course1/olympics_engine/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | base_path = str(Path(__file__).resolve().parent.parent) 4 | sys.path.append(base_path) 5 | print(sys.path) 6 | from olympics_engine.generator import create_scenario 7 | import argparse 8 | from olympics_engine.agent import * 9 | import time 10 | 11 | from scenario import Running, table_hockey, football, wrestling, billiard, \ 12 | curling, billiard_joint, curling_long, curling_competition, Running_competition 13 | 14 | from AI_olympics import AI_Olympics 15 | 16 | import random 17 | import json 18 | 19 | 20 | def store(record, name): 21 | 22 | with open('logs/'+name+'.json', 'w') as f: 23 | f.write(json.dumps(record)) 24 | 25 | def load_record(path): 26 | file = open(path, "rb") 27 | filejson = json.load(file) 28 | return filejson 29 | 30 | RENDER = True 31 | 32 | if __name__ == "__main__": 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--map', default="all", type= str, 35 | help = 'running/table-hockey/football/wrestling/billiard/curling/all') 36 | parser.add_argument("--seed", default=1, type=int) 37 | args = parser.parse_args() 38 | 39 | for i in range(1): 40 | if args.map != 'all': 41 | Gamemap = create_scenario(args.map) 42 | #game = table_hockey(Gamemap) 43 | if args.map == 'running': 44 | game = Running(Gamemap) 45 | agent_num = 2 46 | elif args.map == 'running-competition': 47 | 48 | map_id = random.randint(1,10) 49 | # map_id = 3 50 | Gamemap = create_scenario(args.map) 51 | game = Running_competition(meta_map=Gamemap,map_id=map_id) 52 | agent_num = 2 53 | 54 | 55 | elif args.map == 'table-hockey': 56 | game = table_hockey(Gamemap) 57 | agent_num = 2 58 | elif args.map == 'football': 59 | game = football(Gamemap) 60 | agent_num = 2 61 | elif args.map == 'wrestling': 62 | game = wrestling(Gamemap) 63 | agent_num = 2 64 | # elif args.map == 'volleyball': 65 | # game = volleyball(Gamemap) 66 | # agent_num = 2 67 | elif args.map == 'billiard': 68 | game = billiard(Gamemap) 69 | agent_num = 2 70 | elif args.map == 'curling': 71 | game = curling(Gamemap) 72 | agent_num = 2 73 | 74 | elif args.map == 'curling-joint': 75 | game = curling_joint(Gamemap) 76 | agent_num = 2 77 | 78 | elif args.map == 'billiard-joint': 79 | game = billiard_joint(Gamemap) 80 | agent_num = 2 81 | 82 | elif args.map == 'curling-long': 83 | game = curling_long(Gamemap) 84 | agent_num = 2 85 | 86 | elif args.map == 'curling-competition': 87 | game = curling_competition(Gamemap) 88 | agent_num = 2 89 | 90 | elif args.map == 'all': 91 | game = AI_Olympics(random_selection = False, minimap=False) 92 | agent_num = 2 93 | 94 | agent = random_agent() 95 | rand_agent = random_agent() 96 | 97 | obs = game.reset() 98 | done = False 99 | step = 0 100 | if RENDER: 101 | game.render() 102 | 103 | print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") 104 | time_epi_s = time.time() 105 | while not done: 106 | step += 1 107 | 108 | # print('\n Step ', step) 109 | 110 | #action1 = [100,0]#agent.act(obs) 111 | #action2 = [100,0] #rand_agent.act(obs) 112 | if agent_num == 2: 113 | action1, action2 = agent.act(obs[0]), rand_agent.act(obs[1]) 114 | # action1 = [100,1] 115 | 116 | # action1 =[50,1] 117 | # action2 = [50,-1] 118 | 119 | 120 | action = [action1, action2] 121 | elif agent_num == 1: 122 | action1 = agent.act(obs) 123 | action = [action1] 124 | 125 | # if step <= 5: 126 | # action = [[200,0]] 127 | # else: 128 | # action = [[0,0]] 129 | # action = [[200,action1[1]]] 130 | 131 | obs, reward, done, _ = game.step(action) 132 | print(f'reward = {reward}') 133 | # print('obs = ', obs) 134 | # plt.imshow(obs[0]) 135 | # plt.show() 136 | if RENDER: 137 | game.render() 138 | 139 | # time.sleep(0.05) 140 | 141 | 142 | print("episode duration: ", time.time() - time_epi_s, "step: ", step, (time.time() - time_epi_s)/step) 143 | # if args.map == 'billiard': 144 | # print('reward =', game.total_reward) 145 | # else: 146 | # print('reward = ', reward) 147 | # if R: 148 | # store(record,'bug1') 149 | 150 | -------------------------------------------------------------------------------- /course1/olympics_engine/scenario/__init__.py: -------------------------------------------------------------------------------- 1 | from olympics_engine.scenario.running import Running 2 | from olympics_engine.scenario.running_competition import Running_competition 3 | from olympics_engine.scenario.table_hockey import table_hockey 4 | from olympics_engine.scenario.football import football 5 | from olympics_engine.scenario.wrestling import wrestling 6 | from olympics_engine.scenario.curling import curling 7 | from olympics_engine.scenario.billiard import billiard 8 | from olympics_engine.scenario.billiard_joint import billiard_joint 9 | from olympics_engine.scenario.curling_long import curling_long 10 | from olympics_engine.scenario.curling_competition import curling_competition 11 | -------------------------------------------------------------------------------- /course1/olympics_engine/scenario/longjump.py: -------------------------------------------------------------------------------- 1 | from olympics_engine.core import OlympicsBase 2 | from olympics_engine.viewer import Viewer 3 | 4 | import math 5 | 6 | 7 | def closest_point(l1, l2, point): 8 | """ 9 | compute the coordinate of point on the line l1l2 closest to the given point, reference: https://en.wikipedia.org/wiki/Cramer%27s_rule 10 | :param l1: start pos 11 | :param l2: end pos 12 | :param point: 13 | :return: 14 | """ 15 | A1 = l2[1] - l1[1] 16 | B1 = l1[0] - l2[0] 17 | C1 = (l2[1] - l1[1])*l1[0] + (l1[0] - l2[0])*l1[1] 18 | C2 = -B1 * point[0] + A1 * point[1] 19 | det = A1*A1 + B1*B1 20 | if det == 0: 21 | cx, cy = point 22 | else: 23 | cx = (A1*C1 - B1*C2)/det 24 | cy = (A1*C2 + B1*C1)/det 25 | 26 | return [cx, cy] 27 | 28 | def distance_to_line(l1, l2, pos): 29 | closest_p = closest_point(l1, l2, pos) 30 | 31 | n = [pos[0] - closest_p[0], pos[1] - closest_p[1]] # compute normal 32 | nn = n[0] ** 2 + n[1] ** 2 33 | nn_sqrt = math.sqrt(nn) 34 | cl1 = [l1[0] - pos[0], l1[1] - pos[1]] 35 | cl1_n = (cl1[0] * n[0] + cl1[1] * n[1]) / nn_sqrt 36 | 37 | return abs(cl1_n) 38 | 39 | 40 | class longjump(OlympicsBase): 41 | def __init__(self, map): 42 | super(longjump, self).__init__(map) 43 | 44 | self.jump = False 45 | 46 | def reset(self): 47 | self.init_state() 48 | self.step_cnt = 0 49 | self.done = False 50 | self.jump = False 51 | 52 | self.gamma = 0.98 # for longjump env 53 | 54 | self.viewer = Viewer() 55 | self._init_view() 56 | 57 | 58 | def cross_detect(self): 59 | """ 60 | check whether the agent has reach the cross(final) line 61 | :return: 62 | """ 63 | for agent_idx in range(self.agent_num): 64 | 65 | agent = self.agent_list[agent_idx] 66 | for object_idx in range(len(self.map['objects'])): 67 | object = self.map['objects'][object_idx] 68 | 69 | if not object.can_pass(): 70 | continue 71 | else: 72 | #print('object = ', object.type) 73 | if object.color == 'red' and object.check_cross(self.agent_pos[agent_idx], agent.r): 74 | 75 | agent.color = 'red' 76 | self.gamma = 0.85 #this will change the gamma for the whole env, so need to change if dealing with multi-agent 77 | self.jump = True 78 | 79 | 80 | def step(self, actions_list): 81 | if self.jump: 82 | input_action = [None] #if jump, stop actions 83 | else: 84 | input_action = actions_list 85 | 86 | self.stepPhysics(input_action) 87 | self.speed_limit() 88 | self.cross_detect() 89 | self.change_inner_state() 90 | self.step_cnt += 1 91 | 92 | step_reward = self.get_reward() 93 | obs_next = self.get_obs() 94 | done = self.is_terminal() 95 | 96 | #return self.agent_pos, self.agent_v, self.agent_accel, self.agent_theta, obs_next, step_reward, done 97 | return obs_next, step_reward, done, '' 98 | 99 | def get_reward(self): 100 | 101 | agent_reward = [0. for _ in range(self.agent_num)] 102 | 103 | for agent_idx in range(self.agent_num): 104 | if self.agent_list[agent_idx].color == 'red' and (self.agent_v[agent_idx][0]**2 + self.agent_v[agent_idx][1]**2) < 1e-10: 105 | for object_idx in range(len(self.map['objects'])): 106 | object = self.map['objects'][object_idx] 107 | if object.color == 'red': 108 | l1, l2 = object.init_pos 109 | agent_reward[agent_idx] = distance_to_line(l1, l2, self.agent_pos[agent_idx]) 110 | return agent_reward 111 | 112 | def is_terminal(self): 113 | 114 | if self.step_cnt >= self.max_step: 115 | return True 116 | 117 | for agent_idx in range(self.agent_num): 118 | if self.agent_list[agent_idx].color == 'red' and ( 119 | self.agent_v[agent_idx][0] ** 2 + self.agent_v[agent_idx][1] ** 2) < 1e-5: 120 | return True 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /course1/olympics_engine/scenario/running.py: -------------------------------------------------------------------------------- 1 | from olympics_engine.core import OlympicsBase 2 | from olympics_engine.viewer import Viewer, debug 3 | import time 4 | import pygame 5 | import sys 6 | 7 | class Running(OlympicsBase): 8 | def __init__(self, map, seed = None): 9 | self.minimap_mode = map['obs_cfg'].get('minimap', False) 10 | 11 | super(Running, self).__init__(map, seed) 12 | 13 | self.game_name = 'running' 14 | 15 | self.agent1_color = self.agent_list[0].color 16 | self.agent2_color = self.agent_list[1].color 17 | 18 | self.tau = map['env_cfg'].get('tau', 0.1) 19 | self.gamma = map["env_cfg"].get('gamma', 1) 20 | self.wall_restitution = map['env_cfg'].get('wall_restitution', 1) 21 | self.circle_restitution = map['env_cfg'].get('circle_restitution', 1) 22 | self.max_step = map['env_cfg'].get('max_step', 500) 23 | self.energy_recover_rate = map['env_cfg'].get('energy_recover_rate', 200) 24 | self.speed_cap = map['env_cfg'].get('speed_cap', 500) 25 | 26 | self.print_log = False 27 | self.print_log2 = False 28 | 29 | 30 | self.draw_obs = True 31 | self.show_traj = True 32 | 33 | #self.is_render = True 34 | 35 | def reset(self): 36 | self.set_seed() 37 | self.init_state() 38 | self.step_cnt = 0 39 | self.done = False 40 | 41 | self.viewer = Viewer(self.view_setting) 42 | self.display_mode=False 43 | 44 | 45 | init_obs = self.get_obs() 46 | 47 | if self.minimap_mode: 48 | self._build_minimap() 49 | 50 | output_init_obs = self._build_from_raw_obs(init_obs) 51 | return output_init_obs 52 | # image = pygame.surfarray.array3d(self.viewer.background).swapaxes(0,1) 53 | 54 | # return [{"agent_obs": init_obs[0], "minimap":image}, {"agent_obs": init_obs[1], "minimap":image}] 55 | 56 | 57 | # return [{'agent_obs':init_obs[0]}, {'agent_obs':init_obs[1]}] 58 | 59 | def check_overlap(self): 60 | #todo 61 | pass 62 | 63 | def get_reward(self): 64 | 65 | agent_reward = [0. for _ in range(self.agent_num)] 66 | 67 | 68 | for agent_idx in range(self.agent_num): 69 | if self.agent_list[agent_idx].finished: 70 | agent_reward[agent_idx] = 100. 71 | 72 | return agent_reward 73 | 74 | def is_terminal(self): 75 | 76 | if self.step_cnt >= self.max_step: 77 | return True 78 | 79 | for agent_idx in range(self.agent_num): 80 | if self.agent_list[agent_idx].finished: 81 | return True 82 | 83 | return False 84 | 85 | 86 | 87 | def step(self, actions_list): 88 | 89 | previous_pos = self.agent_pos 90 | 91 | time1 = time.time() 92 | self.stepPhysics(actions_list, self.step_cnt) 93 | time2 = time.time() 94 | #print('stepPhysics time = ', time2 - time1) 95 | self.speed_limit() 96 | 97 | self.cross_detect(previous_pos, self.agent_pos) 98 | 99 | self.step_cnt += 1 100 | step_reward = self.get_reward() 101 | done = self.is_terminal() 102 | 103 | obs_next = self.get_obs() 104 | #self.check_overlap() 105 | self.change_inner_state() 106 | 107 | 108 | if self.minimap_mode: 109 | self._build_minimap() 110 | 111 | output_obs_next = self._build_from_raw_obs(obs_next) 112 | 113 | 114 | return output_obs_next, step_reward, done, '' 115 | 116 | def _build_from_raw_obs(self, obs): 117 | if self.minimap_mode: 118 | image = pygame.surfarray.array3d(self.viewer.background).swapaxes(0,1) 119 | return [{"agent_obs": obs[0], "minimap":image, "id":"team_0"}, 120 | {"agent_obs": obs[1], "minimap": image, "id":"team_1"}] 121 | else: 122 | return [{"agent_obs":obs[0], "id":"team_0"}, {"agent_obs": obs[1], "id":"team_1"}] 123 | 124 | def _build_minimap(self): 125 | 126 | #need to render first 127 | if not self.display_mode: 128 | self.viewer.set_mode() 129 | self.display_mode = True 130 | 131 | self.viewer.draw_background() 132 | for w in self.map['objects']: 133 | self.viewer.draw_map(w) 134 | 135 | self.viewer.draw_ball(self.agent_pos, self.agent_list) 136 | 137 | if self.draw_obs: 138 | self.viewer.draw_obs(self.obs_boundary, self.agent_list) 139 | 140 | # image = pygame.surfarray.array3d(self.viewer.background).swapaxes(0,1) 141 | 142 | # return image 143 | 144 | def check_win(self): 145 | if self.agent_list[0].finished and not (self.agent_list[1].finished): 146 | return '0' 147 | elif not(self.agent_list[0].finished) and self.agent_list[1].finished: 148 | return '1' 149 | else: 150 | return '-1' 151 | 152 | 153 | def render(self, info=None): 154 | 155 | if self.minimap_mode: 156 | pass 157 | else: 158 | 159 | if not self.display_mode: 160 | self.viewer.set_mode() 161 | self.display_mode=True 162 | 163 | self.viewer.draw_background() 164 | for w in self.map['objects']: 165 | self.viewer.draw_map(w) 166 | 167 | self.viewer.draw_ball(self.agent_pos, self.agent_list) 168 | 169 | if self.draw_obs: 170 | self.viewer.draw_obs(self.obs_boundary, self.agent_list) 171 | 172 | if self.draw_obs: 173 | if len(self.obs_list) > 0: 174 | self.viewer.draw_view(self.obs_list, self.agent_list, leftmost_x=500, upmost_y=10, gap = 100) 175 | 176 | if self.show_traj: 177 | self.get_trajectory() 178 | self.viewer.draw_trajectory(self.agent_record, self.agent_list) 179 | 180 | self.viewer.draw_direction(self.agent_pos, self.agent_accel) 181 | 182 | 183 | # debug('mouse pos = '+ str(pygame.mouse.get_pos())) 184 | debug('Step: ' + str(self.step_cnt), x=30) 185 | if info is not None: 186 | debug(info, x=100) 187 | 188 | 189 | 190 | for event in pygame.event.get(): 191 | if event.type == pygame.QUIT: 192 | sys.exit() 193 | pygame.display.flip() 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /course1/olympics_engine/scenario/running_competition.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from olympics_engine.core import OlympicsBase 4 | from olympics_engine.viewer import Viewer, debug 5 | import time 6 | import pygame 7 | import sys 8 | import os 9 | 10 | 11 | from olympics_engine.generator import create_scenario 12 | from pathlib import Path 13 | current_path = str(Path(__file__).resolve().parent) 14 | maps_path = os.path.join(current_path, "running_competition_maps/maps.json") 15 | 16 | 17 | 18 | class Running_competition(OlympicsBase): 19 | def __init__(self, meta_map, map_id = None, seed = None, vis = None, vis_clear=None, agent1_color = 'purple', agent2_color = 'green'): 20 | # self.minimap_mode = map['obs_cfg'].get('minimap', False) 21 | 22 | Gamemap, map_index = Running_competition.choose_a_map(idx = map_id) #fixme(yan): penatration in some maps, need to check engine, vis 23 | if vis is not None: 24 | for a in Gamemap['agents']: 25 | a.visibility = vis 26 | a.visibility_clear = vis_clear 27 | if a.color == 'purple': 28 | a.color = agent1_color 29 | a.original_color = agent1_color 30 | elif a.color == 'green': 31 | a.color = agent2_color 32 | a.original_color = agent2_color 33 | 34 | 35 | self.meta_map = meta_map 36 | self.map_index = map_index 37 | 38 | super(Running_competition, self).__init__(Gamemap, seed) 39 | 40 | self.game_name = 'running-competition' 41 | 42 | self.original_tau = meta_map['env_cfg']['tau'] 43 | self.original_gamma = meta_map['env_cfg']['gamma'] 44 | self.wall_restitution = meta_map['env_cfg']['wall_restitution'] 45 | self.circle_restitution = meta_map['env_cfg']['circle_restitution'] 46 | self.max_step = meta_map['env_cfg']['max_step'] 47 | self.energy_recover_rate = meta_map['env_cfg']['energy_recover_rate'] 48 | self.speed_cap = meta_map['env_cfg']['speed_cap'] 49 | self.faster = meta_map['env_cfg']['faster'] 50 | 51 | self.tau = self.original_tau*self.faster 52 | self.gamma = 1-(1-self.original_gamma)*self.faster 53 | 54 | # self.gamma = 1 # v衰减系数 55 | # self.restitution = 0.5 56 | # self.print_log = False 57 | # self.print_log2 = False 58 | # self.tau = 0.1 59 | # 60 | # self.speed_cap = 100 61 | # 62 | # self.draw_obs = True 63 | # self.show_traj = True 64 | 65 | @staticmethod 66 | def reset_map(meta_map, map_id, vis=None, vis_clear=None, agent1_color = 'purple', agent2_color = 'green'): 67 | return Running_competition(meta_map, map_id, vis=vis, vis_clear = vis_clear, agent1_color=agent1_color, agent2_color=agent2_color) 68 | 69 | @staticmethod 70 | def choose_a_map(idx=None): 71 | if idx is None: 72 | idx = random.randint(1,4) 73 | MapStats = create_scenario("map"+str(idx), file_path= maps_path) 74 | return MapStats, idx 75 | 76 | def check_overlap(self): 77 | #todo 78 | pass 79 | 80 | def get_reward(self): 81 | 82 | agent_reward = [0. for _ in range(self.agent_num)] 83 | 84 | 85 | for agent_idx in range(self.agent_num): 86 | if self.agent_list[agent_idx].finished: 87 | agent_reward[agent_idx] = 1. 88 | 89 | return agent_reward 90 | 91 | def is_terminal(self): 92 | 93 | if self.step_cnt >= self.max_step: 94 | return True 95 | 96 | for agent_idx in range(self.agent_num): 97 | if self.agent_list[agent_idx].finished: 98 | return True 99 | 100 | return False 101 | 102 | 103 | 104 | def step(self, actions_list): 105 | 106 | previous_pos = self.agent_pos 107 | 108 | time1 = time.time() 109 | self.stepPhysics(actions_list, self.step_cnt) 110 | time2 = time.time() 111 | #print('stepPhysics time = ', time2 - time1) 112 | self.speed_limit() 113 | 114 | self.cross_detect(previous_pos, self.agent_pos) 115 | 116 | self.step_cnt += 1 117 | step_reward = self.get_reward() 118 | done = self.is_terminal() 119 | 120 | time3 = time.time() 121 | obs_next = self.get_obs() 122 | time4 = time.time() 123 | #print('render time = ', time4-time3) 124 | # obs_next = 1 125 | #self.check_overlap() 126 | self.change_inner_state() 127 | 128 | return obs_next, step_reward, done, '' 129 | 130 | def check_win(self): 131 | if self.agent_list[0].finished and not (self.agent_list[1].finished): 132 | return '0' 133 | elif not(self.agent_list[0].finished) and self.agent_list[1].finished: 134 | return '1' 135 | else: 136 | return '-1' 137 | 138 | 139 | def render(self, info=None): 140 | 141 | 142 | if not self.display_mode: 143 | self.viewer.set_mode() 144 | self.display_mode=True 145 | 146 | self.viewer.draw_background() 147 | for w in self.map['objects']: 148 | self.viewer.draw_map(w) 149 | 150 | self.viewer.draw_ball(self.agent_pos, self.agent_list) 151 | 152 | if self.draw_obs: 153 | self.viewer.draw_obs(self.obs_boundary, self.agent_list) 154 | 155 | if self.draw_obs: 156 | if len(self.obs_list) > 0: 157 | self.viewer.draw_view(self.obs_list, self.agent_list, leftmost_x=500, upmost_y=10, gap = 100) 158 | 159 | if self.show_traj: 160 | self.get_trajectory() 161 | self.viewer.draw_trajectory(self.agent_record, self.agent_list) 162 | 163 | self.viewer.draw_direction(self.agent_pos, self.agent_accel) 164 | 165 | 166 | # debug('mouse pos = '+ str(pygame.mouse.get_pos())) 167 | debug('Step: ' + str(self.step_cnt), x=30) 168 | if info is not None: 169 | debug(info, x=100) 170 | 171 | 172 | 173 | for event in pygame.event.get(): 174 | if event.type == pygame.QUIT: 175 | sys.exit() 176 | pygame.display.flip() 177 | 178 | 179 | 180 | if __name__ == '__main__': 181 | running = Running_competition() 182 | map = running.choose_a_map() 183 | print(map) -------------------------------------------------------------------------------- /course1/olympics_engine/scenario/seeks.py: -------------------------------------------------------------------------------- 1 | #seeks for high rewards -------------------------------------------------------------------------------- /course1/olympics_engine/scenario/volleyball.py: -------------------------------------------------------------------------------- 1 | from olympics_engine.core import OlympicsBase 2 | from olympics_engine.viewer import debug 3 | import pygame 4 | import sys 5 | import math 6 | 7 | 8 | class volleyball(OlympicsBase): 9 | def __init__(self, map): 10 | super(volleyball, self).__init__(map) 11 | 12 | self.gamma = 1 # v衰减系数 13 | self.restitution = 0.7 14 | self.print_log = False 15 | self.tau = 0.1 16 | 17 | self.draw_obs = True 18 | self.show_traj = True 19 | 20 | self.g = 60 21 | self.agent_original_accel = [[0,0], [0,0]] 22 | 23 | def check_overlap(self): 24 | pass 25 | 26 | def check_action(self, action_list): 27 | action = [] 28 | for agent_idx in range(self.agent_num): 29 | if self.agent_list[agent_idx].type == 'agent': 30 | action.append(action_list[0]) 31 | _ = action_list.pop(0) 32 | else: 33 | action.append(None) 34 | 35 | return action 36 | 37 | def actions_to_accel(self, actions_list): 38 | self.agent_original_accel = [[] for _ in range(self.agent_num)] 39 | a_container = [[] for _ in range(self.agent_num)] 40 | for agent_idx in range(self.agent_num): 41 | action = actions_list[agent_idx] 42 | if action is None: 43 | accel = [0, self.agent_list[agent_idx].mass*self.g] 44 | self.agent_original_accel[agent_idx] = [0,0] 45 | 46 | else: 47 | if self.agent_list[agent_idx].is_fatigue: #if agent is out of energy, no driving force applies 48 | accel = [0,self.agent_list[agent_idx].mass*self.g] 49 | else: 50 | mass = self.agent_list[agent_idx].mass 51 | 52 | assert self.action_f[0] <= action[0] <= self.action_f[1], print('Continuous driving force needs ' 53 | 'to be within the range [-100,200]') 54 | force = action[0] / mass 55 | assert self.action_theta[0] <= action[1] <= self.action_theta[1], print( 56 | 'Continuous turing angle needs to be within the range [-30deg, 30deg]') 57 | theta = action[1] 58 | 59 | theta_old = self.agent_theta[agent_idx][0] 60 | theta_new = theta_old + theta 61 | self.agent_theta[agent_idx][0] = theta_new 62 | 63 | accel_x = force * math.cos(theta_new / 180 * math.pi) 64 | accel_y = force * math.sin(theta_new / 180 * math.pi) 65 | accel = [accel_x, accel_y + mass*self.g ] 66 | self.agent_original_accel[agent_idx] = [accel_x, accel_y] 67 | 68 | a_container[agent_idx] = accel 69 | return a_container 70 | 71 | 72 | 73 | def step(self, actions_list): 74 | previous_pos = self.agent_pos 75 | 76 | actions_list = self.check_action(actions_list) 77 | 78 | self.stepPhysics(actions_list, self.step_cnt) 79 | 80 | #self.cross_detect(previous_pos, self.agent_pos) 81 | 82 | self.step_cnt += 1 83 | step_reward = 1 #self.get_reward() 84 | obs_next = self.get_obs() 85 | # obs_next = 1 86 | done = False#self.is_terminal() 87 | 88 | #check overlapping 89 | #self.check_overlap() 90 | 91 | #return self.agent_pos, self.agent_v, self.agent_accel, self.agent_theta, obs_next, step_reward, done 92 | return obs_next, step_reward, done, '' 93 | 94 | def render(self, info=None): 95 | 96 | if not self.display_mode: 97 | self.viewer.set_mode() 98 | self.display_mode=True 99 | 100 | self.viewer.draw_background() 101 | # 先画map; ball在map之上 102 | for w in self.map['objects']: 103 | self.viewer.draw_map(w) 104 | 105 | self.viewer.draw_ball(self.agent_pos, self.agent_list) 106 | if self.show_traj: 107 | self.get_trajectory() 108 | self.viewer.draw_trajectory(self.agent_record, self.agent_list) 109 | self.viewer.draw_direction(self.agent_pos, self.agent_accel) 110 | #self.viewer.draw_map() 111 | 112 | if self.draw_obs: 113 | self.viewer.draw_obs(self.obs_boundary, self.agent_list) 114 | self.viewer.draw_view(self.obs_list, self.agent_list) 115 | 116 | #draw energy bar 117 | #debug('agent remaining energy = {}'.format([i.energy for i in self.agent_list]), x=100) 118 | self.viewer.draw_energy_bar(self.agent_list) 119 | debug('Agent 0', x=570, y=110) 120 | debug('Agent 1', x=640, y=110) 121 | if self.map_num is not None: 122 | debug('Map {}'.format(self.map_num), x=100) 123 | 124 | # debug('mouse pos = '+ str(pygame.mouse.get_pos())) 125 | debug('Step: ' + str(self.step_cnt), x=30) 126 | if info is not None: 127 | debug(info, x=100) 128 | debug("Gravity", x = 100) 129 | pygame.draw.line(self.viewer.background, color=[0,0,0],start_pos=[160,10], end_pos=[160,30], width=4) 130 | pygame.draw.line(self.viewer.background, color=[0,0,0],start_pos=[160,30], end_pos=[155,25], width=4) 131 | pygame.draw.line(self.viewer.background, color=[0,0,0],start_pos=[160,30], end_pos=[165,25], width=4) 132 | 133 | 134 | for event in pygame.event.get(): 135 | # 如果单击关闭窗口,则退出 136 | if event.type == pygame.QUIT: 137 | sys.exit() 138 | pygame.display.flip() 139 | #self.viewer.background.fill((255, 255, 255)) 140 | -------------------------------------------------------------------------------- /course1/olympics_engine/test.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import math 3 | import sys 4 | 5 | pygame.init() 6 | size = width, height = 800, 900 7 | screen = pygame.display.set_mode(size) 8 | screen.fill((255, 255, 255)) 9 | 10 | 11 | while 1: 12 | for event in pygame.event.get(): 13 | if event.type == pygame.QUIT: 14 | sys.exit() 15 | 16 | 17 | #pygame.draw.ellipse(screen, [0,0,0], rect = [100,100, 500,800], width = 2) 18 | 19 | pygame.draw.line(screen, [0,0,0], start_pos = [20,600], end_pos = [400, 600], width = 2) 20 | pygame.draw.line(screen, [0,0,0], start_pos = [20,800], end_pos = [400, 800], width = 2) 21 | pygame.draw.line(screen, [0,0,0], start_pos = [20,600], end_pos = [20,800], width = 2) 22 | pygame.draw.line(screen, [176,196,222], start_pos = [20,700], end_pos = [425,700], width = 2) 23 | 24 | pygame.draw.rect(screen, [0,0,0], [0,0,100,200], width = 2) 25 | pygame.draw.arc(screen, [0,0,0], [0,0,100,200], -1/2*math.pi, 1/2*math.pi, 2) 26 | pygame.draw.arc(screen, [0,0,0], [200,100,400,700], -1/2*math.pi, 1/2*math.pi, 2) 27 | pygame.draw.arc(screen, [0,0,0], [350,300,100,300], -1/2*math.pi, 1/2*math.pi, 2) 28 | 29 | pygame.draw.arc(screen, [176,196,222], [325, 200, 200, 500], -1/2*math.pi, 1/2*math.pi, 2) 30 | 31 | #pygame.draw.arc(screen, [0,0,0], [(250,300),(300, 300)], -1/2*math.pi, 1/2*math.pi, 2) 32 | #pygame.draw.arc(screen, [0,0,0], [50,100,700, 700], -1/2*math.pi, 1/2*math.pi, 2) 33 | #pygame.draw.arc(screen, [176,196,222], [150,200,500, 500], -1/2*math.pi, 1/2*math.pi, 2) 34 | 35 | 36 | pygame.draw.line(screen, [0,0,0], start_pos = [20,100], end_pos = [415, 100], width = 2) 37 | pygame.draw.line(screen, [0,0,0], start_pos = [20,300], end_pos = [415, 300], width = 2) 38 | pygame.draw.line(screen, [176,196,222], start_pos = [20,200], end_pos = [430,200], width = 2) 39 | 40 | 41 | pygame.draw.line(screen, [255, 0, 0], start_pos = [20,100], end_pos = [20,300], width = 2) 42 | 43 | pygame.display.flip() -------------------------------------------------------------------------------- /course1/olympics_engine/tools/settings.py: -------------------------------------------------------------------------------- 1 | 2 | COLORS = { 3 | 'red': [255,0,0], 4 | 'light red': [255, 127, 127], 5 | 'green': [0, 255, 0], 6 | 'blue': [0, 0, 255], 7 | 'orange': [255, 127, 0], 8 | 'grey': [176,196,222], 9 | 'purple': [160, 32, 240], 10 | 'black': [0, 0, 0], 11 | 'white': [255, 255, 255], 12 | 'light green': [204, 255, 229], 13 | 'sky blue': [0,191,255], 14 | # 'red-2': [215,80,83], 15 | # 'blue-2': [73,141,247] 16 | } 17 | 18 | COLOR_TO_IDX = { 19 | 'light green': 0, 20 | 'green': 1, 21 | 'sky blue': 2, 22 | 'orange': 3, 23 | 'grey': 4, 24 | 'purple': 5, 25 | 'black': 6, 26 | 'red': 7, 27 | 'blue':8, 28 | 'white': 9, 29 | 'light red': 10 30 | # 'red-2': 9, 31 | # 'blue-2': 10 32 | } 33 | 34 | IDX_TO_COLOR = { 35 | 0: 'light green', 36 | 1: 'green', 37 | 2: 'sky blue', 38 | 3: 'orange', 39 | 4: 'grey', 40 | 5: 'purple', 41 | 6: 'black', 42 | 7: 'red', 43 | 8: 'blue', 44 | 9: 'white', 45 | 10: 'light red' 46 | # 9: 'red-2', 47 | # 10: 'blue-2' 48 | } 49 | 50 | 51 | 52 | 53 | # Map of object type to integers 54 | OBJECT_TO_IDX = { 55 | 'agent': 0, 56 | 'wall': 1, # 反弹 57 | 'cross': 2, # 可穿越 58 | 'goal': 3, # 可穿越 # maybe case by case 59 | 'arc': 4, 60 | 'ball': 5 61 | } 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /course1/olympics_engine/train/algo/random.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class random_agent: 4 | def __init__(self, seed=None): 5 | self.force_range = [-100, 200] 6 | self.angle_range = [-30, 30] 7 | #self.seed(seed) 8 | 9 | def seed(self, seed = None): 10 | random.seed(seed) 11 | 12 | def act(self, obs): 13 | force = random.uniform(self.force_range[0], self.force_range[1]) 14 | angle = random.uniform(self.angle_range[0], self.angle_range[1]) 15 | 16 | return [[force], [angle]] 17 | 18 | -------------------------------------------------------------------------------- /course1/olympics_engine/train/log_path.py: -------------------------------------------------------------------------------- 1 | 2 | from pathlib import Path 3 | import os 4 | import yaml 5 | 6 | def make_logpath(game_name, algo): 7 | base_dir = Path(__file__).resolve().parent 8 | model_dir = base_dir / Path('models') / game_name 9 | if not model_dir.exists(): 10 | curr_run = 'run1' 11 | else: 12 | exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in 13 | model_dir.iterdir() if 14 | str(folder.name).startswith('run')] 15 | if len(exst_run_nums) == 0: 16 | curr_run = 'run1' 17 | else: 18 | curr_run = 'run%i' % (max(exst_run_nums) + 1) 19 | run_dir = model_dir / curr_run 20 | log_dir = run_dir 21 | return run_dir, log_dir 22 | 23 | def save_config(args, save_path): 24 | file = open(os.path.join(str(save_path), 'config.yaml'), mode='w', encoding='utf-8') 25 | yaml.dump(vars(args), file) 26 | file.close() 27 | 28 | -------------------------------------------------------------------------------- /course1/olympics_engine/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .space import Space 4 | from gym import logger 5 | 6 | 7 | class Box(Space): 8 | """ 9 | A (possibly unbounded) box in R^n. Specifically, a Box represents the 10 | Cartesian product of n closed intervals. Each interval has the form of one 11 | of [a, b], (-oo, b], [a, oo), or (-oo, oo). 12 | There are two common use cases: 13 | * Identical bound for each dimension:: 14 | >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) 15 | Box(3, 4) 16 | * Independent bound for each dimension:: 17 | >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) 18 | Box(2,) 19 | """ 20 | def __init__(self, low, high, shape=None, dtype=np.float32): 21 | assert dtype is not None, 'dtype must be explicitly provided. ' 22 | self.dtype = np.dtype(dtype) 23 | 24 | # determine shape if it isn't provided directly 25 | if shape is not None: 26 | shape = tuple(shape) 27 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape" 28 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape" 29 | elif not np.isscalar(low): 30 | shape = low.shape 31 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape" 32 | elif not np.isscalar(high): 33 | shape = high.shape 34 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape" 35 | else: 36 | raise ValueError("shape must be provided or inferred from the shapes of low or high") 37 | 38 | if np.isscalar(low): 39 | low = np.full(shape, low, dtype=dtype) 40 | 41 | if np.isscalar(high): 42 | high = np.full(shape, high, dtype=dtype) 43 | 44 | self.shape = shape 45 | self.low = low 46 | self.high = high 47 | 48 | def _get_precision(dtype): 49 | if np.issubdtype(dtype, np.floating): 50 | return np.finfo(dtype).precision 51 | else: 52 | return np.inf 53 | low_precision = _get_precision(self.low.dtype) 54 | high_precision = _get_precision(self.high.dtype) 55 | dtype_precision = _get_precision(self.dtype) 56 | if min(low_precision, high_precision) > dtype_precision: 57 | logger.warn("Box bound precision lowered by casting to {}".format(self.dtype)) 58 | self.low = self.low.astype(self.dtype) 59 | self.high = self.high.astype(self.dtype) 60 | 61 | # Boolean arrays which indicate the interval type for each coordinate 62 | self.bounded_below = -np.inf < self.low 63 | self.bounded_above = np.inf > self.high 64 | 65 | super(Box, self).__init__(self.shape, self.dtype) 66 | 67 | def is_bounded(self, manner="both"): 68 | below = np.all(self.bounded_below) 69 | above = np.all(self.bounded_above) 70 | if manner == "both": 71 | return below and above 72 | elif manner == "below": 73 | return below 74 | elif manner == "above": 75 | return above 76 | else: 77 | raise ValueError("manner is not in {'below', 'above', 'both'}") 78 | 79 | def sample(self): 80 | """ 81 | Generates a single random sample inside of the Box. 82 | In creating a sample of the box, each coordinate is sampled according to 83 | the form of the interval: 84 | * [a, b] : uniform distribution 85 | * [a, oo) : shifted exponential distribution 86 | * (-oo, b] : shifted negative exponential distribution 87 | * (-oo, oo) : normal distribution 88 | """ 89 | high = self.high if self.dtype.kind == 'f' \ 90 | else self.high.astype('int64') + 1 91 | sample = np.empty(self.shape) 92 | 93 | # Masking arrays which classify the coordinates according to interval 94 | # type 95 | unbounded = ~self.bounded_below & ~self.bounded_above 96 | upp_bounded = ~self.bounded_below & self.bounded_above 97 | low_bounded = self.bounded_below & ~self.bounded_above 98 | bounded = self.bounded_below & self.bounded_above 99 | 100 | 101 | # Vectorized sampling by interval type 102 | sample[unbounded] = self.np_random.normal( 103 | size=unbounded[unbounded].shape) 104 | 105 | sample[low_bounded] = self.np_random.exponential( 106 | size=low_bounded[low_bounded].shape) + self.low[low_bounded] 107 | 108 | sample[upp_bounded] = -self.np_random.exponential( 109 | size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] 110 | 111 | sample[bounded] = self.np_random.uniform(low=self.low[bounded], 112 | high=high[bounded], 113 | size=bounded[bounded].shape) 114 | if self.dtype.kind == 'i': 115 | sample = np.floor(sample) 116 | 117 | return sample.astype(self.dtype) 118 | 119 | def contains(self, x): 120 | if isinstance(x, list): 121 | x = np.array(x) # Promote list to array for contains check 122 | return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) 123 | 124 | def to_jsonable(self, sample_n): 125 | return np.array(sample_n).tolist() 126 | 127 | def from_jsonable(self, sample_n): 128 | return [np.asarray(sample) for sample in sample_n] 129 | 130 | def __repr__(self): 131 | return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype) 132 | 133 | def __eq__(self, other): 134 | return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) -------------------------------------------------------------------------------- /course1/olympics_engine/utils/get_logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import time 4 | import os 5 | 6 | 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False): 8 | if not os.path.exists(log_path): 9 | os.mkdir(log_path) 10 | 11 | logger = logging.getLogger(name='Jidi') 12 | logger.setLevel(logging.INFO) 13 | # 每分钟建一个文件 14 | rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) 15 | log_name = log_path + rq + '_' + name+ '.log' 16 | json_log_name = log_path + rq + '_' + name + '.json' 17 | logfile = log_name 18 | if save_file: 19 | fh = logging.FileHandler(logfile, mode='a') 20 | fh.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter("%(message)s") 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | # 输出到控制台 25 | if console_out: 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | logger.addHandler(console) 29 | 30 | # 输出到json 31 | if json_file: 32 | fh_json = logging.FileHandler(json_log_name, mode='a') 33 | fh_json.setLevel(logging.DEBUG) 34 | formatter_json = logging.Formatter("%(message)s") 35 | fh_json.setFormatter(formatter_json) 36 | logger.addHandler(fh_json) 37 | 38 | return logger -------------------------------------------------------------------------------- /course1/olympics_engine/utils/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | WARNING - Custom observation & action spaces can inherit from the `Space` 9 | class. However, most use-cases should be covered by the existing space 10 | classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & 11 | `Dict`). Note that parametrized probability distributions (through the 12 | `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are 13 | only well-defined for instances of spaces provided in gym by default. 14 | Moreover, some implementations of Reinforcement Learning algorithms might 15 | not handle custom spaces properly. Use custom spaces with care. 16 | """ 17 | def __init__(self, shape=None, dtype=None): 18 | import numpy as np # takes about 300-400ms to import, so we load lazily 19 | self.shape = None if shape is None else tuple(shape) 20 | self.dtype = None if dtype is None else np.dtype(dtype) 21 | self._np_random = None 22 | 23 | @property 24 | def np_random(self): 25 | """Lazily seed the rng since this is expensive and only needed if 26 | sampling from this space. 27 | """ 28 | if self._np_random is None: 29 | self.seed() 30 | 31 | return self._np_random 32 | 33 | def sample(self): 34 | """Randomly sample an element of this space. Can be 35 | uniform or non-uniform sampling based on boundedness of space.""" 36 | raise NotImplementedError 37 | 38 | def seed(self, seed=None): 39 | """Seed the PRNG of this space. """ 40 | self._np_random, seed = seeding.np_random(seed) 41 | return [seed] 42 | 43 | def contains(self, x): 44 | """ 45 | Return boolean specifying if x is a valid 46 | member of this space 47 | """ 48 | raise NotImplementedError 49 | 50 | def __contains__(self, x): 51 | return self.contains(x) 52 | 53 | def to_jsonable(self, sample_n): 54 | """Convert a batch of samples from this space to a JSONable data type.""" 55 | # By default, assume identity is JSONable 56 | return sample_n 57 | 58 | def from_jsonable(self, sample_n): 59 | """Convert a JSONable data type to a batch of samples from this space.""" 60 | # By default, assume identity is JSONable 61 | return sample_n -------------------------------------------------------------------------------- /course1/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .space import Space 4 | from gym import logger 5 | 6 | 7 | class Box(Space): 8 | """ 9 | A (possibly unbounded) box in R^n. Specifically, a Box represents the 10 | Cartesian product of n closed intervals. Each interval has the form of one 11 | of [a, b], (-oo, b], [a, oo), or (-oo, oo). 12 | 13 | There are two common use cases: 14 | 15 | * Identical bound for each dimension:: 16 | >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) 17 | Box(3, 4) 18 | 19 | * Independent bound for each dimension:: 20 | >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) 21 | Box(2,) 22 | 23 | """ 24 | def __init__(self, low, high, shape=None, dtype=np.float32): 25 | assert dtype is not None, 'dtype must be explicitly provided. ' 26 | self.dtype = np.dtype(dtype) 27 | 28 | # determine shape if it isn't provided directly 29 | if shape is not None: 30 | shape = tuple(shape) 31 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape" 32 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape" 33 | elif not np.isscalar(low): 34 | shape = low.shape 35 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape" 36 | elif not np.isscalar(high): 37 | shape = high.shape 38 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape" 39 | else: 40 | raise ValueError("shape must be provided or inferred from the shapes of low or high") 41 | 42 | if np.isscalar(low): 43 | low = np.full(shape, low, dtype=dtype) 44 | 45 | if np.isscalar(high): 46 | high = np.full(shape, high, dtype=dtype) 47 | 48 | self.shape = shape 49 | self.low = low 50 | self.high = high 51 | 52 | def _get_precision(dtype): 53 | if np.issubdtype(dtype, np.floating): 54 | return np.finfo(dtype).precision 55 | else: 56 | return np.inf 57 | low_precision = _get_precision(self.low.dtype) 58 | high_precision = _get_precision(self.high.dtype) 59 | dtype_precision = _get_precision(self.dtype) 60 | if min(low_precision, high_precision) > dtype_precision: 61 | logger.warn("Box bound precision lowered by casting to {}".format(self.dtype)) 62 | self.low = self.low.astype(self.dtype) 63 | self.high = self.high.astype(self.dtype) 64 | 65 | # Boolean arrays which indicate the interval type for each coordinate 66 | self.bounded_below = -np.inf < self.low 67 | self.bounded_above = np.inf > self.high 68 | 69 | super(Box, self).__init__(self.shape, self.dtype) 70 | 71 | def is_bounded(self, manner="both"): 72 | below = np.all(self.bounded_below) 73 | above = np.all(self.bounded_above) 74 | if manner == "both": 75 | return below and above 76 | elif manner == "below": 77 | return below 78 | elif manner == "above": 79 | return above 80 | else: 81 | raise ValueError("manner is not in {'below', 'above', 'both'}") 82 | 83 | def sample(self): 84 | """ 85 | Generates a single random sample inside of the Box. 86 | 87 | In creating a sample of the box, each coordinate is sampled according to 88 | the form of the interval: 89 | 90 | * [a, b] : uniform distribution 91 | * [a, oo) : shifted exponential distribution 92 | * (-oo, b] : shifted negative exponential distribution 93 | * (-oo, oo) : normal distribution 94 | """ 95 | high = self.high if self.dtype.kind == 'f' \ 96 | else self.high.astype('int64') + 1 97 | sample = np.empty(self.shape) 98 | 99 | # Masking arrays which classify the coordinates according to interval 100 | # type 101 | unbounded = ~self.bounded_below & ~self.bounded_above 102 | upp_bounded = ~self.bounded_below & self.bounded_above 103 | low_bounded = self.bounded_below & ~self.bounded_above 104 | bounded = self.bounded_below & self.bounded_above 105 | 106 | 107 | # Vectorized sampling by interval type 108 | sample[unbounded] = self.np_random.normal( 109 | size=unbounded[unbounded].shape) 110 | 111 | sample[low_bounded] = self.np_random.exponential( 112 | size=low_bounded[low_bounded].shape) + self.low[low_bounded] 113 | 114 | sample[upp_bounded] = -self.np_random.exponential( 115 | size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] 116 | 117 | sample[bounded] = self.np_random.uniform(low=self.low[bounded], 118 | high=high[bounded], 119 | size=bounded[bounded].shape) 120 | if self.dtype.kind == 'i': 121 | sample = np.floor(sample) 122 | 123 | return sample.astype(self.dtype) 124 | 125 | def contains(self, x): 126 | if isinstance(x, list): 127 | x = np.array(x) # Promote list to array for contains check 128 | return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) 129 | 130 | def to_jsonable(self, sample_n): 131 | return np.array(sample_n).tolist() 132 | 133 | def from_jsonable(self, sample_n): 134 | return [np.asarray(sample) for sample in sample_n] 135 | 136 | def __repr__(self): 137 | return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype) 138 | 139 | def __eq__(self, other): 140 | return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 141 | -------------------------------------------------------------------------------- /course1/utils/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /course1/utils/get_logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import time 4 | import os 5 | 6 | 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False): 8 | if not os.path.exists(log_path): 9 | os.mkdir(log_path) 10 | 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | # 每分钟建一个文件 14 | rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) 15 | log_name = log_path + rq + '_' + name+ '.log' 16 | json_log_name = log_path + rq + '_' + name + '.json' 17 | logfile = log_name 18 | if save_file: 19 | fh = logging.FileHandler(logfile, mode='a') 20 | fh.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter("%(message)s") 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | # 输出到控制台 25 | if console_out: 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | logger.addHandler(console) 29 | 30 | # 输出到json 31 | if json_file: 32 | fh_json = logging.FileHandler(json_log_name, mode='a') 33 | fh_json.setLevel(logging.DEBUG) 34 | formatter_json = logging.Formatter("%(message)s") 35 | fh_json.setFormatter(formatter_json) 36 | logger.addHandler(fh_json) 37 | 38 | return logger -------------------------------------------------------------------------------- /course1/utils/mutli_discrete_particle.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2021/4/8 下午2:42 3 | # Author: Yahui Cui 4 | 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) 7 | 8 | import numpy as np 9 | 10 | import gym 11 | 12 | class MultiDiscreteParticle(gym.Space): 13 | """ 14 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters 15 | - It can be adapted to both a Discrete action space or a continuous (Box) action space 16 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 17 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space 18 | where the discrete action space can take any integers from `min` to `max` (both inclusive) 19 | Note: A value of 0 always need to represent the NOOP action. 20 | e.g. Nintendo Game Controller 21 | - Can be conceptualized as 3 discrete action spaces: 22 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 23 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 24 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 25 | - Can be initialized as 26 | MultiDiscrete([ [0,4], [0,1], [0,1] ]) 27 | """ 28 | def __init__(self, array_of_param_array): 29 | self.low = np.array([x[0] for x in array_of_param_array]) 30 | self.high = np.array([x[1] for x in array_of_param_array]) 31 | self.num_discrete_space = self.low.shape[0] 32 | 33 | def sample(self): 34 | """ Returns a array with one sample from each discrete action space """ 35 | # For each row: round(random .* (max - min) + min, 0) 36 | np_random = np.random.RandomState() 37 | random_array = np_random.rand(self.num_discrete_space) 38 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] 39 | def contains(self, x): 40 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() 41 | 42 | @property 43 | def shape(self): 44 | return self.num_discrete_space 45 | def __repr__(self): 46 | return "MultiDiscrete" + str(self.num_discrete_space) 47 | def __eq__(self, other): 48 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) 49 | -------------------------------------------------------------------------------- /course1/utils/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | 9 | WARNING - Custom observation & action spaces can inherit from the `Space` 10 | class. However, most use-cases should be covered by the existing space 11 | classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & 12 | `Dict`). Note that parametrized probability distributions (through the 13 | `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are 14 | only well-defined for instances of spaces provided in gym by default. 15 | Moreover, some implementations of Reinforcement Learning algorithms might 16 | not handle custom spaces properly. Use custom spaces with care. 17 | """ 18 | def __init__(self, shape=None, dtype=None): 19 | import numpy as np # takes about 300-400ms to import, so we load lazily 20 | self.shape = None if shape is None else tuple(shape) 21 | self.dtype = None if dtype is None else np.dtype(dtype) 22 | self._np_random = None 23 | 24 | @property 25 | def np_random(self): 26 | """Lazily seed the rng since this is expensive and only needed if 27 | sampling from this space. 28 | """ 29 | if self._np_random is None: 30 | self.seed() 31 | 32 | return self._np_random 33 | 34 | def sample(self): 35 | """Randomly sample an element of this space. Can be 36 | uniform or non-uniform sampling based on boundedness of space.""" 37 | raise NotImplementedError 38 | 39 | def seed(self, seed=None): 40 | """Seed the PRNG of this space. """ 41 | self._np_random, seed = seeding.np_random(seed) 42 | return [seed] 43 | 44 | def contains(self, x): 45 | """ 46 | Return boolean specifying if x is a valid 47 | member of this space 48 | """ 49 | raise NotImplementedError 50 | 51 | def __contains__(self, x): 52 | return self.contains(x) 53 | 54 | def to_jsonable(self, sample_n): 55 | """Convert a batch of samples from this space to a JSONable data type.""" 56 | # By default, assume identity is JSONable 57 | return sample_n 58 | 59 | def from_jsonable(self, sample_n): 60 | """Convert a JSONable data type to a batch of samples from this space.""" 61 | # By default, assume identity is JSONable 62 | return sample_n 63 | -------------------------------------------------------------------------------- /course2/README.md: -------------------------------------------------------------------------------- 1 | ## 实践课第二天 2 | 3 | ### 任务:经典棋牌 翻转棋 作业要求: 提交通过并且在金榜的排名高于Jidi_random 4 | 5 | 6 | --- 7 | ### Env 👉请看 [reversi.py](env/reversi.py) 8 | 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py) 10 | 11 | ### 提交 👉请看 [submission.py](examples/random/submission.py) 12 | 13 | --- 14 | 15 | ### How to test submission 16 | 17 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py 18 | >python run_log.py 19 | 20 | If no errors, your submission is ready to go~ 21 | 22 | ___ 23 | Have a good time~~~ -------------------------------------------------------------------------------- /course2/env/__init__.py: -------------------------------------------------------------------------------- 1 | from .reversi import * 2 | from .sokoban import * 3 | -------------------------------------------------------------------------------- /course2/env/chooseenv.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/9/11 11:17 上午 4 | # 描述:选择运行环境,需要维护env/__ini__.py && config.json(存储环境默认参数) 5 | 6 | import json 7 | import env 8 | import os 9 | 10 | 11 | def make(env_type, conf=None): 12 | file_path = os.path.join(os.path.dirname(__file__), 'config.json') 13 | if not conf: 14 | with open(file_path) as f: 15 | conf = json.load(f)[env_type] 16 | class_literal = conf['class_literal'] 17 | return getattr(env, class_literal)(conf) 18 | 19 | 20 | if __name__ == "__main__": 21 | make("classic_MountainCar-v0") 22 | -------------------------------------------------------------------------------- /course2/env/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "reversi_1v1": { 3 | "class_literal": "Reversi", 4 | "n_player": 2, 5 | "board_width": 10, 6 | "board_height": 10, 7 | "max_step": 100, 8 | "cell_range": 3, 9 | "game_name": "reversi", 10 | "is_obs_continuous": false, 11 | "is_act_continuous": false, 12 | "agent_nums": [1,1], 13 | "obs_type": ["grid","grid"] 14 | }, 15 | "sokoban_1p": { 16 | "class_literal": "Sokoban", 17 | "n_player": 1, 18 | "board_width": 8, 19 | "board_height": 8, 20 | "cell_range": 5, 21 | "max_step": 50, 22 | "level": 2, 23 | "game_name": "sokoban", 24 | "is_obs_continuous": false, 25 | "is_act_continuous": false, 26 | "agent_nums": [1], 27 | "obs_type": ["grid"] 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /course2/env/obs_interfaces/observation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/11/13 3:51 下午 4 | # 描述:observation的各种接口类 5 | obs_type = ["grid", "vector", "dict"] 6 | 7 | 8 | class GridObservation(object): 9 | def get_grid_observation(self, current_state, player_id, info_before): 10 | raise NotImplementedError 11 | 12 | def get_grid_many_observation(self, current_state, player_id_list, info_before=''): 13 | all_obs = [] 14 | for i in player_id_list: 15 | all_obs.append(self.get_grid_observation(current_state, i, info_before)) 16 | return all_obs 17 | 18 | 19 | class VectorObservation(object): 20 | def get_vector_observation(self, current_state, player_id, info_before): 21 | raise NotImplementedError 22 | 23 | def get_vector_many_observation(self, current_state, player_id_list, info_before=''): 24 | all_obs = [] 25 | for i in player_id_list: 26 | all_obs.append(self.get_vector_observation(current_state, i, info_before)) 27 | return all_obs 28 | 29 | 30 | class DictObservation(object): 31 | def get_dict_observation(self, current_state, player_id, info_before): 32 | raise NotImplementedError 33 | 34 | def get_dict_many_observation(self, current_state, player_id_list, info_before=''): 35 | all_obs = [] 36 | for i in player_id_list: 37 | all_obs.append(self.get_dict_observation(current_state, i, info_before)) 38 | return all_obs 39 | 40 | 41 | # todo: observation builder 42 | class CustomObservation(object): 43 | def get_custom_observation(self, current_state, player_id): 44 | raise NotImplementedError 45 | 46 | def get_custom_obs_space(self, player_id): 47 | raise NotImplementedError 48 | 49 | def get_custom_many_observation(self, current_state, player_id_list): 50 | all_obs = [] 51 | for i in player_id_list: 52 | all_obs.append(self.get_custom_observation(current_state, i)) 53 | return all_obs 54 | 55 | def get_custom_many_obs_space(self, player_id_list): 56 | all_obs_space = [] 57 | for i in player_id_list: 58 | all_obs_space.append(self.get_custom_obs_space(i)) 59 | return all_obs_space 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /course2/env/simulators/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/7/10 10:24 上午 4 | # 描述: 5 | from abc import ABC, abstractmethod 6 | 7 | 8 | class Game(ABC): 9 | def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type): 10 | self.n_player = n_player 11 | self.current_state = None 12 | self.all_observes = None 13 | self.is_obs_continuous = is_obs_continuous 14 | self.is_act_continuous = is_act_continuous 15 | self.game_name = game_name 16 | self.agent_nums = agent_nums 17 | self.obs_type = obs_type 18 | 19 | def get_config(self, player_id): 20 | raise NotImplementedError 21 | 22 | def get_render_data(self, current_state): 23 | return current_state 24 | 25 | def set_current_state(self, current_state): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def is_terminal(self): 30 | raise NotImplementedError 31 | 32 | def get_next_state(self, all_action): 33 | raise NotImplementedError 34 | 35 | def get_reward(self, all_action): 36 | raise NotImplementedError 37 | 38 | @abstractmethod 39 | def step(self, all_action): 40 | raise NotImplementedError 41 | 42 | @abstractmethod 43 | def reset(self): 44 | raise NotImplementedError 45 | 46 | def set_action_space(self): 47 | raise NotImplementedError 48 | 49 | -------------------------------------------------------------------------------- /course2/examples/alphabeta-reversi/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: Shu LIN 3 | 4 | import numpy 5 | 6 | DEPTH = 4 # 搜索深度 7 | INF = 100000000 8 | DIR = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)) # 方向向量 9 | 10 | 11 | # 放置棋子,计算新局面 12 | def place(board, x, y, color, width, height): 13 | if x < 0: 14 | return False 15 | board[x][y] = color 16 | valid = False 17 | for d in range(8): 18 | i = x + DIR[d][0] 19 | j = y + DIR[d][1] 20 | while 0 <= i and i < width and 0 <= j and j < height and board[i][j] == -color: 21 | i += DIR[d][0] 22 | j += DIR[d][1] 23 | if 0 <= i and i < width and 0 <= j and j < height and board[i][j] == color: 24 | while True: 25 | i -= DIR[d][0] 26 | j -= DIR[d][1] 27 | if i == x and j == y: 28 | break 29 | valid = True 30 | board[i][j] = color 31 | return valid 32 | 33 | 34 | # 评估局面 35 | def evaluate(board, color, width, height): 36 | score = 0 37 | for i in range(width): 38 | for j in range(height): 39 | score += board[i][j] * color 40 | return score 41 | 42 | 43 | # 使用Alpha-Beta剪枝搜索,返回最优结果和最优动作 44 | def alphabeta(board, depth, alpha, beta, color, width, height): 45 | if depth == 0: 46 | return evaluate(board, color), -1, -1 47 | x = y = -1 48 | noMove = True 49 | for i in range(width): 50 | for j in range(height): 51 | if board[i][j] == 0: 52 | newBoard = board.copy() 53 | if place(newBoard, i, j, color, width, height): 54 | noMove = False 55 | v = -alphabeta(newBoard, depth - 1, -beta, -alpha, -color, width, height)[0] 56 | if v > alpha: 57 | if beta <= alpha: 58 | return v, i, j 59 | alpha = v 60 | x, y = i, j 61 | if noMove: 62 | v = -alphabeta(board, depth - 1, -beta, -alpha, -color, width, height)[0] 63 | if v > alpha: 64 | alpha = v 65 | return alpha, x, y 66 | 67 | 68 | def wrap_action(x, y, width, height): 69 | action = [[0] * width, [0] * height] 70 | action[0][x] = 1 71 | action[1][y] = 1 72 | return action 73 | 74 | 75 | def my_controller(observation, action_space, is_act_continuous=False): 76 | myColor = 1 if observation["chess_player_idx"] == 1 else -1 77 | height = observation["board_height"] 78 | width = observation["board_width"] 79 | board = [[0 for _ in range(width)] for _ in range(height)] 80 | for i in range(width): 81 | for j in range(height): 82 | board[i][j] = 0 83 | for position in observation[1]: 84 | board[position[0]][position[1]] = 1 85 | for position in observation[2]: 86 | board[position[0]][position[1]] = -1 87 | _, x, y = alphabeta(board, DEPTH, -INF, INF, myColor, width, height) 88 | return wrap_action(x, y, height, width) 89 | -------------------------------------------------------------------------------- /course2/examples/bfs-sokoban/submission.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course2/examples/bfs-sokoban/submission.py -------------------------------------------------------------------------------- /course2/examples/mcts-reversi/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: Shu LIN 3 | 4 | import numpy 5 | import random 6 | 7 | TIMES = 10000 # 模拟次数 8 | INF = 100000000 9 | EPS = 0.1 10 | DIR = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)) # 方向向量 11 | 12 | visits = {} 13 | returns = {} 14 | 15 | 16 | # 放置棋子,计算新局面 17 | def place(board, x, y, color, width, height): 18 | if x < 0: 19 | return False 20 | board[x][y] = color 21 | valid = False 22 | for d in range(8): 23 | i = x + DIR[d][0] 24 | j = y + DIR[d][1] 25 | while 0 <= i and i < width and 0 <= j and j < height and board[i][j] == -color: 26 | i += DIR[d][0] 27 | j += DIR[d][1] 28 | if 0 <= i and i < width and 0 <= j and j < height and board[i][j] == color: 29 | while True: 30 | i -= DIR[d][0] 31 | j -= DIR[d][1] 32 | if i == x and j == y: 33 | break 34 | valid = True 35 | board[i][j] = color 36 | return valid 37 | 38 | 39 | # 评估局面 40 | def evaluate(board, color, width, height): 41 | score = 0 42 | for i in range(width): 43 | for j in range(height): 44 | score += board[i][j] * color 45 | if score > 0: 46 | return 1 47 | if score < 0: 48 | return -1 49 | return 0 50 | 51 | 52 | # 选择下一步行动 53 | def getMove(board, color, chooseBest, width, height): 54 | moves = [] 55 | for i in range(width): 56 | for j in range(height): 57 | if board[i][j] == 0: 58 | newBoard = board.copy() 59 | if place(newBoard, i, j, color, width, height): 60 | moves.append((i, j)) 61 | if len(moves) == 0: 62 | return -1, -1 63 | best = -INF 64 | x = y = -1 65 | for (i, j) in moves: 66 | avg = INF 67 | if (color, i, j) in visits: 68 | avg = returns[color, i, j] / visits[color, i, j] 69 | if avg > best: 70 | best = avg 71 | x, y = i, j 72 | if chooseBest or random.random() > EPS: 73 | return x, y 74 | return random.choice(moves) 75 | 76 | 77 | # 蒙特卡洛模拟 78 | def simulate(board, color, width, height): 79 | x, y = getMove(board, color, False, width, height) 80 | noMove = x < 0 81 | if noMove: 82 | color = -color 83 | x, y = getMove(board, color, False, width, height) 84 | if x < 0: 85 | return evaluate(board, -color, width, height) 86 | newBoard = board.copy() 87 | place(newBoard, x, y, color, width, height) 88 | result = -simulate(newBoard, -color, width, height) 89 | global visits, returns 90 | if (color, x, y) not in visits: 91 | visits[color, x, y] = 1 92 | returns[color, x, y] = result 93 | else: 94 | visits[color, x, y] += 1 95 | returns[color, x, y] += result 96 | if noMove: 97 | return -result 98 | return result 99 | 100 | 101 | # 使用蒙特卡洛树搜索,返回最优结果和最优动作 102 | def montecarlo(board, color, width, height): 103 | for _ in range(TIMES): 104 | simulate(board, color, width, height) 105 | return getMove(board, color, True, width, height) 106 | 107 | 108 | def wrap_action(x, y, width, height): 109 | action = [[0] * width, [0] * height] 110 | action[0][x] = 1 111 | action[1][y] = 1 112 | return action 113 | 114 | 115 | def my_controller(observation, action_space, is_act_continuous=False): 116 | myColor = 1 if observation["chess_player_idx"] == 1 else -1 117 | height = observation["board_height"] 118 | width = observation["board_width"] 119 | board = [[0 for _ in range(width)] for _ in range(height)] 120 | for i in range(width): 121 | for j in range(height): 122 | board[i][j] = 0 123 | for position in observation[1]: 124 | board[position[0]][position[1]] = 1 125 | for position in observation[2]: 126 | board[position[0]][position[1]] = -1 127 | x, y = montecarlo(board, myColor, height, width) 128 | return wrap_action(x, y, height, width) 129 | -------------------------------------------------------------------------------- /course2/examples/random/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | agent_action = [] 16 | for i in range(len(action_space)): 17 | action_ = sample_single_dim(action_space[i], is_act_continuous) 18 | agent_action.append(action_) 19 | return agent_action 20 | 21 | 22 | def sample_single_dim(action_space_list_each, is_act_continuous): 23 | each = [] 24 | if is_act_continuous: 25 | each = action_space_list_each.sample() 26 | else: 27 | if action_space_list_each.__class__.__name__ == "Discrete": 28 | each = [0] * action_space_list_each.n 29 | idx = action_space_list_each.sample() 30 | each[idx] = 1 31 | elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle": 32 | each = [] 33 | nvec = action_space_list_each.high - action_space_list_each.low + 1 34 | sample_indexes = action_space_list_each.sample() 35 | 36 | for i in range(len(nvec)): 37 | dim = nvec[i] 38 | new_action = [0] * dim 39 | index = sample_indexes[i] 40 | new_action[index] = 1 41 | each.extend(new_action) 42 | elif action_space_list_each.__class__.__name__ == "Discrete_SC2": 43 | each = action_space_list_each.sample() 44 | elif action_space_list_each.__class__.__name__ == "Box": 45 | each = action_space_list_each.sample() 46 | return each 47 | -------------------------------------------------------------------------------- /course2/examples/submission/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | pass 16 | -------------------------------------------------------------------------------- /course2/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .space import Space 4 | from gym import logger 5 | 6 | 7 | class Box(Space): 8 | """ 9 | A (possibly unbounded) box in R^n. Specifically, a Box represents the 10 | Cartesian product of n closed intervals. Each interval has the form of one 11 | of [a, b], (-oo, b], [a, oo), or (-oo, oo). 12 | 13 | There are two common use cases: 14 | 15 | * Identical bound for each dimension:: 16 | >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) 17 | Box(3, 4) 18 | 19 | * Independent bound for each dimension:: 20 | >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) 21 | Box(2,) 22 | 23 | """ 24 | def __init__(self, low, high, shape=None, dtype=np.float32): 25 | assert dtype is not None, 'dtype must be explicitly provided. ' 26 | self.dtype = np.dtype(dtype) 27 | 28 | # determine shape if it isn't provided directly 29 | if shape is not None: 30 | shape = tuple(shape) 31 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape" 32 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape" 33 | elif not np.isscalar(low): 34 | shape = low.shape 35 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape" 36 | elif not np.isscalar(high): 37 | shape = high.shape 38 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape" 39 | else: 40 | raise ValueError("shape must be provided or inferred from the shapes of low or high") 41 | 42 | if np.isscalar(low): 43 | low = np.full(shape, low, dtype=dtype) 44 | 45 | if np.isscalar(high): 46 | high = np.full(shape, high, dtype=dtype) 47 | 48 | self.shape = shape 49 | self.low = low 50 | self.high = high 51 | 52 | def _get_precision(dtype): 53 | if np.issubdtype(dtype, np.floating): 54 | return np.finfo(dtype).precision 55 | else: 56 | return np.inf 57 | low_precision = _get_precision(self.low.dtype) 58 | high_precision = _get_precision(self.high.dtype) 59 | dtype_precision = _get_precision(self.dtype) 60 | if min(low_precision, high_precision) > dtype_precision: 61 | logger.warn("Box bound precision lowered by casting to {}".format(self.dtype)) 62 | self.low = self.low.astype(self.dtype) 63 | self.high = self.high.astype(self.dtype) 64 | 65 | # Boolean arrays which indicate the interval type for each coordinate 66 | self.bounded_below = -np.inf < self.low 67 | self.bounded_above = np.inf > self.high 68 | 69 | super(Box, self).__init__(self.shape, self.dtype) 70 | 71 | def is_bounded(self, manner="both"): 72 | below = np.all(self.bounded_below) 73 | above = np.all(self.bounded_above) 74 | if manner == "both": 75 | return below and above 76 | elif manner == "below": 77 | return below 78 | elif manner == "above": 79 | return above 80 | else: 81 | raise ValueError("manner is not in {'below', 'above', 'both'}") 82 | 83 | def sample(self): 84 | """ 85 | Generates a single random sample inside of the Box. 86 | 87 | In creating a sample of the box, each coordinate is sampled according to 88 | the form of the interval: 89 | 90 | * [a, b] : uniform distribution 91 | * [a, oo) : shifted exponential distribution 92 | * (-oo, b] : shifted negative exponential distribution 93 | * (-oo, oo) : normal distribution 94 | """ 95 | high = self.high if self.dtype.kind == 'f' \ 96 | else self.high.astype('int64') + 1 97 | sample = np.empty(self.shape) 98 | 99 | # Masking arrays which classify the coordinates according to interval 100 | # type 101 | unbounded = ~self.bounded_below & ~self.bounded_above 102 | upp_bounded = ~self.bounded_below & self.bounded_above 103 | low_bounded = self.bounded_below & ~self.bounded_above 104 | bounded = self.bounded_below & self.bounded_above 105 | 106 | 107 | # Vectorized sampling by interval type 108 | sample[unbounded] = self.np_random.normal( 109 | size=unbounded[unbounded].shape) 110 | 111 | sample[low_bounded] = self.np_random.exponential( 112 | size=low_bounded[low_bounded].shape) + self.low[low_bounded] 113 | 114 | sample[upp_bounded] = -self.np_random.exponential( 115 | size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] 116 | 117 | sample[bounded] = self.np_random.uniform(low=self.low[bounded], 118 | high=high[bounded], 119 | size=bounded[bounded].shape) 120 | if self.dtype.kind == 'i': 121 | sample = np.floor(sample) 122 | 123 | return sample.astype(self.dtype) 124 | 125 | def contains(self, x): 126 | if isinstance(x, list): 127 | x = np.array(x) # Promote list to array for contains check 128 | return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) 129 | 130 | def to_jsonable(self, sample_n): 131 | return np.array(sample_n).tolist() 132 | 133 | def from_jsonable(self, sample_n): 134 | return [np.asarray(sample) for sample in sample_n] 135 | 136 | def __repr__(self): 137 | return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype) 138 | 139 | def __eq__(self, other): 140 | return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 141 | -------------------------------------------------------------------------------- /course2/utils/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /course2/utils/get_logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import time 4 | import os 5 | 6 | 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False): 8 | if not os.path.exists(log_path): 9 | os.mkdir(log_path) 10 | 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | # 每分钟建一个文件 14 | rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) 15 | log_name = log_path + rq + '_' + name+ '.log' 16 | json_log_name = log_path + rq + '_' + name + '.json' 17 | logfile = log_name 18 | if save_file: 19 | fh = logging.FileHandler(logfile, mode='a') 20 | fh.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter("%(message)s") 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | # 输出到控制台 25 | if console_out: 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | logger.addHandler(console) 29 | 30 | # 输出到json 31 | if json_file: 32 | fh_json = logging.FileHandler(json_log_name, mode='a') 33 | fh_json.setLevel(logging.DEBUG) 34 | formatter_json = logging.Formatter("%(message)s") 35 | fh_json.setFormatter(formatter_json) 36 | logger.addHandler(fh_json) 37 | 38 | return logger -------------------------------------------------------------------------------- /course2/utils/mutli_discrete_particle.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2021/4/8 下午2:42 3 | # Author: Yahui Cui 4 | 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) 7 | 8 | import numpy as np 9 | 10 | import gym 11 | 12 | class MultiDiscreteParticle(gym.Space): 13 | """ 14 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters 15 | - It can be adapted to both a Discrete action space or a continuous (Box) action space 16 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 17 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space 18 | where the discrete action space can take any integers from `min` to `max` (both inclusive) 19 | Note: A value of 0 always need to represent the NOOP action. 20 | e.g. Nintendo Game Controller 21 | - Can be conceptualized as 3 discrete action spaces: 22 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 23 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 24 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 25 | - Can be initialized as 26 | MultiDiscrete([ [0,4], [0,1], [0,1] ]) 27 | """ 28 | def __init__(self, array_of_param_array): 29 | self.low = np.array([x[0] for x in array_of_param_array]) 30 | self.high = np.array([x[1] for x in array_of_param_array]) 31 | self.num_discrete_space = self.low.shape[0] 32 | 33 | def sample(self): 34 | """ Returns a array with one sample from each discrete action space """ 35 | # For each row: round(random .* (max - min) + min, 0) 36 | np_random = np.random.RandomState() 37 | random_array = np_random.rand(self.num_discrete_space) 38 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] 39 | def contains(self, x): 40 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() 41 | 42 | @property 43 | def shape(self): 44 | return self.num_discrete_space 45 | def __repr__(self): 46 | return "MultiDiscrete" + str(self.num_discrete_space) 47 | def __eq__(self, other): 48 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) 49 | -------------------------------------------------------------------------------- /course2/utils/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | 9 | WARNING - Custom observation & action spaces can inherit from the `Space` 10 | class. However, most use-cases should be covered by the existing space 11 | classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & 12 | `Dict`). Note that parametrized probability distributions (through the 13 | `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are 14 | only well-defined for instances of spaces provided in gym by default. 15 | Moreover, some implementations of Reinforcement Learning algorithms might 16 | not handle custom spaces properly. Use custom spaces with care. 17 | """ 18 | def __init__(self, shape=None, dtype=None): 19 | import numpy as np # takes about 300-400ms to import, so we load lazily 20 | self.shape = None if shape is None else tuple(shape) 21 | self.dtype = None if dtype is None else np.dtype(dtype) 22 | self._np_random = None 23 | 24 | @property 25 | def np_random(self): 26 | """Lazily seed the rng since this is expensive and only needed if 27 | sampling from this space. 28 | """ 29 | if self._np_random is None: 30 | self.seed() 31 | 32 | return self._np_random 33 | 34 | def sample(self): 35 | """Randomly sample an element of this space. Can be 36 | uniform or non-uniform sampling based on boundedness of space.""" 37 | raise NotImplementedError 38 | 39 | def seed(self, seed=None): 40 | """Seed the PRNG of this space. """ 41 | self._np_random, seed = seeding.np_random(seed) 42 | return [seed] 43 | 44 | def contains(self, x): 45 | """ 46 | Return boolean specifying if x is a valid 47 | member of this space 48 | """ 49 | raise NotImplementedError 50 | 51 | def __contains__(self, x): 52 | return self.contains(x) 53 | 54 | def to_jsonable(self, sample_n): 55 | """Convert a batch of samples from this space to a JSONable data type.""" 56 | # By default, assume identity is JSONable 57 | return sample_n 58 | 59 | def from_jsonable(self, sample_n): 60 | """Convert a JSONable data type to a batch of samples from this space.""" 61 | # By default, assume identity is JSONable 62 | return sample_n 63 | -------------------------------------------------------------------------------- /course3/README.md: -------------------------------------------------------------------------------- 1 | ## 实践课第三天 2 | 3 | ### 任务:Gym 倒立摆 作业要求: 提交通过并且在金榜的排名高于Jidi_random 4 | 5 | 6 | --- 7 | ### Env 👉请看 [ccgame.py](env/ccgame.py) 8 | 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py) 10 | 11 | ### 提交 👉请看 [submission.py](examples/random/submission.py) 12 | 13 | --- 14 | 15 | ### Install Gym 16 | >pip install -i https://pypi.tuna.tsinghua.edu.cn/simple gym==0.18.3 17 | 18 | ### How to test submission 19 | 20 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py 21 | >python run_log.py 22 | 23 | If no errors, your submission is ready to go~ 24 | 25 | ### Ready to submit 26 | > random: [random/submission.py](examples/random/submission.py) 27 | 28 | > DDPG: [ddpg/submission.py](examples/ddpg/submission.py) (To submit, change the `SUBMISSION` variable on line 181 to `True`) 29 | > and [ddpg/actor_200.pth](examples/ddpg/actor_200.pth) 30 | 31 | 32 | ### How to train DDPG agent 33 | > python train.py 34 | >(`SUBMISSION` variable in [ddpg/submission.py](examples/ddpg/submission.py) 35 | > controls whether to train from scratch. `False` means to train from scratch.) 36 | > 37 | > The model will be store under the folder [ddpg/trained_model](examples/ddpg/trained_model). 38 | 39 | ___ 40 | Have a good time~~~ -------------------------------------------------------------------------------- /course3/docs/rlcn_2022_rl_algorithms.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course3/docs/rlcn_2022_rl_algorithms.zip -------------------------------------------------------------------------------- /course3/env/__init__.py: -------------------------------------------------------------------------------- 1 | from .ccgame import * 2 | -------------------------------------------------------------------------------- /course3/env/chooseenv.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/9/11 11:17 上午 4 | # 描述:选择运行环境,需要维护env/__ini__.py && config.json(存储环境默认参数) 5 | 6 | import json 7 | import env 8 | import os 9 | 10 | 11 | def make(env_type, conf=None): 12 | file_path = os.path.join(os.path.dirname(__file__), 'config.json') 13 | if not conf: 14 | with open(file_path) as f: 15 | conf = json.load(f)[env_type] 16 | class_literal = conf['class_literal'] 17 | return getattr(env, class_literal)(conf) 18 | 19 | 20 | if __name__ == "__main__": 21 | make("classic_MountainCar-v0") 22 | -------------------------------------------------------------------------------- /course3/env/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "classic_Pendulum-v0": { 3 | "class_literal": "CCGame", 4 | "n_player": 1, 5 | "is_obs_continuous": true, 6 | "is_act_continuous": true, 7 | "act_box": {"low": -2.0, "high":2.0, "shape":[1]}, 8 | "game_name": "Pendulum-v0", 9 | "agent_nums": [1], 10 | "max_step": 200, 11 | "obs_type": ["vector"] 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /course3/env/obs_interfaces/observation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/11/13 3:51 下午 4 | # 描述:observation的各种接口类 5 | obs_type = ["grid", "vector", "dict"] 6 | 7 | 8 | class GridObservation(object): 9 | def get_grid_observation(self, current_state, player_id, info_before): 10 | raise NotImplementedError 11 | 12 | def get_grid_many_observation(self, current_state, player_id_list, info_before=''): 13 | all_obs = [] 14 | for i in player_id_list: 15 | all_obs.append(self.get_grid_observation(current_state, i, info_before)) 16 | return all_obs 17 | 18 | 19 | class VectorObservation(object): 20 | def get_vector_observation(self, current_state, player_id, info_before): 21 | raise NotImplementedError 22 | 23 | def get_vector_many_observation(self, current_state, player_id_list, info_before=''): 24 | all_obs = [] 25 | for i in player_id_list: 26 | all_obs.append(self.get_vector_observation(current_state, i, info_before)) 27 | return all_obs 28 | 29 | 30 | class DictObservation(object): 31 | def get_dict_observation(self, current_state, player_id, info_before): 32 | raise NotImplementedError 33 | 34 | def get_dict_many_observation(self, current_state, player_id_list, info_before=''): 35 | all_obs = [] 36 | for i in player_id_list: 37 | all_obs.append(self.get_dict_observation(current_state, i, info_before)) 38 | return all_obs 39 | 40 | 41 | # todo: observation builder 42 | class CustomObservation(object): 43 | def get_custom_observation(self, current_state, player_id): 44 | raise NotImplementedError 45 | 46 | def get_custom_obs_space(self, player_id): 47 | raise NotImplementedError 48 | 49 | def get_custom_many_observation(self, current_state, player_id_list): 50 | all_obs = [] 51 | for i in player_id_list: 52 | all_obs.append(self.get_custom_observation(current_state, i)) 53 | return all_obs 54 | 55 | def get_custom_many_obs_space(self, player_id_list): 56 | all_obs_space = [] 57 | for i in player_id_list: 58 | all_obs_space.append(self.get_custom_obs_space(i)) 59 | return all_obs_space 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /course3/env/simulators/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/7/10 10:24 上午 4 | # 描述: 5 | from abc import ABC, abstractmethod 6 | 7 | 8 | class Game(ABC): 9 | def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type): 10 | self.n_player = n_player 11 | self.current_state = None 12 | self.all_observes = None 13 | self.is_obs_continuous = is_obs_continuous 14 | self.is_act_continuous = is_act_continuous 15 | self.game_name = game_name 16 | self.agent_nums = agent_nums 17 | self.obs_type = obs_type 18 | 19 | def get_config(self, player_id): 20 | raise NotImplementedError 21 | 22 | def get_render_data(self, current_state): 23 | return current_state 24 | 25 | def set_current_state(self, current_state): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def is_terminal(self): 30 | raise NotImplementedError 31 | 32 | def get_next_state(self, all_action): 33 | raise NotImplementedError 34 | 35 | def get_reward(self, all_action): 36 | raise NotImplementedError 37 | 38 | @abstractmethod 39 | def step(self, all_action): 40 | raise NotImplementedError 41 | 42 | @abstractmethod 43 | def reset(self): 44 | raise NotImplementedError 45 | 46 | def set_action_space(self): 47 | raise NotImplementedError 48 | 49 | -------------------------------------------------------------------------------- /course3/examples/ddpg/actor_200.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course3/examples/ddpg/actor_200.pth -------------------------------------------------------------------------------- /course3/examples/random/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | agent_action = [] 16 | for i in range(len(action_space)): 17 | action_ = sample_single_dim(action_space[i], is_act_continuous) 18 | agent_action.append(action_) 19 | return agent_action 20 | 21 | 22 | def sample_single_dim(action_space_list_each, is_act_continuous): 23 | each = [] 24 | if is_act_continuous: 25 | each = action_space_list_each.sample() 26 | else: 27 | if action_space_list_each.__class__.__name__ == "Discrete": 28 | each = [0] * action_space_list_each.n 29 | idx = action_space_list_each.sample() 30 | each[idx] = 1 31 | elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle": 32 | each = [] 33 | nvec = action_space_list_each.high - action_space_list_each.low + 1 34 | sample_indexes = action_space_list_each.sample() 35 | 36 | for i in range(len(nvec)): 37 | dim = nvec[i] 38 | new_action = [0] * dim 39 | index = sample_indexes[i] 40 | new_action[index] = 1 41 | each.extend(new_action) 42 | elif action_space_list_each.__class__.__name__ == "Discrete_SC2": 43 | each = action_space_list_each.sample() 44 | elif action_space_list_each.__class__.__name__ == "Box": 45 | each = action_space_list_each.sample() 46 | return each 47 | -------------------------------------------------------------------------------- /course3/examples/submission/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | pass 16 | -------------------------------------------------------------------------------- /course3/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/19 下午5:25 3 | # Author: Yahui Cui 4 | import numpy as np 5 | import os 6 | import sys 7 | import datetime 8 | 9 | sys.path.append(os.path.dirname(__file__)) 10 | from env.chooseenv import make 11 | from course3.examples.ddpg.submission import agent as ddpg_agent 12 | from course3.examples.ddpg.submission import my_controller 13 | from course3.examples.ddpg.submission import replay_buffer 14 | 15 | 16 | def main(): 17 | num_episodes = 200 18 | minimal_size = 1000 19 | batch_size = 64 20 | 21 | now = datetime.datetime.now() 22 | model_path = os.path.join(os.path.dirname(__file__), 'examples', 'ddpg', 'trained_model', 23 | now.strftime("%Y-%m-%d-%H-%M-%S")) 24 | 25 | env_name = 'classic_Pendulum-v0' 26 | env = make(env_name) 27 | action_space = env.joint_action_space 28 | agent_id = 0 29 | 30 | return_list = [] 31 | for i in range(10): 32 | 33 | for i_episode in range(int(num_episodes / 10)): 34 | episode_return = 0 35 | state = env.reset() 36 | done = False 37 | while not done: 38 | action = my_controller(state[agent_id], action_space, True) 39 | next_state, reward, done, _, _ = env.step([action]) 40 | replay_buffer.add(state[agent_id]['obs'], action[0], reward[agent_id], next_state[agent_id]['obs'], done) 41 | state = next_state 42 | episode_return += reward[agent_id] 43 | # 当buffer数据的数量超过一定值后,才进行Q网络训练 44 | if replay_buffer.size() > minimal_size: 45 | b_s, b_a, b_r, b_ns, b_d = replay_buffer.sample(batch_size) 46 | transition_dict = { 47 | 'states': b_s, 48 | 'actions': b_a, 49 | 'next_states': b_ns, 50 | 'rewards': b_r, 51 | 'dones': b_d 52 | } 53 | ddpg_agent.update(transition_dict) 54 | return_list.append(episode_return) 55 | if (i_episode + 1) % 10 == 0: 56 | print('episode' + ':' + 57 | '%d' % (num_episodes / 10 * i + i_episode + 1) + 58 | ' return' + ':''%.3f' % np.mean(return_list[-10:])) 59 | 60 | ddpg_agent.save(model_path, num_episodes / 10 * (i + 1)) 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /course3/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .space import Space 4 | from gym import logger 5 | 6 | 7 | class Box(Space): 8 | """ 9 | A (possibly unbounded) box in R^n. Specifically, a Box represents the 10 | Cartesian product of n closed intervals. Each interval has the form of one 11 | of [a, b], (-oo, b], [a, oo), or (-oo, oo). 12 | 13 | There are two common use cases: 14 | 15 | * Identical bound for each dimension:: 16 | >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) 17 | Box(3, 4) 18 | 19 | * Independent bound for each dimension:: 20 | >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) 21 | Box(2,) 22 | 23 | """ 24 | def __init__(self, low, high, shape=None, dtype=np.float32): 25 | assert dtype is not None, 'dtype must be explicitly provided. ' 26 | self.dtype = np.dtype(dtype) 27 | 28 | # determine shape if it isn't provided directly 29 | if shape is not None: 30 | shape = tuple(shape) 31 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape" 32 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape" 33 | elif not np.isscalar(low): 34 | shape = low.shape 35 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape" 36 | elif not np.isscalar(high): 37 | shape = high.shape 38 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape" 39 | else: 40 | raise ValueError("shape must be provided or inferred from the shapes of low or high") 41 | 42 | if np.isscalar(low): 43 | low = np.full(shape, low, dtype=dtype) 44 | 45 | if np.isscalar(high): 46 | high = np.full(shape, high, dtype=dtype) 47 | 48 | self.shape = shape 49 | self.low = low 50 | self.high = high 51 | 52 | def _get_precision(dtype): 53 | if np.issubdtype(dtype, np.floating): 54 | return np.finfo(dtype).precision 55 | else: 56 | return np.inf 57 | low_precision = _get_precision(self.low.dtype) 58 | high_precision = _get_precision(self.high.dtype) 59 | dtype_precision = _get_precision(self.dtype) 60 | if min(low_precision, high_precision) > dtype_precision: 61 | logger.warn("Box bound precision lowered by casting to {}".format(self.dtype)) 62 | self.low = self.low.astype(self.dtype) 63 | self.high = self.high.astype(self.dtype) 64 | 65 | # Boolean arrays which indicate the interval type for each coordinate 66 | self.bounded_below = -np.inf < self.low 67 | self.bounded_above = np.inf > self.high 68 | 69 | super(Box, self).__init__(self.shape, self.dtype) 70 | 71 | def is_bounded(self, manner="both"): 72 | below = np.all(self.bounded_below) 73 | above = np.all(self.bounded_above) 74 | if manner == "both": 75 | return below and above 76 | elif manner == "below": 77 | return below 78 | elif manner == "above": 79 | return above 80 | else: 81 | raise ValueError("manner is not in {'below', 'above', 'both'}") 82 | 83 | def sample(self): 84 | """ 85 | Generates a single random sample inside of the Box. 86 | 87 | In creating a sample of the box, each coordinate is sampled according to 88 | the form of the interval: 89 | 90 | * [a, b] : uniform distribution 91 | * [a, oo) : shifted exponential distribution 92 | * (-oo, b] : shifted negative exponential distribution 93 | * (-oo, oo) : normal distribution 94 | """ 95 | high = self.high if self.dtype.kind == 'f' \ 96 | else self.high.astype('int64') + 1 97 | sample = np.empty(self.shape) 98 | 99 | # Masking arrays which classify the coordinates according to interval 100 | # type 101 | unbounded = ~self.bounded_below & ~self.bounded_above 102 | upp_bounded = ~self.bounded_below & self.bounded_above 103 | low_bounded = self.bounded_below & ~self.bounded_above 104 | bounded = self.bounded_below & self.bounded_above 105 | 106 | 107 | # Vectorized sampling by interval type 108 | sample[unbounded] = self.np_random.normal( 109 | size=unbounded[unbounded].shape) 110 | 111 | sample[low_bounded] = self.np_random.exponential( 112 | size=low_bounded[low_bounded].shape) + self.low[low_bounded] 113 | 114 | sample[upp_bounded] = -self.np_random.exponential( 115 | size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] 116 | 117 | sample[bounded] = self.np_random.uniform(low=self.low[bounded], 118 | high=high[bounded], 119 | size=bounded[bounded].shape) 120 | if self.dtype.kind == 'i': 121 | sample = np.floor(sample) 122 | 123 | return sample.astype(self.dtype) 124 | 125 | def contains(self, x): 126 | if isinstance(x, list): 127 | x = np.array(x) # Promote list to array for contains check 128 | return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) 129 | 130 | def to_jsonable(self, sample_n): 131 | return np.array(sample_n).tolist() 132 | 133 | def from_jsonable(self, sample_n): 134 | return [np.asarray(sample) for sample in sample_n] 135 | 136 | def __repr__(self): 137 | return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype) 138 | 139 | def __eq__(self, other): 140 | return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 141 | -------------------------------------------------------------------------------- /course3/utils/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /course3/utils/get_logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import time 4 | import os 5 | 6 | 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False): 8 | if not os.path.exists(log_path): 9 | os.mkdir(log_path) 10 | 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | # 每分钟建一个文件 14 | rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) 15 | log_name = log_path + rq + '_' + name+ '.log' 16 | json_log_name = log_path + rq + '_' + name + '.json' 17 | logfile = log_name 18 | if save_file: 19 | fh = logging.FileHandler(logfile, mode='a') 20 | fh.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter("%(message)s") 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | # 输出到控制台 25 | if console_out: 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | logger.addHandler(console) 29 | 30 | # 输出到json 31 | if json_file: 32 | fh_json = logging.FileHandler(json_log_name, mode='a') 33 | fh_json.setLevel(logging.DEBUG) 34 | formatter_json = logging.Formatter("%(message)s") 35 | fh_json.setFormatter(formatter_json) 36 | logger.addHandler(fh_json) 37 | 38 | return logger -------------------------------------------------------------------------------- /course3/utils/mutli_discrete_particle.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2021/4/8 下午2:42 3 | # Author: Yahui Cui 4 | 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) 7 | 8 | import numpy as np 9 | 10 | import gym 11 | 12 | class MultiDiscreteParticle(gym.Space): 13 | """ 14 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters 15 | - It can be adapted to both a Discrete action space or a continuous (Box) action space 16 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 17 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space 18 | where the discrete action space can take any integers from `min` to `max` (both inclusive) 19 | Note: A value of 0 always need to represent the NOOP action. 20 | e.g. Nintendo Game Controller 21 | - Can be conceptualized as 3 discrete action spaces: 22 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 23 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 24 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 25 | - Can be initialized as 26 | MultiDiscrete([ [0,4], [0,1], [0,1] ]) 27 | """ 28 | def __init__(self, array_of_param_array): 29 | self.low = np.array([x[0] for x in array_of_param_array]) 30 | self.high = np.array([x[1] for x in array_of_param_array]) 31 | self.num_discrete_space = self.low.shape[0] 32 | 33 | def sample(self): 34 | """ Returns a array with one sample from each discrete action space """ 35 | # For each row: round(random .* (max - min) + min, 0) 36 | np_random = np.random.RandomState() 37 | random_array = np_random.rand(self.num_discrete_space) 38 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] 39 | def contains(self, x): 40 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() 41 | 42 | @property 43 | def shape(self): 44 | return self.num_discrete_space 45 | def __repr__(self): 46 | return "MultiDiscrete" + str(self.num_discrete_space) 47 | def __eq__(self, other): 48 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) 49 | -------------------------------------------------------------------------------- /course3/utils/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | 9 | WARNING - Custom observation & action spaces can inherit from the `Space` 10 | class. However, most use-cases should be covered by the existing space 11 | classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & 12 | `Dict`). Note that parametrized probability distributions (through the 13 | `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are 14 | only well-defined for instances of spaces provided in gym by default. 15 | Moreover, some implementations of Reinforcement Learning algorithms might 16 | not handle custom spaces properly. Use custom spaces with care. 17 | """ 18 | def __init__(self, shape=None, dtype=None): 19 | import numpy as np # takes about 300-400ms to import, so we load lazily 20 | self.shape = None if shape is None else tuple(shape) 21 | self.dtype = None if dtype is None else np.dtype(dtype) 22 | self._np_random = None 23 | 24 | @property 25 | def np_random(self): 26 | """Lazily seed the rng since this is expensive and only needed if 27 | sampling from this space. 28 | """ 29 | if self._np_random is None: 30 | self.seed() 31 | 32 | return self._np_random 33 | 34 | def sample(self): 35 | """Randomly sample an element of this space. Can be 36 | uniform or non-uniform sampling based on boundedness of space.""" 37 | raise NotImplementedError 38 | 39 | def seed(self, seed=None): 40 | """Seed the PRNG of this space. """ 41 | self._np_random, seed = seeding.np_random(seed) 42 | return [seed] 43 | 44 | def contains(self, x): 45 | """ 46 | Return boolean specifying if x is a valid 47 | member of this space 48 | """ 49 | raise NotImplementedError 50 | 51 | def __contains__(self, x): 52 | return self.contains(x) 53 | 54 | def to_jsonable(self, sample_n): 55 | """Convert a batch of samples from this space to a JSONable data type.""" 56 | # By default, assume identity is JSONable 57 | return sample_n 58 | 59 | def from_jsonable(self, sample_n): 60 | """Convert a JSONable data type to a batch of samples from this space.""" 61 | # By default, assume identity is JSONable 62 | return sample_n 63 | -------------------------------------------------------------------------------- /course4/README.md: -------------------------------------------------------------------------------- 1 | ## 实践课第四天 2 | 3 | ### 任务:经典棋牌 德州扑克 简化版 作业要求: 提交通过 4 | 5 | 6 | --- 7 | ### Env 👉请看 [chessandcard.py](env/chessandcard.py) 8 | 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py) 10 | 11 | ### 提交 👉请看 [submission.py](examples/random/submission.py) 12 | 13 | --- 14 | 15 | ### Install PettingZoo 16 | >pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pettingzoo[classic]==1.10.0 17 | 18 | ### How to test submission 19 | 20 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py 21 | >python run_log.py 22 | 23 | If no errors, your submission is ready to go~ 24 | 25 | ___ 26 | Have a good time~~~ -------------------------------------------------------------------------------- /course4/docs/rlchina_pbl.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course4/docs/rlchina_pbl.zip -------------------------------------------------------------------------------- /course4/env/__init__.py: -------------------------------------------------------------------------------- 1 | from .chessandcard import * 2 | -------------------------------------------------------------------------------- /course4/env/chooseenv.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/9/11 11:17 上午 4 | # 描述:选择运行环境,需要维护env/__ini__.py && config.json(存储环境默认参数) 5 | 6 | import json 7 | import env 8 | import os 9 | 10 | 11 | def make(env_type, conf=None): 12 | file_path = os.path.join(os.path.dirname(__file__), 'config.json') 13 | if not conf: 14 | with open(file_path) as f: 15 | conf = json.load(f)[env_type] 16 | class_literal = conf['class_literal'] 17 | return getattr(env, class_literal)(conf) 18 | 19 | 20 | if __name__ == "__main__": 21 | make("classic_MountainCar-v0") 22 | -------------------------------------------------------------------------------- /course4/env/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "chessandcard-leduc_holdem_v3": { 3 | "class_literal": "ChessAndCard", 4 | "n_player": 2, 5 | "max_step": 10000, 6 | "game_name": "leduc_holdem_v3", 7 | "is_obs_continuous": false, 8 | "is_act_continuous": false, 9 | "agent_nums": [1,1], 10 | "obs_type": ["dict", "dict"] 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /course4/env/obs_interfaces/observation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/11/13 3:51 下午 4 | # 描述:observation的各种接口类 5 | obs_type = ["grid", "vector", "dict"] 6 | 7 | 8 | class GridObservation(object): 9 | def get_grid_observation(self, current_state, player_id, info_before): 10 | raise NotImplementedError 11 | 12 | def get_grid_many_observation(self, current_state, player_id_list, info_before=''): 13 | all_obs = [] 14 | for i in player_id_list: 15 | all_obs.append(self.get_grid_observation(current_state, i, info_before)) 16 | return all_obs 17 | 18 | 19 | class VectorObservation(object): 20 | def get_vector_observation(self, current_state, player_id, info_before): 21 | raise NotImplementedError 22 | 23 | def get_vector_many_observation(self, current_state, player_id_list, info_before=''): 24 | all_obs = [] 25 | for i in player_id_list: 26 | all_obs.append(self.get_vector_observation(current_state, i, info_before)) 27 | return all_obs 28 | 29 | 30 | class DictObservation(object): 31 | def get_dict_observation(self, current_state, player_id, info_before): 32 | raise NotImplementedError 33 | 34 | def get_dict_many_observation(self, current_state, player_id_list, info_before=''): 35 | all_obs = [] 36 | for i in player_id_list: 37 | all_obs.append(self.get_dict_observation(current_state, i, info_before)) 38 | return all_obs 39 | 40 | 41 | # todo: observation builder 42 | class CustomObservation(object): 43 | def get_custom_observation(self, current_state, player_id): 44 | raise NotImplementedError 45 | 46 | def get_custom_obs_space(self, player_id): 47 | raise NotImplementedError 48 | 49 | def get_custom_many_observation(self, current_state, player_id_list): 50 | all_obs = [] 51 | for i in player_id_list: 52 | all_obs.append(self.get_custom_observation(current_state, i)) 53 | return all_obs 54 | 55 | def get_custom_many_obs_space(self, player_id_list): 56 | all_obs_space = [] 57 | for i in player_id_list: 58 | all_obs_space.append(self.get_custom_obs_space(i)) 59 | return all_obs_space 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /course4/env/simulators/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/7/10 10:24 上午 4 | # 描述: 5 | from abc import ABC, abstractmethod 6 | 7 | 8 | class Game(ABC): 9 | def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type): 10 | self.n_player = n_player 11 | self.current_state = None 12 | self.all_observes = None 13 | self.is_obs_continuous = is_obs_continuous 14 | self.is_act_continuous = is_act_continuous 15 | self.game_name = game_name 16 | self.agent_nums = agent_nums 17 | self.obs_type = obs_type 18 | 19 | def get_config(self, player_id): 20 | raise NotImplementedError 21 | 22 | def get_render_data(self, current_state): 23 | return current_state 24 | 25 | def set_current_state(self, current_state): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def is_terminal(self): 30 | raise NotImplementedError 31 | 32 | def get_next_state(self, all_action): 33 | raise NotImplementedError 34 | 35 | def get_reward(self, all_action): 36 | raise NotImplementedError 37 | 38 | @abstractmethod 39 | def step(self, all_action): 40 | raise NotImplementedError 41 | 42 | @abstractmethod 43 | def reset(self): 44 | raise NotImplementedError 45 | 46 | def set_action_space(self): 47 | raise NotImplementedError 48 | 49 | -------------------------------------------------------------------------------- /course4/examples/random/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | agent_action = [] 16 | for i in range(len(action_space)): 17 | action_ = sample_single_dim(action_space[i], is_act_continuous) 18 | agent_action.append(action_) 19 | return agent_action 20 | 21 | 22 | def sample_single_dim(action_space_list_each, is_act_continuous): 23 | each = [] 24 | if is_act_continuous: 25 | each = action_space_list_each.sample() 26 | else: 27 | if action_space_list_each.__class__.__name__ == "Discrete": 28 | each = [0] * action_space_list_each.n 29 | idx = action_space_list_each.sample() 30 | each[idx] = 1 31 | elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle": 32 | each = [] 33 | nvec = action_space_list_each.high - action_space_list_each.low + 1 34 | sample_indexes = action_space_list_each.sample() 35 | 36 | for i in range(len(nvec)): 37 | dim = nvec[i] 38 | new_action = [0] * dim 39 | index = sample_indexes[i] 40 | new_action[index] = 1 41 | each.extend(new_action) 42 | elif action_space_list_each.__class__.__name__ == "Discrete_SC2": 43 | each = action_space_list_each.sample() 44 | elif action_space_list_each.__class__.__name__ == "Box": 45 | each = action_space_list_each.sample() 46 | return each 47 | -------------------------------------------------------------------------------- /course4/examples/submission/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | pass 16 | -------------------------------------------------------------------------------- /course4/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .space import Space 4 | from gym import logger 5 | 6 | 7 | class Box(Space): 8 | """ 9 | A (possibly unbounded) box in R^n. Specifically, a Box represents the 10 | Cartesian product of n closed intervals. Each interval has the form of one 11 | of [a, b], (-oo, b], [a, oo), or (-oo, oo). 12 | 13 | There are two common use cases: 14 | 15 | * Identical bound for each dimension:: 16 | >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) 17 | Box(3, 4) 18 | 19 | * Independent bound for each dimension:: 20 | >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) 21 | Box(2,) 22 | 23 | """ 24 | def __init__(self, low, high, shape=None, dtype=np.float32): 25 | assert dtype is not None, 'dtype must be explicitly provided. ' 26 | self.dtype = np.dtype(dtype) 27 | 28 | # determine shape if it isn't provided directly 29 | if shape is not None: 30 | shape = tuple(shape) 31 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape" 32 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape" 33 | elif not np.isscalar(low): 34 | shape = low.shape 35 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape" 36 | elif not np.isscalar(high): 37 | shape = high.shape 38 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape" 39 | else: 40 | raise ValueError("shape must be provided or inferred from the shapes of low or high") 41 | 42 | if np.isscalar(low): 43 | low = np.full(shape, low, dtype=dtype) 44 | 45 | if np.isscalar(high): 46 | high = np.full(shape, high, dtype=dtype) 47 | 48 | self.shape = shape 49 | self.low = low 50 | self.high = high 51 | 52 | def _get_precision(dtype): 53 | if np.issubdtype(dtype, np.floating): 54 | return np.finfo(dtype).precision 55 | else: 56 | return np.inf 57 | low_precision = _get_precision(self.low.dtype) 58 | high_precision = _get_precision(self.high.dtype) 59 | dtype_precision = _get_precision(self.dtype) 60 | if min(low_precision, high_precision) > dtype_precision: 61 | logger.warn("Box bound precision lowered by casting to {}".format(self.dtype)) 62 | self.low = self.low.astype(self.dtype) 63 | self.high = self.high.astype(self.dtype) 64 | 65 | # Boolean arrays which indicate the interval type for each coordinate 66 | self.bounded_below = -np.inf < self.low 67 | self.bounded_above = np.inf > self.high 68 | 69 | super(Box, self).__init__(self.shape, self.dtype) 70 | 71 | def is_bounded(self, manner="both"): 72 | below = np.all(self.bounded_below) 73 | above = np.all(self.bounded_above) 74 | if manner == "both": 75 | return below and above 76 | elif manner == "below": 77 | return below 78 | elif manner == "above": 79 | return above 80 | else: 81 | raise ValueError("manner is not in {'below', 'above', 'both'}") 82 | 83 | def sample(self): 84 | """ 85 | Generates a single random sample inside of the Box. 86 | 87 | In creating a sample of the box, each coordinate is sampled according to 88 | the form of the interval: 89 | 90 | * [a, b] : uniform distribution 91 | * [a, oo) : shifted exponential distribution 92 | * (-oo, b] : shifted negative exponential distribution 93 | * (-oo, oo) : normal distribution 94 | """ 95 | high = self.high if self.dtype.kind == 'f' \ 96 | else self.high.astype('int64') + 1 97 | sample = np.empty(self.shape) 98 | 99 | # Masking arrays which classify the coordinates according to interval 100 | # type 101 | unbounded = ~self.bounded_below & ~self.bounded_above 102 | upp_bounded = ~self.bounded_below & self.bounded_above 103 | low_bounded = self.bounded_below & ~self.bounded_above 104 | bounded = self.bounded_below & self.bounded_above 105 | 106 | 107 | # Vectorized sampling by interval type 108 | sample[unbounded] = self.np_random.normal( 109 | size=unbounded[unbounded].shape) 110 | 111 | sample[low_bounded] = self.np_random.exponential( 112 | size=low_bounded[low_bounded].shape) + self.low[low_bounded] 113 | 114 | sample[upp_bounded] = -self.np_random.exponential( 115 | size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] 116 | 117 | sample[bounded] = self.np_random.uniform(low=self.low[bounded], 118 | high=high[bounded], 119 | size=bounded[bounded].shape) 120 | if self.dtype.kind == 'i': 121 | sample = np.floor(sample) 122 | 123 | return sample.astype(self.dtype) 124 | 125 | def contains(self, x): 126 | if isinstance(x, list): 127 | x = np.array(x) # Promote list to array for contains check 128 | return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) 129 | 130 | def to_jsonable(self, sample_n): 131 | return np.array(sample_n).tolist() 132 | 133 | def from_jsonable(self, sample_n): 134 | return [np.asarray(sample) for sample in sample_n] 135 | 136 | def __repr__(self): 137 | return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype) 138 | 139 | def __eq__(self, other): 140 | return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 141 | -------------------------------------------------------------------------------- /course4/utils/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /course4/utils/get_logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import time 4 | import os 5 | 6 | 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False): 8 | if not os.path.exists(log_path): 9 | os.mkdir(log_path) 10 | 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | # 每分钟建一个文件 14 | rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) 15 | log_name = log_path + rq + '_' + name+ '.log' 16 | json_log_name = log_path + rq + '_' + name + '.json' 17 | logfile = log_name 18 | if save_file: 19 | fh = logging.FileHandler(logfile, mode='a') 20 | fh.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter("%(message)s") 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | # 输出到控制台 25 | if console_out: 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | logger.addHandler(console) 29 | 30 | # 输出到json 31 | if json_file: 32 | fh_json = logging.FileHandler(json_log_name, mode='a') 33 | fh_json.setLevel(logging.DEBUG) 34 | formatter_json = logging.Formatter("%(message)s") 35 | fh_json.setFormatter(formatter_json) 36 | logger.addHandler(fh_json) 37 | 38 | return logger -------------------------------------------------------------------------------- /course4/utils/mutli_discrete_particle.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2021/4/8 下午2:42 3 | # Author: Yahui Cui 4 | 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) 7 | 8 | import numpy as np 9 | 10 | import gym 11 | 12 | class MultiDiscreteParticle(gym.Space): 13 | """ 14 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters 15 | - It can be adapted to both a Discrete action space or a continuous (Box) action space 16 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 17 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space 18 | where the discrete action space can take any integers from `min` to `max` (both inclusive) 19 | Note: A value of 0 always need to represent the NOOP action. 20 | e.g. Nintendo Game Controller 21 | - Can be conceptualized as 3 discrete action spaces: 22 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 23 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 24 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 25 | - Can be initialized as 26 | MultiDiscrete([ [0,4], [0,1], [0,1] ]) 27 | """ 28 | def __init__(self, array_of_param_array): 29 | self.low = np.array([x[0] for x in array_of_param_array]) 30 | self.high = np.array([x[1] for x in array_of_param_array]) 31 | self.num_discrete_space = self.low.shape[0] 32 | 33 | def sample(self): 34 | """ Returns a array with one sample from each discrete action space """ 35 | # For each row: round(random .* (max - min) + min, 0) 36 | np_random = np.random.RandomState() 37 | random_array = np_random.rand(self.num_discrete_space) 38 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] 39 | def contains(self, x): 40 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() 41 | 42 | @property 43 | def shape(self): 44 | return self.num_discrete_space 45 | def __repr__(self): 46 | return "MultiDiscrete" + str(self.num_discrete_space) 47 | def __eq__(self, other): 48 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) 49 | -------------------------------------------------------------------------------- /course4/utils/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | 9 | WARNING - Custom observation & action spaces can inherit from the `Space` 10 | class. However, most use-cases should be covered by the existing space 11 | classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & 12 | `Dict`). Note that parametrized probability distributions (through the 13 | `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are 14 | only well-defined for instances of spaces provided in gym by default. 15 | Moreover, some implementations of Reinforcement Learning algorithms might 16 | not handle custom spaces properly. Use custom spaces with care. 17 | """ 18 | def __init__(self, shape=None, dtype=None): 19 | import numpy as np # takes about 300-400ms to import, so we load lazily 20 | self.shape = None if shape is None else tuple(shape) 21 | self.dtype = None if dtype is None else np.dtype(dtype) 22 | self._np_random = None 23 | 24 | @property 25 | def np_random(self): 26 | """Lazily seed the rng since this is expensive and only needed if 27 | sampling from this space. 28 | """ 29 | if self._np_random is None: 30 | self.seed() 31 | 32 | return self._np_random 33 | 34 | def sample(self): 35 | """Randomly sample an element of this space. Can be 36 | uniform or non-uniform sampling based on boundedness of space.""" 37 | raise NotImplementedError 38 | 39 | def seed(self, seed=None): 40 | """Seed the PRNG of this space. """ 41 | self._np_random, seed = seeding.np_random(seed) 42 | return [seed] 43 | 44 | def contains(self, x): 45 | """ 46 | Return boolean specifying if x is a valid 47 | member of this space 48 | """ 49 | raise NotImplementedError 50 | 51 | def __contains__(self, x): 52 | return self.contains(x) 53 | 54 | def to_jsonable(self, sample_n): 55 | """Convert a batch of samples from this space to a JSONable data type.""" 56 | # By default, assume identity is JSONable 57 | return sample_n 58 | 59 | def from_jsonable(self, sample_n): 60 | """Convert a JSONable data type to a batch of samples from this space.""" 61 | # By default, assume identity is JSONable 62 | return sample_n 63 | -------------------------------------------------------------------------------- /course5/README.md: -------------------------------------------------------------------------------- 1 | ## 实践课第五天 2 | 3 | ### 任务:入门 REVIVE 冰箱控温 作业要求: 提交通过并且在金榜的排名高于Jidi_random 4 | 5 | 6 | --- 7 | ### Env 👉请看 [revive_refrigerator.py](env/revive_refrigerator.py) 8 | 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py) 10 | 11 | ### 提交 👉请看 [submission.py](examples/random/submission.py) 12 | 13 | --- 14 | 15 | ### Install REVIVE SDK (if training with REVIVE SDK) 16 | >https://www.revive.cn/help/polixir-revive-sdk/text/introduction.html 17 | 18 | ### How to test submission 19 | 20 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py 21 | >python run_log.py 22 | 23 | If no errors, your submission is ready to go~ 24 | 25 | 26 | ### Ready to submit 27 | > random: [random/submission.py](examples/random/submission.py) 28 | 29 | > REVIVE Example: [revive_example/submission.py](examples/revive_example/submission.py) 30 | > and [revive_example/revive_policy.pkl](examples/revive_example/revive_policy.pkl) 31 | 32 | 33 | ___ 34 | Have a good time~~~ -------------------------------------------------------------------------------- /course5/env/__init__.py: -------------------------------------------------------------------------------- 1 | from .revive_refrigerator import * 2 | -------------------------------------------------------------------------------- /course5/env/chooseenv.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/9/11 11:17 上午 4 | # 描述:选择运行环境,需要维护env/__ini__.py && config.json(存储环境默认参数) 5 | 6 | import json 7 | import env 8 | import os 9 | 10 | 11 | def make(env_type, conf=None): 12 | file_path = os.path.join(os.path.dirname(__file__), 'config.json') 13 | if not conf: 14 | with open(file_path) as f: 15 | conf = json.load(f)[env_type] 16 | class_literal = conf['class_literal'] 17 | return getattr(env, class_literal)(conf) 18 | 19 | 20 | if __name__ == "__main__": 21 | make("classic_MountainCar-v0") 22 | -------------------------------------------------------------------------------- /course5/env/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "revive-refrigerator": { 3 | "class_literal": "Refrigerator", 4 | "n_player": 1, 5 | "max_step": 2000, 6 | "game_name": "refrigerator", 7 | "is_obs_continuous": true, 8 | "is_act_continuous": true, 9 | "agent_nums": [1], 10 | "obs_type": ["dict"] 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /course5/env/obs_interfaces/observation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/11/13 3:51 下午 4 | # 描述:observation的各种接口类 5 | obs_type = ["grid", "vector", "dict"] 6 | 7 | 8 | class GridObservation(object): 9 | def get_grid_observation(self, current_state, player_id, info_before): 10 | raise NotImplementedError 11 | 12 | def get_grid_many_observation(self, current_state, player_id_list, info_before=''): 13 | all_obs = [] 14 | for i in player_id_list: 15 | all_obs.append(self.get_grid_observation(current_state, i, info_before)) 16 | return all_obs 17 | 18 | 19 | class VectorObservation(object): 20 | def get_vector_observation(self, current_state, player_id, info_before): 21 | raise NotImplementedError 22 | 23 | def get_vector_many_observation(self, current_state, player_id_list, info_before=''): 24 | all_obs = [] 25 | for i in player_id_list: 26 | all_obs.append(self.get_vector_observation(current_state, i, info_before)) 27 | return all_obs 28 | 29 | 30 | class DictObservation(object): 31 | def get_dict_observation(self, current_state, player_id, info_before): 32 | raise NotImplementedError 33 | 34 | def get_dict_many_observation(self, current_state, player_id_list, info_before=''): 35 | all_obs = [] 36 | for i in player_id_list: 37 | all_obs.append(self.get_dict_observation(current_state, i, info_before)) 38 | return all_obs 39 | 40 | 41 | # todo: observation builder 42 | class CustomObservation(object): 43 | def get_custom_observation(self, current_state, player_id): 44 | raise NotImplementedError 45 | 46 | def get_custom_obs_space(self, player_id): 47 | raise NotImplementedError 48 | 49 | def get_custom_many_observation(self, current_state, player_id_list): 50 | all_obs = [] 51 | for i in player_id_list: 52 | all_obs.append(self.get_custom_observation(current_state, i)) 53 | return all_obs 54 | 55 | def get_custom_many_obs_space(self, player_id_list): 56 | all_obs_space = [] 57 | for i in player_id_list: 58 | all_obs_space.append(self.get_custom_obs_space(i)) 59 | return all_obs_space 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /course5/env/revive/refrigerator.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/7/25 下午5:33 3 | # Author: Yahui Cui 4 | 5 | import random 6 | import numpy as np 7 | 8 | 9 | class DoorOpen(): 10 | """The agent that controls the opening and closing of the refrigerator door.""" 11 | 12 | def __init__(self, door_open_time=10): 13 | self.door_open_time = door_open_time 14 | self.init_door_open_time = door_open_time 15 | self.door_open = False 16 | 17 | def act(self): 18 | self.door_open_time -= 1 19 | if self.door_open_time >= 0: 20 | self.door_open = True 21 | else: 22 | self.door_open = False 23 | return self.door_open 24 | 25 | def reset(self): 26 | self.door_open = False 27 | self.door_open_time = self.init_door_open_time 28 | 29 | 30 | class Simulator: 31 | """Refrigerator temperature control simulator.""" 32 | 33 | def __init__(self, init_temperature=10): 34 | self.outdoor_temperature = 15 35 | self.temp = init_temperature 36 | self.door_state = False 37 | 38 | def update(self, power, dt, door_open=False): 39 | self.door_state = door_open 40 | 41 | if power > 0: 42 | self.temp -= power * dt 43 | if self.door_state == False: 44 | self.temp = self.temp - (self.temp - self.outdoor_temperature) * 0.02 * dt 45 | else: 46 | self.temp = self.temp - (self.temp - self.outdoor_temperature) * 0.08 * dt 47 | return self.get_temperature() 48 | 49 | def get_temperature(self): 50 | return self.temp + np.random.normal(0, 0.1) 51 | 52 | def get_door_state(self): 53 | return self.door_state 54 | 55 | def reset(self, init_temperature): 56 | self.temp = init_temperature 57 | self.outdoor_temperature = 15 58 | random.seed(0) 59 | np.random.seed(0) 60 | -------------------------------------------------------------------------------- /course5/env/revive_refrigerator.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/7/25 下午4:19 3 | # Author: Yahui Cui 4 | import sys 5 | from pathlib import Path 6 | 7 | root_dir = str(Path(__file__).resolve().parent.parent.parent) 8 | sys.path.append(root_dir) 9 | 10 | import copy 11 | import random 12 | import numpy as np 13 | 14 | from course5.env.simulators.game import Game 15 | from course5.env.revive.refrigerator import DoorOpen, Simulator 16 | from course5.utils.box import Box 17 | 18 | 19 | class Refrigerator(Game): 20 | def __init__(self, conf, seed=0): 21 | super(Refrigerator, self).__init__(conf['n_player'], conf['is_obs_continuous'], conf['is_act_continuous'], 22 | conf['game_name'], conf['agent_nums'], conf['obs_type']) 23 | self.max_step = int(conf["max_step"]) 24 | self.done = False 25 | self.seed = None 26 | self.set_seed(seed) 27 | self.won = {} 28 | self.n_return = [0] * self.n_player 29 | 30 | self.step_cnt = 0 31 | self.open_interval = 200 32 | self.open_door = False 33 | self.door_open_after_step = self.open_interval + 1 34 | self.init_temperature = 10 35 | self.target_temperature = -2 36 | self.sampling_time = 1 37 | self.door_open_agent = DoorOpen(door_open_time=20) 38 | self.cool_simulator = Simulator(self.init_temperature) 39 | self.cool_simulator.reset(init_temperature=self.init_temperature) 40 | self.traj = [] 41 | 42 | self.joint_action_space = self.set_action_space() 43 | self.current_state = self.cool_simulator.get_temperature() 44 | self.all_observes = self.get_all_observes() 45 | self.init_info = self.get_info_after(False) 46 | 47 | def reset(self): 48 | self.won = {} 49 | self.n_return = [0] * self.n_player 50 | self.step_cnt = 0 51 | self.open_interval = 200 52 | self.open_door = False 53 | self.door_open_after_step = self.open_interval + 1 54 | self.init_temperature = 10 55 | self.target_temperature = -2 56 | self.sampling_time = 1 57 | self.door_open_agent = DoorOpen(door_open_time=20) 58 | self.cool_simulator = Simulator(self.init_temperature) 59 | self.cool_simulator.reset(init_temperature=self.init_temperature) 60 | self.traj = [] 61 | 62 | self.current_state = self.cool_simulator.get_temperature() 63 | self.all_observes = self.get_all_observes() 64 | self.init_info = self.get_info_after(False) 65 | return self.all_observes 66 | 67 | def step(self, joint_action): 68 | self.is_valid_action(joint_action) 69 | if self.step_cnt % self.open_interval == 0: 70 | if random.random() < 0.5: 71 | self.open_door = True 72 | self.door_open_agent.reset() 73 | self.door_open_after_step = random.randint(0, self.open_interval - self.door_open_agent.door_open_time) 74 | action = joint_action[0][0] 75 | if self.open_door and self.step_cnt % self.open_interval >= self.door_open_after_step: 76 | door_open = self.door_open_agent.act() 77 | self.cool_simulator.update(power=action, dt=self.sampling_time, door_open=door_open) 78 | else: 79 | door_open = False 80 | self.cool_simulator.update(power=action, dt=self.sampling_time, door_open=door_open) 81 | self.current_state = self.cool_simulator.get_temperature() 82 | self.all_observes = self.get_all_observes() 83 | self.traj.append(copy.deepcopy(self.current_state)) 84 | reward = -(abs(self.current_state - self.init_temperature)) 85 | self.step_cnt += 1 86 | done = self.is_terminal() 87 | if done: 88 | self.set_n_return() 89 | info_after = self.get_info_after(door_open) 90 | 91 | return self.all_observes, reward, done, '', info_after 92 | 93 | def is_valid_action(self, joint_action): 94 | 95 | if np.isscalar(joint_action): 96 | raise Exception("Input joint action dimension should be (1,)") 97 | 98 | if len(joint_action) != self.n_player: 99 | raise Exception("Input joint action dimension should be (1,)") 100 | 101 | if np.isscalar(joint_action[0]): 102 | raise Exception("Input joint action dimension should be (1,)") 103 | 104 | if len(joint_action[0]) != 1: 105 | raise Exception("Input joint action dimension should be (1,)") 106 | 107 | if isinstance(joint_action[0][0], np.ndarray): 108 | joint_action[0][0] = joint_action[0][0][0] 109 | 110 | if not np.isscalar(joint_action[0][0]): 111 | raise Exception("Value in the action should be a scalar") 112 | 113 | if joint_action[0][0] < 0 or joint_action[0][0] > 10: 114 | raise Exception("Value of action should between 0 and 10") 115 | 116 | def set_action_space(self): 117 | return [[Box(low=0, high=10, shape=(1,))]] 118 | 119 | def get_all_observes(self): 120 | return [{"obs": copy.deepcopy(self.current_state), "controlled_player_index": 0}] 121 | 122 | def get_single_action_space(self, player_id): 123 | return self.joint_action_space[player_id] 124 | 125 | def is_terminal(self): 126 | if self.step_cnt >= self.max_step: 127 | self.done = True 128 | 129 | return self.done 130 | 131 | def set_seed(self, seed): 132 | if seed is not None: 133 | self.seed = seed 134 | random.seed(self.seed) 135 | 136 | def get_info_after(self, door_open): 137 | return {"temperature": copy.deepcopy(self.current_state), "controlled_player_index": 0, "door_open": door_open} 138 | 139 | def set_n_return(self): 140 | self.n_return[0] = -np.mean(np.abs(np.array(self.traj) - self.target_temperature)) 141 | 142 | def check_win(self): 143 | return self.won 144 | 145 | -------------------------------------------------------------------------------- /course5/env/simulators/game.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:zruizhi 3 | # 创建时间: 2020/7/10 10:24 上午 4 | # 描述: 5 | from abc import ABC, abstractmethod 6 | 7 | 8 | class Game(ABC): 9 | def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type): 10 | self.n_player = n_player 11 | self.current_state = None 12 | self.all_observes = None 13 | self.is_obs_continuous = is_obs_continuous 14 | self.is_act_continuous = is_act_continuous 15 | self.game_name = game_name 16 | self.agent_nums = agent_nums 17 | self.obs_type = obs_type 18 | 19 | def get_config(self, player_id): 20 | raise NotImplementedError 21 | 22 | def get_render_data(self, current_state): 23 | return current_state 24 | 25 | def set_current_state(self, current_state): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def is_terminal(self): 30 | raise NotImplementedError 31 | 32 | def get_next_state(self, all_action): 33 | raise NotImplementedError 34 | 35 | def get_reward(self, all_action): 36 | raise NotImplementedError 37 | 38 | @abstractmethod 39 | def step(self, all_action): 40 | raise NotImplementedError 41 | 42 | @abstractmethod 43 | def reset(self): 44 | raise NotImplementedError 45 | 46 | def set_action_space(self): 47 | raise NotImplementedError 48 | 49 | -------------------------------------------------------------------------------- /course5/examples/random/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | agent_action = [] 16 | for i in range(len(action_space)): 17 | action_ = sample_single_dim(action_space[i], is_act_continuous) 18 | agent_action.append(action_) 19 | return agent_action 20 | 21 | 22 | def sample_single_dim(action_space_list_each, is_act_continuous): 23 | each = [] 24 | if is_act_continuous: 25 | each = action_space_list_each.sample() 26 | else: 27 | if action_space_list_each.__class__.__name__ == "Discrete": 28 | each = [0] * action_space_list_each.n 29 | idx = action_space_list_each.sample() 30 | each[idx] = 1 31 | elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle": 32 | each = [] 33 | nvec = action_space_list_each.high - action_space_list_each.low + 1 34 | sample_indexes = action_space_list_each.sample() 35 | 36 | for i in range(len(nvec)): 37 | dim = nvec[i] 38 | new_action = [0] * dim 39 | index = sample_indexes[i] 40 | new_action[index] = 1 41 | each.extend(new_action) 42 | elif action_space_list_each.__class__.__name__ == "Discrete_SC2": 43 | each = action_space_list_each.sample() 44 | elif action_space_list_each.__class__.__name__ == "Box": 45 | each = action_space_list_each.sample() 46 | return each 47 | -------------------------------------------------------------------------------- /course5/examples/revive_example/revive_policy.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course5/examples/revive_example/revive_policy.pkl -------------------------------------------------------------------------------- /course5/examples/revive_example/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/7/26 下午4:24 3 | # Author: Yahui Cui 4 | import pickle 5 | import numpy as np 6 | from pathlib import Path 7 | import os 8 | 9 | 10 | class VenPolicy(): 11 | """Strategies for using environment initialization""" 12 | def __init__(self, policy_model_path): 13 | self.policy_model = pickle.load(open(policy_model_path, 'rb')) 14 | 15 | def act(self, state): 16 | new_state = {} 17 | new_state['temperature'] = np.array([state]) 18 | new_state['door_open'] = np.array([0]) 19 | 20 | try: 21 | next_state = self.policy_model.infer(new_state) 22 | except: 23 | next_state = self.policy_model.infer_one_step(new_state)["action"] 24 | 25 | return next_state[0] 26 | 27 | 28 | dirname = str(Path(__file__).resolve().parent) 29 | model_path = os.path.join(dirname, "revive_policy.pkl") 30 | agent = VenPolicy(model_path) 31 | 32 | 33 | def my_controller(observation, action_space, model=None, is_act_continuous=False): 34 | action = agent.act(observation['obs']) 35 | return [action] 36 | 37 | -------------------------------------------------------------------------------- /course5/examples/submission/submission.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2022/8/10 下午4:14 3 | # Author: Yahui Cui 4 | 5 | """ 6 | # =================================== Important ========================================= 7 | Notes: 8 | 1. this agents is random agents , which can fit any env in Jidi platform. 9 | 2. if you want to load .pth file, please follow the instruction here: 10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo 11 | """ 12 | 13 | 14 | def my_controller(observation, action_space, is_act_continuous=True): 15 | pass 16 | -------------------------------------------------------------------------------- /course5/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .space import Space 4 | from gym import logger 5 | 6 | 7 | class Box(Space): 8 | """ 9 | A (possibly unbounded) box in R^n. Specifically, a Box represents the 10 | Cartesian product of n closed intervals. Each interval has the form of one 11 | of [a, b], (-oo, b], [a, oo), or (-oo, oo). 12 | 13 | There are two common use cases: 14 | 15 | * Identical bound for each dimension:: 16 | >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) 17 | Box(3, 4) 18 | 19 | * Independent bound for each dimension:: 20 | >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) 21 | Box(2,) 22 | 23 | """ 24 | def __init__(self, low, high, shape=None, dtype=np.float32): 25 | assert dtype is not None, 'dtype must be explicitly provided. ' 26 | self.dtype = np.dtype(dtype) 27 | 28 | # determine shape if it isn't provided directly 29 | if shape is not None: 30 | shape = tuple(shape) 31 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape" 32 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape" 33 | elif not np.isscalar(low): 34 | shape = low.shape 35 | assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape" 36 | elif not np.isscalar(high): 37 | shape = high.shape 38 | assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape" 39 | else: 40 | raise ValueError("shape must be provided or inferred from the shapes of low or high") 41 | 42 | if np.isscalar(low): 43 | low = np.full(shape, low, dtype=dtype) 44 | 45 | if np.isscalar(high): 46 | high = np.full(shape, high, dtype=dtype) 47 | 48 | self.shape = shape 49 | self.low = low 50 | self.high = high 51 | 52 | def _get_precision(dtype): 53 | if np.issubdtype(dtype, np.floating): 54 | return np.finfo(dtype).precision 55 | else: 56 | return np.inf 57 | low_precision = _get_precision(self.low.dtype) 58 | high_precision = _get_precision(self.high.dtype) 59 | dtype_precision = _get_precision(self.dtype) 60 | if min(low_precision, high_precision) > dtype_precision: 61 | logger.warn("Box bound precision lowered by casting to {}".format(self.dtype)) 62 | self.low = self.low.astype(self.dtype) 63 | self.high = self.high.astype(self.dtype) 64 | 65 | # Boolean arrays which indicate the interval type for each coordinate 66 | self.bounded_below = -np.inf < self.low 67 | self.bounded_above = np.inf > self.high 68 | 69 | super(Box, self).__init__(self.shape, self.dtype) 70 | 71 | def is_bounded(self, manner="both"): 72 | below = np.all(self.bounded_below) 73 | above = np.all(self.bounded_above) 74 | if manner == "both": 75 | return below and above 76 | elif manner == "below": 77 | return below 78 | elif manner == "above": 79 | return above 80 | else: 81 | raise ValueError("manner is not in {'below', 'above', 'both'}") 82 | 83 | def sample(self): 84 | """ 85 | Generates a single random sample inside of the Box. 86 | 87 | In creating a sample of the box, each coordinate is sampled according to 88 | the form of the interval: 89 | 90 | * [a, b] : uniform distribution 91 | * [a, oo) : shifted exponential distribution 92 | * (-oo, b] : shifted negative exponential distribution 93 | * (-oo, oo) : normal distribution 94 | """ 95 | high = self.high if self.dtype.kind == 'f' \ 96 | else self.high.astype('int64') + 1 97 | sample = np.empty(self.shape) 98 | 99 | # Masking arrays which classify the coordinates according to interval 100 | # type 101 | unbounded = ~self.bounded_below & ~self.bounded_above 102 | upp_bounded = ~self.bounded_below & self.bounded_above 103 | low_bounded = self.bounded_below & ~self.bounded_above 104 | bounded = self.bounded_below & self.bounded_above 105 | 106 | 107 | # Vectorized sampling by interval type 108 | sample[unbounded] = self.np_random.normal( 109 | size=unbounded[unbounded].shape) 110 | 111 | sample[low_bounded] = self.np_random.exponential( 112 | size=low_bounded[low_bounded].shape) + self.low[low_bounded] 113 | 114 | sample[upp_bounded] = -self.np_random.exponential( 115 | size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] 116 | 117 | sample[bounded] = self.np_random.uniform(low=self.low[bounded], 118 | high=high[bounded], 119 | size=bounded[bounded].shape) 120 | if self.dtype.kind == 'i': 121 | sample = np.floor(sample) 122 | 123 | return sample.astype(self.dtype) 124 | 125 | def contains(self, x): 126 | if isinstance(x, list): 127 | x = np.array(x) # Promote list to array for contains check 128 | return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) 129 | 130 | def to_jsonable(self, sample_n): 131 | return np.array(sample_n).tolist() 132 | 133 | def from_jsonable(self, sample_n): 134 | return [np.asarray(sample) for sample in sample_n] 135 | 136 | def __repr__(self): 137 | return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype) 138 | 139 | def __eq__(self, other): 140 | return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 141 | -------------------------------------------------------------------------------- /course5/utils/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /course5/utils/get_logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import time 4 | import os 5 | 6 | 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False): 8 | if not os.path.exists(log_path): 9 | os.mkdir(log_path) 10 | 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | # 每分钟建一个文件 14 | rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) 15 | log_name = log_path + rq + '_' + name+ '.log' 16 | json_log_name = log_path + rq + '_' + name + '.json' 17 | logfile = log_name 18 | if save_file: 19 | fh = logging.FileHandler(logfile, mode='a') 20 | fh.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter("%(message)s") 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | # 输出到控制台 25 | if console_out: 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | logger.addHandler(console) 29 | 30 | # 输出到json 31 | if json_file: 32 | fh_json = logging.FileHandler(json_log_name, mode='a') 33 | fh_json.setLevel(logging.DEBUG) 34 | formatter_json = logging.Formatter("%(message)s") 35 | fh_json.setFormatter(formatter_json) 36 | logger.addHandler(fh_json) 37 | 38 | return logger -------------------------------------------------------------------------------- /course5/utils/mutli_discrete_particle.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Time : 2021/4/8 下午2:42 3 | # Author: Yahui Cui 4 | 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) 7 | 8 | import numpy as np 9 | 10 | import gym 11 | 12 | class MultiDiscreteParticle(gym.Space): 13 | """ 14 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters 15 | - It can be adapted to both a Discrete action space or a continuous (Box) action space 16 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 17 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space 18 | where the discrete action space can take any integers from `min` to `max` (both inclusive) 19 | Note: A value of 0 always need to represent the NOOP action. 20 | e.g. Nintendo Game Controller 21 | - Can be conceptualized as 3 discrete action spaces: 22 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 23 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 24 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 25 | - Can be initialized as 26 | MultiDiscrete([ [0,4], [0,1], [0,1] ]) 27 | """ 28 | def __init__(self, array_of_param_array): 29 | self.low = np.array([x[0] for x in array_of_param_array]) 30 | self.high = np.array([x[1] for x in array_of_param_array]) 31 | self.num_discrete_space = self.low.shape[0] 32 | 33 | def sample(self): 34 | """ Returns a array with one sample from each discrete action space """ 35 | # For each row: round(random .* (max - min) + min, 0) 36 | np_random = np.random.RandomState() 37 | random_array = np_random.rand(self.num_discrete_space) 38 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] 39 | def contains(self, x): 40 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() 41 | 42 | @property 43 | def shape(self): 44 | return self.num_discrete_space 45 | def __repr__(self): 46 | return "MultiDiscrete" + str(self.num_discrete_space) 47 | def __eq__(self, other): 48 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) 49 | -------------------------------------------------------------------------------- /course5/utils/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | 9 | WARNING - Custom observation & action spaces can inherit from the `Space` 10 | class. However, most use-cases should be covered by the existing space 11 | classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & 12 | `Dict`). Note that parametrized probability distributions (through the 13 | `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are 14 | only well-defined for instances of spaces provided in gym by default. 15 | Moreover, some implementations of Reinforcement Learning algorithms might 16 | not handle custom spaces properly. Use custom spaces with care. 17 | """ 18 | def __init__(self, shape=None, dtype=None): 19 | import numpy as np # takes about 300-400ms to import, so we load lazily 20 | self.shape = None if shape is None else tuple(shape) 21 | self.dtype = None if dtype is None else np.dtype(dtype) 22 | self._np_random = None 23 | 24 | @property 25 | def np_random(self): 26 | """Lazily seed the rng since this is expensive and only needed if 27 | sampling from this space. 28 | """ 29 | if self._np_random is None: 30 | self.seed() 31 | 32 | return self._np_random 33 | 34 | def sample(self): 35 | """Randomly sample an element of this space. Can be 36 | uniform or non-uniform sampling based on boundedness of space.""" 37 | raise NotImplementedError 38 | 39 | def seed(self, seed=None): 40 | """Seed the PRNG of this space. """ 41 | self._np_random, seed = seeding.np_random(seed) 42 | return [seed] 43 | 44 | def contains(self, x): 45 | """ 46 | Return boolean specifying if x is a valid 47 | member of this space 48 | """ 49 | raise NotImplementedError 50 | 51 | def __contains__(self, x): 52 | return self.contains(x) 53 | 54 | def to_jsonable(self, sample_n): 55 | """Convert a batch of samples from this space to a JSONable data type.""" 56 | # By default, assume identity is JSONable 57 | return sample_n 58 | 59 | def from_jsonable(self, sample_n): 60 | """Convert a JSONable data type to a batch of samples from this space.""" 61 | # By default, assume identity is JSONable 62 | return sample_n 63 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | IPython==7.25.0 2 | matplotlib==3.3.4 3 | gym==0.18.3 4 | pillow==8.2.0 5 | numpy==1.19.5 6 | cattrs==1.0.0 7 | pyyaml==5.4.1 8 | attrs==21.2.0 9 | pygame==2.0.1 10 | pettingzoo[classic]==1.10.0 11 | --------------------------------------------------------------------------------