├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── schedule.jpg
├── course1
    ├── README.md
    ├── dataset
    │   └── wrestling
    │   │   ├── README.md
    │   │   └── obs_actions.zip
    ├── docs
    │   ├── 111.gif
    │   ├── AI-Olympics.png
    │   ├── Data_type.png
    │   ├── MNIST1.png
    │   ├── MNIST2.png
    │   ├── MNIST3.png
    │   ├── MacInstall.png
    │   ├── Pytorch_advanced.ipynb
    │   ├── RNN.png
    │   ├── WindowsInstall.png
    │   ├── data
    │   │   ├── FashionMNIST.zip
    │   │   ├── MNIST.zip
    │   │   └── wrestling.zip
    │   ├── index.png
    │   ├── matrices.png
    │   ├── ppt
    │   │   └── RLChina-PyTorch_tutorial---Yan.pptx
    │   ├── pytorch_intro.ipynb
    │   ├── tensor2.png
    │   ├── tensors1.png
    │   ├── torch_variable.png
    │   ├── vectors.png
    │   └── 及第平台使用手册以及暑期实践课环境准备.pdf
    ├── env
    │   ├── __init__.py
    │   ├── chooseenv.py
    │   ├── config.json
    │   ├── obs_interfaces
    │   │   └── observation.py
    │   ├── olympics_wrestling.py
    │   └── simulators
    │   │   ├── game.py
    │   │   └── gridgame.py
    ├── examples
    │   ├── BC_submission
    │   │   ├── README.md
    │   │   ├── actor_state_dict.pt
    │   │   └── submission.py
    │   ├── random
    │   │   └── submission.py
    │   └── submission
    │   │   └── submission.py
    ├── olympics_engine
    │   ├── .gitignore
    │   ├── AI_olympics.py
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── agent.py
    │   ├── assets
    │   │   ├── AI-Olympics.png
    │   │   ├── blue rock.png
    │   │   ├── board.png
    │   │   ├── crown.png
    │   │   ├── curling ground.png
    │   │   ├── energy bar.png
    │   │   ├── energy-blue-bar.png
    │   │   ├── energy-blue.png
    │   │   ├── energy-red-bar.png
    │   │   ├── energy-red.png
    │   │   ├── football
    │   │   │   ├── agent1-V1.png
    │   │   │   ├── agent1-V2.png
    │   │   │   ├── agent1_bold.png
    │   │   │   ├── agent2-V1.png
    │   │   │   ├── agent2-V2.png
    │   │   │   ├── agent2_bold.png
    │   │   │   ├── football.png
    │   │   │   ├── playground.png
    │   │   │   ├── sight1.png
    │   │   │   └── sight2.png
    │   │   ├── green rock.png
    │   │   ├── purple rock.png
    │   │   ├── red rock.png
    │   │   ├── table_hockey
    │   │   │   ├── ball.png
    │   │   │   ├── player1.png
    │   │   │   ├── player2.png
    │   │   │   ├── playground.png
    │   │   │   ├── sight1.png
    │   │   │   └── sight2.png
    │   │   ├── wood.png
    │   │   └── wrestling
    │   │   │   ├── player1.png
    │   │   │   ├── player2.png
    │   │   │   ├── playground.png
    │   │   │   ├── sight1.png
    │   │   │   └── sight2.png
    │   ├── core.py
    │   ├── env_wrapper
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── chooseenv.py
    │   │   ├── config.json
    │   │   ├── obs_interfaces
    │   │   │   └── observation.py
    │   │   ├── olympics_running.py
    │   │   └── simulators
    │   │   │   └── game.py
    │   ├── generator.py
    │   ├── main.py
    │   ├── objects.py
    │   ├── scenario.json
    │   ├── scenario
    │   │   ├── __init__.py
    │   │   ├── billiard.py
    │   │   ├── billiard_joint.py
    │   │   ├── curling.py
    │   │   ├── curling_competition.py
    │   │   ├── curling_joint.py
    │   │   ├── curling_long.py
    │   │   ├── football.py
    │   │   ├── longjump.py
    │   │   ├── running.py
    │   │   ├── running_competition.py
    │   │   ├── running_competition_maps
    │   │   │   └── maps.json
    │   │   ├── seeks.py
    │   │   ├── table_hockey.py
    │   │   ├── volleyball.py
    │   │   └── wrestling.py
    │   ├── test.py
    │   ├── test
    │   │   ├── DDA.py
    │   │   └── test_engine.py
    │   ├── tools
    │   │   ├── func.py
    │   │   └── settings.py
    │   ├── train
    │   │   ├── algo
    │   │   │   ├── active_inference_MDP.py
    │   │   │   ├── active_inference_POMDP.py
    │   │   │   ├── network.py
    │   │   │   ├── ppo.py
    │   │   │   └── random.py
    │   │   ├── log_path.py
    │   │   ├── rllib_train.py
    │   │   ├── train2avoid_ppo.py
    │   │   ├── train2run_AI_MDP.py
    │   │   ├── train2run_AI_POMDP.py
    │   │   └── train_ppo.py
    │   ├── utils
    │   │   ├── box.py
    │   │   ├── get_logger.py
    │   │   └── space.py
    │   └── viewer.py
    ├── run_log.py
    └── utils
    │   ├── box.py
    │   ├── discrete.py
    │   ├── get_logger.py
    │   ├── mutli_discrete_particle.py
    │   └── space.py
├── course2
    ├── README.md
    ├── env
    │   ├── __init__.py
    │   ├── chooseenv.py
    │   ├── config.json
    │   ├── obs_interfaces
    │   │   └── observation.py
    │   ├── reversi.py
    │   ├── simulators
    │   │   ├── game.py
    │   │   └── gridgame.py
    │   └── sokoban.py
    ├── examples
    │   ├── alphabeta-reversi
    │   │   └── submission.py
    │   ├── bfs-sokoban
    │   │   └── submission.py
    │   ├── mcts-reversi
    │   │   └── submission.py
    │   ├── random
    │   │   └── submission.py
    │   └── submission
    │   │   └── submission.py
    ├── run_log.py
    └── utils
    │   ├── box.py
    │   ├── discrete.py
    │   ├── get_logger.py
    │   ├── mutli_discrete_particle.py
    │   └── space.py
├── course3
    ├── README.md
    ├── docs
    │   └── rlcn_2022_rl_algorithms.zip
    ├── env
    │   ├── __init__.py
    │   ├── ccgame.py
    │   ├── chooseenv.py
    │   ├── config.json
    │   ├── obs_interfaces
    │   │   └── observation.py
    │   └── simulators
    │   │   ├── game.py
    │   │   └── gridgame.py
    ├── examples
    │   ├── ddpg
    │   │   ├── actor_200.pth
    │   │   └── submission.py
    │   ├── random
    │   │   └── submission.py
    │   └── submission
    │   │   └── submission.py
    ├── run_log.py
    ├── train.py
    └── utils
    │   ├── box.py
    │   ├── discrete.py
    │   ├── get_logger.py
    │   ├── mutli_discrete_particle.py
    │   └── space.py
├── course4
    ├── README.md
    ├── docs
    │   └── rlchina_pbl.zip
    ├── env
    │   ├── __init__.py
    │   ├── chessandcard.py
    │   ├── chooseenv.py
    │   ├── config.json
    │   ├── obs_interfaces
    │   │   └── observation.py
    │   └── simulators
    │   │   ├── game.py
    │   │   └── gridgame.py
    ├── examples
    │   ├── random
    │   │   └── submission.py
    │   └── submission
    │   │   └── submission.py
    ├── run_log.py
    └── utils
    │   ├── box.py
    │   ├── discrete.py
    │   ├── get_logger.py
    │   ├── mutli_discrete_particle.py
    │   └── space.py
├── course5
    ├── README.md
    ├── env
    │   ├── __init__.py
    │   ├── chooseenv.py
    │   ├── config.json
    │   ├── obs_interfaces
    │   │   └── observation.py
    │   ├── revive
    │   │   └── refrigerator.py
    │   ├── revive_refrigerator.py
    │   └── simulators
    │   │   ├── game.py
    │   │   └── gridgame.py
    ├── examples
    │   ├── random
    │   │   └── submission.py
    │   ├── revive_example
    │   │   ├── revive_policy.pkl
    │   │   └── submission.py
    │   └── submission
    │   │   └── submission.py
    ├── run_log.py
    └── utils
    │   ├── box.py
    │   ├── discrete.py
    │   ├── get_logger.py
    │   ├── mutli_discrete_particle.py
    │   └── space.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | /*/__pycache__/
 3 | /*/*/__pycache__/
 4 | .idea*
 5 | /course1/logs/
 6 | /course2/logs/
 7 | /course3/logs/
 8 | /course4/logs/
 9 | /course5/logs/
10 | .DS_Store
11 | logs/*
12 | /course3/examples/ddpg/trained_model
13 | /course3/docs/rlcn_2022_rl_algorithms
14 | /course4/docs/rlchina_pbl
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 jidiai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SummerCourse2022
 2 | 
 3 | 参与RLChina暑期实践课，一起来学习强化学习吧^0^
 4 | 
 5 | 小编把每天的课程独立在一个文件夹里面，学习内容递进，完成所有五次作业的学员还将获得电子版结课证书！筒子们冲啊！
 6 | 
 7 | 课程表👇
 8 | 
 9 | ![image](assets/schedule.jpg)
10 | 
11 | 详细信息 👉请看 [RLChina官网](http://rlchina.org/)
12 | ---
13 | ### Env Dependency
14 | 
15 | #### Install Anaconda or Miniconda
16 | 
17 | - Anaconda: https://www.anaconda.com/products/distribution
18 | - Miniconda: https://conda.io/projects/conda/en/latest/user-guide/install/index.html
19 | 
20 | >conda create -n jidi_2022 python=3.7.5
21 | 
22 | >conda activate jidi_2022
23 | 
24 | >pip install -r requirements.txt
25 | 
26 | 


--------------------------------------------------------------------------------
/assets/schedule.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/assets/schedule.jpg


--------------------------------------------------------------------------------
/course1/README.md:
--------------------------------------------------------------------------------
 1 | ## 实践课第一天
 2 | 
 3 | ### 任务：奥林匹克 相扑 作业要求: 提交通过并且在金榜的排名高于Jidi_random
 4 | 
 5 | 
 6 | ---
 7 | ### Env 👉请看 [olympics_wrestling.py](env/olympics_wrestling.py)
 8 | 
 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py)
10 | 
11 | ### 提交 👉请看 [submission.py](examples/random/submission.py)
12 | 
13 | ---
14 | 
15 | ### 作业：从 expert data 中做 Behaviour Cloning 学习保持在擂台的方法，使用 PyTorch 框架进行训练。
16 | 
17 | 1. 下载 `/dataset/wrestling/obs_actions.zip` dataset
18 | 
19 | 2. 读取 dataset, 搭建model, 进行训练
20 | 
21 | 3. 在`run_log.py`测试训练的策略
22 | 
23 | 4. 提交策略至及第平台
24 | 
25 | 
26 | ---
27 | 
28 | ### How to test submission
29 | 
30 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py
31 | >python run_log.py 
32 | 
33 | If no errors, your submission is ready to go~
34 | 
35 | ___
36 | Have a good time~~~


--------------------------------------------------------------------------------
/course1/dataset/wrestling/README.md:
--------------------------------------------------------------------------------
1 | ## Behaviour Cloning expert data
2 | 
3 | 这里有用作Behaviour Cloning的专家数据（10000 pairs），包含观测和动作。该策略的风格为围着圆心转圈，能够保持自己在界内，足以打败random 策略。用户需要提交一个BC获得的强于random的策略，并显露出该专家数据的风格。


--------------------------------------------------------------------------------
/course1/dataset/wrestling/obs_actions.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/dataset/wrestling/obs_actions.zip


--------------------------------------------------------------------------------
/course1/docs/111.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/111.gif


--------------------------------------------------------------------------------
/course1/docs/AI-Olympics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/AI-Olympics.png


--------------------------------------------------------------------------------
/course1/docs/Data_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/Data_type.png


--------------------------------------------------------------------------------
/course1/docs/MNIST1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MNIST1.png


--------------------------------------------------------------------------------
/course1/docs/MNIST2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MNIST2.png


--------------------------------------------------------------------------------
/course1/docs/MNIST3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MNIST3.png


--------------------------------------------------------------------------------
/course1/docs/MacInstall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/MacInstall.png


--------------------------------------------------------------------------------
/course1/docs/RNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/RNN.png


--------------------------------------------------------------------------------
/course1/docs/WindowsInstall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/WindowsInstall.png


--------------------------------------------------------------------------------
/course1/docs/data/FashionMNIST.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/data/FashionMNIST.zip


--------------------------------------------------------------------------------
/course1/docs/data/MNIST.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/data/MNIST.zip


--------------------------------------------------------------------------------
/course1/docs/data/wrestling.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/data/wrestling.zip


--------------------------------------------------------------------------------
/course1/docs/index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/index.png


--------------------------------------------------------------------------------
/course1/docs/matrices.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/matrices.png


--------------------------------------------------------------------------------
/course1/docs/ppt/RLChina-PyTorch_tutorial---Yan.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/ppt/RLChina-PyTorch_tutorial---Yan.pptx


--------------------------------------------------------------------------------
/course1/docs/tensor2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/tensor2.png


--------------------------------------------------------------------------------
/course1/docs/tensors1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/tensors1.png


--------------------------------------------------------------------------------
/course1/docs/torch_variable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/torch_variable.png


--------------------------------------------------------------------------------
/course1/docs/vectors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/vectors.png


--------------------------------------------------------------------------------
/course1/docs/及第平台使用手册以及暑期实践课环境准备.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/docs/及第平台使用手册以及暑期实践课环境准备.pdf


--------------------------------------------------------------------------------
/course1/env/__init__.py:
--------------------------------------------------------------------------------
1 | from .olympics_wrestling import *
2 | 


--------------------------------------------------------------------------------
/course1/env/chooseenv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/9/11 11:17 上午   
 4 | # 描述：选择运行环境，需要维护env/__ini__.py && config.json（存储环境默认参数）
 5 | 
 6 | import json
 7 | import env
 8 | import os
 9 | 
10 | 
11 | def make(env_type, conf=None):
12 |     file_path = os.path.join(os.path.dirname(__file__), 'config.json')
13 |     if not conf:
14 |         with open(file_path) as f:
15 |             conf = json.load(f)[env_type]
16 |     class_literal = conf['class_literal']
17 |     return getattr(env, class_literal)(conf)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     make("classic_MountainCar-v0")
22 | 


--------------------------------------------------------------------------------
/course1/env/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "olympics-wrestling": {
 3 |     "class_literal": "OlympicsWrestling",
 4 |     "n_player": 2,
 5 |     "max_step": 500,
 6 |     "game_name": "wrestling",
 7 |     "is_obs_continuous": true,
 8 |     "is_act_continuous": true,
 9 |     "agent_nums": [1,1],
10 |     "obs_type": ["vector", "vector"]
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/course1/env/obs_interfaces/observation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/11/13 3:51 下午   
 4 | # 描述：observation的各种接口类
 5 | obs_type = ["grid", "vector", "dict"]
 6 | 
 7 | 
 8 | class GridObservation(object):
 9 |     def get_grid_observation(self, current_state, player_id, info_before):
10 |         raise NotImplementedError
11 | 
12 |     def get_grid_many_observation(self, current_state, player_id_list, info_before=''):
13 |         all_obs = []
14 |         for i in player_id_list:
15 |             all_obs.append(self.get_grid_observation(current_state, i, info_before))
16 |         return all_obs
17 | 
18 | 
19 | class VectorObservation(object):
20 |     def get_vector_observation(self, current_state, player_id, info_before):
21 |         raise NotImplementedError
22 | 
23 |     def get_vector_many_observation(self, current_state, player_id_list, info_before=''):
24 |         all_obs = []
25 |         for i in player_id_list:
26 |             all_obs.append(self.get_vector_observation(current_state, i, info_before))
27 |         return all_obs
28 | 
29 | 
30 | class DictObservation(object):
31 |     def get_dict_observation(self, current_state, player_id, info_before):
32 |         raise NotImplementedError
33 | 
34 |     def get_dict_many_observation(self, current_state, player_id_list, info_before=''):
35 |         all_obs = []
36 |         for i in player_id_list:
37 |             all_obs.append(self.get_dict_observation(current_state, i, info_before))
38 |         return all_obs
39 | 
40 | 
41 | # todo: observation builder
42 | class CustomObservation(object):
43 |     def get_custom_observation(self, current_state, player_id):
44 |         raise NotImplementedError
45 | 
46 |     def get_custom_obs_space(self, player_id):
47 |         raise NotImplementedError
48 | 
49 |     def get_custom_many_observation(self, current_state, player_id_list):
50 |         all_obs = []
51 |         for i in player_id_list:
52 |             all_obs.append(self.get_custom_observation(current_state, i))
53 |         return all_obs
54 | 
55 |     def get_custom_many_obs_space(self, player_id_list):
56 |         all_obs_space = []
57 |         for i in player_id_list:
58 |             all_obs_space.append(self.get_custom_obs_space(i))
59 |         return all_obs_space
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/course1/env/olympics_wrestling.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | 
  4 | root_dir = str(Path(__file__).resolve().parent.parent.parent)
  5 | sys.path.append(root_dir)
  6 | 
  7 | from course1.olympics_engine.generator import create_scenario
  8 | from course1.olympics_engine.scenario.wrestling import *
  9 | 
 10 | from course1.utils.box import Box
 11 | from course1.env.simulators.game import Game
 12 | 
 13 | import numpy as np
 14 | 
 15 | 
 16 | class OlympicsWrestling(Game):
 17 |     def __init__(self, conf, seed=None):
 18 |         super(OlympicsWrestling, self).__init__(conf['n_player'], conf['is_obs_continuous'], conf['is_act_continuous'],
 19 |                                                 conf['game_name'], conf['agent_nums'], conf['obs_type'])
 20 |         self.seed = seed
 21 |         self.set_seed()
 22 | 
 23 |         Gamemap = create_scenario("wrestling")
 24 |         self.env_core = wrestling(Gamemap)
 25 |         self.max_step = int(conf['max_step'])
 26 |         self.joint_action_space = self.set_action_space()
 27 |         self.action_dim = self.joint_action_space
 28 | 
 29 |         self.step_cnt = 0
 30 |         self.init_info = None
 31 |         self.won = {}
 32 |         self.n_return = [0] * self.n_player
 33 | 
 34 |         _ = self.reset()
 35 | 
 36 |         self.board_width = self.env_core.view_setting['width'] + 2 * self.env_core.view_setting['edge']
 37 |         self.board_height = self.env_core.view_setting['height'] + 2 * self.env_core.view_setting['edge']
 38 | 
 39 |     @staticmethod
 40 |     def create_seed():
 41 |         seed = random.randrange(1000)
 42 |         return seed
 43 | 
 44 |     def set_seed(self, seed=None):
 45 |         if not seed:  # use previous seed when no new seed input
 46 |             seed = self.seed
 47 |         else:  # update env global seed
 48 |             self.seed = seed
 49 |         random.seed(seed)
 50 |         np.random.seed(seed)
 51 | 
 52 |     def reset(self):
 53 |         init_obs = self.env_core.reset()
 54 |         self.step_cnt = 0
 55 |         self.done = False
 56 |         self.init_info = None
 57 |         self.won = {}
 58 |         self.n_return = [0] * self.n_player
 59 | 
 60 |         self.current_state = init_obs
 61 |         self.all_observes = self.get_all_observes()
 62 | 
 63 |         return self.all_observes
 64 | 
 65 |     def step(self, joint_action):
 66 |         self.is_valid_action(joint_action)
 67 |         info_before = self.step_before_info()
 68 |         joint_action_decode = self.decode(joint_action)
 69 |         all_observations, reward, done, info_after = self.env_core.step(joint_action_decode)
 70 |         info_after = ''
 71 |         self.current_state = all_observations
 72 |         self.all_observes = self.get_all_observes()
 73 | 
 74 |         self.step_cnt += 1
 75 |         self.done = done
 76 |         if self.done:
 77 |             self.set_n_return()
 78 | 
 79 |         return self.all_observes, reward, self.done, info_before, info_after
 80 | 
 81 |     def is_valid_action(self, joint_action):
 82 |         if len(joint_action) != self.n_player:  # check number of player
 83 |             raise Exception("Input joint action dimension should be {}, not {}".format(
 84 |                 self.n_player, len(joint_action)))
 85 | 
 86 |     def step_before_info(self, info=''):
 87 |         return info
 88 | 
 89 |     def decode(self, joint_action):
 90 |         joint_action_decode = []
 91 |         for act_id, nested_action in enumerate(joint_action):
 92 |             temp_action = [0, 0]
 93 |             temp_action[0] = nested_action[0][0]
 94 |             temp_action[1] = nested_action[1][0]
 95 |             joint_action_decode.append(temp_action)
 96 | 
 97 |         return joint_action_decode
 98 | 
 99 |     def get_all_observes(self):
100 |         all_observes = []
101 |         for i in range(self.n_player):
102 |             each = {"obs": self.current_state[i], "controlled_player_index": i}
103 |             all_observes.append(each)
104 | 
105 |         return all_observes
106 | 
107 |     def set_action_space(self):
108 |         return [[Box(-100, 200, shape=(1,)), Box(-30, 30, shape=(1,))] for _ in range(self.n_player)]
109 | 
110 |     def get_reward(self, reward):
111 |         return [reward]
112 | 
113 |     def is_terminal(self):
114 |         return self.env_core.is_terminal()
115 | 
116 |     def set_n_return(self):
117 | 
118 |         if self.env_core.agent_list[0].finished and not (self.env_core.agent_list[1].finished):
119 |             self.n_return = [0, 1]
120 |         elif not (self.env_core.agent_list[0].finished) and self.env_core.agent_list[1].finished:
121 |             self.n_return = [1, 0]
122 |         elif self.env_core.agent_list[0].finished and self.env_core.agent_list[1].finished:
123 |             self.n_return = [0, 0]
124 |         else:
125 |             self.n_return = [0, 0]
126 | 
127 |     def check_win(self):
128 | 
129 |         if self.env_core.agent_list[0].finished and not (self.env_core.agent_list[1].finished):
130 |             return '1'
131 |         elif not (self.env_core.agent_list[0].finished) and self.env_core.agent_list[1].finished:
132 |             return '0'
133 |         else:
134 |             return '-1'
135 | 
136 |     def get_single_action_space(self, player_id):
137 |         return self.joint_action_space[player_id]
138 | 


--------------------------------------------------------------------------------
/course1/env/simulators/game.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/7/10 10:24 上午   
 4 | # 描述：
 5 | from abc import ABC, abstractmethod
 6 | 
 7 | 
 8 | class Game(ABC):
 9 |     def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
10 |         self.n_player = n_player
11 |         self.current_state = None
12 |         self.all_observes = None
13 |         self.is_obs_continuous = is_obs_continuous
14 |         self.is_act_continuous = is_act_continuous
15 |         self.game_name = game_name
16 |         self.agent_nums = agent_nums
17 |         self.obs_type = obs_type
18 | 
19 |     def get_config(self, player_id):
20 |         raise NotImplementedError
21 | 
22 |     def get_render_data(self, current_state):
23 |         return current_state
24 | 
25 |     def set_current_state(self, current_state):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def is_terminal(self):
30 |         raise NotImplementedError
31 | 
32 |     def get_next_state(self, all_action):
33 |         raise NotImplementedError
34 | 
35 |     def get_reward(self, all_action):
36 |         raise NotImplementedError
37 | 
38 |     @abstractmethod
39 |     def step(self, all_action):
40 |         raise NotImplementedError
41 | 
42 |     @abstractmethod
43 |     def reset(self):
44 |         raise NotImplementedError
45 | 
46 |     def set_action_space(self):
47 |         raise NotImplementedError
48 | 
49 | 


--------------------------------------------------------------------------------
/course1/examples/BC_submission/README.md:
--------------------------------------------------------------------------------
1 | ## Behaviour Cloning example submission
2 | 
3 | 这里是一个是用了BC的提交例子，针对奥林匹克相扑环境（Olympics-wrestling）。用户只需要提交`submission.py`和 `actor_state_dict.pt`文件至环境提交页面。
4 | 
5 | `submission.py`文件里的`my_controller`函数为评测时所调用的策略主函数，输入为观测obs，输出为动作actions。注意格式对齐。提交前可以在`course1/run_log.py`文件内测试，若能跑通则提交也能通过。


--------------------------------------------------------------------------------
/course1/examples/BC_submission/actor_state_dict.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/examples/BC_submission/actor_state_dict.pt


--------------------------------------------------------------------------------
/course1/examples/BC_submission/submission.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import os
 3 | 
 4 | current_path = Path(__file__).resolve().parent
 5 | model_path = os.path.join(current_path, 'actor_state_dict.pt')
 6 | 
 7 | 
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | class Net(nn.Module):
13 |     def __init__(self):
14 |         super(Net, self).__init__()
15 |         self.net = nn.Sequential(
16 |             nn.Linear(1600, 400),
17 |             nn.ReLU(),
18 |             nn.Linear(400, 128),
19 |             nn.ReLU(),
20 |             nn.Linear(128, 64),
21 |             nn.ReLU(),
22 |             nn.Linear(64, 2)
23 |         )
24 |     def forward(self, X):
25 |         action_batch = self.net(X)
26 |         action_batch[:, 0] = torch.tanh(action_batch[:,0])*150+50
27 |         action_batch[:, 1] = torch.tanh(action_batch[:, 1])*30
28 |         return action_batch
29 | 
30 | model = Net()
31 | loaded_actor_state = torch.load(model_path)
32 | model.load_state_dict(loaded_actor_state)
33 | 
34 | def my_controller(observation, action_space, is_act_continuous=True):
35 | 
36 |     obs_array = torch.tensor(observation['obs']['agent_obs']).float().reshape(1, -1)
37 |     action = model(obs_array)
38 | 
39 |     return [[action[0][0].item()], [action[0][1].item()]]
40 | 
41 | 


--------------------------------------------------------------------------------
/course1/examples/random/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     agent_action = []
16 |     for i in range(len(action_space)):
17 |         action_ = sample_single_dim(action_space[i], is_act_continuous)
18 |         agent_action.append(action_)
19 |     return agent_action
20 | 
21 | 
22 | def sample_single_dim(action_space_list_each, is_act_continuous):
23 |     each = []
24 |     if is_act_continuous:
25 |         each = action_space_list_each.sample()
26 |     else:
27 |         if action_space_list_each.__class__.__name__ == "Discrete":
28 |             each = [0] * action_space_list_each.n
29 |             idx = action_space_list_each.sample()
30 |             each[idx] = 1
31 |         elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
32 |             each = []
33 |             nvec = action_space_list_each.high - action_space_list_each.low + 1
34 |             sample_indexes = action_space_list_each.sample()
35 | 
36 |             for i in range(len(nvec)):
37 |                 dim = nvec[i]
38 |                 new_action = [0] * dim
39 |                 index = sample_indexes[i]
40 |                 new_action[index] = 1
41 |                 each.extend(new_action)
42 |         elif action_space_list_each.__class__.__name__ == "Discrete_SC2":
43 |             each = action_space_list_each.sample()
44 |         elif action_space_list_each.__class__.__name__ == "Box":
45 |             each = action_space_list_each.sample()
46 |     return each
47 | 


--------------------------------------------------------------------------------
/course1/examples/submission/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | __pycache__/*
3 | scenario/__pycache__/*
4 | test/.pytest_cache/
5 | .idea
6 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/AI_olympics.py:
--------------------------------------------------------------------------------
  1 | from scenario import Running_competition, table_hockey, football, wrestling, curling_competition, billiard_joint
  2 | import sys
  3 | from pathlib import Path
  4 | base_path = str(Path(__file__).resolve().parent.parent)
  5 | sys.path.append(base_path)
  6 | from olympics_engine.generator import create_scenario
  7 | 
  8 | import random
  9 | 
 10 | 
 11 | class AI_Olympics:
 12 |     def __init__(self, random_selection, minimap):
 13 | 
 14 |         self.random_selection = True
 15 |         self.minimap_mode = minimap
 16 | 
 17 |         self.max_step = 400
 18 |         self.vis = 200
 19 |         self.vis_clear = 5
 20 | 
 21 |         running_Gamemap = create_scenario("running-competition")
 22 |         self.running_game = Running_competition(running_Gamemap, vis = 200, vis_clear=5, agent1_color = 'light red', agent2_color='blue')
 23 | 
 24 |         self.tablehockey_game = table_hockey(create_scenario("table-hockey"))
 25 |         self.football_game = football(create_scenario('football'))
 26 |         self.wrestling_game = wrestling(create_scenario('wrestling'))
 27 |         self.curling_game = curling_competition(create_scenario('curling-IJACA-competition'))
 28 |         self.billiard_game = billiard_joint(create_scenario("billiard-joint"))
 29 | 
 30 |         self.running_game.max_step = self.max_step
 31 |         self.tablehockey_game.max_step = self.max_step
 32 |         self.football_game.max_step = self.max_step
 33 |         self.wrestling_game.max_step = self.max_step
 34 |         # self.curling_game.max_step =
 35 | 
 36 |         self.game_pool = [{"name": 'running-competition', 'game': self.running_game},
 37 |                           {"name": 'table-hockey', "game": self.tablehockey_game},
 38 |                           {"name": 'football', "game": self.football_game},
 39 |                           {"name": 'wrestling', "game": self.wrestling_game},
 40 |                           {"name": "curling", "game": self.curling_game},
 41 |                           {"name": "billiard", "game": self.billiard_game}]
 42 |         self.view_setting = self.running_game.view_setting
 43 | 
 44 |     def reset(self):
 45 | 
 46 |         self.done = False
 47 |         selected_game_idx_pool = list(range(len(self.game_pool)))
 48 |         if self.random_selection:
 49 |             random.shuffle(selected_game_idx_pool)            #random game playing sequence
 50 | 
 51 |         self.selected_game_idx_pool = selected_game_idx_pool                           #fix game playing sequence
 52 |         self.current_game_count = 0
 53 |         selected_game_idx = self.selected_game_idx_pool[self.current_game_count]
 54 | 
 55 | 
 56 |         print(f'Playing {self.game_pool[selected_game_idx]["name"]}')
 57 |         # if self.game_pool[selected_game_idx]['name'] == 'running-competition':
 58 |         #     self.game_pool[selected_game_idx]['game'] = \
 59 |         #         Running_competition.reset_map(meta_map= self.running_game.meta_map,map_id=None, vis=200, vis_clear=5,
 60 |         #                                       agent1_color = 'light red', agent2_color = 'blue')     #random sample a map
 61 |         #     self.game_pool[selected_game_idx]['game'].max_step = self.max_step
 62 | 
 63 |         self.current_game = self.game_pool[selected_game_idx]['game']
 64 |         self.game_score = [0,0]
 65 | 
 66 |         init_obs = self.current_game.reset()
 67 |         if self.current_game.game_name == 'running-competition':
 68 |             init_obs = [{'agent_obs': init_obs[i], 'id': f'team_{i}'} for i in [0,1]]
 69 |         for i in init_obs:
 70 |             i['game_mode'] = 'NEW GAME'
 71 | 
 72 |         for i,j in enumerate(init_obs):
 73 |             if 'curling' in self.current_game.game_name:
 74 |                 j['energy'] = 1000
 75 |             else:
 76 |                 j['energy'] = self.current_game.agent_list[i].energy
 77 | 
 78 |         return init_obs
 79 | 
 80 |     def step(self, action_list):
 81 | 
 82 |         obs, reward, done, _ = self.current_game.step(action_list)
 83 | 
 84 |         if self.current_game.game_name == 'running-competition':
 85 |             obs = [{'agent_obs': obs[i], 'id': f'team_{i}'} for i in [0,1]]
 86 |         for i in obs:
 87 |             i['game_mode'] = ''
 88 | 
 89 |         for i,j in enumerate(obs):
 90 |             if 'curling' in self.current_game.game_name:
 91 |                 j['energy'] = 1000
 92 |             elif 'billiard' in self.current_game.game_name:
 93 |                 j['energy'] = self.current_game.agent_energy[i]
 94 |             else:
 95 |                 j['energy'] = self.current_game.agent_list[i].energy
 96 | 
 97 |         if done:
 98 |             winner = self.current_game.check_win()
 99 |             if winner != '-1':
100 |                 self.game_score[int(winner)] += 1
101 | 
102 |             if self.current_game_count == len(self.game_pool)-1:
103 |                 self.done = True
104 |             else:
105 |                 # self.current_game_idx += 1
106 |                 self.current_game_count += 1
107 |                 self.current_game_idx = self.selected_game_idx_pool[self.current_game_count]
108 | 
109 |                 self.current_game = self.game_pool[self.current_game_idx]['game']
110 |                 print(f'Playing {self.game_pool[self.current_game_idx]["name"]}')
111 |                 obs = self.current_game.reset()
112 |                 if self.current_game.game_name == 'running-competition':
113 |                     obs = [{'agent_obs': obs[i], 'id': f'team_{i}'} for i in [0,1]]
114 |                 for i in obs:
115 |                     i['game_mode'] = 'NEW GAME'
116 |                 for i,j in enumerate(obs):
117 |                     if 'curling' in self.current_game.game_name:
118 |                         j['energy'] = 1000
119 |                     else:
120 |                         j['energy'] = self.current_game.agent_list[i].energy
121 | 
122 |         if self.done:
123 |             print('game score = ', self.game_score)
124 |             if self.game_score[0] > self.game_score[1]:
125 |                 self.final_reward = [100, 0]
126 |                 print('Results: team 0 win!')
127 |             elif self.game_score[1] > self.game_score[0]:
128 |                 self.final_reward = [0, 100]
129 |                 print('Results: team 1 win!')
130 |             else:
131 |                 self.final_reward = [0,0]
132 |                 print('Results: Draw!')
133 | 
134 |             return obs, self.final_reward, self.done, ''
135 |         else:
136 |             return obs, reward, self.done, ''
137 | 
138 |     def is_terminal(self):
139 |         return self.done
140 | 
141 |     def __getattr__(self, item):
142 |         return getattr(self.current_game, item)
143 | 
144 | 
145 |     def render(self):
146 |         self.current_game.render()
147 | 
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 jidiai-olympics
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/README.md:
--------------------------------------------------------------------------------
1 | # olympics_engine
2 | 
3 | Third-party package for env *olympics-tablehockey*, *olympics-football*, *olympics-wrestling*.
4 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | __version__ = "0.1.0"
4 | __author__ = 'Yan Song'
5 | __credits__ = 'CASIA'


--------------------------------------------------------------------------------
/course1/olympics_engine/agent.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class random_agent:
 4 |     def __init__(self):
 5 |         self.force_range = [-100, 200]
 6 |         self.angle_range = [-30, 30]
 7 | 
 8 |     def act(self, obs):
 9 |         force = random.uniform(self.force_range[0], self.force_range[1])
10 |         angle = random.uniform(self.angle_range[0], self.angle_range[1])
11 | 
12 |         return [force, angle]
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/AI-Olympics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/AI-Olympics.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/blue rock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/blue rock.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/board.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/board.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/crown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/crown.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/curling ground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/curling ground.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/energy bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy bar.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/energy-blue-bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-blue-bar.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/energy-blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-blue.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/energy-red-bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-red-bar.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/energy-red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/energy-red.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/agent1-V1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent1-V1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/agent1-V2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent1-V2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/agent1_bold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent1_bold.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/agent2-V1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent2-V1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/agent2-V2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent2-V2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/agent2_bold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/agent2_bold.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/football.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/football.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/playground.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/sight1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/sight1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/football/sight2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/football/sight2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/green rock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/green rock.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/purple rock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/purple rock.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/red rock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/red rock.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/table_hockey/ball.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/ball.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/table_hockey/player1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/player1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/table_hockey/player2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/player2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/table_hockey/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/playground.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/table_hockey/sight1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/sight1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/table_hockey/sight2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/table_hockey/sight2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wood.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/wrestling/player1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/player1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/wrestling/player2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/player2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/wrestling/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/playground.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/wrestling/sight1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/sight1.png


--------------------------------------------------------------------------------
/course1/olympics_engine/assets/wrestling/sight2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course1/olympics_engine/assets/wrestling/sight2.png


--------------------------------------------------------------------------------
/course1/olympics_engine/env_wrapper/README.md:
--------------------------------------------------------------------------------
1 | ## Jidi environment wrapper
2 | 
3 | Here lies the environment wrapper for the running scenario which is also the env-wrapper we use to evaluate your submission on Jidi platform.
4 | 
5 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/env_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | from olympics_engine.env_wrapper.olympics_running import *


--------------------------------------------------------------------------------
/course1/olympics_engine/env_wrapper/chooseenv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi
 3 | # 创建时间： 2020/9/11 11:17 上午
 4 | # 描述：选择运行环境，需要维护env/__ini__.py && config.json（存储环境默认参数）
 5 | 
 6 | import json
 7 | import env_wrapper
 8 | import os
 9 | 
10 | 
11 | def make(env_type, seed=None, conf=None):
12 |     file_path = os.path.join(os.path.dirname(__file__), 'config.json')
13 |     if not conf:
14 |         with open(file_path) as f:
15 |             conf = json.load(f)[env_type]
16 |     class_literal = conf['class_literal']
17 |     if env_type.split('-')[0] in ["olympics"]:
18 |         return getattr(env_wrapper, class_literal)(conf, seed)
19 |     else:
20 |         return getattr(env_wrapper, class_literal)(conf)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     make("olympics_running")


--------------------------------------------------------------------------------
/course1/olympics_engine/env_wrapper/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "olympics-running": {
 3 |     "class_literal": "OlympicsRunning",
 4 |     "n_player": 2,
 5 |     "max_step": 500,
 6 |     "game_name": "running",
 7 |     "is_obs_continuous": true,
 8 |     "is_act_continuous": true,
 9 |     "agent_nums": [1,1],
10 |     "obs_type": ["vector", "vector"],
11 |     "map_num": 11
12 |   }
13 | }


--------------------------------------------------------------------------------
/course1/olympics_engine/env_wrapper/obs_interfaces/observation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi
 3 | # 创建时间： 2020/11/13 3:51 下午
 4 | # 描述：observation的各种接口类
 5 | obs_type = ["grid", "vector", "dict"]
 6 | 
 7 | 
 8 | class GridObservation(object):
 9 |     def get_grid_observation(self, current_state, player_id, info_before):
10 |         raise NotImplementedError
11 | 
12 |     def get_grid_many_observation(self, current_state, player_id_list, info_before=''):
13 |         all_obs = []
14 |         for i in player_id_list:
15 |             all_obs.append(self.get_grid_observation(current_state, i, info_before))
16 |         return all_obs
17 | 
18 | 
19 | class VectorObservation(object):
20 |     def get_vector_observation(self, current_state, player_id, info_before):
21 |         raise NotImplementedError
22 | 
23 |     def get_vector_many_observation(self, current_state, player_id_list, info_before=''):
24 |         all_obs = []
25 |         for i in player_id_list:
26 |             all_obs.append(self.get_vector_observation(current_state, i, info_before))
27 |         return all_obs
28 | 
29 | 
30 | class DictObservation(object):
31 |     def get_dict_observation(self, current_state, player_id, info_before):
32 |         raise NotImplementedError
33 | 
34 |     def get_dict_many_observation(self, current_state, player_id_list, info_before=''):
35 |         all_obs = []
36 |         for i in player_id_list:
37 |             all_obs.append(self.get_dict_observation(current_state, i, info_before))
38 |         return all_obs
39 | 
40 | 
41 | # todo: observation builder
42 | class CustomObservation(object):
43 |     def get_custom_observation(self, current_state, player_id):
44 |         raise NotImplementedError
45 | 
46 |     def get_custom_obs_space(self, player_id):
47 |         raise NotImplementedError
48 | 
49 |     def get_custom_many_observation(self, current_state, player_id_list):
50 |         all_obs = []
51 |         for i in player_id_list:
52 |             all_obs.append(self.get_custom_observation(current_state, i))
53 |         return all_obs
54 | 
55 |     def get_custom_many_obs_space(self, player_id_list):
56 |         all_obs_space = []
57 |         for i in player_id_list:
58 |             all_obs_space.append(self.get_custom_obs_space(i))
59 |         return all_obs_space


--------------------------------------------------------------------------------
/course1/olympics_engine/env_wrapper/simulators/game.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi
 3 | # 创建时间： 2020/7/10 10:24 上午
 4 | # 描述：
 5 | from abc import ABC, abstractmethod
 6 | 
 7 | 
 8 | class Game(ABC):
 9 |     def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
10 |         self.n_player = n_player
11 |         self.current_state = None
12 |         self.all_observes = None
13 |         self.is_obs_continuous = is_obs_continuous
14 |         self.is_act_continuous = is_act_continuous
15 |         self.game_name = game_name
16 |         self.agent_nums = agent_nums
17 |         self.obs_type = obs_type
18 | 
19 |     def get_config(self, player_id):
20 |         raise NotImplementedError
21 | 
22 |     def get_render_data(self, current_state):
23 |         return current_state
24 | 
25 |     def set_current_state(self, current_state):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def is_terminal(self):
30 |         raise NotImplementedError
31 | 
32 |     def get_next_state(self, all_action):
33 |         raise NotImplementedError
34 | 
35 |     def get_reward(self, all_action):
36 |         raise NotImplementedError
37 | 
38 |     @abstractmethod
39 |     def step(self, all_action):
40 |         raise NotImplementedError
41 | 
42 |     @abstractmethod
43 |     def reset(self):
44 |         raise NotImplementedError
45 | 
46 |     def set_action_space(self):
47 |         raise NotImplementedError


--------------------------------------------------------------------------------
/course1/olympics_engine/generator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | import os
 4 | from os import path
 5 | father_path = path.dirname(__file__)
 6 | sys.path.append(str(father_path))
 7 | 
 8 | module = __import__("objects")
 9 | 
10 | def create_scenario(scenario_name, file_path = None):
11 |     if file_path is None:
12 |         file_path = os.path.join(os.path.dirname(__file__), 'scenario.json')
13 | 
14 |     with open(file_path) as f:
15 |         conf = json.load(f)[scenario_name]
16 | 
17 |     GameMap = dict()
18 |     GameMap["objects"] = list()
19 |     GameMap["agents"] = list()
20 |     GameMap["view"] = conf["view"]
21 | 
22 |     for type in conf:
23 |         if type == 'env_cfg':
24 |             env_cfg_dict = conf[type]
25 |             GameMap["env_cfg"] = env_cfg_dict
26 |         elif type == 'obs_cfg':
27 |             obs_cfg_dict = conf[type]
28 |             GameMap["obs_cfg"] = obs_cfg_dict
29 | 
30 |         elif (type == "wall") or (type == "cross"):
31 |             #print("!!", conf[type]["objects"])
32 |             for key, value in conf[type]["objects"].items():
33 |                 GameMap["objects"].append(getattr(module, type.capitalize())
34 |                      (
35 |                      init_pos=value["initial_position"],
36 |                     length=None,
37 |                      color=value["color"],
38 |                      ball_can_pass = value['ball_pass'] if ("ball_pass" in value.keys()
39 |                                                             and value['ball_pass']=="True") else False,
40 |                     width=value['width'] if ('width' in value.keys()) else None
41 |                  )
42 |                  )
43 |         elif type == 'arc':
44 |             for key, value in conf[type]['objects'].items():
45 |                 #print("passable = ", bool(value['passable']))
46 |                 GameMap['objects'].append(getattr(module, type.capitalize())(
47 |                     init_pos = value["initial_position"],
48 |                     start_radian = value["start_radian"],
49 |                     end_radian = value["end_radian"],
50 |                     passable = True if value["passable"] == "True" else False,
51 |                     color = value['color'],
52 |                     collision_mode=value['collision_mode'],
53 |                     width = value['width'] if ("width" in value.keys()) else None
54 |                 ))
55 | 
56 |         elif type in ["agent","ball"]:
57 |             for key, value in conf[type]["objects"].items():
58 |                 GameMap["agents"].append(getattr(module, type.capitalize())
59 |                      (
60 |                      mass=value["mass"],
61 |                      r=value["radius"],
62 |                      position=value["initial_position"],
63 |                     color=value["color"],
64 |                     vis = value["vis"] if ("vis" in value.keys()) else None,
65 |                     vis_clear = value["vis_clear"] if ("vis_clear" in value.keys()) else None
66 |                  ),
67 |                                            )
68 |     # print(" ========================== check GameMap ==========================")
69 |     #print(GameMap)
70 |     return GameMap


--------------------------------------------------------------------------------
/course1/olympics_engine/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | base_path = str(Path(__file__).resolve().parent.parent)
  4 | sys.path.append(base_path)
  5 | print(sys.path)
  6 | from olympics_engine.generator import create_scenario
  7 | import argparse
  8 | from olympics_engine.agent import *
  9 | import time
 10 | 
 11 | from scenario import Running, table_hockey, football, wrestling, billiard, \
 12 |     curling, billiard_joint, curling_long, curling_competition, Running_competition
 13 | 
 14 | from AI_olympics import AI_Olympics
 15 | 
 16 | import random
 17 | import json
 18 | 
 19 | 
 20 | def store(record, name):
 21 | 
 22 |     with open('logs/'+name+'.json', 'w') as f:
 23 |         f.write(json.dumps(record))
 24 | 
 25 | def load_record(path):
 26 |     file = open(path, "rb")
 27 |     filejson = json.load(file)
 28 |     return filejson
 29 | 
 30 | RENDER = True
 31 | 
 32 | if __name__ == "__main__":
 33 |     parser = argparse.ArgumentParser()
 34 |     parser.add_argument('--map', default="all", type= str,
 35 |                         help = 'running/table-hockey/football/wrestling/billiard/curling/all')
 36 |     parser.add_argument("--seed", default=1, type=int)
 37 |     args = parser.parse_args()
 38 | 
 39 |     for i in range(1):
 40 |         if args.map != 'all':
 41 |             Gamemap = create_scenario(args.map)
 42 |         #game = table_hockey(Gamemap)
 43 |         if args.map == 'running':
 44 |             game = Running(Gamemap)
 45 |             agent_num = 2
 46 |         elif args.map == 'running-competition':
 47 | 
 48 |             map_id = random.randint(1,10)
 49 |             # map_id = 3
 50 |             Gamemap = create_scenario(args.map)
 51 |             game = Running_competition(meta_map=Gamemap,map_id=map_id)
 52 |             agent_num = 2
 53 | 
 54 | 
 55 |         elif args.map == 'table-hockey':
 56 |             game = table_hockey(Gamemap)
 57 |             agent_num = 2
 58 |         elif args.map == 'football':
 59 |             game = football(Gamemap)
 60 |             agent_num = 2
 61 |         elif args.map == 'wrestling':
 62 |             game = wrestling(Gamemap)
 63 |             agent_num = 2
 64 |         # elif args.map == 'volleyball':
 65 |         #     game = volleyball(Gamemap)
 66 |         #     agent_num = 2
 67 |         elif args.map == 'billiard':
 68 |             game = billiard(Gamemap)
 69 |             agent_num = 2
 70 |         elif args.map == 'curling':
 71 |             game = curling(Gamemap)
 72 |             agent_num = 2
 73 | 
 74 |         elif args.map == 'curling-joint':
 75 |             game = curling_joint(Gamemap)
 76 |             agent_num = 2
 77 | 
 78 |         elif args.map == 'billiard-joint':
 79 |             game = billiard_joint(Gamemap)
 80 |             agent_num = 2
 81 | 
 82 |         elif args.map == 'curling-long':
 83 |             game = curling_long(Gamemap)
 84 |             agent_num = 2
 85 | 
 86 |         elif args.map == 'curling-competition':
 87 |             game = curling_competition(Gamemap)
 88 |             agent_num = 2
 89 | 
 90 |         elif args.map == 'all':
 91 |             game = AI_Olympics(random_selection = False, minimap=False)
 92 |             agent_num = 2
 93 | 
 94 |         agent = random_agent()
 95 |         rand_agent = random_agent()
 96 | 
 97 |         obs = game.reset()
 98 |         done = False
 99 |         step = 0
100 |         if RENDER:
101 |             game.render()
102 | 
103 |         print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
104 |         time_epi_s = time.time()
105 |         while not done:
106 |             step += 1
107 | 
108 |             # print('\n Step ', step)
109 | 
110 |             #action1 = [100,0]#agent.act(obs)
111 |             #action2 = [100,0] #rand_agent.act(obs)
112 |             if agent_num == 2:
113 |                 action1, action2 = agent.act(obs[0]), rand_agent.act(obs[1])
114 |                 # action1 = [100,1]
115 | 
116 |                 # action1 =[50,1]
117 |                 # action2 = [50,-1]
118 | 
119 | 
120 |                 action = [action1, action2]
121 |             elif agent_num == 1:
122 |                 action1 = agent.act(obs)
123 |                 action = [action1]
124 | 
125 |             # if step <= 5:
126 |             #     action = [[200,0]]
127 |             # else:
128 |             #     action = [[0,0]]
129 |             # action = [[200,action1[1]]]
130 | 
131 |             obs, reward, done, _ = game.step(action)
132 |             print(f'reward = {reward}')
133 |             # print('obs = ', obs)
134 |             # plt.imshow(obs[0])
135 |             # plt.show()
136 |             if RENDER:
137 |                 game.render()
138 | 
139 |             # time.sleep(0.05)
140 | 
141 | 
142 |         print("episode duration: ", time.time() - time_epi_s, "step: ", step, (time.time() - time_epi_s)/step)
143 |         # if args.map == 'billiard':
144 |         #     print('reward =', game.total_reward)
145 |         # else:
146 |             # print('reward = ', reward)
147 |         # if R:
148 |         #     store(record,'bug1')
149 | 
150 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/scenario/__init__.py:
--------------------------------------------------------------------------------
 1 | from olympics_engine.scenario.running import Running
 2 | from olympics_engine.scenario.running_competition import Running_competition
 3 | from olympics_engine.scenario.table_hockey import table_hockey
 4 | from olympics_engine.scenario.football import football
 5 | from olympics_engine.scenario.wrestling import wrestling
 6 | from olympics_engine.scenario.curling import curling
 7 | from olympics_engine.scenario.billiard import billiard
 8 | from olympics_engine.scenario.billiard_joint import billiard_joint
 9 | from olympics_engine.scenario.curling_long import curling_long
10 | from olympics_engine.scenario.curling_competition import curling_competition
11 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/scenario/longjump.py:
--------------------------------------------------------------------------------
  1 | from olympics_engine.core import OlympicsBase
  2 | from olympics_engine.viewer import Viewer
  3 | 
  4 | import math
  5 | 
  6 | 
  7 | def closest_point(l1, l2, point):
  8 |     """
  9 |     compute the coordinate of point on the line l1l2 closest to the given point, reference: https://en.wikipedia.org/wiki/Cramer%27s_rule
 10 |     :param l1: start pos
 11 |     :param l2: end pos
 12 |     :param point:
 13 |     :return:
 14 |     """
 15 |     A1 = l2[1] - l1[1]
 16 |     B1 = l1[0] - l2[0]
 17 |     C1 = (l2[1] - l1[1])*l1[0] + (l1[0] - l2[0])*l1[1]
 18 |     C2 = -B1 * point[0] + A1 * point[1]
 19 |     det = A1*A1 + B1*B1
 20 |     if det == 0:
 21 |         cx, cy = point
 22 |     else:
 23 |         cx = (A1*C1 - B1*C2)/det
 24 |         cy = (A1*C2 + B1*C1)/det
 25 | 
 26 |     return [cx, cy]
 27 | 
 28 | def distance_to_line(l1, l2, pos):
 29 |     closest_p = closest_point(l1, l2, pos)
 30 | 
 31 |     n = [pos[0] - closest_p[0], pos[1] - closest_p[1]]  # compute normal
 32 |     nn = n[0] ** 2 + n[1] ** 2
 33 |     nn_sqrt = math.sqrt(nn)
 34 |     cl1 = [l1[0] - pos[0], l1[1] - pos[1]]
 35 |     cl1_n = (cl1[0] * n[0] + cl1[1] * n[1]) / nn_sqrt
 36 | 
 37 |     return abs(cl1_n)
 38 | 
 39 | 
 40 | class longjump(OlympicsBase):
 41 |     def __init__(self, map):
 42 |         super(longjump, self).__init__(map)
 43 | 
 44 |         self.jump = False
 45 | 
 46 |     def reset(self):
 47 |         self.init_state()
 48 |         self.step_cnt = 0
 49 |         self.done = False
 50 |         self.jump = False
 51 | 
 52 |         self.gamma = 0.98  # for longjump env
 53 | 
 54 |         self.viewer = Viewer()
 55 |         self._init_view()
 56 | 
 57 | 
 58 |     def cross_detect(self):
 59 |         """
 60 |         check whether the agent has reach the cross(final) line
 61 |         :return:
 62 |         """
 63 |         for agent_idx in range(self.agent_num):
 64 | 
 65 |             agent = self.agent_list[agent_idx]
 66 |             for object_idx in range(len(self.map['objects'])):
 67 |                 object = self.map['objects'][object_idx]
 68 | 
 69 |                 if not object.can_pass():
 70 |                     continue
 71 |                 else:
 72 |                     #print('object = ', object.type)
 73 |                     if object.color == 'red' and object.check_cross(self.agent_pos[agent_idx], agent.r):
 74 | 
 75 |                         agent.color = 'red'
 76 |                         self.gamma = 0.85            #this will change the gamma for the whole env, so need to change if dealing with multi-agent
 77 |                         self.jump = True
 78 | 
 79 | 
 80 |     def step(self, actions_list):
 81 |         if self.jump:
 82 |             input_action = [None]       #if jump, stop actions
 83 |         else:
 84 |             input_action = actions_list
 85 | 
 86 |         self.stepPhysics(input_action)
 87 |         self.speed_limit()
 88 |         self.cross_detect()
 89 |         self.change_inner_state()
 90 |         self.step_cnt += 1
 91 | 
 92 |         step_reward = self.get_reward()
 93 |         obs_next = self.get_obs()
 94 |         done = self.is_terminal()
 95 | 
 96 |         #return self.agent_pos, self.agent_v, self.agent_accel, self.agent_theta, obs_next, step_reward, done
 97 |         return obs_next, step_reward, done, ''
 98 | 
 99 |     def get_reward(self):
100 | 
101 |         agent_reward = [0. for _ in range(self.agent_num)]
102 | 
103 |         for agent_idx in range(self.agent_num):
104 |             if self.agent_list[agent_idx].color == 'red' and (self.agent_v[agent_idx][0]**2 + self.agent_v[agent_idx][1]**2) < 1e-10:
105 |                 for object_idx in range(len(self.map['objects'])):
106 |                     object = self.map['objects'][object_idx]
107 |                     if object.color == 'red':
108 |                         l1, l2 = object.init_pos
109 |                         agent_reward[agent_idx] = distance_to_line(l1, l2, self.agent_pos[agent_idx])
110 |         return agent_reward
111 | 
112 |     def is_terminal(self):
113 | 
114 |         if self.step_cnt >= self.max_step:
115 |             return True
116 | 
117 |         for agent_idx in range(self.agent_num):
118 |             if self.agent_list[agent_idx].color == 'red' and (
119 |                     self.agent_v[agent_idx][0] ** 2 + self.agent_v[agent_idx][1] ** 2) < 1e-5:
120 |                 return True
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/scenario/running.py:
--------------------------------------------------------------------------------
  1 | from olympics_engine.core import OlympicsBase
  2 | from olympics_engine.viewer import Viewer, debug
  3 | import time
  4 | import pygame
  5 | import sys
  6 | 
  7 | class Running(OlympicsBase):
  8 |     def __init__(self, map, seed = None):
  9 |         self.minimap_mode = map['obs_cfg'].get('minimap', False)
 10 | 
 11 |         super(Running, self).__init__(map, seed)
 12 | 
 13 |         self.game_name = 'running'
 14 | 
 15 |         self.agent1_color = self.agent_list[0].color
 16 |         self.agent2_color = self.agent_list[1].color
 17 | 
 18 |         self.tau = map['env_cfg'].get('tau', 0.1)
 19 |         self.gamma = map["env_cfg"].get('gamma', 1)
 20 |         self.wall_restitution = map['env_cfg'].get('wall_restitution', 1)
 21 |         self.circle_restitution = map['env_cfg'].get('circle_restitution', 1)
 22 |         self.max_step = map['env_cfg'].get('max_step', 500)
 23 |         self.energy_recover_rate = map['env_cfg'].get('energy_recover_rate', 200)
 24 |         self.speed_cap = map['env_cfg'].get('speed_cap', 500)
 25 | 
 26 |         self.print_log = False
 27 |         self.print_log2 = False
 28 | 
 29 | 
 30 |         self.draw_obs = True
 31 |         self.show_traj = True
 32 | 
 33 |         #self.is_render = True
 34 | 
 35 |     def reset(self):
 36 |         self.set_seed()
 37 |         self.init_state()
 38 |         self.step_cnt = 0
 39 |         self.done = False
 40 | 
 41 |         self.viewer = Viewer(self.view_setting)
 42 |         self.display_mode=False
 43 | 
 44 | 
 45 |         init_obs = self.get_obs()
 46 | 
 47 |         if self.minimap_mode:
 48 |             self._build_minimap()
 49 | 
 50 |         output_init_obs = self._build_from_raw_obs(init_obs)
 51 |         return output_init_obs
 52 |             # image = pygame.surfarray.array3d(self.viewer.background).swapaxes(0,1)
 53 | 
 54 |             # return [{"agent_obs": init_obs[0], "minimap":image}, {"agent_obs": init_obs[1], "minimap":image}]
 55 | 
 56 | 
 57 |         # return [{'agent_obs':init_obs[0]}, {'agent_obs':init_obs[1]}]
 58 | 
 59 |     def check_overlap(self):
 60 |         #todo
 61 |         pass
 62 | 
 63 |     def get_reward(self):
 64 | 
 65 |         agent_reward = [0. for _ in range(self.agent_num)]
 66 | 
 67 | 
 68 |         for agent_idx in range(self.agent_num):
 69 |             if self.agent_list[agent_idx].finished:
 70 |                 agent_reward[agent_idx] = 100.
 71 | 
 72 |         return agent_reward
 73 | 
 74 |     def is_terminal(self):
 75 | 
 76 |         if self.step_cnt >= self.max_step:
 77 |             return True
 78 | 
 79 |         for agent_idx in range(self.agent_num):
 80 |             if self.agent_list[agent_idx].finished:
 81 |                 return True
 82 | 
 83 |         return False
 84 | 
 85 | 
 86 | 
 87 |     def step(self, actions_list):
 88 | 
 89 |         previous_pos = self.agent_pos
 90 | 
 91 |         time1 = time.time()
 92 |         self.stepPhysics(actions_list, self.step_cnt)
 93 |         time2 = time.time()
 94 |         #print('stepPhysics time = ', time2 - time1)
 95 |         self.speed_limit()
 96 | 
 97 |         self.cross_detect(previous_pos, self.agent_pos)
 98 | 
 99 |         self.step_cnt += 1
100 |         step_reward = self.get_reward()
101 |         done = self.is_terminal()
102 | 
103 |         obs_next = self.get_obs()
104 |         #self.check_overlap()
105 |         self.change_inner_state()
106 | 
107 | 
108 |         if self.minimap_mode:
109 |             self._build_minimap()
110 | 
111 |         output_obs_next = self._build_from_raw_obs(obs_next)
112 | 
113 | 
114 |         return output_obs_next, step_reward, done, ''
115 | 
116 |     def _build_from_raw_obs(self, obs):
117 |         if self.minimap_mode:
118 |             image = pygame.surfarray.array3d(self.viewer.background).swapaxes(0,1)
119 |             return [{"agent_obs": obs[0], "minimap":image, "id":"team_0"},
120 |                     {"agent_obs": obs[1], "minimap": image, "id":"team_1"}]
121 |         else:
122 |             return [{"agent_obs":obs[0], "id":"team_0"}, {"agent_obs": obs[1], "id":"team_1"}]
123 | 
124 |     def _build_minimap(self):
125 | 
126 |         #need to render first
127 |         if not self.display_mode:
128 |             self.viewer.set_mode()
129 |             self.display_mode = True
130 | 
131 |         self.viewer.draw_background()
132 |         for w in self.map['objects']:
133 |             self.viewer.draw_map(w)
134 | 
135 |         self.viewer.draw_ball(self.agent_pos, self.agent_list)
136 | 
137 |         if self.draw_obs:
138 |             self.viewer.draw_obs(self.obs_boundary, self.agent_list)
139 | 
140 |         # image = pygame.surfarray.array3d(self.viewer.background).swapaxes(0,1)
141 | 
142 |         # return image
143 | 
144 |     def check_win(self):
145 |         if self.agent_list[0].finished and not (self.agent_list[1].finished):
146 |             return '0'
147 |         elif not(self.agent_list[0].finished) and self.agent_list[1].finished:
148 |             return '1'
149 |         else:
150 |             return '-1'
151 | 
152 | 
153 |     def render(self, info=None):
154 | 
155 |         if self.minimap_mode:
156 |             pass
157 |         else:
158 | 
159 |             if not self.display_mode:
160 |                 self.viewer.set_mode()
161 |                 self.display_mode=True
162 | 
163 |             self.viewer.draw_background()
164 |             for w in self.map['objects']:
165 |                 self.viewer.draw_map(w)
166 | 
167 |             self.viewer.draw_ball(self.agent_pos, self.agent_list)
168 | 
169 |             if self.draw_obs:
170 |                 self.viewer.draw_obs(self.obs_boundary,         self.agent_list)
171 | 
172 |         if self.draw_obs:
173 |             if len(self.obs_list) > 0:
174 |                 self.viewer.draw_view(self.obs_list, self.agent_list, leftmost_x=500, upmost_y=10, gap = 100)
175 | 
176 |         if self.show_traj:
177 |             self.get_trajectory()
178 |             self.viewer.draw_trajectory(self.agent_record, self.agent_list)
179 | 
180 |         self.viewer.draw_direction(self.agent_pos, self.agent_accel)
181 | 
182 | 
183 |         # debug('mouse pos = '+ str(pygame.mouse.get_pos()))
184 |         debug('Step: ' + str(self.step_cnt), x=30)
185 |         if info is not None:
186 |             debug(info, x=100)
187 | 
188 | 
189 | 
190 |         for event in pygame.event.get():
191 |             if event.type == pygame.QUIT:
192 |                 sys.exit()
193 |         pygame.display.flip()
194 | 
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/scenario/running_competition.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | from olympics_engine.core import OlympicsBase
  4 | from olympics_engine.viewer import Viewer, debug
  5 | import time
  6 | import pygame
  7 | import sys
  8 | import os
  9 | 
 10 | 
 11 | from olympics_engine.generator import create_scenario
 12 | from pathlib import Path
 13 | current_path = str(Path(__file__).resolve().parent)
 14 | maps_path = os.path.join(current_path, "running_competition_maps/maps.json")
 15 | 
 16 | 
 17 | 
 18 | class Running_competition(OlympicsBase):
 19 |     def __init__(self, meta_map, map_id = None, seed = None, vis = None, vis_clear=None, agent1_color = 'purple', agent2_color = 'green'):
 20 |         # self.minimap_mode = map['obs_cfg'].get('minimap', False)
 21 | 
 22 |         Gamemap, map_index = Running_competition.choose_a_map(idx = map_id)        #fixme(yan): penatration in some maps, need to check engine, vis
 23 |         if vis is not None:
 24 |             for a in Gamemap['agents']:
 25 |                 a.visibility = vis
 26 |                 a.visibility_clear = vis_clear
 27 |                 if a.color == 'purple':
 28 |                     a.color = agent1_color
 29 |                     a.original_color = agent1_color
 30 |                 elif a.color == 'green':
 31 |                     a.color = agent2_color
 32 |                     a.original_color = agent2_color
 33 | 
 34 | 
 35 |         self.meta_map = meta_map
 36 |         self.map_index = map_index
 37 | 
 38 |         super(Running_competition, self).__init__(Gamemap, seed)
 39 | 
 40 |         self.game_name = 'running-competition'
 41 | 
 42 |         self.original_tau = meta_map['env_cfg']['tau']
 43 |         self.original_gamma = meta_map['env_cfg']['gamma']
 44 |         self.wall_restitution = meta_map['env_cfg']['wall_restitution']
 45 |         self.circle_restitution = meta_map['env_cfg']['circle_restitution']
 46 |         self.max_step = meta_map['env_cfg']['max_step']
 47 |         self.energy_recover_rate = meta_map['env_cfg']['energy_recover_rate']
 48 |         self.speed_cap = meta_map['env_cfg']['speed_cap']
 49 |         self.faster = meta_map['env_cfg']['faster']
 50 | 
 51 |         self.tau = self.original_tau*self.faster
 52 |         self.gamma = 1-(1-self.original_gamma)*self.faster
 53 | 
 54 |         # self.gamma = 1  # v衰减系数
 55 |         # self.restitution = 0.5
 56 |         # self.print_log = False
 57 |         # self.print_log2 = False
 58 |         # self.tau = 0.1
 59 |         #
 60 |         # self.speed_cap =  100
 61 |         #
 62 |         # self.draw_obs = True
 63 |         # self.show_traj = True
 64 | 
 65 |     @staticmethod
 66 |     def reset_map(meta_map, map_id, vis=None, vis_clear=None, agent1_color = 'purple', agent2_color = 'green'):
 67 |         return Running_competition(meta_map, map_id, vis=vis, vis_clear = vis_clear, agent1_color=agent1_color, agent2_color=agent2_color)
 68 | 
 69 |     @staticmethod
 70 |     def choose_a_map(idx=None):
 71 |         if idx is None:
 72 |             idx = random.randint(1,4)
 73 |         MapStats = create_scenario("map"+str(idx), file_path=  maps_path)
 74 |         return MapStats, idx
 75 | 
 76 |     def check_overlap(self):
 77 |         #todo
 78 |         pass
 79 | 
 80 |     def get_reward(self):
 81 | 
 82 |         agent_reward = [0. for _ in range(self.agent_num)]
 83 | 
 84 | 
 85 |         for agent_idx in range(self.agent_num):
 86 |             if self.agent_list[agent_idx].finished:
 87 |                 agent_reward[agent_idx] = 1.
 88 | 
 89 |         return agent_reward
 90 | 
 91 |     def is_terminal(self):
 92 | 
 93 |         if self.step_cnt >= self.max_step:
 94 |             return True
 95 | 
 96 |         for agent_idx in range(self.agent_num):
 97 |             if self.agent_list[agent_idx].finished:
 98 |                 return True
 99 | 
100 |         return False
101 | 
102 | 
103 | 
104 |     def step(self, actions_list):
105 | 
106 |         previous_pos = self.agent_pos
107 | 
108 |         time1 = time.time()
109 |         self.stepPhysics(actions_list, self.step_cnt)
110 |         time2 = time.time()
111 |         #print('stepPhysics time = ', time2 - time1)
112 |         self.speed_limit()
113 | 
114 |         self.cross_detect(previous_pos, self.agent_pos)
115 | 
116 |         self.step_cnt += 1
117 |         step_reward = self.get_reward()
118 |         done = self.is_terminal()
119 | 
120 |         time3 = time.time()
121 |         obs_next = self.get_obs()
122 |         time4 = time.time()
123 |         #print('render time = ', time4-time3)
124 |         # obs_next = 1
125 |         #self.check_overlap()
126 |         self.change_inner_state()
127 | 
128 |         return obs_next, step_reward, done, ''
129 | 
130 |     def check_win(self):
131 |         if self.agent_list[0].finished and not (self.agent_list[1].finished):
132 |             return '0'
133 |         elif not(self.agent_list[0].finished) and self.agent_list[1].finished:
134 |             return '1'
135 |         else:
136 |             return '-1'
137 | 
138 | 
139 |     def render(self, info=None):
140 | 
141 | 
142 |         if not self.display_mode:
143 |             self.viewer.set_mode()
144 |             self.display_mode=True
145 | 
146 |         self.viewer.draw_background()
147 |         for w in self.map['objects']:
148 |             self.viewer.draw_map(w)
149 | 
150 |         self.viewer.draw_ball(self.agent_pos, self.agent_list)
151 | 
152 |         if self.draw_obs:
153 |             self.viewer.draw_obs(self.obs_boundary, self.agent_list)
154 | 
155 |         if self.draw_obs:
156 |             if len(self.obs_list) > 0:
157 |                 self.viewer.draw_view(self.obs_list, self.agent_list, leftmost_x=500, upmost_y=10, gap = 100)
158 | 
159 |         if self.show_traj:
160 |             self.get_trajectory()
161 |             self.viewer.draw_trajectory(self.agent_record, self.agent_list)
162 | 
163 |         self.viewer.draw_direction(self.agent_pos, self.agent_accel)
164 | 
165 | 
166 |         # debug('mouse pos = '+ str(pygame.mouse.get_pos()))
167 |         debug('Step: ' + str(self.step_cnt), x=30)
168 |         if info is not None:
169 |             debug(info, x=100)
170 | 
171 | 
172 | 
173 |         for event in pygame.event.get():
174 |             if event.type == pygame.QUIT:
175 |                 sys.exit()
176 |         pygame.display.flip()
177 | 
178 | 
179 | 
180 | if __name__ == '__main__':
181 |     running = Running_competition()
182 |     map = running.choose_a_map()
183 |     print(map)


--------------------------------------------------------------------------------
/course1/olympics_engine/scenario/seeks.py:
--------------------------------------------------------------------------------
1 | #seeks for high rewards


--------------------------------------------------------------------------------
/course1/olympics_engine/scenario/volleyball.py:
--------------------------------------------------------------------------------
  1 | from olympics_engine.core import OlympicsBase
  2 | from olympics_engine.viewer import debug
  3 | import pygame
  4 | import sys
  5 | import math
  6 | 
  7 | 
  8 | class volleyball(OlympicsBase):
  9 |     def __init__(self, map):
 10 |         super(volleyball, self).__init__(map)
 11 | 
 12 |         self.gamma = 1  # v衰减系数
 13 |         self.restitution = 0.7
 14 |         self.print_log = False
 15 |         self.tau = 0.1
 16 | 
 17 |         self.draw_obs = True
 18 |         self.show_traj = True
 19 | 
 20 |         self.g = 60
 21 |         self.agent_original_accel = [[0,0], [0,0]]
 22 | 
 23 |     def check_overlap(self):
 24 |         pass
 25 | 
 26 |     def check_action(self, action_list):
 27 |         action = []
 28 |         for agent_idx in range(self.agent_num):
 29 |             if self.agent_list[agent_idx].type == 'agent':
 30 |                 action.append(action_list[0])
 31 |                 _ = action_list.pop(0)
 32 |             else:
 33 |                 action.append(None)
 34 | 
 35 |         return action
 36 | 
 37 |     def actions_to_accel(self, actions_list):
 38 |         self.agent_original_accel = [[] for _ in range(self.agent_num)]
 39 |         a_container = [[] for _ in range(self.agent_num)]
 40 |         for agent_idx in range(self.agent_num):
 41 |             action = actions_list[agent_idx]
 42 |             if action is None:
 43 |                 accel = [0, self.agent_list[agent_idx].mass*self.g]
 44 |                 self.agent_original_accel[agent_idx] = [0,0]
 45 | 
 46 |             else:
 47 |                 if self.agent_list[agent_idx].is_fatigue:       #if agent is out of energy, no driving force applies
 48 |                     accel = [0,self.agent_list[agent_idx].mass*self.g]
 49 |                 else:
 50 |                     mass = self.agent_list[agent_idx].mass
 51 | 
 52 |                     assert self.action_f[0] <= action[0] <= self.action_f[1], print('Continuous driving force needs '
 53 |                                                                                     'to be within the range [-100,200]')
 54 |                     force = action[0] / mass
 55 |                     assert self.action_theta[0] <= action[1] <= self.action_theta[1], print(
 56 |                         'Continuous turing angle needs to be within the range [-30deg, 30deg]')
 57 |                     theta = action[1]
 58 | 
 59 |                     theta_old = self.agent_theta[agent_idx][0]
 60 |                     theta_new = theta_old + theta
 61 |                     self.agent_theta[agent_idx][0] = theta_new
 62 | 
 63 |                     accel_x = force * math.cos(theta_new / 180 * math.pi)
 64 |                     accel_y = force * math.sin(theta_new / 180 * math.pi)
 65 |                     accel = [accel_x, accel_y + mass*self.g ]
 66 |                 self.agent_original_accel[agent_idx] = [accel_x, accel_y]
 67 | 
 68 |             a_container[agent_idx] = accel
 69 |         return a_container
 70 | 
 71 | 
 72 | 
 73 |     def step(self, actions_list):
 74 |         previous_pos = self.agent_pos
 75 | 
 76 |         actions_list = self.check_action(actions_list)
 77 | 
 78 |         self.stepPhysics(actions_list, self.step_cnt)
 79 | 
 80 |         #self.cross_detect(previous_pos, self.agent_pos)
 81 | 
 82 |         self.step_cnt += 1
 83 |         step_reward = 1 #self.get_reward()
 84 |         obs_next = self.get_obs()
 85 |         # obs_next = 1
 86 |         done = False#self.is_terminal()
 87 | 
 88 |         #check overlapping
 89 |         #self.check_overlap()
 90 | 
 91 |         #return self.agent_pos, self.agent_v, self.agent_accel, self.agent_theta, obs_next, step_reward, done
 92 |         return obs_next, step_reward, done, ''
 93 | 
 94 |     def render(self, info=None):
 95 | 
 96 |         if not self.display_mode:
 97 |             self.viewer.set_mode()
 98 |             self.display_mode=True
 99 | 
100 |         self.viewer.draw_background()
101 |         # 先画map; ball在map之上
102 |         for w in self.map['objects']:
103 |             self.viewer.draw_map(w)
104 | 
105 |         self.viewer.draw_ball(self.agent_pos, self.agent_list)
106 |         if self.show_traj:
107 |             self.get_trajectory()
108 |             self.viewer.draw_trajectory(self.agent_record, self.agent_list)
109 |         self.viewer.draw_direction(self.agent_pos, self.agent_accel)
110 |         #self.viewer.draw_map()
111 | 
112 |         if self.draw_obs:
113 |             self.viewer.draw_obs(self.obs_boundary, self.agent_list)
114 |             self.viewer.draw_view(self.obs_list, self.agent_list)
115 | 
116 |         #draw energy bar
117 |         #debug('agent remaining energy = {}'.format([i.energy for i in self.agent_list]), x=100)
118 |         self.viewer.draw_energy_bar(self.agent_list)
119 |         debug('Agent 0', x=570, y=110)
120 |         debug('Agent 1', x=640, y=110)
121 |         if self.map_num is not None:
122 |             debug('Map {}'.format(self.map_num), x=100)
123 | 
124 |         # debug('mouse pos = '+ str(pygame.mouse.get_pos()))
125 |         debug('Step: ' + str(self.step_cnt), x=30)
126 |         if info is not None:
127 |             debug(info, x=100)
128 |         debug("Gravity", x = 100)
129 |         pygame.draw.line(self.viewer.background, color=[0,0,0],start_pos=[160,10], end_pos=[160,30], width=4)
130 |         pygame.draw.line(self.viewer.background, color=[0,0,0],start_pos=[160,30], end_pos=[155,25], width=4)
131 |         pygame.draw.line(self.viewer.background, color=[0,0,0],start_pos=[160,30], end_pos=[165,25], width=4)
132 | 
133 | 
134 |         for event in pygame.event.get():
135 |             # 如果单击关闭窗口，则退出
136 |             if event.type == pygame.QUIT:
137 |                 sys.exit()
138 |         pygame.display.flip()
139 |         #self.viewer.background.fill((255, 255, 255))
140 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/test.py:
--------------------------------------------------------------------------------
 1 | import pygame
 2 | import math
 3 | import sys
 4 | 
 5 | pygame.init()
 6 | size = width, height = 800, 900
 7 | screen = pygame.display.set_mode(size)
 8 | screen.fill((255, 255, 255))
 9 | 
10 | 
11 | while 1:
12 |     for event in pygame.event.get():
13 |         if event.type == pygame.QUIT:
14 |             sys.exit()
15 | 
16 | 
17 |     #pygame.draw.ellipse(screen, [0,0,0], rect = [100,100, 500,800], width = 2)
18 | 
19 |     pygame.draw.line(screen, [0,0,0], start_pos = [20,600], end_pos = [400, 600], width = 2)
20 |     pygame.draw.line(screen, [0,0,0], start_pos = [20,800], end_pos = [400, 800], width = 2)
21 |     pygame.draw.line(screen, [0,0,0], start_pos = [20,600], end_pos = [20,800], width = 2)
22 |     pygame.draw.line(screen, [176,196,222], start_pos = [20,700], end_pos = [425,700], width = 2)
23 | 
24 |     pygame.draw.rect(screen, [0,0,0], [0,0,100,200], width = 2)
25 |     pygame.draw.arc(screen, [0,0,0], [0,0,100,200], -1/2*math.pi, 1/2*math.pi, 2)
26 |     pygame.draw.arc(screen, [0,0,0], [200,100,400,700], -1/2*math.pi, 1/2*math.pi, 2)
27 |     pygame.draw.arc(screen, [0,0,0], [350,300,100,300], -1/2*math.pi, 1/2*math.pi, 2)
28 | 
29 |     pygame.draw.arc(screen, [176,196,222], [325, 200, 200, 500], -1/2*math.pi, 1/2*math.pi, 2)
30 | 
31 |     #pygame.draw.arc(screen, [0,0,0], [(250,300),(300, 300)], -1/2*math.pi, 1/2*math.pi, 2)
32 |     #pygame.draw.arc(screen, [0,0,0], [50,100,700, 700], -1/2*math.pi, 1/2*math.pi, 2)
33 |     #pygame.draw.arc(screen, [176,196,222], [150,200,500, 500], -1/2*math.pi, 1/2*math.pi, 2)
34 | 
35 | 
36 |     pygame.draw.line(screen, [0,0,0], start_pos = [20,100], end_pos = [415, 100], width = 2)
37 |     pygame.draw.line(screen, [0,0,0], start_pos = [20,300], end_pos = [415, 300], width = 2)
38 |     pygame.draw.line(screen, [176,196,222], start_pos = [20,200], end_pos = [430,200], width = 2)
39 | 
40 | 
41 |     pygame.draw.line(screen, [255, 0, 0], start_pos = [20,100], end_pos = [20,300], width = 2)
42 | 
43 |     pygame.display.flip()


--------------------------------------------------------------------------------
/course1/olympics_engine/tools/settings.py:
--------------------------------------------------------------------------------
 1 | 
 2 | COLORS = {
 3 |     'red': [255,0,0],
 4 |     'light red': [255, 127, 127],
 5 |     'green': [0, 255, 0],
 6 |     'blue': [0, 0, 255],
 7 |     'orange': [255, 127, 0],
 8 |     'grey':  [176,196,222],
 9 |     'purple': [160, 32, 240],
10 |     'black': [0, 0, 0],
11 |     'white': [255, 255, 255],
12 |     'light green': [204, 255, 229],
13 |     'sky blue': [0,191,255],
14 |     # 'red-2': [215,80,83],
15 |     # 'blue-2': [73,141,247]
16 | }
17 | 
18 | COLOR_TO_IDX = {
19 |     'light green': 0,
20 |     'green': 1,
21 |     'sky blue': 2,
22 |     'orange': 3,
23 |     'grey': 4,
24 |     'purple': 5,
25 |     'black': 6,
26 |     'red': 7,
27 |     'blue':8,
28 |     'white': 9,
29 |     'light red': 10
30 |     # 'red-2': 9,
31 |     # 'blue-2': 10
32 | }
33 | 
34 | IDX_TO_COLOR = {
35 |     0: 'light green',
36 |     1: 'green',
37 |     2: 'sky blue',
38 |     3: 'orange',
39 |     4: 'grey',
40 |     5: 'purple',
41 |     6: 'black',
42 |     7: 'red',
43 |     8: 'blue',
44 |     9: 'white',
45 |     10: 'light red'
46 |     # 9: 'red-2',
47 |     # 10: 'blue-2'
48 | }
49 | 
50 | 
51 | 
52 | 
53 | # Map of object type to integers
54 | OBJECT_TO_IDX = {
55 |     'agent': 0,
56 |     'wall': 1,  # 反弹
57 |     'cross': 2,   # 可穿越
58 |     'goal': 3,   # 可穿越 # maybe case by case
59 |     'arc': 4,
60 |     'ball': 5
61 | }
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/train/algo/random.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class random_agent:
 4 |     def __init__(self, seed=None):
 5 |         self.force_range = [-100, 200]
 6 |         self.angle_range = [-30, 30]
 7 |         #self.seed(seed)
 8 | 
 9 |     def seed(self, seed = None):
10 |         random.seed(seed)
11 | 
12 |     def act(self, obs):
13 |         force = random.uniform(self.force_range[0], self.force_range[1])
14 |         angle = random.uniform(self.angle_range[0], self.angle_range[1])
15 | 
16 |         return [[force], [angle]]
17 | 
18 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/train/log_path.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pathlib import Path
 3 | import os
 4 | import yaml
 5 | 
 6 | def make_logpath(game_name, algo):
 7 |     base_dir = Path(__file__).resolve().parent
 8 |     model_dir = base_dir / Path('models') / game_name
 9 |     if not model_dir.exists():
10 |         curr_run = 'run1'
11 |     else:
12 |         exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in
13 |                          model_dir.iterdir() if
14 |                          str(folder.name).startswith('run')]
15 |         if len(exst_run_nums) == 0:
16 |             curr_run = 'run1'
17 |         else:
18 |             curr_run = 'run%i' % (max(exst_run_nums) + 1)
19 |     run_dir = model_dir / curr_run
20 |     log_dir = run_dir
21 |     return run_dir, log_dir
22 | 
23 | def save_config(args, save_path):
24 |     file = open(os.path.join(str(save_path), 'config.yaml'), mode='w', encoding='utf-8')
25 |     yaml.dump(vars(args), file)
26 |     file.close()
27 | 
28 | 


--------------------------------------------------------------------------------
/course1/olympics_engine/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .space import Space
  4 | from gym import logger
  5 | 
  6 | 
  7 | class Box(Space):
  8 |     """
  9 |     A (possibly unbounded) box in R^n. Specifically, a Box represents the
 10 |     Cartesian product of n closed intervals. Each interval has the form of one
 11 |     of [a, b], (-oo, b], [a, oo), or (-oo, oo).
 12 |     There are two common use cases:
 13 |     * Identical bound for each dimension::
 14 |         >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
 15 |         Box(3, 4)
 16 |     * Independent bound for each dimension::
 17 |         >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
 18 |         Box(2,)
 19 |     """
 20 |     def __init__(self, low, high, shape=None, dtype=np.float32):
 21 |         assert dtype is not None, 'dtype must be explicitly provided. '
 22 |         self.dtype = np.dtype(dtype)
 23 | 
 24 |         # determine shape if it isn't provided directly
 25 |         if shape is not None:
 26 |             shape = tuple(shape)
 27 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
 28 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
 29 |         elif not np.isscalar(low):
 30 |             shape = low.shape
 31 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
 32 |         elif not np.isscalar(high):
 33 |             shape = high.shape
 34 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
 35 |         else:
 36 |             raise ValueError("shape must be provided or inferred from the shapes of low or high")
 37 | 
 38 |         if np.isscalar(low):
 39 |             low = np.full(shape, low, dtype=dtype)
 40 | 
 41 |         if np.isscalar(high):
 42 |             high = np.full(shape, high, dtype=dtype)
 43 | 
 44 |         self.shape = shape
 45 |         self.low = low
 46 |         self.high = high
 47 | 
 48 |         def _get_precision(dtype):
 49 |             if np.issubdtype(dtype, np.floating):
 50 |                 return np.finfo(dtype).precision
 51 |             else:
 52 |                 return np.inf
 53 |         low_precision = _get_precision(self.low.dtype)
 54 |         high_precision = _get_precision(self.high.dtype)
 55 |         dtype_precision = _get_precision(self.dtype)
 56 |         if min(low_precision, high_precision) > dtype_precision:
 57 |             logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
 58 |         self.low = self.low.astype(self.dtype)
 59 |         self.high = self.high.astype(self.dtype)
 60 | 
 61 |         # Boolean arrays which indicate the interval type for each coordinate
 62 |         self.bounded_below = -np.inf < self.low
 63 |         self.bounded_above = np.inf > self.high
 64 | 
 65 |         super(Box, self).__init__(self.shape, self.dtype)
 66 | 
 67 |     def is_bounded(self, manner="both"):
 68 |         below = np.all(self.bounded_below)
 69 |         above = np.all(self.bounded_above)
 70 |         if manner == "both":
 71 |             return below and above
 72 |         elif manner == "below":
 73 |             return below
 74 |         elif manner == "above":
 75 |             return above
 76 |         else:
 77 |             raise ValueError("manner is not in {'below', 'above', 'both'}")
 78 | 
 79 |     def sample(self):
 80 |         """
 81 |         Generates a single random sample inside of the Box.
 82 |         In creating a sample of the box, each coordinate is sampled according to
 83 |         the form of the interval:
 84 |         * [a, b] : uniform distribution
 85 |         * [a, oo) : shifted exponential distribution
 86 |         * (-oo, b] : shifted negative exponential distribution
 87 |         * (-oo, oo) : normal distribution
 88 |         """
 89 |         high = self.high if self.dtype.kind == 'f' \
 90 |                 else self.high.astype('int64') + 1
 91 |         sample = np.empty(self.shape)
 92 | 
 93 |         # Masking arrays which classify the coordinates according to interval
 94 |         # type
 95 |         unbounded   = ~self.bounded_below & ~self.bounded_above
 96 |         upp_bounded = ~self.bounded_below &  self.bounded_above
 97 |         low_bounded =  self.bounded_below & ~self.bounded_above
 98 |         bounded     =  self.bounded_below &  self.bounded_above
 99 | 
100 | 
101 |         # Vectorized sampling by interval type
102 |         sample[unbounded] = self.np_random.normal(
103 |                 size=unbounded[unbounded].shape)
104 | 
105 |         sample[low_bounded] = self.np_random.exponential(
106 |             size=low_bounded[low_bounded].shape) + self.low[low_bounded]
107 | 
108 |         sample[upp_bounded] = -self.np_random.exponential(
109 |             size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
110 | 
111 |         sample[bounded] = self.np_random.uniform(low=self.low[bounded],
112 |                                             high=high[bounded],
113 |                                             size=bounded[bounded].shape)
114 |         if self.dtype.kind == 'i':
115 |             sample = np.floor(sample)
116 | 
117 |         return sample.astype(self.dtype)
118 | 
119 |     def contains(self, x):
120 |         if isinstance(x, list):
121 |             x = np.array(x)  # Promote list to array for contains check
122 |         return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
123 | 
124 |     def to_jsonable(self, sample_n):
125 |         return np.array(sample_n).tolist()
126 | 
127 |     def from_jsonable(self, sample_n):
128 |         return [np.asarray(sample) for sample in sample_n]
129 | 
130 |     def __repr__(self):
131 |         return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
132 | 
133 |     def __eq__(self, other):
134 |         return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)


--------------------------------------------------------------------------------
/course1/olympics_engine/utils/get_logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | import logging
 3 | import time
 4 | import os
 5 | 
 6 | 
 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False):
 8 |     if not os.path.exists(log_path):
 9 |         os.mkdir(log_path)
10 | 
11 |     logger = logging.getLogger(name='Jidi')
12 |     logger.setLevel(logging.INFO)
13 |     # 每分钟建一个文件
14 |     rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
15 |     log_name = log_path + rq + '_' + name+  '.log'
16 |     json_log_name = log_path + rq + '_' + name + '.json'
17 |     logfile = log_name
18 |     if save_file:
19 |         fh = logging.FileHandler(logfile, mode='a')
20 |         fh.setLevel(logging.DEBUG)
21 |         formatter = logging.Formatter("%(message)s")
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 |     # 输出到控制台
25 |     if console_out:
26 |         console = logging.StreamHandler()
27 |         console.setLevel(logging.INFO)
28 |         logger.addHandler(console)
29 | 
30 |     # 输出到json
31 |     if json_file:
32 |         fh_json = logging.FileHandler(json_log_name, mode='a')
33 |         fh_json.setLevel(logging.DEBUG)
34 |         formatter_json = logging.Formatter("%(message)s")
35 |         fh_json.setFormatter(formatter_json)
36 |         logger.addHandler(fh_json)
37 | 
38 |     return logger


--------------------------------------------------------------------------------
/course1/olympics_engine/utils/space.py:
--------------------------------------------------------------------------------
 1 | from gym.utils import seeding
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """Defines the observation and action spaces, so you can write generic
 6 |     code that applies to any Env. For example, you can choose a random
 7 |     action.
 8 |     WARNING - Custom observation & action spaces can inherit from the `Space`
 9 |     class. However, most use-cases should be covered by the existing space
10 |     classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` &
11 |     `Dict`). Note that parametrized probability distributions (through the
12 |     `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are
13 |     only well-defined for instances of spaces provided in gym by default.
14 |     Moreover, some implementations of Reinforcement Learning algorithms might
15 |     not handle custom spaces properly. Use custom spaces with care.
16 |     """
17 |     def __init__(self, shape=None, dtype=None):
18 |         import numpy as np  # takes about 300-400ms to import, so we load lazily
19 |         self.shape = None if shape is None else tuple(shape)
20 |         self.dtype = None if dtype is None else np.dtype(dtype)
21 |         self._np_random = None
22 | 
23 |     @property
24 |     def np_random(self):
25 |         """Lazily seed the rng since this is expensive and only needed if
26 |         sampling from this space.
27 |         """
28 |         if self._np_random is None:
29 |             self.seed()
30 | 
31 |         return self._np_random
32 | 
33 |     def sample(self):
34 |         """Randomly sample an element of this space. Can be
35 |         uniform or non-uniform sampling based on boundedness of space."""
36 |         raise NotImplementedError
37 | 
38 |     def seed(self, seed=None):
39 |         """Seed the PRNG of this space. """
40 |         self._np_random, seed = seeding.np_random(seed)
41 |         return [seed]
42 | 
43 |     def contains(self, x):
44 |         """
45 |         Return boolean specifying if x is a valid
46 |         member of this space
47 |         """
48 |         raise NotImplementedError
49 | 
50 |     def __contains__(self, x):
51 |         return self.contains(x)
52 | 
53 |     def to_jsonable(self, sample_n):
54 |         """Convert a batch of samples from this space to a JSONable data type."""
55 |         # By default, assume identity is JSONable
56 |         return sample_n
57 | 
58 |     def from_jsonable(self, sample_n):
59 |         """Convert a JSONable data type to a batch of samples from this space."""
60 |         # By default, assume identity is JSONable
61 |         return sample_n


--------------------------------------------------------------------------------
/course1/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .space import Space
  4 | from gym import logger
  5 | 
  6 | 
  7 | class Box(Space):
  8 |     """
  9 |     A (possibly unbounded) box in R^n. Specifically, a Box represents the
 10 |     Cartesian product of n closed intervals. Each interval has the form of one
 11 |     of [a, b], (-oo, b], [a, oo), or (-oo, oo).
 12 | 
 13 |     There are two common use cases:
 14 | 
 15 |     * Identical bound for each dimension::
 16 |         >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
 17 |         Box(3, 4)
 18 | 
 19 |     * Independent bound for each dimension::
 20 |         >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
 21 |         Box(2,)
 22 | 
 23 |     """
 24 |     def __init__(self, low, high, shape=None, dtype=np.float32):
 25 |         assert dtype is not None, 'dtype must be explicitly provided. '
 26 |         self.dtype = np.dtype(dtype)
 27 | 
 28 |         # determine shape if it isn't provided directly
 29 |         if shape is not None:
 30 |             shape = tuple(shape)
 31 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
 32 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
 33 |         elif not np.isscalar(low):
 34 |             shape = low.shape
 35 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
 36 |         elif not np.isscalar(high):
 37 |             shape = high.shape
 38 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
 39 |         else:
 40 |             raise ValueError("shape must be provided or inferred from the shapes of low or high")
 41 | 
 42 |         if np.isscalar(low):
 43 |             low = np.full(shape, low, dtype=dtype)
 44 | 
 45 |         if np.isscalar(high):
 46 |             high = np.full(shape, high, dtype=dtype)
 47 | 
 48 |         self.shape = shape
 49 |         self.low = low
 50 |         self.high = high
 51 | 
 52 |         def _get_precision(dtype):
 53 |             if np.issubdtype(dtype, np.floating):
 54 |                 return np.finfo(dtype).precision
 55 |             else:
 56 |                 return np.inf
 57 |         low_precision = _get_precision(self.low.dtype)
 58 |         high_precision = _get_precision(self.high.dtype)
 59 |         dtype_precision = _get_precision(self.dtype)
 60 |         if min(low_precision, high_precision) > dtype_precision:
 61 |             logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
 62 |         self.low = self.low.astype(self.dtype)
 63 |         self.high = self.high.astype(self.dtype)
 64 | 
 65 |         # Boolean arrays which indicate the interval type for each coordinate
 66 |         self.bounded_below = -np.inf < self.low
 67 |         self.bounded_above = np.inf > self.high
 68 | 
 69 |         super(Box, self).__init__(self.shape, self.dtype)
 70 | 
 71 |     def is_bounded(self, manner="both"):
 72 |         below = np.all(self.bounded_below)
 73 |         above = np.all(self.bounded_above)
 74 |         if manner == "both":
 75 |             return below and above
 76 |         elif manner == "below":
 77 |             return below
 78 |         elif manner == "above":
 79 |             return above
 80 |         else:
 81 |             raise ValueError("manner is not in {'below', 'above', 'both'}")
 82 | 
 83 |     def sample(self):
 84 |         """
 85 |         Generates a single random sample inside of the Box.
 86 | 
 87 |         In creating a sample of the box, each coordinate is sampled according to
 88 |         the form of the interval:
 89 | 
 90 |         * [a, b] : uniform distribution
 91 |         * [a, oo) : shifted exponential distribution
 92 |         * (-oo, b] : shifted negative exponential distribution
 93 |         * (-oo, oo) : normal distribution
 94 |         """
 95 |         high = self.high if self.dtype.kind == 'f' \
 96 |                 else self.high.astype('int64') + 1
 97 |         sample = np.empty(self.shape)
 98 | 
 99 |         # Masking arrays which classify the coordinates according to interval
100 |         # type
101 |         unbounded   = ~self.bounded_below & ~self.bounded_above
102 |         upp_bounded = ~self.bounded_below &  self.bounded_above
103 |         low_bounded =  self.bounded_below & ~self.bounded_above
104 |         bounded     =  self.bounded_below &  self.bounded_above
105 | 
106 | 
107 |         # Vectorized sampling by interval type
108 |         sample[unbounded] = self.np_random.normal(
109 |                 size=unbounded[unbounded].shape)
110 | 
111 |         sample[low_bounded] = self.np_random.exponential(
112 |             size=low_bounded[low_bounded].shape) + self.low[low_bounded]
113 | 
114 |         sample[upp_bounded] = -self.np_random.exponential(
115 |             size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
116 | 
117 |         sample[bounded] = self.np_random.uniform(low=self.low[bounded],
118 |                                             high=high[bounded],
119 |                                             size=bounded[bounded].shape)
120 |         if self.dtype.kind == 'i':
121 |             sample = np.floor(sample)
122 | 
123 |         return sample.astype(self.dtype)
124 | 
125 |     def contains(self, x):
126 |         if isinstance(x, list):
127 |             x = np.array(x)  # Promote list to array for contains check
128 |         return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
129 | 
130 |     def to_jsonable(self, sample_n):
131 |         return np.array(sample_n).tolist()
132 | 
133 |     def from_jsonable(self, sample_n):
134 |         return [np.asarray(sample) for sample in sample_n]
135 | 
136 |     def __repr__(self):
137 |         return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
138 | 
139 |     def __eq__(self, other):
140 |         return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
141 | 


--------------------------------------------------------------------------------
/course1/utils/discrete.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .space import Space
 3 | 
 4 | 
 5 | class Discrete(Space):
 6 |     r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
 7 | 
 8 |     Example::
 9 | 
10 |         >>> Discrete(2)
11 | 
12 |     """
13 |     def __init__(self, n):
14 |         assert n >= 0
15 |         self.n = n
16 |         super(Discrete, self).__init__((), np.int64)
17 | 
18 |     def sample(self):
19 |         return self.np_random.randint(self.n)
20 | 
21 |     def contains(self, x):
22 |         if isinstance(x, int):
23 |             as_int = x
24 |         elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
25 |             as_int = int(x)
26 |         else:
27 |             return False
28 |         return as_int >= 0 and as_int < self.n
29 | 
30 |     def __repr__(self):
31 |         return "Discrete(%d)" % self.n
32 | 
33 |     def __eq__(self, other):
34 |         return isinstance(other, Discrete) and self.n == other.n
35 | 


--------------------------------------------------------------------------------
/course1/utils/get_logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | import logging
 3 | import time
 4 | import os
 5 | 
 6 | 
 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False):
 8 |     if not os.path.exists(log_path):
 9 |         os.mkdir(log_path)
10 | 
11 |     logger = logging.getLogger()
12 |     logger.setLevel(logging.INFO)
13 |     # 每分钟建一个文件
14 |     rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
15 |     log_name = log_path + rq + '_' + name+  '.log'
16 |     json_log_name = log_path + rq + '_' + name + '.json'
17 |     logfile = log_name
18 |     if save_file:
19 |         fh = logging.FileHandler(logfile, mode='a')
20 |         fh.setLevel(logging.DEBUG)
21 |         formatter = logging.Formatter("%(message)s")
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 |     # 输出到控制台
25 |     if console_out:
26 |         console = logging.StreamHandler()
27 |         console.setLevel(logging.INFO)
28 |         logger.addHandler(console)
29 | 
30 |     # 输出到json
31 |     if json_file:
32 |         fh_json = logging.FileHandler(json_log_name, mode='a')
33 |         fh_json.setLevel(logging.DEBUG)
34 |         formatter_json = logging.Formatter("%(message)s")
35 |         fh_json.setFormatter(formatter_json)
36 |         logger.addHandler(fh_json)
37 | 
38 |     return logger


--------------------------------------------------------------------------------
/course1/utils/mutli_discrete_particle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2021/4/8 下午2:42
 3 | # Author: Yahui Cui
 4 | 
 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 7 | 
 8 | import numpy as np
 9 | 
10 | import gym
11 | 
12 | class MultiDiscreteParticle(gym.Space):
13 |     """
14 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
15 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
16 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
17 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
18 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
19 |     Note: A value of 0 always need to represent the NOOP action.
20 |     e.g. Nintendo Game Controller
21 |     - Can be conceptualized as 3 discrete action spaces:
22 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
23 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
24 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
25 |     - Can be initialized as
26 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
27 |     """
28 |     def __init__(self, array_of_param_array):
29 |         self.low = np.array([x[0] for x in array_of_param_array])
30 |         self.high = np.array([x[1] for x in array_of_param_array])
31 |         self.num_discrete_space = self.low.shape[0]
32 | 
33 |     def sample(self):
34 |         """ Returns a array with one sample from each discrete action space """
35 |         # For each row: round(random .* (max - min) + min, 0)
36 |         np_random = np.random.RandomState()
37 |         random_array = np_random.rand(self.num_discrete_space)
38 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
39 |     def contains(self, x):
40 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
41 | 
42 |     @property
43 |     def shape(self):
44 |         return self.num_discrete_space
45 |     def __repr__(self):
46 |         return "MultiDiscrete" + str(self.num_discrete_space)
47 |     def __eq__(self, other):
48 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
49 | 


--------------------------------------------------------------------------------
/course1/utils/space.py:
--------------------------------------------------------------------------------
 1 | from gym.utils import seeding
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """Defines the observation and action spaces, so you can write generic
 6 |     code that applies to any Env. For example, you can choose a random
 7 |     action.
 8 | 
 9 |     WARNING - Custom observation & action spaces can inherit from the `Space`
10 |     class. However, most use-cases should be covered by the existing space
11 |     classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` &
12 |     `Dict`). Note that parametrized probability distributions (through the
13 |     `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are
14 |     only well-defined for instances of spaces provided in gym by default.
15 |     Moreover, some implementations of Reinforcement Learning algorithms might
16 |     not handle custom spaces properly. Use custom spaces with care.
17 |     """
18 |     def __init__(self, shape=None, dtype=None):
19 |         import numpy as np  # takes about 300-400ms to import, so we load lazily
20 |         self.shape = None if shape is None else tuple(shape)
21 |         self.dtype = None if dtype is None else np.dtype(dtype)
22 |         self._np_random = None
23 | 
24 |     @property
25 |     def np_random(self):
26 |         """Lazily seed the rng since this is expensive and only needed if
27 |         sampling from this space.
28 |         """
29 |         if self._np_random is None:
30 |             self.seed()
31 | 
32 |         return self._np_random
33 | 
34 |     def sample(self):
35 |         """Randomly sample an element of this space. Can be 
36 |         uniform or non-uniform sampling based on boundedness of space."""
37 |         raise NotImplementedError
38 | 
39 |     def seed(self, seed=None):
40 |         """Seed the PRNG of this space. """
41 |         self._np_random, seed = seeding.np_random(seed)
42 |         return [seed]
43 | 
44 |     def contains(self, x):
45 |         """
46 |         Return boolean specifying if x is a valid
47 |         member of this space
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def __contains__(self, x):
52 |         return self.contains(x)
53 | 
54 |     def to_jsonable(self, sample_n):
55 |         """Convert a batch of samples from this space to a JSONable data type."""
56 |         # By default, assume identity is JSONable
57 |         return sample_n
58 | 
59 |     def from_jsonable(self, sample_n):
60 |         """Convert a JSONable data type to a batch of samples from this space."""
61 |         # By default, assume identity is JSONable
62 |         return sample_n
63 | 


--------------------------------------------------------------------------------
/course2/README.md:
--------------------------------------------------------------------------------
 1 | ## 实践课第二天
 2 | 
 3 | ### 任务：经典棋牌 翻转棋 作业要求: 提交通过并且在金榜的排名高于Jidi_random
 4 | 
 5 | 
 6 | ---
 7 | ### Env 👉请看 [reversi.py](env/reversi.py)
 8 | 
 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py)
10 | 
11 | ### 提交 👉请看 [submission.py](examples/random/submission.py)
12 | 
13 | ---
14 | 
15 | ### How to test submission
16 | 
17 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py
18 | >python run_log.py 
19 | 
20 | If no errors, your submission is ready to go~
21 | 
22 | ___
23 | Have a good time~~~


--------------------------------------------------------------------------------
/course2/env/__init__.py:
--------------------------------------------------------------------------------
1 | from .reversi import *
2 | from .sokoban import *
3 | 


--------------------------------------------------------------------------------
/course2/env/chooseenv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/9/11 11:17 上午   
 4 | # 描述：选择运行环境，需要维护env/__ini__.py && config.json（存储环境默认参数）
 5 | 
 6 | import json
 7 | import env
 8 | import os
 9 | 
10 | 
11 | def make(env_type, conf=None):
12 |     file_path = os.path.join(os.path.dirname(__file__), 'config.json')
13 |     if not conf:
14 |         with open(file_path) as f:
15 |             conf = json.load(f)[env_type]
16 |     class_literal = conf['class_literal']
17 |     return getattr(env, class_literal)(conf)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     make("classic_MountainCar-v0")
22 | 


--------------------------------------------------------------------------------
/course2/env/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "reversi_1v1": {
 3 |     "class_literal": "Reversi",
 4 |     "n_player": 2,
 5 |     "board_width": 10,
 6 |     "board_height": 10,
 7 |     "max_step": 100,
 8 |     "cell_range": 3,
 9 |     "game_name": "reversi",
10 |     "is_obs_continuous": false,
11 |     "is_act_continuous": false,
12 |     "agent_nums": [1,1],
13 |     "obs_type": ["grid","grid"]
14 |   },
15 |   "sokoban_1p": {
16 |     "class_literal": "Sokoban",
17 |     "n_player": 1,
18 |     "board_width": 8,
19 |     "board_height": 8,
20 |     "cell_range": 5,
21 |     "max_step": 50,
22 |     "level": 2,
23 |     "game_name": "sokoban",
24 |     "is_obs_continuous": false,
25 |     "is_act_continuous": false,
26 |     "agent_nums": [1],
27 |     "obs_type": ["grid"]
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/course2/env/obs_interfaces/observation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/11/13 3:51 下午   
 4 | # 描述：observation的各种接口类
 5 | obs_type = ["grid", "vector", "dict"]
 6 | 
 7 | 
 8 | class GridObservation(object):
 9 |     def get_grid_observation(self, current_state, player_id, info_before):
10 |         raise NotImplementedError
11 | 
12 |     def get_grid_many_observation(self, current_state, player_id_list, info_before=''):
13 |         all_obs = []
14 |         for i in player_id_list:
15 |             all_obs.append(self.get_grid_observation(current_state, i, info_before))
16 |         return all_obs
17 | 
18 | 
19 | class VectorObservation(object):
20 |     def get_vector_observation(self, current_state, player_id, info_before):
21 |         raise NotImplementedError
22 | 
23 |     def get_vector_many_observation(self, current_state, player_id_list, info_before=''):
24 |         all_obs = []
25 |         for i in player_id_list:
26 |             all_obs.append(self.get_vector_observation(current_state, i, info_before))
27 |         return all_obs
28 | 
29 | 
30 | class DictObservation(object):
31 |     def get_dict_observation(self, current_state, player_id, info_before):
32 |         raise NotImplementedError
33 | 
34 |     def get_dict_many_observation(self, current_state, player_id_list, info_before=''):
35 |         all_obs = []
36 |         for i in player_id_list:
37 |             all_obs.append(self.get_dict_observation(current_state, i, info_before))
38 |         return all_obs
39 | 
40 | 
41 | # todo: observation builder
42 | class CustomObservation(object):
43 |     def get_custom_observation(self, current_state, player_id):
44 |         raise NotImplementedError
45 | 
46 |     def get_custom_obs_space(self, player_id):
47 |         raise NotImplementedError
48 | 
49 |     def get_custom_many_observation(self, current_state, player_id_list):
50 |         all_obs = []
51 |         for i in player_id_list:
52 |             all_obs.append(self.get_custom_observation(current_state, i))
53 |         return all_obs
54 | 
55 |     def get_custom_many_obs_space(self, player_id_list):
56 |         all_obs_space = []
57 |         for i in player_id_list:
58 |             all_obs_space.append(self.get_custom_obs_space(i))
59 |         return all_obs_space
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/course2/env/simulators/game.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/7/10 10:24 上午   
 4 | # 描述：
 5 | from abc import ABC, abstractmethod
 6 | 
 7 | 
 8 | class Game(ABC):
 9 |     def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
10 |         self.n_player = n_player
11 |         self.current_state = None
12 |         self.all_observes = None
13 |         self.is_obs_continuous = is_obs_continuous
14 |         self.is_act_continuous = is_act_continuous
15 |         self.game_name = game_name
16 |         self.agent_nums = agent_nums
17 |         self.obs_type = obs_type
18 | 
19 |     def get_config(self, player_id):
20 |         raise NotImplementedError
21 | 
22 |     def get_render_data(self, current_state):
23 |         return current_state
24 | 
25 |     def set_current_state(self, current_state):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def is_terminal(self):
30 |         raise NotImplementedError
31 | 
32 |     def get_next_state(self, all_action):
33 |         raise NotImplementedError
34 | 
35 |     def get_reward(self, all_action):
36 |         raise NotImplementedError
37 | 
38 |     @abstractmethod
39 |     def step(self, all_action):
40 |         raise NotImplementedError
41 | 
42 |     @abstractmethod
43 |     def reset(self):
44 |         raise NotImplementedError
45 | 
46 |     def set_action_space(self):
47 |         raise NotImplementedError
48 | 
49 | 


--------------------------------------------------------------------------------
/course2/examples/alphabeta-reversi/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Author: Shu LIN
 3 | 
 4 | import numpy
 5 | 
 6 | DEPTH = 4  # 搜索深度
 7 | INF = 100000000
 8 | DIR = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1))  # 方向向量
 9 | 
10 | 
11 | # 放置棋子，计算新局面
12 | def place(board, x, y, color, width, height):
13 |     if x < 0:
14 |         return False
15 |     board[x][y] = color
16 |     valid = False
17 |     for d in range(8):
18 |         i = x + DIR[d][0]
19 |         j = y + DIR[d][1]
20 |         while 0 <= i and i < width and 0 <= j and j < height and board[i][j] == -color:
21 |             i += DIR[d][0]
22 |             j += DIR[d][1]
23 |         if 0 <= i and i < width and 0 <= j and j < height and board[i][j] == color:
24 |             while True:
25 |                 i -= DIR[d][0]
26 |                 j -= DIR[d][1]
27 |                 if i == x and j == y:
28 |                     break
29 |                 valid = True
30 |                 board[i][j] = color
31 |     return valid
32 | 
33 | 
34 | # 评估局面
35 | def evaluate(board, color, width, height):
36 |     score = 0
37 |     for i in range(width):
38 |         for j in range(height):
39 |             score += board[i][j] * color
40 |     return score
41 | 
42 | 
43 | # 使用Alpha-Beta剪枝搜索，返回最优结果和最优动作
44 | def alphabeta(board, depth, alpha, beta, color, width, height):
45 |     if depth == 0:
46 |         return evaluate(board, color), -1, -1
47 |     x = y = -1
48 |     noMove = True
49 |     for i in range(width):
50 |         for j in range(height):
51 |             if board[i][j] == 0:
52 |                 newBoard = board.copy()
53 |                 if place(newBoard, i, j, color, width, height):
54 |                     noMove = False
55 |                     v = -alphabeta(newBoard, depth - 1, -beta, -alpha, -color, width, height)[0]
56 |                     if v > alpha:
57 |                         if beta <= alpha:
58 |                             return v, i, j
59 |                         alpha = v
60 |                         x, y = i, j
61 |     if noMove:
62 |         v = -alphabeta(board, depth - 1, -beta, -alpha, -color, width, height)[0]
63 |         if v > alpha:
64 |             alpha = v
65 |     return alpha, x, y
66 | 
67 | 
68 | def wrap_action(x, y, width, height):
69 |     action = [[0] * width, [0] * height]
70 |     action[0][x] = 1
71 |     action[1][y] = 1
72 |     return action
73 | 
74 | 
75 | def my_controller(observation, action_space, is_act_continuous=False):
76 |     myColor = 1 if observation["chess_player_idx"] == 1 else -1
77 |     height = observation["board_height"]
78 |     width = observation["board_width"]
79 |     board = [[0 for _ in range(width)] for _ in range(height)]
80 |     for i in range(width):
81 |         for j in range(height):
82 |             board[i][j] = 0
83 |     for position in observation[1]:
84 |         board[position[0]][position[1]] = 1
85 |     for position in observation[2]:
86 |         board[position[0]][position[1]] = -1
87 |     _, x, y = alphabeta(board, DEPTH, -INF, INF, myColor, width, height)
88 |     return wrap_action(x, y, height, width)
89 | 


--------------------------------------------------------------------------------
/course2/examples/bfs-sokoban/submission.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course2/examples/bfs-sokoban/submission.py


--------------------------------------------------------------------------------
/course2/examples/mcts-reversi/submission.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8  -*-
  2 | # Author: Shu LIN
  3 | 
  4 | import numpy
  5 | import random
  6 | 
  7 | TIMES = 10000  # 模拟次数
  8 | INF = 100000000
  9 | EPS = 0.1
 10 | DIR = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1))  # 方向向量
 11 | 
 12 | visits = {}
 13 | returns = {}
 14 | 
 15 | 
 16 | # 放置棋子，计算新局面
 17 | def place(board, x, y, color, width, height):
 18 |     if x < 0:
 19 |         return False
 20 |     board[x][y] = color
 21 |     valid = False
 22 |     for d in range(8):
 23 |         i = x + DIR[d][0]
 24 |         j = y + DIR[d][1]
 25 |         while 0 <= i and i < width and 0 <= j and j < height and board[i][j] == -color:
 26 |             i += DIR[d][0]
 27 |             j += DIR[d][1]
 28 |         if 0 <= i and i < width and 0 <= j and j < height and board[i][j] == color:
 29 |             while True:
 30 |                 i -= DIR[d][0]
 31 |                 j -= DIR[d][1]
 32 |                 if i == x and j == y:
 33 |                     break
 34 |                 valid = True
 35 |                 board[i][j] = color
 36 |     return valid
 37 | 
 38 | 
 39 | # 评估局面
 40 | def evaluate(board, color, width, height):
 41 |     score = 0
 42 |     for i in range(width):
 43 |         for j in range(height):
 44 |             score += board[i][j] * color
 45 |     if score > 0:
 46 |         return 1
 47 |     if score < 0:
 48 |         return -1
 49 |     return 0
 50 | 
 51 | 
 52 | # 选择下一步行动
 53 | def getMove(board, color, chooseBest, width, height):
 54 |     moves = []
 55 |     for i in range(width):
 56 |         for j in range(height):
 57 |             if board[i][j] == 0:
 58 |                 newBoard = board.copy()
 59 |                 if place(newBoard, i, j, color, width, height):
 60 |                     moves.append((i, j))
 61 |     if len(moves) == 0:
 62 |         return -1, -1
 63 |     best = -INF
 64 |     x = y = -1
 65 |     for (i, j) in moves:
 66 |         avg = INF
 67 |         if (color, i, j) in visits:
 68 |             avg = returns[color, i, j] / visits[color, i, j]
 69 |         if avg > best:
 70 |             best = avg
 71 |             x, y = i, j
 72 |     if chooseBest or random.random() > EPS:
 73 |         return x, y
 74 |     return random.choice(moves)
 75 | 
 76 | 
 77 | # 蒙特卡洛模拟
 78 | def simulate(board, color, width, height):
 79 |     x, y = getMove(board, color, False, width, height)
 80 |     noMove = x < 0
 81 |     if noMove:
 82 |         color = -color
 83 |         x, y = getMove(board, color, False, width, height)
 84 |         if x < 0:
 85 |             return evaluate(board, -color, width, height)
 86 |     newBoard = board.copy()
 87 |     place(newBoard, x, y, color, width, height)
 88 |     result = -simulate(newBoard, -color, width, height)
 89 |     global visits, returns
 90 |     if (color, x, y) not in visits:
 91 |         visits[color, x, y] = 1
 92 |         returns[color, x, y] = result
 93 |     else:
 94 |         visits[color, x, y] += 1
 95 |         returns[color, x, y] += result
 96 |     if noMove:
 97 |         return -result
 98 |     return result
 99 | 
100 | 
101 | # 使用蒙特卡洛树搜索，返回最优结果和最优动作
102 | def montecarlo(board, color, width, height):
103 |     for _ in range(TIMES):
104 |         simulate(board, color, width, height)
105 |     return getMove(board, color, True, width, height)
106 | 
107 | 
108 | def wrap_action(x, y, width, height):
109 |     action = [[0] * width, [0] * height]
110 |     action[0][x] = 1
111 |     action[1][y] = 1
112 |     return action
113 | 
114 | 
115 | def my_controller(observation, action_space, is_act_continuous=False):
116 |     myColor = 1 if observation["chess_player_idx"] == 1 else -1
117 |     height = observation["board_height"]
118 |     width = observation["board_width"]
119 |     board = [[0 for _ in range(width)] for _ in range(height)]
120 |     for i in range(width):
121 |         for j in range(height):
122 |             board[i][j] = 0
123 |     for position in observation[1]:
124 |         board[position[0]][position[1]] = 1
125 |     for position in observation[2]:
126 |         board[position[0]][position[1]] = -1
127 |     x, y = montecarlo(board, myColor, height, width)
128 |     return wrap_action(x, y, height, width)
129 | 


--------------------------------------------------------------------------------
/course2/examples/random/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     agent_action = []
16 |     for i in range(len(action_space)):
17 |         action_ = sample_single_dim(action_space[i], is_act_continuous)
18 |         agent_action.append(action_)
19 |     return agent_action
20 | 
21 | 
22 | def sample_single_dim(action_space_list_each, is_act_continuous):
23 |     each = []
24 |     if is_act_continuous:
25 |         each = action_space_list_each.sample()
26 |     else:
27 |         if action_space_list_each.__class__.__name__ == "Discrete":
28 |             each = [0] * action_space_list_each.n
29 |             idx = action_space_list_each.sample()
30 |             each[idx] = 1
31 |         elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
32 |             each = []
33 |             nvec = action_space_list_each.high - action_space_list_each.low + 1
34 |             sample_indexes = action_space_list_each.sample()
35 | 
36 |             for i in range(len(nvec)):
37 |                 dim = nvec[i]
38 |                 new_action = [0] * dim
39 |                 index = sample_indexes[i]
40 |                 new_action[index] = 1
41 |                 each.extend(new_action)
42 |         elif action_space_list_each.__class__.__name__ == "Discrete_SC2":
43 |             each = action_space_list_each.sample()
44 |         elif action_space_list_each.__class__.__name__ == "Box":
45 |             each = action_space_list_each.sample()
46 |     return each
47 | 


--------------------------------------------------------------------------------
/course2/examples/submission/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/course2/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .space import Space
  4 | from gym import logger
  5 | 
  6 | 
  7 | class Box(Space):
  8 |     """
  9 |     A (possibly unbounded) box in R^n. Specifically, a Box represents the
 10 |     Cartesian product of n closed intervals. Each interval has the form of one
 11 |     of [a, b], (-oo, b], [a, oo), or (-oo, oo).
 12 | 
 13 |     There are two common use cases:
 14 | 
 15 |     * Identical bound for each dimension::
 16 |         >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
 17 |         Box(3, 4)
 18 | 
 19 |     * Independent bound for each dimension::
 20 |         >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
 21 |         Box(2,)
 22 | 
 23 |     """
 24 |     def __init__(self, low, high, shape=None, dtype=np.float32):
 25 |         assert dtype is not None, 'dtype must be explicitly provided. '
 26 |         self.dtype = np.dtype(dtype)
 27 | 
 28 |         # determine shape if it isn't provided directly
 29 |         if shape is not None:
 30 |             shape = tuple(shape)
 31 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
 32 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
 33 |         elif not np.isscalar(low):
 34 |             shape = low.shape
 35 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
 36 |         elif not np.isscalar(high):
 37 |             shape = high.shape
 38 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
 39 |         else:
 40 |             raise ValueError("shape must be provided or inferred from the shapes of low or high")
 41 | 
 42 |         if np.isscalar(low):
 43 |             low = np.full(shape, low, dtype=dtype)
 44 | 
 45 |         if np.isscalar(high):
 46 |             high = np.full(shape, high, dtype=dtype)
 47 | 
 48 |         self.shape = shape
 49 |         self.low = low
 50 |         self.high = high
 51 | 
 52 |         def _get_precision(dtype):
 53 |             if np.issubdtype(dtype, np.floating):
 54 |                 return np.finfo(dtype).precision
 55 |             else:
 56 |                 return np.inf
 57 |         low_precision = _get_precision(self.low.dtype)
 58 |         high_precision = _get_precision(self.high.dtype)
 59 |         dtype_precision = _get_precision(self.dtype)
 60 |         if min(low_precision, high_precision) > dtype_precision:
 61 |             logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
 62 |         self.low = self.low.astype(self.dtype)
 63 |         self.high = self.high.astype(self.dtype)
 64 | 
 65 |         # Boolean arrays which indicate the interval type for each coordinate
 66 |         self.bounded_below = -np.inf < self.low
 67 |         self.bounded_above = np.inf > self.high
 68 | 
 69 |         super(Box, self).__init__(self.shape, self.dtype)
 70 | 
 71 |     def is_bounded(self, manner="both"):
 72 |         below = np.all(self.bounded_below)
 73 |         above = np.all(self.bounded_above)
 74 |         if manner == "both":
 75 |             return below and above
 76 |         elif manner == "below":
 77 |             return below
 78 |         elif manner == "above":
 79 |             return above
 80 |         else:
 81 |             raise ValueError("manner is not in {'below', 'above', 'both'}")
 82 | 
 83 |     def sample(self):
 84 |         """
 85 |         Generates a single random sample inside of the Box.
 86 | 
 87 |         In creating a sample of the box, each coordinate is sampled according to
 88 |         the form of the interval:
 89 | 
 90 |         * [a, b] : uniform distribution
 91 |         * [a, oo) : shifted exponential distribution
 92 |         * (-oo, b] : shifted negative exponential distribution
 93 |         * (-oo, oo) : normal distribution
 94 |         """
 95 |         high = self.high if self.dtype.kind == 'f' \
 96 |                 else self.high.astype('int64') + 1
 97 |         sample = np.empty(self.shape)
 98 | 
 99 |         # Masking arrays which classify the coordinates according to interval
100 |         # type
101 |         unbounded   = ~self.bounded_below & ~self.bounded_above
102 |         upp_bounded = ~self.bounded_below &  self.bounded_above
103 |         low_bounded =  self.bounded_below & ~self.bounded_above
104 |         bounded     =  self.bounded_below &  self.bounded_above
105 | 
106 | 
107 |         # Vectorized sampling by interval type
108 |         sample[unbounded] = self.np_random.normal(
109 |                 size=unbounded[unbounded].shape)
110 | 
111 |         sample[low_bounded] = self.np_random.exponential(
112 |             size=low_bounded[low_bounded].shape) + self.low[low_bounded]
113 | 
114 |         sample[upp_bounded] = -self.np_random.exponential(
115 |             size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
116 | 
117 |         sample[bounded] = self.np_random.uniform(low=self.low[bounded],
118 |                                             high=high[bounded],
119 |                                             size=bounded[bounded].shape)
120 |         if self.dtype.kind == 'i':
121 |             sample = np.floor(sample)
122 | 
123 |         return sample.astype(self.dtype)
124 | 
125 |     def contains(self, x):
126 |         if isinstance(x, list):
127 |             x = np.array(x)  # Promote list to array for contains check
128 |         return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
129 | 
130 |     def to_jsonable(self, sample_n):
131 |         return np.array(sample_n).tolist()
132 | 
133 |     def from_jsonable(self, sample_n):
134 |         return [np.asarray(sample) for sample in sample_n]
135 | 
136 |     def __repr__(self):
137 |         return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
138 | 
139 |     def __eq__(self, other):
140 |         return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
141 | 


--------------------------------------------------------------------------------
/course2/utils/discrete.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .space import Space
 3 | 
 4 | 
 5 | class Discrete(Space):
 6 |     r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
 7 | 
 8 |     Example::
 9 | 
10 |         >>> Discrete(2)
11 | 
12 |     """
13 |     def __init__(self, n):
14 |         assert n >= 0
15 |         self.n = n
16 |         super(Discrete, self).__init__((), np.int64)
17 | 
18 |     def sample(self):
19 |         return self.np_random.randint(self.n)
20 | 
21 |     def contains(self, x):
22 |         if isinstance(x, int):
23 |             as_int = x
24 |         elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
25 |             as_int = int(x)
26 |         else:
27 |             return False
28 |         return as_int >= 0 and as_int < self.n
29 | 
30 |     def __repr__(self):
31 |         return "Discrete(%d)" % self.n
32 | 
33 |     def __eq__(self, other):
34 |         return isinstance(other, Discrete) and self.n == other.n
35 | 


--------------------------------------------------------------------------------
/course2/utils/get_logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | import logging
 3 | import time
 4 | import os
 5 | 
 6 | 
 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False):
 8 |     if not os.path.exists(log_path):
 9 |         os.mkdir(log_path)
10 | 
11 |     logger = logging.getLogger()
12 |     logger.setLevel(logging.INFO)
13 |     # 每分钟建一个文件
14 |     rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
15 |     log_name = log_path + rq + '_' + name+  '.log'
16 |     json_log_name = log_path + rq + '_' + name + '.json'
17 |     logfile = log_name
18 |     if save_file:
19 |         fh = logging.FileHandler(logfile, mode='a')
20 |         fh.setLevel(logging.DEBUG)
21 |         formatter = logging.Formatter("%(message)s")
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 |     # 输出到控制台
25 |     if console_out:
26 |         console = logging.StreamHandler()
27 |         console.setLevel(logging.INFO)
28 |         logger.addHandler(console)
29 | 
30 |     # 输出到json
31 |     if json_file:
32 |         fh_json = logging.FileHandler(json_log_name, mode='a')
33 |         fh_json.setLevel(logging.DEBUG)
34 |         formatter_json = logging.Formatter("%(message)s")
35 |         fh_json.setFormatter(formatter_json)
36 |         logger.addHandler(fh_json)
37 | 
38 |     return logger


--------------------------------------------------------------------------------
/course2/utils/mutli_discrete_particle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2021/4/8 下午2:42
 3 | # Author: Yahui Cui
 4 | 
 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 7 | 
 8 | import numpy as np
 9 | 
10 | import gym
11 | 
12 | class MultiDiscreteParticle(gym.Space):
13 |     """
14 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
15 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
16 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
17 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
18 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
19 |     Note: A value of 0 always need to represent the NOOP action.
20 |     e.g. Nintendo Game Controller
21 |     - Can be conceptualized as 3 discrete action spaces:
22 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
23 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
24 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
25 |     - Can be initialized as
26 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
27 |     """
28 |     def __init__(self, array_of_param_array):
29 |         self.low = np.array([x[0] for x in array_of_param_array])
30 |         self.high = np.array([x[1] for x in array_of_param_array])
31 |         self.num_discrete_space = self.low.shape[0]
32 | 
33 |     def sample(self):
34 |         """ Returns a array with one sample from each discrete action space """
35 |         # For each row: round(random .* (max - min) + min, 0)
36 |         np_random = np.random.RandomState()
37 |         random_array = np_random.rand(self.num_discrete_space)
38 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
39 |     def contains(self, x):
40 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
41 | 
42 |     @property
43 |     def shape(self):
44 |         return self.num_discrete_space
45 |     def __repr__(self):
46 |         return "MultiDiscrete" + str(self.num_discrete_space)
47 |     def __eq__(self, other):
48 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
49 | 


--------------------------------------------------------------------------------
/course2/utils/space.py:
--------------------------------------------------------------------------------
 1 | from gym.utils import seeding
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """Defines the observation and action spaces, so you can write generic
 6 |     code that applies to any Env. For example, you can choose a random
 7 |     action.
 8 | 
 9 |     WARNING - Custom observation & action spaces can inherit from the `Space`
10 |     class. However, most use-cases should be covered by the existing space
11 |     classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` &
12 |     `Dict`). Note that parametrized probability distributions (through the
13 |     `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are
14 |     only well-defined for instances of spaces provided in gym by default.
15 |     Moreover, some implementations of Reinforcement Learning algorithms might
16 |     not handle custom spaces properly. Use custom spaces with care.
17 |     """
18 |     def __init__(self, shape=None, dtype=None):
19 |         import numpy as np  # takes about 300-400ms to import, so we load lazily
20 |         self.shape = None if shape is None else tuple(shape)
21 |         self.dtype = None if dtype is None else np.dtype(dtype)
22 |         self._np_random = None
23 | 
24 |     @property
25 |     def np_random(self):
26 |         """Lazily seed the rng since this is expensive and only needed if
27 |         sampling from this space.
28 |         """
29 |         if self._np_random is None:
30 |             self.seed()
31 | 
32 |         return self._np_random
33 | 
34 |     def sample(self):
35 |         """Randomly sample an element of this space. Can be 
36 |         uniform or non-uniform sampling based on boundedness of space."""
37 |         raise NotImplementedError
38 | 
39 |     def seed(self, seed=None):
40 |         """Seed the PRNG of this space. """
41 |         self._np_random, seed = seeding.np_random(seed)
42 |         return [seed]
43 | 
44 |     def contains(self, x):
45 |         """
46 |         Return boolean specifying if x is a valid
47 |         member of this space
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def __contains__(self, x):
52 |         return self.contains(x)
53 | 
54 |     def to_jsonable(self, sample_n):
55 |         """Convert a batch of samples from this space to a JSONable data type."""
56 |         # By default, assume identity is JSONable
57 |         return sample_n
58 | 
59 |     def from_jsonable(self, sample_n):
60 |         """Convert a JSONable data type to a batch of samples from this space."""
61 |         # By default, assume identity is JSONable
62 |         return sample_n
63 | 


--------------------------------------------------------------------------------
/course3/README.md:
--------------------------------------------------------------------------------
 1 | ## 实践课第三天
 2 | 
 3 | ### 任务：Gym 倒立摆 作业要求: 提交通过并且在金榜的排名高于Jidi_random
 4 | 
 5 | 
 6 | ---
 7 | ### Env 👉请看 [ccgame.py](env/ccgame.py)
 8 | 
 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py)
10 | 
11 | ### 提交 👉请看 [submission.py](examples/random/submission.py)
12 | 
13 | ---
14 | 
15 | ### Install Gym
16 | >pip install -i https://pypi.tuna.tsinghua.edu.cn/simple gym==0.18.3
17 | 
18 | ### How to test submission
19 | 
20 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py
21 | >python run_log.py 
22 | 
23 | If no errors, your submission is ready to go~
24 | 
25 | ### Ready to submit
26 | > random: [random/submission.py](examples/random/submission.py)
27 | 
28 | > DDPG: [ddpg/submission.py](examples/ddpg/submission.py) (To submit, change the `SUBMISSION` variable on line 181 to `True`) 
29 | > and [ddpg/actor_200.pth](examples/ddpg/actor_200.pth)
30 | 
31 | 
32 | ### How to train DDPG agent
33 | > python train.py 
34 | >(`SUBMISSION` variable in [ddpg/submission.py](examples/ddpg/submission.py) 
35 | > controls whether to train from scratch. `False` means to train from scratch.)
36 | > 
37 | > The model will be store under the folder [ddpg/trained_model](examples/ddpg/trained_model).
38 | 
39 | ___
40 | Have a good time~~~


--------------------------------------------------------------------------------
/course3/docs/rlcn_2022_rl_algorithms.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course3/docs/rlcn_2022_rl_algorithms.zip


--------------------------------------------------------------------------------
/course3/env/__init__.py:
--------------------------------------------------------------------------------
1 | from .ccgame import *
2 | 


--------------------------------------------------------------------------------
/course3/env/chooseenv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/9/11 11:17 上午   
 4 | # 描述：选择运行环境，需要维护env/__ini__.py && config.json（存储环境默认参数）
 5 | 
 6 | import json
 7 | import env
 8 | import os
 9 | 
10 | 
11 | def make(env_type, conf=None):
12 |     file_path = os.path.join(os.path.dirname(__file__), 'config.json')
13 |     if not conf:
14 |         with open(file_path) as f:
15 |             conf = json.load(f)[env_type]
16 |     class_literal = conf['class_literal']
17 |     return getattr(env, class_literal)(conf)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     make("classic_MountainCar-v0")
22 | 


--------------------------------------------------------------------------------
/course3/env/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "classic_Pendulum-v0": {
 3 |     "class_literal": "CCGame",
 4 |     "n_player": 1,
 5 |     "is_obs_continuous": true,
 6 |     "is_act_continuous": true,
 7 |     "act_box": {"low": -2.0, "high":2.0, "shape":[1]},
 8 |     "game_name": "Pendulum-v0",
 9 |     "agent_nums": [1],
10 |     "max_step": 200,
11 |     "obs_type": ["vector"]
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/course3/env/obs_interfaces/observation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/11/13 3:51 下午   
 4 | # 描述：observation的各种接口类
 5 | obs_type = ["grid", "vector", "dict"]
 6 | 
 7 | 
 8 | class GridObservation(object):
 9 |     def get_grid_observation(self, current_state, player_id, info_before):
10 |         raise NotImplementedError
11 | 
12 |     def get_grid_many_observation(self, current_state, player_id_list, info_before=''):
13 |         all_obs = []
14 |         for i in player_id_list:
15 |             all_obs.append(self.get_grid_observation(current_state, i, info_before))
16 |         return all_obs
17 | 
18 | 
19 | class VectorObservation(object):
20 |     def get_vector_observation(self, current_state, player_id, info_before):
21 |         raise NotImplementedError
22 | 
23 |     def get_vector_many_observation(self, current_state, player_id_list, info_before=''):
24 |         all_obs = []
25 |         for i in player_id_list:
26 |             all_obs.append(self.get_vector_observation(current_state, i, info_before))
27 |         return all_obs
28 | 
29 | 
30 | class DictObservation(object):
31 |     def get_dict_observation(self, current_state, player_id, info_before):
32 |         raise NotImplementedError
33 | 
34 |     def get_dict_many_observation(self, current_state, player_id_list, info_before=''):
35 |         all_obs = []
36 |         for i in player_id_list:
37 |             all_obs.append(self.get_dict_observation(current_state, i, info_before))
38 |         return all_obs
39 | 
40 | 
41 | # todo: observation builder
42 | class CustomObservation(object):
43 |     def get_custom_observation(self, current_state, player_id):
44 |         raise NotImplementedError
45 | 
46 |     def get_custom_obs_space(self, player_id):
47 |         raise NotImplementedError
48 | 
49 |     def get_custom_many_observation(self, current_state, player_id_list):
50 |         all_obs = []
51 |         for i in player_id_list:
52 |             all_obs.append(self.get_custom_observation(current_state, i))
53 |         return all_obs
54 | 
55 |     def get_custom_many_obs_space(self, player_id_list):
56 |         all_obs_space = []
57 |         for i in player_id_list:
58 |             all_obs_space.append(self.get_custom_obs_space(i))
59 |         return all_obs_space
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/course3/env/simulators/game.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/7/10 10:24 上午   
 4 | # 描述：
 5 | from abc import ABC, abstractmethod
 6 | 
 7 | 
 8 | class Game(ABC):
 9 |     def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
10 |         self.n_player = n_player
11 |         self.current_state = None
12 |         self.all_observes = None
13 |         self.is_obs_continuous = is_obs_continuous
14 |         self.is_act_continuous = is_act_continuous
15 |         self.game_name = game_name
16 |         self.agent_nums = agent_nums
17 |         self.obs_type = obs_type
18 | 
19 |     def get_config(self, player_id):
20 |         raise NotImplementedError
21 | 
22 |     def get_render_data(self, current_state):
23 |         return current_state
24 | 
25 |     def set_current_state(self, current_state):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def is_terminal(self):
30 |         raise NotImplementedError
31 | 
32 |     def get_next_state(self, all_action):
33 |         raise NotImplementedError
34 | 
35 |     def get_reward(self, all_action):
36 |         raise NotImplementedError
37 | 
38 |     @abstractmethod
39 |     def step(self, all_action):
40 |         raise NotImplementedError
41 | 
42 |     @abstractmethod
43 |     def reset(self):
44 |         raise NotImplementedError
45 | 
46 |     def set_action_space(self):
47 |         raise NotImplementedError
48 | 
49 | 


--------------------------------------------------------------------------------
/course3/examples/ddpg/actor_200.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course3/examples/ddpg/actor_200.pth


--------------------------------------------------------------------------------
/course3/examples/random/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     agent_action = []
16 |     for i in range(len(action_space)):
17 |         action_ = sample_single_dim(action_space[i], is_act_continuous)
18 |         agent_action.append(action_)
19 |     return agent_action
20 | 
21 | 
22 | def sample_single_dim(action_space_list_each, is_act_continuous):
23 |     each = []
24 |     if is_act_continuous:
25 |         each = action_space_list_each.sample()
26 |     else:
27 |         if action_space_list_each.__class__.__name__ == "Discrete":
28 |             each = [0] * action_space_list_each.n
29 |             idx = action_space_list_each.sample()
30 |             each[idx] = 1
31 |         elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
32 |             each = []
33 |             nvec = action_space_list_each.high - action_space_list_each.low + 1
34 |             sample_indexes = action_space_list_each.sample()
35 | 
36 |             for i in range(len(nvec)):
37 |                 dim = nvec[i]
38 |                 new_action = [0] * dim
39 |                 index = sample_indexes[i]
40 |                 new_action[index] = 1
41 |                 each.extend(new_action)
42 |         elif action_space_list_each.__class__.__name__ == "Discrete_SC2":
43 |             each = action_space_list_each.sample()
44 |         elif action_space_list_each.__class__.__name__ == "Box":
45 |             each = action_space_list_each.sample()
46 |     return each
47 | 


--------------------------------------------------------------------------------
/course3/examples/submission/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/course3/train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/19 下午5:25
 3 | # Author: Yahui Cui
 4 | import numpy as np
 5 | import os
 6 | import sys
 7 | import datetime
 8 | 
 9 | sys.path.append(os.path.dirname(__file__))
10 | from env.chooseenv import make
11 | from course3.examples.ddpg.submission import agent as ddpg_agent
12 | from course3.examples.ddpg.submission import my_controller
13 | from course3.examples.ddpg.submission import replay_buffer
14 | 
15 | 
16 | def main():
17 |     num_episodes = 200
18 |     minimal_size = 1000
19 |     batch_size = 64
20 | 
21 |     now = datetime.datetime.now()
22 |     model_path = os.path.join(os.path.dirname(__file__), 'examples', 'ddpg', 'trained_model',
23 |                               now.strftime("%Y-%m-%d-%H-%M-%S"))
24 | 
25 |     env_name = 'classic_Pendulum-v0'
26 |     env = make(env_name)
27 |     action_space = env.joint_action_space
28 |     agent_id = 0
29 | 
30 |     return_list = []
31 |     for i in range(10):
32 | 
33 |         for i_episode in range(int(num_episodes / 10)):
34 |             episode_return = 0
35 |             state = env.reset()
36 |             done = False
37 |             while not done:
38 |                 action = my_controller(state[agent_id], action_space, True)
39 |                 next_state, reward, done, _, _ = env.step([action])
40 |                 replay_buffer.add(state[agent_id]['obs'], action[0], reward[agent_id], next_state[agent_id]['obs'], done)
41 |                 state = next_state
42 |                 episode_return += reward[agent_id]
43 |                 # 当buffer数据的数量超过一定值后,才进行Q网络训练
44 |                 if replay_buffer.size() > minimal_size:
45 |                     b_s, b_a, b_r, b_ns, b_d = replay_buffer.sample(batch_size)
46 |                     transition_dict = {
47 |                         'states': b_s,
48 |                         'actions': b_a,
49 |                         'next_states': b_ns,
50 |                         'rewards': b_r,
51 |                         'dones': b_d
52 |                     }
53 |                     ddpg_agent.update(transition_dict)
54 |             return_list.append(episode_return)
55 |             if (i_episode + 1) % 10 == 0:
56 |                 print('episode' + ':' +
57 |                       '%d' % (num_episodes / 10 * i + i_episode + 1) +
58 |                       ' return' + ':''%.3f' % np.mean(return_list[-10:]))
59 | 
60 |         ddpg_agent.save(model_path, num_episodes / 10 * (i + 1))
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/course3/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .space import Space
  4 | from gym import logger
  5 | 
  6 | 
  7 | class Box(Space):
  8 |     """
  9 |     A (possibly unbounded) box in R^n. Specifically, a Box represents the
 10 |     Cartesian product of n closed intervals. Each interval has the form of one
 11 |     of [a, b], (-oo, b], [a, oo), or (-oo, oo).
 12 | 
 13 |     There are two common use cases:
 14 | 
 15 |     * Identical bound for each dimension::
 16 |         >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
 17 |         Box(3, 4)
 18 | 
 19 |     * Independent bound for each dimension::
 20 |         >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
 21 |         Box(2,)
 22 | 
 23 |     """
 24 |     def __init__(self, low, high, shape=None, dtype=np.float32):
 25 |         assert dtype is not None, 'dtype must be explicitly provided. '
 26 |         self.dtype = np.dtype(dtype)
 27 | 
 28 |         # determine shape if it isn't provided directly
 29 |         if shape is not None:
 30 |             shape = tuple(shape)
 31 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
 32 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
 33 |         elif not np.isscalar(low):
 34 |             shape = low.shape
 35 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
 36 |         elif not np.isscalar(high):
 37 |             shape = high.shape
 38 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
 39 |         else:
 40 |             raise ValueError("shape must be provided or inferred from the shapes of low or high")
 41 | 
 42 |         if np.isscalar(low):
 43 |             low = np.full(shape, low, dtype=dtype)
 44 | 
 45 |         if np.isscalar(high):
 46 |             high = np.full(shape, high, dtype=dtype)
 47 | 
 48 |         self.shape = shape
 49 |         self.low = low
 50 |         self.high = high
 51 | 
 52 |         def _get_precision(dtype):
 53 |             if np.issubdtype(dtype, np.floating):
 54 |                 return np.finfo(dtype).precision
 55 |             else:
 56 |                 return np.inf
 57 |         low_precision = _get_precision(self.low.dtype)
 58 |         high_precision = _get_precision(self.high.dtype)
 59 |         dtype_precision = _get_precision(self.dtype)
 60 |         if min(low_precision, high_precision) > dtype_precision:
 61 |             logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
 62 |         self.low = self.low.astype(self.dtype)
 63 |         self.high = self.high.astype(self.dtype)
 64 | 
 65 |         # Boolean arrays which indicate the interval type for each coordinate
 66 |         self.bounded_below = -np.inf < self.low
 67 |         self.bounded_above = np.inf > self.high
 68 | 
 69 |         super(Box, self).__init__(self.shape, self.dtype)
 70 | 
 71 |     def is_bounded(self, manner="both"):
 72 |         below = np.all(self.bounded_below)
 73 |         above = np.all(self.bounded_above)
 74 |         if manner == "both":
 75 |             return below and above
 76 |         elif manner == "below":
 77 |             return below
 78 |         elif manner == "above":
 79 |             return above
 80 |         else:
 81 |             raise ValueError("manner is not in {'below', 'above', 'both'}")
 82 | 
 83 |     def sample(self):
 84 |         """
 85 |         Generates a single random sample inside of the Box.
 86 | 
 87 |         In creating a sample of the box, each coordinate is sampled according to
 88 |         the form of the interval:
 89 | 
 90 |         * [a, b] : uniform distribution
 91 |         * [a, oo) : shifted exponential distribution
 92 |         * (-oo, b] : shifted negative exponential distribution
 93 |         * (-oo, oo) : normal distribution
 94 |         """
 95 |         high = self.high if self.dtype.kind == 'f' \
 96 |                 else self.high.astype('int64') + 1
 97 |         sample = np.empty(self.shape)
 98 | 
 99 |         # Masking arrays which classify the coordinates according to interval
100 |         # type
101 |         unbounded   = ~self.bounded_below & ~self.bounded_above
102 |         upp_bounded = ~self.bounded_below &  self.bounded_above
103 |         low_bounded =  self.bounded_below & ~self.bounded_above
104 |         bounded     =  self.bounded_below &  self.bounded_above
105 | 
106 | 
107 |         # Vectorized sampling by interval type
108 |         sample[unbounded] = self.np_random.normal(
109 |                 size=unbounded[unbounded].shape)
110 | 
111 |         sample[low_bounded] = self.np_random.exponential(
112 |             size=low_bounded[low_bounded].shape) + self.low[low_bounded]
113 | 
114 |         sample[upp_bounded] = -self.np_random.exponential(
115 |             size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
116 | 
117 |         sample[bounded] = self.np_random.uniform(low=self.low[bounded],
118 |                                             high=high[bounded],
119 |                                             size=bounded[bounded].shape)
120 |         if self.dtype.kind == 'i':
121 |             sample = np.floor(sample)
122 | 
123 |         return sample.astype(self.dtype)
124 | 
125 |     def contains(self, x):
126 |         if isinstance(x, list):
127 |             x = np.array(x)  # Promote list to array for contains check
128 |         return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
129 | 
130 |     def to_jsonable(self, sample_n):
131 |         return np.array(sample_n).tolist()
132 | 
133 |     def from_jsonable(self, sample_n):
134 |         return [np.asarray(sample) for sample in sample_n]
135 | 
136 |     def __repr__(self):
137 |         return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
138 | 
139 |     def __eq__(self, other):
140 |         return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
141 | 


--------------------------------------------------------------------------------
/course3/utils/discrete.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .space import Space
 3 | 
 4 | 
 5 | class Discrete(Space):
 6 |     r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
 7 | 
 8 |     Example::
 9 | 
10 |         >>> Discrete(2)
11 | 
12 |     """
13 |     def __init__(self, n):
14 |         assert n >= 0
15 |         self.n = n
16 |         super(Discrete, self).__init__((), np.int64)
17 | 
18 |     def sample(self):
19 |         return self.np_random.randint(self.n)
20 | 
21 |     def contains(self, x):
22 |         if isinstance(x, int):
23 |             as_int = x
24 |         elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
25 |             as_int = int(x)
26 |         else:
27 |             return False
28 |         return as_int >= 0 and as_int < self.n
29 | 
30 |     def __repr__(self):
31 |         return "Discrete(%d)" % self.n
32 | 
33 |     def __eq__(self, other):
34 |         return isinstance(other, Discrete) and self.n == other.n
35 | 


--------------------------------------------------------------------------------
/course3/utils/get_logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | import logging
 3 | import time
 4 | import os
 5 | 
 6 | 
 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False):
 8 |     if not os.path.exists(log_path):
 9 |         os.mkdir(log_path)
10 | 
11 |     logger = logging.getLogger()
12 |     logger.setLevel(logging.INFO)
13 |     # 每分钟建一个文件
14 |     rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
15 |     log_name = log_path + rq + '_' + name+  '.log'
16 |     json_log_name = log_path + rq + '_' + name + '.json'
17 |     logfile = log_name
18 |     if save_file:
19 |         fh = logging.FileHandler(logfile, mode='a')
20 |         fh.setLevel(logging.DEBUG)
21 |         formatter = logging.Formatter("%(message)s")
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 |     # 输出到控制台
25 |     if console_out:
26 |         console = logging.StreamHandler()
27 |         console.setLevel(logging.INFO)
28 |         logger.addHandler(console)
29 | 
30 |     # 输出到json
31 |     if json_file:
32 |         fh_json = logging.FileHandler(json_log_name, mode='a')
33 |         fh_json.setLevel(logging.DEBUG)
34 |         formatter_json = logging.Formatter("%(message)s")
35 |         fh_json.setFormatter(formatter_json)
36 |         logger.addHandler(fh_json)
37 | 
38 |     return logger


--------------------------------------------------------------------------------
/course3/utils/mutli_discrete_particle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2021/4/8 下午2:42
 3 | # Author: Yahui Cui
 4 | 
 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 7 | 
 8 | import numpy as np
 9 | 
10 | import gym
11 | 
12 | class MultiDiscreteParticle(gym.Space):
13 |     """
14 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
15 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
16 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
17 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
18 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
19 |     Note: A value of 0 always need to represent the NOOP action.
20 |     e.g. Nintendo Game Controller
21 |     - Can be conceptualized as 3 discrete action spaces:
22 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
23 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
24 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
25 |     - Can be initialized as
26 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
27 |     """
28 |     def __init__(self, array_of_param_array):
29 |         self.low = np.array([x[0] for x in array_of_param_array])
30 |         self.high = np.array([x[1] for x in array_of_param_array])
31 |         self.num_discrete_space = self.low.shape[0]
32 | 
33 |     def sample(self):
34 |         """ Returns a array with one sample from each discrete action space """
35 |         # For each row: round(random .* (max - min) + min, 0)
36 |         np_random = np.random.RandomState()
37 |         random_array = np_random.rand(self.num_discrete_space)
38 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
39 |     def contains(self, x):
40 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
41 | 
42 |     @property
43 |     def shape(self):
44 |         return self.num_discrete_space
45 |     def __repr__(self):
46 |         return "MultiDiscrete" + str(self.num_discrete_space)
47 |     def __eq__(self, other):
48 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
49 | 


--------------------------------------------------------------------------------
/course3/utils/space.py:
--------------------------------------------------------------------------------
 1 | from gym.utils import seeding
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """Defines the observation and action spaces, so you can write generic
 6 |     code that applies to any Env. For example, you can choose a random
 7 |     action.
 8 | 
 9 |     WARNING - Custom observation & action spaces can inherit from the `Space`
10 |     class. However, most use-cases should be covered by the existing space
11 |     classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` &
12 |     `Dict`). Note that parametrized probability distributions (through the
13 |     `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are
14 |     only well-defined for instances of spaces provided in gym by default.
15 |     Moreover, some implementations of Reinforcement Learning algorithms might
16 |     not handle custom spaces properly. Use custom spaces with care.
17 |     """
18 |     def __init__(self, shape=None, dtype=None):
19 |         import numpy as np  # takes about 300-400ms to import, so we load lazily
20 |         self.shape = None if shape is None else tuple(shape)
21 |         self.dtype = None if dtype is None else np.dtype(dtype)
22 |         self._np_random = None
23 | 
24 |     @property
25 |     def np_random(self):
26 |         """Lazily seed the rng since this is expensive and only needed if
27 |         sampling from this space.
28 |         """
29 |         if self._np_random is None:
30 |             self.seed()
31 | 
32 |         return self._np_random
33 | 
34 |     def sample(self):
35 |         """Randomly sample an element of this space. Can be 
36 |         uniform or non-uniform sampling based on boundedness of space."""
37 |         raise NotImplementedError
38 | 
39 |     def seed(self, seed=None):
40 |         """Seed the PRNG of this space. """
41 |         self._np_random, seed = seeding.np_random(seed)
42 |         return [seed]
43 | 
44 |     def contains(self, x):
45 |         """
46 |         Return boolean specifying if x is a valid
47 |         member of this space
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def __contains__(self, x):
52 |         return self.contains(x)
53 | 
54 |     def to_jsonable(self, sample_n):
55 |         """Convert a batch of samples from this space to a JSONable data type."""
56 |         # By default, assume identity is JSONable
57 |         return sample_n
58 | 
59 |     def from_jsonable(self, sample_n):
60 |         """Convert a JSONable data type to a batch of samples from this space."""
61 |         # By default, assume identity is JSONable
62 |         return sample_n
63 | 


--------------------------------------------------------------------------------
/course4/README.md:
--------------------------------------------------------------------------------
 1 | ## 实践课第四天
 2 | 
 3 | ### 任务：经典棋牌 德州扑克 简化版 作业要求: 提交通过
 4 | 
 5 | 
 6 | ---
 7 | ### Env 👉请看 [chessandcard.py](env/chessandcard.py)
 8 | 
 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py)
10 | 
11 | ### 提交 👉请看 [submission.py](examples/random/submission.py)
12 | 
13 | ---
14 | 
15 | ### Install PettingZoo
16 | >pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pettingzoo[classic]==1.10.0
17 | 
18 | ### How to test submission
19 | 
20 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py
21 | >python run_log.py 
22 | 
23 | If no errors, your submission is ready to go~
24 | 
25 | ___
26 | Have a good time~~~


--------------------------------------------------------------------------------
/course4/docs/rlchina_pbl.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course4/docs/rlchina_pbl.zip


--------------------------------------------------------------------------------
/course4/env/__init__.py:
--------------------------------------------------------------------------------
1 | from .chessandcard import *
2 | 


--------------------------------------------------------------------------------
/course4/env/chooseenv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/9/11 11:17 上午   
 4 | # 描述：选择运行环境，需要维护env/__ini__.py && config.json（存储环境默认参数）
 5 | 
 6 | import json
 7 | import env
 8 | import os
 9 | 
10 | 
11 | def make(env_type, conf=None):
12 |     file_path = os.path.join(os.path.dirname(__file__), 'config.json')
13 |     if not conf:
14 |         with open(file_path) as f:
15 |             conf = json.load(f)[env_type]
16 |     class_literal = conf['class_literal']
17 |     return getattr(env, class_literal)(conf)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     make("classic_MountainCar-v0")
22 | 


--------------------------------------------------------------------------------
/course4/env/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chessandcard-leduc_holdem_v3": {
 3 |     "class_literal": "ChessAndCard",
 4 |     "n_player": 2,
 5 |     "max_step": 10000,
 6 |     "game_name": "leduc_holdem_v3",
 7 |     "is_obs_continuous": false,
 8 |     "is_act_continuous": false,
 9 |     "agent_nums": [1,1],
10 |     "obs_type": ["dict", "dict"]
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/course4/env/obs_interfaces/observation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/11/13 3:51 下午   
 4 | # 描述：observation的各种接口类
 5 | obs_type = ["grid", "vector", "dict"]
 6 | 
 7 | 
 8 | class GridObservation(object):
 9 |     def get_grid_observation(self, current_state, player_id, info_before):
10 |         raise NotImplementedError
11 | 
12 |     def get_grid_many_observation(self, current_state, player_id_list, info_before=''):
13 |         all_obs = []
14 |         for i in player_id_list:
15 |             all_obs.append(self.get_grid_observation(current_state, i, info_before))
16 |         return all_obs
17 | 
18 | 
19 | class VectorObservation(object):
20 |     def get_vector_observation(self, current_state, player_id, info_before):
21 |         raise NotImplementedError
22 | 
23 |     def get_vector_many_observation(self, current_state, player_id_list, info_before=''):
24 |         all_obs = []
25 |         for i in player_id_list:
26 |             all_obs.append(self.get_vector_observation(current_state, i, info_before))
27 |         return all_obs
28 | 
29 | 
30 | class DictObservation(object):
31 |     def get_dict_observation(self, current_state, player_id, info_before):
32 |         raise NotImplementedError
33 | 
34 |     def get_dict_many_observation(self, current_state, player_id_list, info_before=''):
35 |         all_obs = []
36 |         for i in player_id_list:
37 |             all_obs.append(self.get_dict_observation(current_state, i, info_before))
38 |         return all_obs
39 | 
40 | 
41 | # todo: observation builder
42 | class CustomObservation(object):
43 |     def get_custom_observation(self, current_state, player_id):
44 |         raise NotImplementedError
45 | 
46 |     def get_custom_obs_space(self, player_id):
47 |         raise NotImplementedError
48 | 
49 |     def get_custom_many_observation(self, current_state, player_id_list):
50 |         all_obs = []
51 |         for i in player_id_list:
52 |             all_obs.append(self.get_custom_observation(current_state, i))
53 |         return all_obs
54 | 
55 |     def get_custom_many_obs_space(self, player_id_list):
56 |         all_obs_space = []
57 |         for i in player_id_list:
58 |             all_obs_space.append(self.get_custom_obs_space(i))
59 |         return all_obs_space
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/course4/env/simulators/game.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/7/10 10:24 上午   
 4 | # 描述：
 5 | from abc import ABC, abstractmethod
 6 | 
 7 | 
 8 | class Game(ABC):
 9 |     def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
10 |         self.n_player = n_player
11 |         self.current_state = None
12 |         self.all_observes = None
13 |         self.is_obs_continuous = is_obs_continuous
14 |         self.is_act_continuous = is_act_continuous
15 |         self.game_name = game_name
16 |         self.agent_nums = agent_nums
17 |         self.obs_type = obs_type
18 | 
19 |     def get_config(self, player_id):
20 |         raise NotImplementedError
21 | 
22 |     def get_render_data(self, current_state):
23 |         return current_state
24 | 
25 |     def set_current_state(self, current_state):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def is_terminal(self):
30 |         raise NotImplementedError
31 | 
32 |     def get_next_state(self, all_action):
33 |         raise NotImplementedError
34 | 
35 |     def get_reward(self, all_action):
36 |         raise NotImplementedError
37 | 
38 |     @abstractmethod
39 |     def step(self, all_action):
40 |         raise NotImplementedError
41 | 
42 |     @abstractmethod
43 |     def reset(self):
44 |         raise NotImplementedError
45 | 
46 |     def set_action_space(self):
47 |         raise NotImplementedError
48 | 
49 | 


--------------------------------------------------------------------------------
/course4/examples/random/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     agent_action = []
16 |     for i in range(len(action_space)):
17 |         action_ = sample_single_dim(action_space[i], is_act_continuous)
18 |         agent_action.append(action_)
19 |     return agent_action
20 | 
21 | 
22 | def sample_single_dim(action_space_list_each, is_act_continuous):
23 |     each = []
24 |     if is_act_continuous:
25 |         each = action_space_list_each.sample()
26 |     else:
27 |         if action_space_list_each.__class__.__name__ == "Discrete":
28 |             each = [0] * action_space_list_each.n
29 |             idx = action_space_list_each.sample()
30 |             each[idx] = 1
31 |         elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
32 |             each = []
33 |             nvec = action_space_list_each.high - action_space_list_each.low + 1
34 |             sample_indexes = action_space_list_each.sample()
35 | 
36 |             for i in range(len(nvec)):
37 |                 dim = nvec[i]
38 |                 new_action = [0] * dim
39 |                 index = sample_indexes[i]
40 |                 new_action[index] = 1
41 |                 each.extend(new_action)
42 |         elif action_space_list_each.__class__.__name__ == "Discrete_SC2":
43 |             each = action_space_list_each.sample()
44 |         elif action_space_list_each.__class__.__name__ == "Box":
45 |             each = action_space_list_each.sample()
46 |     return each
47 | 


--------------------------------------------------------------------------------
/course4/examples/submission/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/course4/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .space import Space
  4 | from gym import logger
  5 | 
  6 | 
  7 | class Box(Space):
  8 |     """
  9 |     A (possibly unbounded) box in R^n. Specifically, a Box represents the
 10 |     Cartesian product of n closed intervals. Each interval has the form of one
 11 |     of [a, b], (-oo, b], [a, oo), or (-oo, oo).
 12 | 
 13 |     There are two common use cases:
 14 | 
 15 |     * Identical bound for each dimension::
 16 |         >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
 17 |         Box(3, 4)
 18 | 
 19 |     * Independent bound for each dimension::
 20 |         >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
 21 |         Box(2,)
 22 | 
 23 |     """
 24 |     def __init__(self, low, high, shape=None, dtype=np.float32):
 25 |         assert dtype is not None, 'dtype must be explicitly provided. '
 26 |         self.dtype = np.dtype(dtype)
 27 | 
 28 |         # determine shape if it isn't provided directly
 29 |         if shape is not None:
 30 |             shape = tuple(shape)
 31 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
 32 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
 33 |         elif not np.isscalar(low):
 34 |             shape = low.shape
 35 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
 36 |         elif not np.isscalar(high):
 37 |             shape = high.shape
 38 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
 39 |         else:
 40 |             raise ValueError("shape must be provided or inferred from the shapes of low or high")
 41 | 
 42 |         if np.isscalar(low):
 43 |             low = np.full(shape, low, dtype=dtype)
 44 | 
 45 |         if np.isscalar(high):
 46 |             high = np.full(shape, high, dtype=dtype)
 47 | 
 48 |         self.shape = shape
 49 |         self.low = low
 50 |         self.high = high
 51 | 
 52 |         def _get_precision(dtype):
 53 |             if np.issubdtype(dtype, np.floating):
 54 |                 return np.finfo(dtype).precision
 55 |             else:
 56 |                 return np.inf
 57 |         low_precision = _get_precision(self.low.dtype)
 58 |         high_precision = _get_precision(self.high.dtype)
 59 |         dtype_precision = _get_precision(self.dtype)
 60 |         if min(low_precision, high_precision) > dtype_precision:
 61 |             logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
 62 |         self.low = self.low.astype(self.dtype)
 63 |         self.high = self.high.astype(self.dtype)
 64 | 
 65 |         # Boolean arrays which indicate the interval type for each coordinate
 66 |         self.bounded_below = -np.inf < self.low
 67 |         self.bounded_above = np.inf > self.high
 68 | 
 69 |         super(Box, self).__init__(self.shape, self.dtype)
 70 | 
 71 |     def is_bounded(self, manner="both"):
 72 |         below = np.all(self.bounded_below)
 73 |         above = np.all(self.bounded_above)
 74 |         if manner == "both":
 75 |             return below and above
 76 |         elif manner == "below":
 77 |             return below
 78 |         elif manner == "above":
 79 |             return above
 80 |         else:
 81 |             raise ValueError("manner is not in {'below', 'above', 'both'}")
 82 | 
 83 |     def sample(self):
 84 |         """
 85 |         Generates a single random sample inside of the Box.
 86 | 
 87 |         In creating a sample of the box, each coordinate is sampled according to
 88 |         the form of the interval:
 89 | 
 90 |         * [a, b] : uniform distribution
 91 |         * [a, oo) : shifted exponential distribution
 92 |         * (-oo, b] : shifted negative exponential distribution
 93 |         * (-oo, oo) : normal distribution
 94 |         """
 95 |         high = self.high if self.dtype.kind == 'f' \
 96 |                 else self.high.astype('int64') + 1
 97 |         sample = np.empty(self.shape)
 98 | 
 99 |         # Masking arrays which classify the coordinates according to interval
100 |         # type
101 |         unbounded   = ~self.bounded_below & ~self.bounded_above
102 |         upp_bounded = ~self.bounded_below &  self.bounded_above
103 |         low_bounded =  self.bounded_below & ~self.bounded_above
104 |         bounded     =  self.bounded_below &  self.bounded_above
105 | 
106 | 
107 |         # Vectorized sampling by interval type
108 |         sample[unbounded] = self.np_random.normal(
109 |                 size=unbounded[unbounded].shape)
110 | 
111 |         sample[low_bounded] = self.np_random.exponential(
112 |             size=low_bounded[low_bounded].shape) + self.low[low_bounded]
113 | 
114 |         sample[upp_bounded] = -self.np_random.exponential(
115 |             size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
116 | 
117 |         sample[bounded] = self.np_random.uniform(low=self.low[bounded],
118 |                                             high=high[bounded],
119 |                                             size=bounded[bounded].shape)
120 |         if self.dtype.kind == 'i':
121 |             sample = np.floor(sample)
122 | 
123 |         return sample.astype(self.dtype)
124 | 
125 |     def contains(self, x):
126 |         if isinstance(x, list):
127 |             x = np.array(x)  # Promote list to array for contains check
128 |         return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
129 | 
130 |     def to_jsonable(self, sample_n):
131 |         return np.array(sample_n).tolist()
132 | 
133 |     def from_jsonable(self, sample_n):
134 |         return [np.asarray(sample) for sample in sample_n]
135 | 
136 |     def __repr__(self):
137 |         return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
138 | 
139 |     def __eq__(self, other):
140 |         return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
141 | 


--------------------------------------------------------------------------------
/course4/utils/discrete.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .space import Space
 3 | 
 4 | 
 5 | class Discrete(Space):
 6 |     r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
 7 | 
 8 |     Example::
 9 | 
10 |         >>> Discrete(2)
11 | 
12 |     """
13 |     def __init__(self, n):
14 |         assert n >= 0
15 |         self.n = n
16 |         super(Discrete, self).__init__((), np.int64)
17 | 
18 |     def sample(self):
19 |         return self.np_random.randint(self.n)
20 | 
21 |     def contains(self, x):
22 |         if isinstance(x, int):
23 |             as_int = x
24 |         elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
25 |             as_int = int(x)
26 |         else:
27 |             return False
28 |         return as_int >= 0 and as_int < self.n
29 | 
30 |     def __repr__(self):
31 |         return "Discrete(%d)" % self.n
32 | 
33 |     def __eq__(self, other):
34 |         return isinstance(other, Discrete) and self.n == other.n
35 | 


--------------------------------------------------------------------------------
/course4/utils/get_logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | import logging
 3 | import time
 4 | import os
 5 | 
 6 | 
 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False):
 8 |     if not os.path.exists(log_path):
 9 |         os.mkdir(log_path)
10 | 
11 |     logger = logging.getLogger()
12 |     logger.setLevel(logging.INFO)
13 |     # 每分钟建一个文件
14 |     rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
15 |     log_name = log_path + rq + '_' + name+  '.log'
16 |     json_log_name = log_path + rq + '_' + name + '.json'
17 |     logfile = log_name
18 |     if save_file:
19 |         fh = logging.FileHandler(logfile, mode='a')
20 |         fh.setLevel(logging.DEBUG)
21 |         formatter = logging.Formatter("%(message)s")
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 |     # 输出到控制台
25 |     if console_out:
26 |         console = logging.StreamHandler()
27 |         console.setLevel(logging.INFO)
28 |         logger.addHandler(console)
29 | 
30 |     # 输出到json
31 |     if json_file:
32 |         fh_json = logging.FileHandler(json_log_name, mode='a')
33 |         fh_json.setLevel(logging.DEBUG)
34 |         formatter_json = logging.Formatter("%(message)s")
35 |         fh_json.setFormatter(formatter_json)
36 |         logger.addHandler(fh_json)
37 | 
38 |     return logger


--------------------------------------------------------------------------------
/course4/utils/mutli_discrete_particle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2021/4/8 下午2:42
 3 | # Author: Yahui Cui
 4 | 
 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 7 | 
 8 | import numpy as np
 9 | 
10 | import gym
11 | 
12 | class MultiDiscreteParticle(gym.Space):
13 |     """
14 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
15 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
16 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
17 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
18 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
19 |     Note: A value of 0 always need to represent the NOOP action.
20 |     e.g. Nintendo Game Controller
21 |     - Can be conceptualized as 3 discrete action spaces:
22 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
23 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
24 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
25 |     - Can be initialized as
26 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
27 |     """
28 |     def __init__(self, array_of_param_array):
29 |         self.low = np.array([x[0] for x in array_of_param_array])
30 |         self.high = np.array([x[1] for x in array_of_param_array])
31 |         self.num_discrete_space = self.low.shape[0]
32 | 
33 |     def sample(self):
34 |         """ Returns a array with one sample from each discrete action space """
35 |         # For each row: round(random .* (max - min) + min, 0)
36 |         np_random = np.random.RandomState()
37 |         random_array = np_random.rand(self.num_discrete_space)
38 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
39 |     def contains(self, x):
40 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
41 | 
42 |     @property
43 |     def shape(self):
44 |         return self.num_discrete_space
45 |     def __repr__(self):
46 |         return "MultiDiscrete" + str(self.num_discrete_space)
47 |     def __eq__(self, other):
48 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
49 | 


--------------------------------------------------------------------------------
/course4/utils/space.py:
--------------------------------------------------------------------------------
 1 | from gym.utils import seeding
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """Defines the observation and action spaces, so you can write generic
 6 |     code that applies to any Env. For example, you can choose a random
 7 |     action.
 8 | 
 9 |     WARNING - Custom observation & action spaces can inherit from the `Space`
10 |     class. However, most use-cases should be covered by the existing space
11 |     classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` &
12 |     `Dict`). Note that parametrized probability distributions (through the
13 |     `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are
14 |     only well-defined for instances of spaces provided in gym by default.
15 |     Moreover, some implementations of Reinforcement Learning algorithms might
16 |     not handle custom spaces properly. Use custom spaces with care.
17 |     """
18 |     def __init__(self, shape=None, dtype=None):
19 |         import numpy as np  # takes about 300-400ms to import, so we load lazily
20 |         self.shape = None if shape is None else tuple(shape)
21 |         self.dtype = None if dtype is None else np.dtype(dtype)
22 |         self._np_random = None
23 | 
24 |     @property
25 |     def np_random(self):
26 |         """Lazily seed the rng since this is expensive and only needed if
27 |         sampling from this space.
28 |         """
29 |         if self._np_random is None:
30 |             self.seed()
31 | 
32 |         return self._np_random
33 | 
34 |     def sample(self):
35 |         """Randomly sample an element of this space. Can be 
36 |         uniform or non-uniform sampling based on boundedness of space."""
37 |         raise NotImplementedError
38 | 
39 |     def seed(self, seed=None):
40 |         """Seed the PRNG of this space. """
41 |         self._np_random, seed = seeding.np_random(seed)
42 |         return [seed]
43 | 
44 |     def contains(self, x):
45 |         """
46 |         Return boolean specifying if x is a valid
47 |         member of this space
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def __contains__(self, x):
52 |         return self.contains(x)
53 | 
54 |     def to_jsonable(self, sample_n):
55 |         """Convert a batch of samples from this space to a JSONable data type."""
56 |         # By default, assume identity is JSONable
57 |         return sample_n
58 | 
59 |     def from_jsonable(self, sample_n):
60 |         """Convert a JSONable data type to a batch of samples from this space."""
61 |         # By default, assume identity is JSONable
62 |         return sample_n
63 | 


--------------------------------------------------------------------------------
/course5/README.md:
--------------------------------------------------------------------------------
 1 | ## 实践课第五天
 2 | 
 3 | ### 任务：入门 REVIVE 冰箱控温 作业要求: 提交通过并且在金榜的排名高于Jidi_random
 4 | 
 5 | 
 6 | ---
 7 | ### Env 👉请看 [revive_refrigerator.py](env/revive_refrigerator.py)
 8 | 
 9 | ### Random 👉请看 [random/submission.py](examples/random/submission.py)
10 | 
11 | ### 提交 👉请看 [submission.py](examples/random/submission.py)
12 | 
13 | ---
14 | 
15 | ### Install REVIVE SDK (if training with REVIVE SDK)
16 | >https://www.revive.cn/help/polixir-revive-sdk/text/introduction.html
17 | 
18 | ### How to test submission
19 | 
20 | Complete examples/submission/submission.py, and then set "policy_list" in line 176 of run_log.py
21 | >python run_log.py 
22 | 
23 | If no errors, your submission is ready to go~
24 | 
25 | 
26 | ### Ready to submit
27 | > random: [random/submission.py](examples/random/submission.py)
28 | 
29 | > REVIVE Example: [revive_example/submission.py](examples/revive_example/submission.py) 
30 | > and [revive_example/revive_policy.pkl](examples/revive_example/revive_policy.pkl)
31 | 
32 | 
33 | ___
34 | Have a good time~~~


--------------------------------------------------------------------------------
/course5/env/__init__.py:
--------------------------------------------------------------------------------
1 | from .revive_refrigerator import *
2 | 


--------------------------------------------------------------------------------
/course5/env/chooseenv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/9/11 11:17 上午   
 4 | # 描述：选择运行环境，需要维护env/__ini__.py && config.json（存储环境默认参数）
 5 | 
 6 | import json
 7 | import env
 8 | import os
 9 | 
10 | 
11 | def make(env_type, conf=None):
12 |     file_path = os.path.join(os.path.dirname(__file__), 'config.json')
13 |     if not conf:
14 |         with open(file_path) as f:
15 |             conf = json.load(f)[env_type]
16 |     class_literal = conf['class_literal']
17 |     return getattr(env, class_literal)(conf)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     make("classic_MountainCar-v0")
22 | 


--------------------------------------------------------------------------------
/course5/env/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "revive-refrigerator": {
 3 |     "class_literal": "Refrigerator",
 4 |     "n_player": 1,
 5 |     "max_step": 2000,
 6 |     "game_name": "refrigerator",
 7 |     "is_obs_continuous": true,
 8 |     "is_act_continuous": true,
 9 |     "agent_nums": [1],
10 |     "obs_type": ["dict"]
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/course5/env/obs_interfaces/observation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/11/13 3:51 下午   
 4 | # 描述：observation的各种接口类
 5 | obs_type = ["grid", "vector", "dict"]
 6 | 
 7 | 
 8 | class GridObservation(object):
 9 |     def get_grid_observation(self, current_state, player_id, info_before):
10 |         raise NotImplementedError
11 | 
12 |     def get_grid_many_observation(self, current_state, player_id_list, info_before=''):
13 |         all_obs = []
14 |         for i in player_id_list:
15 |             all_obs.append(self.get_grid_observation(current_state, i, info_before))
16 |         return all_obs
17 | 
18 | 
19 | class VectorObservation(object):
20 |     def get_vector_observation(self, current_state, player_id, info_before):
21 |         raise NotImplementedError
22 | 
23 |     def get_vector_many_observation(self, current_state, player_id_list, info_before=''):
24 |         all_obs = []
25 |         for i in player_id_list:
26 |             all_obs.append(self.get_vector_observation(current_state, i, info_before))
27 |         return all_obs
28 | 
29 | 
30 | class DictObservation(object):
31 |     def get_dict_observation(self, current_state, player_id, info_before):
32 |         raise NotImplementedError
33 | 
34 |     def get_dict_many_observation(self, current_state, player_id_list, info_before=''):
35 |         all_obs = []
36 |         for i in player_id_list:
37 |             all_obs.append(self.get_dict_observation(current_state, i, info_before))
38 |         return all_obs
39 | 
40 | 
41 | # todo: observation builder
42 | class CustomObservation(object):
43 |     def get_custom_observation(self, current_state, player_id):
44 |         raise NotImplementedError
45 | 
46 |     def get_custom_obs_space(self, player_id):
47 |         raise NotImplementedError
48 | 
49 |     def get_custom_many_observation(self, current_state, player_id_list):
50 |         all_obs = []
51 |         for i in player_id_list:
52 |             all_obs.append(self.get_custom_observation(current_state, i))
53 |         return all_obs
54 | 
55 |     def get_custom_many_obs_space(self, player_id_list):
56 |         all_obs_space = []
57 |         for i in player_id_list:
58 |             all_obs_space.append(self.get_custom_obs_space(i))
59 |         return all_obs_space
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/course5/env/revive/refrigerator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/7/25 下午5:33
 3 | # Author: Yahui Cui
 4 | 
 5 | import random
 6 | import numpy as np
 7 | 
 8 | 
 9 | class DoorOpen():
10 |     """The agent that controls the opening and closing of the refrigerator door."""
11 | 
12 |     def __init__(self, door_open_time=10):
13 |         self.door_open_time = door_open_time
14 |         self.init_door_open_time = door_open_time
15 |         self.door_open = False
16 | 
17 |     def act(self):
18 |         self.door_open_time -= 1
19 |         if self.door_open_time >= 0:
20 |             self.door_open = True
21 |         else:
22 |             self.door_open = False
23 |         return self.door_open
24 | 
25 |     def reset(self):
26 |         self.door_open = False
27 |         self.door_open_time = self.init_door_open_time
28 | 
29 | 
30 | class Simulator:
31 |     """Refrigerator temperature control simulator."""
32 | 
33 |     def __init__(self, init_temperature=10):
34 |         self.outdoor_temperature = 15
35 |         self.temp = init_temperature
36 |         self.door_state = False
37 | 
38 |     def update(self, power, dt, door_open=False):
39 |         self.door_state = door_open
40 | 
41 |         if power > 0:
42 |             self.temp -= power * dt
43 |         if self.door_state == False:
44 |             self.temp = self.temp - (self.temp - self.outdoor_temperature) * 0.02 * dt
45 |         else:
46 |             self.temp = self.temp - (self.temp - self.outdoor_temperature) * 0.08 * dt
47 |         return self.get_temperature()
48 | 
49 |     def get_temperature(self):
50 |         return self.temp + np.random.normal(0, 0.1)
51 | 
52 |     def get_door_state(self):
53 |         return self.door_state
54 | 
55 |     def reset(self, init_temperature):
56 |         self.temp = init_temperature
57 |         self.outdoor_temperature = 15
58 |         random.seed(0)
59 |         np.random.seed(0)
60 | 


--------------------------------------------------------------------------------
/course5/env/revive_refrigerator.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8  -*-
  2 | # Time  : 2022/7/25 下午4:19
  3 | # Author: Yahui Cui
  4 | import sys
  5 | from pathlib import Path
  6 | 
  7 | root_dir = str(Path(__file__).resolve().parent.parent.parent)
  8 | sys.path.append(root_dir)
  9 | 
 10 | import copy
 11 | import random
 12 | import numpy as np
 13 | 
 14 | from course5.env.simulators.game import Game
 15 | from course5.env.revive.refrigerator import DoorOpen, Simulator
 16 | from course5.utils.box import Box
 17 | 
 18 | 
 19 | class Refrigerator(Game):
 20 |     def __init__(self, conf, seed=0):
 21 |         super(Refrigerator, self).__init__(conf['n_player'], conf['is_obs_continuous'], conf['is_act_continuous'],
 22 |                                            conf['game_name'], conf['agent_nums'], conf['obs_type'])
 23 |         self.max_step = int(conf["max_step"])
 24 |         self.done = False
 25 |         self.seed = None
 26 |         self.set_seed(seed)
 27 |         self.won = {}
 28 |         self.n_return = [0] * self.n_player
 29 | 
 30 |         self.step_cnt = 0
 31 |         self.open_interval = 200
 32 |         self.open_door = False
 33 |         self.door_open_after_step = self.open_interval + 1
 34 |         self.init_temperature = 10
 35 |         self.target_temperature = -2
 36 |         self.sampling_time = 1
 37 |         self.door_open_agent = DoorOpen(door_open_time=20)
 38 |         self.cool_simulator = Simulator(self.init_temperature)
 39 |         self.cool_simulator.reset(init_temperature=self.init_temperature)
 40 |         self.traj = []
 41 | 
 42 |         self.joint_action_space = self.set_action_space()
 43 |         self.current_state = self.cool_simulator.get_temperature()
 44 |         self.all_observes = self.get_all_observes()
 45 |         self.init_info = self.get_info_after(False)
 46 | 
 47 |     def reset(self):
 48 |         self.won = {}
 49 |         self.n_return = [0] * self.n_player
 50 |         self.step_cnt = 0
 51 |         self.open_interval = 200
 52 |         self.open_door = False
 53 |         self.door_open_after_step = self.open_interval + 1
 54 |         self.init_temperature = 10
 55 |         self.target_temperature = -2
 56 |         self.sampling_time = 1
 57 |         self.door_open_agent = DoorOpen(door_open_time=20)
 58 |         self.cool_simulator = Simulator(self.init_temperature)
 59 |         self.cool_simulator.reset(init_temperature=self.init_temperature)
 60 |         self.traj = []
 61 | 
 62 |         self.current_state = self.cool_simulator.get_temperature()
 63 |         self.all_observes = self.get_all_observes()
 64 |         self.init_info = self.get_info_after(False)
 65 |         return self.all_observes
 66 | 
 67 |     def step(self, joint_action):
 68 |         self.is_valid_action(joint_action)
 69 |         if self.step_cnt % self.open_interval == 0:
 70 |             if random.random() < 0.5:
 71 |                 self.open_door = True
 72 |             self.door_open_agent.reset()
 73 |             self.door_open_after_step = random.randint(0, self.open_interval - self.door_open_agent.door_open_time)
 74 |         action = joint_action[0][0]
 75 |         if self.open_door and self.step_cnt % self.open_interval >= self.door_open_after_step:
 76 |             door_open = self.door_open_agent.act()
 77 |             self.cool_simulator.update(power=action, dt=self.sampling_time, door_open=door_open)
 78 |         else:
 79 |             door_open = False
 80 |             self.cool_simulator.update(power=action, dt=self.sampling_time, door_open=door_open)
 81 |         self.current_state = self.cool_simulator.get_temperature()
 82 |         self.all_observes = self.get_all_observes()
 83 |         self.traj.append(copy.deepcopy(self.current_state))
 84 |         reward = -(abs(self.current_state - self.init_temperature))
 85 |         self.step_cnt += 1
 86 |         done = self.is_terminal()
 87 |         if done:
 88 |             self.set_n_return()
 89 |         info_after = self.get_info_after(door_open)
 90 | 
 91 |         return self.all_observes, reward, done, '', info_after
 92 | 
 93 |     def is_valid_action(self, joint_action):
 94 | 
 95 |         if np.isscalar(joint_action):
 96 |             raise Exception("Input joint action dimension should be (1,)")
 97 | 
 98 |         if len(joint_action) != self.n_player:
 99 |             raise Exception("Input joint action dimension should be (1,)")
100 | 
101 |         if np.isscalar(joint_action[0]):
102 |             raise Exception("Input joint action dimension should be (1,)")
103 | 
104 |         if len(joint_action[0]) != 1:
105 |             raise Exception("Input joint action dimension should be (1,)")
106 | 
107 |         if isinstance(joint_action[0][0], np.ndarray):
108 |             joint_action[0][0] = joint_action[0][0][0]
109 | 
110 |         if not np.isscalar(joint_action[0][0]):
111 |             raise Exception("Value in the action should be a scalar")
112 | 
113 |         if joint_action[0][0] < 0 or joint_action[0][0] > 10:
114 |             raise Exception("Value of action should between 0 and 10")
115 | 
116 |     def set_action_space(self):
117 |         return [[Box(low=0, high=10, shape=(1,))]]
118 | 
119 |     def get_all_observes(self):
120 |         return [{"obs": copy.deepcopy(self.current_state), "controlled_player_index": 0}]
121 | 
122 |     def get_single_action_space(self, player_id):
123 |         return self.joint_action_space[player_id]
124 | 
125 |     def is_terminal(self):
126 |         if self.step_cnt >= self.max_step:
127 |             self.done = True
128 | 
129 |         return self.done
130 | 
131 |     def set_seed(self, seed):
132 |         if seed is not None:
133 |             self.seed = seed
134 |             random.seed(self.seed)
135 | 
136 |     def get_info_after(self, door_open):
137 |         return {"temperature": copy.deepcopy(self.current_state), "controlled_player_index": 0, "door_open": door_open}
138 | 
139 |     def set_n_return(self):
140 |         self.n_return[0] = -np.mean(np.abs(np.array(self.traj) - self.target_temperature))
141 | 
142 |     def check_win(self):
143 |         return self.won
144 | 
145 | 


--------------------------------------------------------------------------------
/course5/env/simulators/game.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # 作者：zruizhi   
 3 | # 创建时间： 2020/7/10 10:24 上午   
 4 | # 描述：
 5 | from abc import ABC, abstractmethod
 6 | 
 7 | 
 8 | class Game(ABC):
 9 |     def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
10 |         self.n_player = n_player
11 |         self.current_state = None
12 |         self.all_observes = None
13 |         self.is_obs_continuous = is_obs_continuous
14 |         self.is_act_continuous = is_act_continuous
15 |         self.game_name = game_name
16 |         self.agent_nums = agent_nums
17 |         self.obs_type = obs_type
18 | 
19 |     def get_config(self, player_id):
20 |         raise NotImplementedError
21 | 
22 |     def get_render_data(self, current_state):
23 |         return current_state
24 | 
25 |     def set_current_state(self, current_state):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def is_terminal(self):
30 |         raise NotImplementedError
31 | 
32 |     def get_next_state(self, all_action):
33 |         raise NotImplementedError
34 | 
35 |     def get_reward(self, all_action):
36 |         raise NotImplementedError
37 | 
38 |     @abstractmethod
39 |     def step(self, all_action):
40 |         raise NotImplementedError
41 | 
42 |     @abstractmethod
43 |     def reset(self):
44 |         raise NotImplementedError
45 | 
46 |     def set_action_space(self):
47 |         raise NotImplementedError
48 | 
49 | 


--------------------------------------------------------------------------------
/course5/examples/random/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     agent_action = []
16 |     for i in range(len(action_space)):
17 |         action_ = sample_single_dim(action_space[i], is_act_continuous)
18 |         agent_action.append(action_)
19 |     return agent_action
20 | 
21 | 
22 | def sample_single_dim(action_space_list_each, is_act_continuous):
23 |     each = []
24 |     if is_act_continuous:
25 |         each = action_space_list_each.sample()
26 |     else:
27 |         if action_space_list_each.__class__.__name__ == "Discrete":
28 |             each = [0] * action_space_list_each.n
29 |             idx = action_space_list_each.sample()
30 |             each[idx] = 1
31 |         elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
32 |             each = []
33 |             nvec = action_space_list_each.high - action_space_list_each.low + 1
34 |             sample_indexes = action_space_list_each.sample()
35 | 
36 |             for i in range(len(nvec)):
37 |                 dim = nvec[i]
38 |                 new_action = [0] * dim
39 |                 index = sample_indexes[i]
40 |                 new_action[index] = 1
41 |                 each.extend(new_action)
42 |         elif action_space_list_each.__class__.__name__ == "Discrete_SC2":
43 |             each = action_space_list_each.sample()
44 |         elif action_space_list_each.__class__.__name__ == "Box":
45 |             each = action_space_list_each.sample()
46 |     return each
47 | 


--------------------------------------------------------------------------------
/course5/examples/revive_example/revive_policy.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jidiai/SummerCourse2022/d9cd874cd33276c7ac5244ec7f68943452a205a8/course5/examples/revive_example/revive_policy.pkl


--------------------------------------------------------------------------------
/course5/examples/revive_example/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/7/26 下午4:24
 3 | # Author: Yahui Cui
 4 | import pickle
 5 | import numpy as np
 6 | from pathlib import Path
 7 | import os
 8 | 
 9 | 
10 | class VenPolicy():
11 |     """Strategies for using environment initialization"""
12 |     def __init__(self, policy_model_path):
13 |         self.policy_model = pickle.load(open(policy_model_path, 'rb'))
14 | 
15 |     def act(self, state):
16 |         new_state = {}
17 |         new_state['temperature'] = np.array([state])
18 |         new_state['door_open'] = np.array([0])
19 | 
20 |         try:
21 |             next_state = self.policy_model.infer(new_state)
22 |         except:
23 |             next_state = self.policy_model.infer_one_step(new_state)["action"]
24 | 
25 |         return next_state[0]
26 | 
27 | 
28 | dirname = str(Path(__file__).resolve().parent)
29 | model_path = os.path.join(dirname, "revive_policy.pkl")
30 | agent = VenPolicy(model_path)
31 | 
32 | 
33 | def my_controller(observation, action_space, model=None, is_act_continuous=False):
34 |     action = agent.act(observation['obs'])
35 |     return [action]
36 | 
37 | 


--------------------------------------------------------------------------------
/course5/examples/submission/submission.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2022/8/10 下午4:14
 3 | # Author: Yahui Cui
 4 | 
 5 | """
 6 | # =================================== Important =========================================
 7 | Notes:
 8 | 1. this agents is random agents , which can fit any env in Jidi platform.
 9 | 2. if you want to load .pth file, please follow the instruction here:
10 | https://github.com/jidiai/ai_lib/blob/master/examples/demo
11 | """
12 | 
13 | 
14 | def my_controller(observation, action_space, is_act_continuous=True):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/course5/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .space import Space
  4 | from gym import logger
  5 | 
  6 | 
  7 | class Box(Space):
  8 |     """
  9 |     A (possibly unbounded) box in R^n. Specifically, a Box represents the
 10 |     Cartesian product of n closed intervals. Each interval has the form of one
 11 |     of [a, b], (-oo, b], [a, oo), or (-oo, oo).
 12 | 
 13 |     There are two common use cases:
 14 | 
 15 |     * Identical bound for each dimension::
 16 |         >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
 17 |         Box(3, 4)
 18 | 
 19 |     * Independent bound for each dimension::
 20 |         >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
 21 |         Box(2,)
 22 | 
 23 |     """
 24 |     def __init__(self, low, high, shape=None, dtype=np.float32):
 25 |         assert dtype is not None, 'dtype must be explicitly provided. '
 26 |         self.dtype = np.dtype(dtype)
 27 | 
 28 |         # determine shape if it isn't provided directly
 29 |         if shape is not None:
 30 |             shape = tuple(shape)
 31 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
 32 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
 33 |         elif not np.isscalar(low):
 34 |             shape = low.shape
 35 |             assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
 36 |         elif not np.isscalar(high):
 37 |             shape = high.shape
 38 |             assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
 39 |         else:
 40 |             raise ValueError("shape must be provided or inferred from the shapes of low or high")
 41 | 
 42 |         if np.isscalar(low):
 43 |             low = np.full(shape, low, dtype=dtype)
 44 | 
 45 |         if np.isscalar(high):
 46 |             high = np.full(shape, high, dtype=dtype)
 47 | 
 48 |         self.shape = shape
 49 |         self.low = low
 50 |         self.high = high
 51 | 
 52 |         def _get_precision(dtype):
 53 |             if np.issubdtype(dtype, np.floating):
 54 |                 return np.finfo(dtype).precision
 55 |             else:
 56 |                 return np.inf
 57 |         low_precision = _get_precision(self.low.dtype)
 58 |         high_precision = _get_precision(self.high.dtype)
 59 |         dtype_precision = _get_precision(self.dtype)
 60 |         if min(low_precision, high_precision) > dtype_precision:
 61 |             logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
 62 |         self.low = self.low.astype(self.dtype)
 63 |         self.high = self.high.astype(self.dtype)
 64 | 
 65 |         # Boolean arrays which indicate the interval type for each coordinate
 66 |         self.bounded_below = -np.inf < self.low
 67 |         self.bounded_above = np.inf > self.high
 68 | 
 69 |         super(Box, self).__init__(self.shape, self.dtype)
 70 | 
 71 |     def is_bounded(self, manner="both"):
 72 |         below = np.all(self.bounded_below)
 73 |         above = np.all(self.bounded_above)
 74 |         if manner == "both":
 75 |             return below and above
 76 |         elif manner == "below":
 77 |             return below
 78 |         elif manner == "above":
 79 |             return above
 80 |         else:
 81 |             raise ValueError("manner is not in {'below', 'above', 'both'}")
 82 | 
 83 |     def sample(self):
 84 |         """
 85 |         Generates a single random sample inside of the Box.
 86 | 
 87 |         In creating a sample of the box, each coordinate is sampled according to
 88 |         the form of the interval:
 89 | 
 90 |         * [a, b] : uniform distribution
 91 |         * [a, oo) : shifted exponential distribution
 92 |         * (-oo, b] : shifted negative exponential distribution
 93 |         * (-oo, oo) : normal distribution
 94 |         """
 95 |         high = self.high if self.dtype.kind == 'f' \
 96 |                 else self.high.astype('int64') + 1
 97 |         sample = np.empty(self.shape)
 98 | 
 99 |         # Masking arrays which classify the coordinates according to interval
100 |         # type
101 |         unbounded   = ~self.bounded_below & ~self.bounded_above
102 |         upp_bounded = ~self.bounded_below &  self.bounded_above
103 |         low_bounded =  self.bounded_below & ~self.bounded_above
104 |         bounded     =  self.bounded_below &  self.bounded_above
105 | 
106 | 
107 |         # Vectorized sampling by interval type
108 |         sample[unbounded] = self.np_random.normal(
109 |                 size=unbounded[unbounded].shape)
110 | 
111 |         sample[low_bounded] = self.np_random.exponential(
112 |             size=low_bounded[low_bounded].shape) + self.low[low_bounded]
113 | 
114 |         sample[upp_bounded] = -self.np_random.exponential(
115 |             size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
116 | 
117 |         sample[bounded] = self.np_random.uniform(low=self.low[bounded],
118 |                                             high=high[bounded],
119 |                                             size=bounded[bounded].shape)
120 |         if self.dtype.kind == 'i':
121 |             sample = np.floor(sample)
122 | 
123 |         return sample.astype(self.dtype)
124 | 
125 |     def contains(self, x):
126 |         if isinstance(x, list):
127 |             x = np.array(x)  # Promote list to array for contains check
128 |         return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
129 | 
130 |     def to_jsonable(self, sample_n):
131 |         return np.array(sample_n).tolist()
132 | 
133 |     def from_jsonable(self, sample_n):
134 |         return [np.asarray(sample) for sample in sample_n]
135 | 
136 |     def __repr__(self):
137 |         return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
138 | 
139 |     def __eq__(self, other):
140 |         return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
141 | 


--------------------------------------------------------------------------------
/course5/utils/discrete.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .space import Space
 3 | 
 4 | 
 5 | class Discrete(Space):
 6 |     r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
 7 | 
 8 |     Example::
 9 | 
10 |         >>> Discrete(2)
11 | 
12 |     """
13 |     def __init__(self, n):
14 |         assert n >= 0
15 |         self.n = n
16 |         super(Discrete, self).__init__((), np.int64)
17 | 
18 |     def sample(self):
19 |         return self.np_random.randint(self.n)
20 | 
21 |     def contains(self, x):
22 |         if isinstance(x, int):
23 |             as_int = x
24 |         elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
25 |             as_int = int(x)
26 |         else:
27 |             return False
28 |         return as_int >= 0 and as_int < self.n
29 | 
30 |     def __repr__(self):
31 |         return "Discrete(%d)" % self.n
32 | 
33 |     def __eq__(self, other):
34 |         return isinstance(other, Discrete) and self.n == other.n
35 | 


--------------------------------------------------------------------------------
/course5/utils/get_logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | import logging
 3 | import time
 4 | import os
 5 | 
 6 | 
 7 | def get_logger(log_path, name, save_file=False, console_out=False, json_file=False):
 8 |     if not os.path.exists(log_path):
 9 |         os.mkdir(log_path)
10 | 
11 |     logger = logging.getLogger()
12 |     logger.setLevel(logging.INFO)
13 |     # 每分钟建一个文件
14 |     rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
15 |     log_name = log_path + rq + '_' + name+  '.log'
16 |     json_log_name = log_path + rq + '_' + name + '.json'
17 |     logfile = log_name
18 |     if save_file:
19 |         fh = logging.FileHandler(logfile, mode='a')
20 |         fh.setLevel(logging.DEBUG)
21 |         formatter = logging.Formatter("%(message)s")
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 |     # 输出到控制台
25 |     if console_out:
26 |         console = logging.StreamHandler()
27 |         console.setLevel(logging.INFO)
28 |         logger.addHandler(console)
29 | 
30 |     # 输出到json
31 |     if json_file:
32 |         fh_json = logging.FileHandler(json_log_name, mode='a')
33 |         fh_json.setLevel(logging.DEBUG)
34 |         formatter_json = logging.Formatter("%(message)s")
35 |         fh_json.setFormatter(formatter_json)
36 |         logger.addHandler(fh_json)
37 | 
38 |     return logger


--------------------------------------------------------------------------------
/course5/utils/mutli_discrete_particle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8  -*-
 2 | # Time  : 2021/4/8 下午2:42
 3 | # Author: Yahui Cui
 4 | 
 5 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 6 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 7 | 
 8 | import numpy as np
 9 | 
10 | import gym
11 | 
12 | class MultiDiscreteParticle(gym.Space):
13 |     """
14 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
15 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
16 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
17 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
18 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
19 |     Note: A value of 0 always need to represent the NOOP action.
20 |     e.g. Nintendo Game Controller
21 |     - Can be conceptualized as 3 discrete action spaces:
22 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
23 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
24 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
25 |     - Can be initialized as
26 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
27 |     """
28 |     def __init__(self, array_of_param_array):
29 |         self.low = np.array([x[0] for x in array_of_param_array])
30 |         self.high = np.array([x[1] for x in array_of_param_array])
31 |         self.num_discrete_space = self.low.shape[0]
32 | 
33 |     def sample(self):
34 |         """ Returns a array with one sample from each discrete action space """
35 |         # For each row: round(random .* (max - min) + min, 0)
36 |         np_random = np.random.RandomState()
37 |         random_array = np_random.rand(self.num_discrete_space)
38 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
39 |     def contains(self, x):
40 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
41 | 
42 |     @property
43 |     def shape(self):
44 |         return self.num_discrete_space
45 |     def __repr__(self):
46 |         return "MultiDiscrete" + str(self.num_discrete_space)
47 |     def __eq__(self, other):
48 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
49 | 


--------------------------------------------------------------------------------
/course5/utils/space.py:
--------------------------------------------------------------------------------
 1 | from gym.utils import seeding
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """Defines the observation and action spaces, so you can write generic
 6 |     code that applies to any Env. For example, you can choose a random
 7 |     action.
 8 | 
 9 |     WARNING - Custom observation & action spaces can inherit from the `Space`
10 |     class. However, most use-cases should be covered by the existing space
11 |     classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` &
12 |     `Dict`). Note that parametrized probability distributions (through the
13 |     `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are
14 |     only well-defined for instances of spaces provided in gym by default.
15 |     Moreover, some implementations of Reinforcement Learning algorithms might
16 |     not handle custom spaces properly. Use custom spaces with care.
17 |     """
18 |     def __init__(self, shape=None, dtype=None):
19 |         import numpy as np  # takes about 300-400ms to import, so we load lazily
20 |         self.shape = None if shape is None else tuple(shape)
21 |         self.dtype = None if dtype is None else np.dtype(dtype)
22 |         self._np_random = None
23 | 
24 |     @property
25 |     def np_random(self):
26 |         """Lazily seed the rng since this is expensive and only needed if
27 |         sampling from this space.
28 |         """
29 |         if self._np_random is None:
30 |             self.seed()
31 | 
32 |         return self._np_random
33 | 
34 |     def sample(self):
35 |         """Randomly sample an element of this space. Can be 
36 |         uniform or non-uniform sampling based on boundedness of space."""
37 |         raise NotImplementedError
38 | 
39 |     def seed(self, seed=None):
40 |         """Seed the PRNG of this space. """
41 |         self._np_random, seed = seeding.np_random(seed)
42 |         return [seed]
43 | 
44 |     def contains(self, x):
45 |         """
46 |         Return boolean specifying if x is a valid
47 |         member of this space
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def __contains__(self, x):
52 |         return self.contains(x)
53 | 
54 |     def to_jsonable(self, sample_n):
55 |         """Convert a batch of samples from this space to a JSONable data type."""
56 |         # By default, assume identity is JSONable
57 |         return sample_n
58 | 
59 |     def from_jsonable(self, sample_n):
60 |         """Convert a JSONable data type to a batch of samples from this space."""
61 |         # By default, assume identity is JSONable
62 |         return sample_n
63 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | IPython==7.25.0
 2 | matplotlib==3.3.4
 3 | gym==0.18.3
 4 | pillow==8.2.0
 5 | numpy==1.19.5
 6 | cattrs==1.0.0
 7 | pyyaml==5.4.1
 8 | attrs==21.2.0
 9 | pygame==2.0.1
10 | pettingzoo[classic]==1.10.0
11 | 


--------------------------------------------------------------------------------