├── Miner-Testing-CodeSample ├── src │ └── build.sh └── build │ ├── RLModelSample.h5 │ ├── run.sh │ ├── GAME_SOCKET.py │ ├── RLModelSample.json │ ├── TestingAgent.py │ ├── MinerEnv.py │ └── MINER_STATE.py ├── Miner-Training-Local-CodeSample ├── Data │ └── .gitkeep ├── TrainedModels │ └── .gitkeep ├── Maps │ ├── map6 │ ├── map4 │ ├── map12 │ ├── map2 │ ├── map5 │ ├── map11 │ ├── map7 │ ├── map3 │ ├── map9 │ ├── map1 │ ├── map10 │ └── map8 ├── bot2.py ├── bot1.py ├── bot3.py ├── Memory.py ├── MinerEnv.py ├── DQNModel.py ├── MINER_STATE.py ├── TrainingClient.py └── GAME_SOCKET_DUMMY.py ├── Maps ├── map1.png ├── map2.png ├── map3.png ├── map4.png ├── map5.png ├── image10.png ├── image11.png ├── image12.png ├── image6.png ├── image7.png ├── image8.png └── image9.png ├── Change logs.docx ├── MinerAI - CodeAISample.pdf ├── image ├── codeAI │ ├── Picture1.png │ ├── Picture2.png │ └── Picture3.png └── minerEnv │ ├── Picture1.png │ ├── Picture10.png │ ├── Picture11.png │ ├── Picture2.png │ ├── Picture3.png │ ├── Picture4.png │ ├── Picture5.png │ ├── Picture6.png │ ├── Picture7.png │ ├── Picture8.png │ └── Picture9.png ├── MinerAI - CodeAISample.docx ├── MinerAI - CodeAISample_en.pdf ├── MInerAI-Set-up-Environment.pdf ├── MinerAI - CodeAISample_en.docx ├── MInerAI - Cài đặt môi trường.docx ├── MinerAI - Cài đặt môi trường.pdf ├── Miner-Testing-Server ├── README.md └── DUMMY_SERVER.py ├── .gitignore ├── README.md └── Miner-Colab-CodeSample └── Miner_Training_Colab_CodeSample.ipynb /Miner-Testing-CodeSample/src/build.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/TrainedModels/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Maps/map1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map1.png -------------------------------------------------------------------------------- /Maps/map2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map2.png -------------------------------------------------------------------------------- /Maps/map3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map3.png -------------------------------------------------------------------------------- /Maps/map4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map4.png -------------------------------------------------------------------------------- /Maps/map5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map5.png -------------------------------------------------------------------------------- /Change logs.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Change logs.docx -------------------------------------------------------------------------------- /Maps/image10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image10.png -------------------------------------------------------------------------------- /Maps/image11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image11.png -------------------------------------------------------------------------------- /Maps/image12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image12.png -------------------------------------------------------------------------------- /Maps/image6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image6.png -------------------------------------------------------------------------------- /Maps/image7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image7.png -------------------------------------------------------------------------------- /Maps/image8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image8.png -------------------------------------------------------------------------------- /Maps/image9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image9.png -------------------------------------------------------------------------------- /MinerAI - CodeAISample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample.pdf -------------------------------------------------------------------------------- /image/codeAI/Picture1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/codeAI/Picture1.png -------------------------------------------------------------------------------- /image/codeAI/Picture2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/codeAI/Picture2.png -------------------------------------------------------------------------------- /image/codeAI/Picture3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/codeAI/Picture3.png -------------------------------------------------------------------------------- /MinerAI - CodeAISample.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample.docx -------------------------------------------------------------------------------- /MinerAI - CodeAISample_en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample_en.pdf -------------------------------------------------------------------------------- /image/minerEnv/Picture1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture1.png -------------------------------------------------------------------------------- /image/minerEnv/Picture10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture10.png -------------------------------------------------------------------------------- /image/minerEnv/Picture11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture11.png -------------------------------------------------------------------------------- /image/minerEnv/Picture2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture2.png -------------------------------------------------------------------------------- /image/minerEnv/Picture3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture3.png -------------------------------------------------------------------------------- /image/minerEnv/Picture4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture4.png -------------------------------------------------------------------------------- /image/minerEnv/Picture5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture5.png -------------------------------------------------------------------------------- /image/minerEnv/Picture6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture6.png -------------------------------------------------------------------------------- /image/minerEnv/Picture7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture7.png -------------------------------------------------------------------------------- /image/minerEnv/Picture8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture8.png -------------------------------------------------------------------------------- /image/minerEnv/Picture9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture9.png -------------------------------------------------------------------------------- /MInerAI-Set-up-Environment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MInerAI-Set-up-Environment.pdf -------------------------------------------------------------------------------- /MinerAI - CodeAISample_en.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample_en.docx -------------------------------------------------------------------------------- /MInerAI - Cài đặt môi trường.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MInerAI - Cài đặt môi trường.docx -------------------------------------------------------------------------------- /MinerAI - Cài đặt môi trường.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - Cài đặt môi trường.pdf -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/RLModelSample.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Miner-Testing-CodeSample/build/RLModelSample.h5 -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #workingDir=${PWD} 3 | #cd /tf2/bin 4 | #source ./activate 5 | #cd $workingDir 6 | python3 TestingAgent.py $1 $2 7 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map6: -------------------------------------------------------------------------------- 1 | [[-3,0,0,0,0,0,50,-3,50,0,-2,-2,-3,0,0,0,-3,-1,-2,-1,50],[0,0,-2,-1,0,-1,0,0,0,-2,-1,-3,50,-1,-1,-2,50,-2,-1,-2,50],[-3,-2,0,50,0,0,-3,-1,0,0,0,-3,0,-2,0,-1,-1,-1,0,-2,-1],[-3,-3,-2,0,-3,-3,-2,-1,0,0,-2,-3,0,0,0,0,-2,-3,-2,0,50],[50,-2,-3,0,0,0,-2,-2,0,0,-3,0,0,-3,-3,0,0,-3,-1,-3,0],[-2,-1,50,-2,-3,-3,-2,-1,-1,-3,50,-2,0,0,-1,-3,-1,-2,50,-2,0],[-2,-3,-1,-2,0,50,50,-1,-2,-2,-3,0,-2,-3,0,0,-1,50,-3,0,0],[-2,0,-2,0,-1,-1,0,-1,0,-1,0,-3,0,-1,0,-1,-2,0,-2,-3,-1],[-3,50,-2,-2,50,50,-1,-3,-2,-2,0,0,0,-3,-3,-1,-1,0,-1,50,0]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map4: -------------------------------------------------------------------------------- 1 | [[0,-1,0,0,0,0,0,-3,0,-1,0,-1,-3,150,500,200,-1,0,-2,-1,0],[-3,500,-3,-2,350,-1,0,0,0,0,0,0,-1,-1,50,-1,0,0,-2,350,-3],[-1,-3,-2,0,-1,-2,0,-1,0,0,0,0,0,0,-2,-1,0,0,-2,-1,-3],[0,-3,0,0,0,0,-2,600,-3,0,0,-2,-2,0,-2,0,0,0,0,0,0],[0,0,0,0,-1,-1,0,-3,0,-1,400,-3,-2,0,0,0,-1,-2,-1,-1,-1],[-1,0,0,-1,-3,-3,-1,-1,0,0,-3,-2,0,0,0,0,-1,700,-1,-1,-3],[350,-2,-1,-3,-2,-3,-3,-1,0,-3,0,0,0,0,-3,0,-1,-1,-1,-3,200],[0,-1,-3,-2,250,-2,-3,-1,0,0,0,0,-2,0,0,0,0,-1,-3,300,0],[0,-3,-2,300,1000,-2,-3,-1,0,-1,0,-2,100,-2,-1,0,-1,-3,400,0,800]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map12: -------------------------------------------------------------------------------- 1 | [[50,-2,-3,0,0,0,50,-3,-1,-1,-3,-1,0,-2,-3,-3,50,-3,0,-3,-3],[0,-1,-1,50,-2,-3,50,-3,0,-2,-3,0,0,0,-2,-2,0,0,0,-3,50],[-2,-1,0,50,0,-2,-3,-2,-1,-1,-2,50,-3,50,0,-3,-2,-1,0,-1,0],[0,0,-1,-3,0,0,-3,50,-2,-2,-1,-2,-3,0,0,-3,-1,-2,0,-3,-1],[50,0,-3,-2,-1,-2,-1,-2,-3,50,0,0,-2,0,-3,50,-1,-3,-3,50,-1],[-1,-3,-1,-2,-2,-3,-2,0,-2,-3,0,0,0,-2,-2,0,0,0,-2,-1,50],[50,-3,50,-3,-1,-3,-2,0,-3,-3,50,-1,-2,-1,-2,-2,0,0,-3,0,0],[0,-2,0,-1,0,-3,-2,-2,-1,-2,0,0,-3,-2,50,0,-3,-2,-3,-2,-3],[-1,-3,-2,-3,0,-3,50,-1,0,0,-2,-3,50,0,-2,0,0,0,50,-1,0]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map2: -------------------------------------------------------------------------------- 1 | [[550,-1,-1,-2,-2,-2,-2,150,0,0,0,-3,-2,400,-3,650,-2,-1,0,0,0],[350,-1,-1,-2,0,0,-2,-2,-2,-1,0,-3,-3,-2,300,-2,-1,650,-2,-1,0],[-1,-1,450,-2,0,0,0,0,0,0,0,0,0,-3,-3,-3,-1,-1,-3,0,0],[-1,-1,-2,-2,0,-3,-2,0,0,-3,-3,-3,0,0,0,0,0,0,-3,0,150],[0,0,-2,0,-3,-3,-3,0,-1,-2,400,-3,-3,0,-2,0,-1,0,-3,0,0],[0,200,-2,0,-3,250,-1,0,0,-1,-2,-3,-2,0,-1,300,-1,0,-3,-1,0],[-3,-3,-2,0,-3,-3,-3,0,0,0,0,-3,-3,-3,-2,-2,-2,0,-2,-2,0],[-1,-3,-2,0,0,-2,0,0,-3,-3,-3,-3,150,-3,0,0,0,0,-2,200,0],[800,-3,-2,-3,450,-2,0,-3,-3,200,-1,250,-1,-3,0,-1,-1,0,0,0,0]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map5: -------------------------------------------------------------------------------- 1 | [[0,0,0,0,0,0,0,-2,0,0,0,0,-2,0,0,0,0,-1,-1,800,-1],[100,-1,-3,-2,0,0,-1,200,-1,-2,250,-2,-2,250,-1,0,0,-1,-2,-1,-2],[-1,700,-1,-3,150,-1,-1,-3,-3,-2,-2,-1,-3,-2,-1,0,0,-3,0,-1,0],[0,-1,-3,0,-1,0,-3,-3,-1,-3,-1,600,-1,-3,-2,-1,-3,500,-1,0,0],[0,-3,0,0,0,0,-2,-1,350,-1,50,-1,-3,50,0,0,0,-1,0,0,-2],[0,0,0,-3,250,-3,-3,-3,-1,500,450,-1,-3,0,-1,0,0,0,0,-2,100],[0,-3,0,0,-3,0,0,0,-3,-1,-1,-3,0,0,-2,300,-2,-1,0,0,-2],[0,-1,-3,-2,0,0,0,-1,0,-3,-3,-3,-1,-2,-2,-1,-2,0,-1,0,0],[-1,500,-1,-3,-2,0,-1,450,-1,0,0,-1,500,-1,-2,-1,-2,0,50,0,0]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map11: -------------------------------------------------------------------------------- 1 | [[50,0,-2,-3,-2,0,-3,-2,-2,0,50,-3,-1,-1,0,-1,50,0,0,-1,-3],[-2,-3,-2,-3,0,0,-3,-1,-3,-3,0,-3,0,0,0,0,-1,0,-3,50,0],[0,-2,50,-2,50,0,-1,0,-1,50,0,-2,-3,0,-1,50,0,-2,-3,0,-2],[-2,-1,0,-2,0,-3,50,-3,-3,-3,-1,-2,-3,-2,-3,-2,-2,50,0,-2,0],[-1,50,0,-3,-3,-2,-3,50,-2,-1,-3,-3,-1,-1,-1,-2,-3,50,-1,-3,-2],[0,-1,-1,-2,-2,-3,-3,-3,0,-2,0,-2,-2,50,-2,-3,-1,-3,-3,0,0],[-3,-2,-1,0,0,-2,-3,-2,0,-1,-1,-3,0,50,-2,0,0,50,-1,0,50],[-1,-3,0,0,-3,50,-3,-1,-1,-2,-3,-1,-1,0,-2,-1,-1,0,0,-2,-3],[-2,50,-3,-1,50,-1,-2,-2,0,0,-3,-2,0,-2,50,-2,-2,-2,-3,-3,50]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map7: -------------------------------------------------------------------------------- 1 | [[-1,-3,-1,50,-1,-1,-1,-2,0,0,-1,-3,0,-2,0,-3,0,-3,-2,-2,50],[0,0,-1,-3,-1,-2,-1,50,-1,-2,-2,-2,-3,0,0,-3,-2,-3,-1,-1,0],[50,0,-2,-2,-3,-3,0,50,-1,-3,0,0,-2,-1,-1,50,0,-2,-2,0,-1],[0,-1,-1,0,-2,0,0,-3,0,-3,0,-3,-1,-3,-2,0,-2,0,0,-2,-2],[-2,-3,-2,-2,-2,-3,0,-1,0,-1,0,50,-1,-1,0,-1,-2,50,-2,-3,-1],[0,-3,-1,-2,-2,50,50,-1,-2,50,-2,-1,-1,-2,-2,-2,0,0,-2,-1,-2],[-1,0,0,-3,-3,-1,-3,-3,-3,50,-3,-1,-2,-3,-1,-2,-1,-3,-1,-2,0],[-3,-2,-3,-1,50,0,-2,0,-3,-3,-2,0,-3,-3,-2,50,-1,-2,0,-3,50],[0,50,-2,-1,-1,-2,-2,0,50,-3,0,-2,-2,0,-3,0,-3,-1,-1,-1,-2]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map3: -------------------------------------------------------------------------------- 1 | [[200,-2,-2,250,-2,0,-2,-1,0,-3,500,-3,-3,0,0,0,0,0,-3,450,-3],[-2,-2,-1,-2,-1,0,-3,200,-2,0,-3,0,150,0,-2,-1,0,0,0,-3,0],[-3,-2,-1,-3,-1,0,0,-3,-2,0,0,0,-1,-2,450,-2,0,-2,150,-2,0],[300,-3,-3,300,-2,-2,-2,-2,300,-2,-2,0,0,0,-2,0,-3,-3,-3,-2,0],[-3,-3,0,-3,-1,350,-1,0,-2,-2,350,-2,0,-3,0,0,-3,300,-3,-2,250],[-3,0,0,0,-1,-1,-1,-1,-3,0,-2,0,-3,400,-3,0,-3,-3,-3,0,0],[450,-3,0,0,0,0,-1,400,-3,0,0,0,-2,-3,-2,0,0,0,0,0,0],[-3,0,-1,0,-1,0,-3,-3,-3,0,-1,0,0,0,250,-3,-3,-3,-1,-2,-2],[0,0,-1,200,-1,-3,500,-3,0,-1,200,-1,0,0,-2,-2,-2,-1,400,-1,-2]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map9: -------------------------------------------------------------------------------- 1 | [[50,-2,-2,-2,-2,-2,-1,0,50,-2,-3,-1,-2,0,-3,0,-1,-3,-2,50,-2],[-1,-1,50,-1,-2,50,-2,0,-1,-1,50,-1,-3,-3,-1,-1,-1,0,-2,-3,-3],[-1,0,0,-1,-1,-2,0,-2,-2,-1,-1,0,-3,-2,50,-3,-2,-1,-2,-1,-1],[-1,0,-2,50,-1,0,-2,-1,-3,-1,-3,-2,50,0,-3,0,50,-1,50,0,-2],[-1,-3,-3,-3,-3,50,-1,-1,50,0,-3,-3,-3,-3,-1,0,0,-2,-2,-3,-2],[-3,-3,-3,0,0,-1,-1,-2,0,-2,-2,0,-2,-3,-1,-3,0,-3,0,-2,0],[-1,-3,0,50,0,-1,0,-3,0,-2,0,50,-2,-2,-3,-1,0,-3,-2,0,-1],[-1,-2,50,-1,-3,50,-2,-2,-2,-3,0,-3,-2,50,0,0,0,0,0,-3,50],[50,0,-1,50,-1,-3,-3,0,-1,-3,-3,-1,-1,-3,-3,-3,-3,50,-1,-3,-1]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map1: -------------------------------------------------------------------------------- 1 | [[450,-2,0,-2,150,-1,0,0,0,0,-1,-2,-2,-2,0,0,0,0,150,-2,350],[-2,-2,-2,-2,-1,0,-1,-1,-1,-1,-3,50,-2,-2,-2,-2,-3,-3,50,-2,-1],[-2,-2,200,-2,0,-2,0,-2,-3,-3,-2,0,-3,-2,-2,150,-3,-3,0,0,50],[0,-3,-3,-2,0,0,-1,0,550,-3,-2,0,0,0,-1,0,0,-1,-1,-1,-2],[-2,0,0,0,-1,0,-1,50,300,-3,-2,0,-3,0,0,0,-1,-3,-3,-2,-1],[-1,-3,-1,-3,0,-2,0,0,-2,-1,100,-3,0,-2,300,-3,0,-2,-3,-2,0],[-2,-3,-1,-3,-1,500,-1,-3,-2,-1,0,-1,0,-1,0,-1,0,-2,-3,-3,-1],[0,-3,-1,-3,0,-2,-3,-3,0,0,0,0,-2,0,-2,-3,-3,-3,-3,200,-1],[1200,-3,-1,-3,-1,-1,-2,-2,0,-1,150,-2,0,-2,0,0,-2,-3,-3,1500,50]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map10: -------------------------------------------------------------------------------- 1 | [[50,0,-2,-2,-3,-2,-3,-1,-2,-3,-3,0,0,0,-1,0,-2,-2,50,-1,-2],[-1,-1,-3,-1,50,-2,-1,-3,-2,-1,-1,0,50,0,-2,-3,50,-2,-1,-1,-1],[-3,-1,-3,-1,-3,-2,-3,0,-1,0,-2,50,-2,-3,0,0,-2,-3,50,0,0],[-3,-1,-3,50,-1,-1,50,-1,0,-2,-3,-2,-1,50,0,-3,50,0,50,-3,-2],[-2,-1,-1,0,-3,-1,50,50,0,-3,-3,0,-1,50,-1,-3,0,0,0,-2,0],[0,-2,-2,-1,-3,50,-3,-3,-3,-2,0,0,0,-1,-2,-3,0,-3,0,-2,-2],[-3,-1,-3,-1,-3,-2,-1,-2,-1,-3,0,-1,-3,-2,0,-2,-1,50,-3,-2,50],[-3,50,-1,0,-2,0,-2,-3,-3,-3,0,-3,-1,0,-1,-1,-3,-2,-1,-3,-3],[-3,-3,-3,50,0,-2,-2,-3,50,-3,-2,-3,-1,0,-3,-1,50,-2,-2,-3,0]] -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Maps/map8: -------------------------------------------------------------------------------- 1 | [[50,-1,0,0,-2,50,-3,-1,50,-2,0,-2,-2,-1,-1,-3,0,-1,-2,-2,-2],[-2,0,0,0,-3,-3,50,0,50,-3,50,-2,-2,0,-1,-3,-2,-2,0,-1,50],[-1,-2,-2,-2,0,-1,-2,-2,0,-3,50,-1,-3,0,-2,50,-2,50,0,-2,-1],[-3,-3,-3,-3,-1,-3,-1,-3,0,-1,0,-2,0,-1,-1,-2,-2,-3,0,0,-3],[-2,-2,-2,-2,-1,-3,-3,50,-1,-1,-3,-1,-1,-3,50,-1,-2,50,-3,-3,-2],[50,-2,-1,-3,-2,-1,-1,-3,-3,-3,0,-3,-2,-1,0,0,-2,50,-3,0,50],[-3,-3,50,-3,0,-1,-1,-3,0,-2,-2,-1,50,0,50,0,0,-1,-1,-3,-2],[-3,-2,-2,-2,50,-2,-3,-1,-2,0,50,-1,0,0,-1,-2,50,-3,50,-3,-2],[50,0,-1,-3,-1,0,0,-3,-2,-2,0,0,-2,-2,-2,-3,0,-1,-1,0,-3]] -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/GAME_SOCKET.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import socket 3 | import json 4 | 5 | class GameSocket: 6 | def __init__(self, host, port): 7 | self.host = host 8 | self.port = port 9 | self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 10 | 11 | def connect(self): 12 | try: 13 | self.socket.connect((self.host, self.port)) 14 | print("Connected to server.") 15 | except Exception as e: 16 | import traceback 17 | traceback.print_exc() 18 | print("Cannot connect.") 19 | 20 | def receive(self): 21 | buff_size = 4096 22 | recv_data = b"" 23 | while True: 24 | part = self.socket.recv(buff_size) 25 | recv_data += part 26 | if len(part) < buff_size: 27 | break 28 | message = recv_data.decode("utf-8") 29 | return message 30 | 31 | def send(self, message): 32 | self.socket.send(message.encode("utf-8")) 33 | 34 | def close(self): 35 | self.socket.shutdown(1) 36 | self.socket.close() 37 | print("Close socket.") 38 | -------------------------------------------------------------------------------- /Miner-Testing-Server/README.md: -------------------------------------------------------------------------------- 1 | ***(English version, Vietnamese below)*** 2 | 3 | # DUMMY TESTING SERVER 4 | This is a server that assists players to check their agents' running results. 5 | 6 | ## Descriptions 7 | The Server has been simulated for the player to run on a map named Verify, which is used to verify the player’s code when uploading the source code to the system. 8 | 9 | ## How to Run 10 | 11 | Start server: 12 | **Python3 DUMMY_SERVER. py {Port}** 13 | 14 | Example: **Python3 DUMMY_SERVER. PY 1234** 15 | 16 | Run client: 17 | **./run.sh localhost {port}** 18 | 19 | Example: **./run.sh localhost 1234** 20 | 21 | ## Note 22 | This is only a simulation server to help players quickly check the agent's quality, but does not guarantee accurate results in real-world environments. 23 | 24 | 25 | 26 | # DUMMY TESTING SERVER 27 | Đây là phần giả lập server giúp cho người chơi có thể kiểm tra kết quả chạy của agent của mình. 28 | 29 | ## Mô tả 30 | Server đã được giả lập để người chơi có thể chạy trên bản đồ Verify - là bản đồ được dùng đề verify code của người chơi khi upload mã nguồn lên hệ thống. 31 | 32 | ## Cách chạy 33 | 34 | Khởi động server: 35 | **python3 DUMMY_SERVER.py {port}** 36 | 37 | Ví dụ: **python3 DUMMY_SERVER.py 1234** 38 | 39 | Chạy client: 40 | **./run.sh localhost {port}** 41 | 42 | Ví dụ: **./run.sh localhost 1234** 43 | 44 | ## Lưu ý 45 | Đây chỉ là bản giả lập server để giúp người chơi kiểm tra nhanh chất lượng của agent, không đảm bảo kết quả chạy chính xác trên môi trường thật. 46 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/bot2.py: -------------------------------------------------------------------------------- 1 | from MINER_STATE import State 2 | import numpy as np 3 | 4 | 5 | class PlayerInfo: 6 | def __init__(self, id): 7 | self.playerId = id 8 | self.score = 0 9 | self.energy = 0 10 | self.posx = 0 11 | self.posy = 0 12 | self.lastAction = -1 13 | self.status = 0 14 | self.freeCount = 0 15 | 16 | 17 | class Bot2: 18 | ACTION_GO_LEFT = 0 19 | ACTION_GO_RIGHT = 1 20 | ACTION_GO_UP = 2 21 | ACTION_GO_DOWN = 3 22 | ACTION_FREE = 4 23 | ACTION_CRAFT = 5 24 | 25 | def __init__(self, id): 26 | self.state = State() 27 | self.info = PlayerInfo(id) 28 | 29 | def next_action(self): 30 | if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: 31 | if self.info.energy >= 6: 32 | return self.ACTION_CRAFT 33 | else: 34 | return self.ACTION_FREE 35 | if self.info.energy < 5: 36 | return self.ACTION_FREE 37 | else: 38 | action = np.random.randint(0, 4) 39 | return action 40 | 41 | def new_game(self, data): 42 | try: 43 | self.state.init_state(data) 44 | except Exception as e: 45 | import traceback 46 | traceback.print_exc() 47 | 48 | def new_state(self, data): 49 | # action = self.next_action(); 50 | # self.socket.send(action) 51 | try: 52 | self.state.update_state(data) 53 | except Exception as e: 54 | import traceback 55 | traceback.print_exc() -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/RLModelSample.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 198], "dtype": "float32", "units": 300, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 300, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 6, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_3", "trainable": true, "dtype": "float32", "activation": "linear"}}]}, "keras_version": "2.3.1", "backend": "tensorflow"} -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/bot1.py: -------------------------------------------------------------------------------- 1 | from MINER_STATE import State 2 | import numpy as np 3 | 4 | 5 | class PlayerInfo: 6 | def __init__(self, id): 7 | self.playerId = id 8 | self.score = 0 9 | self.energy = 0 10 | self.posx = 0 11 | self.posy = 0 12 | self.lastAction = -1 13 | self.status = 0 14 | self.freeCount = 0 15 | 16 | 17 | class Bot1: 18 | ACTION_GO_LEFT = 0 19 | ACTION_GO_RIGHT = 1 20 | ACTION_GO_UP = 2 21 | ACTION_GO_DOWN = 3 22 | ACTION_FREE = 4 23 | ACTION_CRAFT = 5 24 | 25 | def __init__(self, id): 26 | self.state = State() 27 | self.info = PlayerInfo(id) 28 | 29 | def next_action(self): 30 | if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: 31 | if self.info.energy >= 6: 32 | return self.ACTION_CRAFT 33 | else: 34 | return self.ACTION_FREE 35 | if self.info.energy < 5: 36 | return self.ACTION_FREE 37 | else: 38 | action = self.ACTION_GO_UP 39 | if self.info.posy % 2 == 0: 40 | if self.info.posx < self.state.mapInfo.max_x: 41 | action = self.ACTION_GO_RIGHT 42 | else: 43 | if self.info.posx > 0: 44 | action = self.ACTION_GO_LEFT 45 | else: 46 | action = self.ACTION_GO_DOWN 47 | return action 48 | 49 | def new_game(self, data): 50 | try: 51 | self.state.init_state(data) 52 | except Exception as e: 53 | import traceback 54 | traceback.print_exc() 55 | 56 | def new_state(self, data): 57 | # action = self.next_action(); 58 | # self.socket.send(action) 59 | try: 60 | self.state.update_state(data) 61 | except Exception as e: 62 | import traceback 63 | traceback.print_exc() -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/bot3.py: -------------------------------------------------------------------------------- 1 | from MINER_STATE import State 2 | import numpy as np 3 | 4 | 5 | class PlayerInfo: 6 | def __init__(self, id): 7 | self.playerId = id 8 | self.score = 0 9 | self.energy = 0 10 | self.posx = 0 11 | self.posy = 0 12 | self.lastAction = -1 13 | self.status = 0 14 | self.freeCount = 0 15 | 16 | 17 | class Bot3: 18 | ACTION_GO_LEFT = 0 19 | ACTION_GO_RIGHT = 1 20 | ACTION_GO_UP = 2 21 | ACTION_GO_DOWN = 3 22 | ACTION_FREE = 4 23 | ACTION_CRAFT = 5 24 | 25 | def __init__(self, id): 26 | self.state = State() 27 | self.info = PlayerInfo(id) 28 | 29 | def next_action(self): 30 | if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0: 31 | if self.info.energy >= 6: 32 | return self.ACTION_CRAFT 33 | else: 34 | return self.ACTION_FREE 35 | if self.info.energy < 5: 36 | return self.ACTION_FREE 37 | else: 38 | action = self.ACTION_GO_LEFT 39 | if self.info.posx % 2 == 0: 40 | if self.info.posy < self.state.mapInfo.max_y: 41 | action = self.ACTION_GO_DOWN 42 | else: 43 | if self.info.posy > 0: 44 | action = self.ACTION_GO_UP 45 | else: 46 | action = self.ACTION_GO_RIGHT 47 | return action 48 | 49 | def new_game(self, data): 50 | try: 51 | self.state.init_state(data) 52 | except Exception as e: 53 | import traceback 54 | traceback.print_exc() 55 | 56 | def new_state(self, data): 57 | # action = self.next_action(); 58 | # self.socket.send(action) 59 | try: 60 | self.state.update_state(data) 61 | except Exception as e: 62 | import traceback 63 | traceback.print_exc() 64 | -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/TestingAgent.py: -------------------------------------------------------------------------------- 1 | from warnings import simplefilter 2 | simplefilter(action='ignore', category=FutureWarning) 3 | 4 | import sys 5 | from keras.models import model_from_json 6 | from MinerEnv import MinerEnv 7 | import numpy as np 8 | 9 | ACTION_GO_LEFT = 0 10 | ACTION_GO_RIGHT = 1 11 | ACTION_GO_UP = 2 12 | ACTION_GO_DOWN = 3 13 | ACTION_FREE = 4 14 | ACTION_CRAFT = 5 15 | 16 | HOST = "localhost" 17 | PORT = 1111 18 | if len(sys.argv) == 3: 19 | HOST = str(sys.argv[1]) 20 | PORT = int(sys.argv[2]) 21 | 22 | # load json and create model 23 | json_file = open('RLModelSample.json', 'r') 24 | loaded_model_json = json_file.read() 25 | json_file.close() 26 | DQNAgent = model_from_json(loaded_model_json) 27 | # load weights into new model 28 | DQNAgent.load_weights("RLModelSample.h5") 29 | print("Loaded model from disk") 30 | status_map = {0: "STATUS_PLAYING", 1: "STATUS_ELIMINATED_WENT_OUT_MAP", 2: "STATUS_ELIMINATED_OUT_OF_ENERGY", 31 | 3: "STATUS_ELIMINATED_INVALID_ACTION", 4: "STATUS_STOP_EMPTY_GOLD", 5: "STATUS_STOP_END_STEP"} 32 | try: 33 | # Initialize environment 34 | minerEnv = MinerEnv(HOST, PORT) 35 | minerEnv.start() # Connect to the game 36 | minerEnv.reset() 37 | s = minerEnv.get_state() ##Getting an initial state 38 | while not minerEnv.check_terminate(): 39 | try: 40 | action = np.argmax(DQNAgent.predict(s.reshape(1, len(s)))) # Getting an action from the trained model 41 | print("next action = ", action) 42 | minerEnv.step(str(action)) # Performing the action in order to obtain the new state 43 | s_next = minerEnv.get_state() # Getting a new state 44 | s = s_next 45 | except Exception as e: 46 | import traceback 47 | traceback.print_exc() 48 | print("Finished.") 49 | break 50 | print(status_map[minerEnv.state.status]) 51 | except Exception as e: 52 | import traceback 53 | traceback.print_exc() 54 | print("End game.") 55 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/Memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | class Memory: 5 | 6 | capacity = None 7 | 8 | 9 | def __init__( 10 | self, 11 | capacity, 12 | length = None, 13 | states = None, 14 | actions = None, 15 | rewards = None, 16 | dones = None, 17 | states2 = None, 18 | ): 19 | self.capacity = capacity 20 | self.length = 0 21 | self.states = states 22 | self.actions = actions 23 | self.rewards = rewards 24 | self.dones = dones 25 | self.states2 = states2 26 | 27 | def push(self, s, a, r, done, s2): 28 | if self.states is None: 29 | self.states = s 30 | self.actions = a 31 | self.rewards = r 32 | self.dones = done 33 | self.states2 = s2 34 | else: 35 | self.states = np.vstack((self.states,s)) 36 | self.actions = np.vstack((self.actions,a)) 37 | self.rewards = np.vstack((self.rewards, r)) 38 | self.dones = np.vstack((self.dones, done)) 39 | self.states2 = np.vstack((self.states2,s2)) 40 | 41 | self.length = self.length + 1 42 | 43 | if (self.length > self.capacity): 44 | self.states = np.delete(self.states,(0), axis = 0) 45 | self.actions = np.delete(self.actions,(0), axis = 0) 46 | self.rewards = np.delete(self.rewards,(0), axis = 0) 47 | self.dones = np.delete(self.dones,(0), axis = 0) 48 | self.states2 = np.delete(self.states2,(0), axis = 0) 49 | self.length = self.length - 1 50 | 51 | 52 | def sample(self,batch_size): 53 | if (self.length >= batch_size): 54 | idx = random.sample(range(0,self.length),batch_size) 55 | s = self.states[idx,:] 56 | a = self.actions[idx,:] 57 | r = self.rewards[idx,:] 58 | d = self.dones[idx,:] 59 | s2 = self.states2[idx,:] 60 | 61 | return list([s,a,r,s2,d]) 62 | 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # pytype static type analyzer 138 | .pytype/ 139 | 140 | # Cython debug symbols 141 | cython_debug/ 142 | 143 | .idea/ 144 | 145 | .DS_Store -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/MinerEnv.py: -------------------------------------------------------------------------------- 1 | from warnings import simplefilter 2 | simplefilter(action='ignore', category=FutureWarning) 3 | 4 | import sys 5 | import numpy as np 6 | from GAME_SOCKET import GameSocket #in testing version, please use GameSocket instead of GameSocketDummy 7 | from MINER_STATE import State 8 | 9 | TreeID = 1 10 | TrapID = 2 11 | SwampID = 3 12 | 13 | 14 | class MinerEnv: 15 | def __init__(self, host, port): 16 | self.socket = GameSocket(host, port) 17 | self.state = State() 18 | 19 | self.score_pre = self.state.score#Storing the last score for designing the reward function 20 | 21 | def start(self): #connect to server 22 | self.socket.connect() 23 | 24 | def end(self): #disconnect server 25 | self.socket.close() 26 | 27 | def send_map_info(self, request):#tell server which map to run 28 | self.socket.send(request) 29 | 30 | def reset(self): #start new game 31 | try: 32 | message = self.socket.receive() #receive game info from server 33 | print(message) 34 | self.state.init_state(message) #init state 35 | except Exception as e: 36 | import traceback 37 | traceback.print_exc() 38 | 39 | def step(self, action): #step process 40 | self.socket.send(action) #send action to server 41 | try: 42 | message = self.socket.receive() #receive new state from server 43 | #print("New state: ", message) 44 | self.state.update_state(message) #update to local state 45 | except Exception as e: 46 | import traceback 47 | traceback.print_exc() 48 | 49 | # Functions are customized by client 50 | def get_state(self): 51 | # Building the map 52 | view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) 53 | for i in range(self.state.mapInfo.max_x + 1): 54 | for j in range(self.state.mapInfo.max_y + 1): 55 | if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree 56 | view[i, j] = -TreeID 57 | if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap 58 | view[i, j] = -TrapID 59 | if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp 60 | view[i, j] = -SwampID 61 | if self.state.mapInfo.gold_amount(i, j) > 0: 62 | view[i, j] = self.state.mapInfo.gold_amount(i, j) 63 | 64 | DQNState = view.flatten().tolist()#Flattening the map matrix to a vector 65 | 66 | # Add position and energy of agent to the DQNState 67 | DQNState.append(self.state.x) 68 | DQNState.append(self.state.y) 69 | DQNState.append(self.state.energy) 70 | 71 | #Add position of bots 72 | for player in self.state.players: 73 | if player["playerId"] != self.state.id: 74 | DQNState.append(player["posx"]) 75 | DQNState.append(player["posy"]) 76 | 77 | #Convert the DQNState from list to array 78 | DQNState = np.array(DQNState) 79 | 80 | return DQNState 81 | 82 | def check_terminate(self): 83 | return self.state.status != State.STATUS_PLAYING 84 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/MinerEnv.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from GAME_SOCKET_DUMMY import GameSocket #in testing version, please use GameSocket instead of GAME_SOCKET_DUMMY 4 | from MINER_STATE import State 5 | 6 | 7 | TreeID = 1 8 | TrapID = 2 9 | SwampID = 3 10 | class MinerEnv: 11 | def __init__(self, host, port): 12 | self.socket = GameSocket(host, port) 13 | self.state = State() 14 | 15 | self.score_pre = self.state.score#Storing the last score for designing the reward function 16 | 17 | def start(self): #connect to server 18 | self.socket.connect() 19 | 20 | def end(self): #disconnect server 21 | self.socket.close() 22 | 23 | def send_map_info(self, request):#tell server which map to run 24 | self.socket.send(request) 25 | 26 | def reset(self): #start new game 27 | try: 28 | message = self.socket.receive() #receive game info from server 29 | self.state.init_state(message) #init state 30 | except Exception as e: 31 | import traceback 32 | traceback.print_exc() 33 | 34 | def step(self, action): #step process 35 | self.socket.send(action) #send action to server 36 | try: 37 | message = self.socket.receive() #receive new state from server 38 | self.state.update_state(message) #update to local state 39 | except Exception as e: 40 | import traceback 41 | traceback.print_exc() 42 | 43 | # Functions are customized by client 44 | def get_state(self): 45 | # Building the map 46 | view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int) 47 | for i in range(self.state.mapInfo.max_x + 1): 48 | for j in range(self.state.mapInfo.max_y + 1): 49 | if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree 50 | view[i, j] = -TreeID 51 | if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap 52 | view[i, j] = -TrapID 53 | if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp 54 | view[i, j] = -SwampID 55 | if self.state.mapInfo.gold_amount(i, j) > 0: 56 | view[i, j] = self.state.mapInfo.gold_amount(i, j) 57 | 58 | DQNState = view.flatten().tolist() #Flattening the map matrix to a vector 59 | 60 | # Add position and energy of agent to the DQNState 61 | DQNState.append(self.state.x) 62 | DQNState.append(self.state.y) 63 | DQNState.append(self.state.energy) 64 | #Add position of bots 65 | for player in self.state.players: 66 | if player["playerId"] != self.state.id: 67 | DQNState.append(player["posx"]) 68 | DQNState.append(player["posy"]) 69 | 70 | #Convert the DQNState from list to array for training 71 | DQNState = np.array(DQNState) 72 | 73 | return DQNState 74 | 75 | def get_reward(self): 76 | # Calculate reward 77 | reward = 0 78 | score_action = self.state.score - self.score_pre 79 | self.score_pre = self.state.score 80 | if score_action > 0: 81 | #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action) 82 | reward += score_action 83 | 84 | #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward 85 | if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree 86 | reward -= TreeID 87 | if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap 88 | reward -= TrapID 89 | if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp 90 | reward -= SwampID 91 | 92 | # If out of the map, then the DQN agent should be punished by a larger nagative reward. 93 | if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP: 94 | reward += -10 95 | 96 | #Run out of energy, then the DQN agent should be punished by a larger nagative reward. 97 | if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY: 98 | reward += -10 99 | # print ("reward",reward) 100 | return reward 101 | 102 | def check_terminate(self): 103 | #Checking the status of the game 104 | #it indicates the game ends or is playing 105 | return self.state.status != State.STATUS_PLAYING 106 | -------------------------------------------------------------------------------- /Miner-Testing-CodeSample/build/MINER_STATE.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def str_2_json(str): 5 | return json.loads(str, encoding="utf-8") 6 | 7 | 8 | class MapInfo: 9 | def __init__(self): 10 | self.max_x = 0 11 | self.max_y = 0 12 | self.golds = [] 13 | self.obstacles = [] 14 | self.numberOfPlayers = 0 15 | self.maxStep = 0 16 | 17 | def init_map(self, gameInfo): 18 | self.max_x = gameInfo["width"] - 1 19 | self.max_y = gameInfo["height"] - 1 20 | self.golds = gameInfo["golds"] 21 | self.obstacles = gameInfo["obstacles"] 22 | self.maxStep = gameInfo["steps"] 23 | self.numberOfPlayers = gameInfo["numberOfPlayers"] 24 | 25 | def update(self, golds, changedObstacles): 26 | self.golds = golds 27 | for cob in changedObstacles: 28 | newOb = True 29 | for ob in self.obstacles: 30 | if cob["posx"] == ob["posx"] and cob["posy"] == ob["posy"]: 31 | newOb = False 32 | #print("cell(", cob["posx"], ",", cob["posy"], ") change type from: ", ob["type"], " -> ", 33 | # cob["type"], " / value: ", ob["value"], " -> ", cob["value"]) 34 | ob["type"] = cob["type"] 35 | ob["value"] = cob["value"] 36 | break 37 | if newOb: 38 | self.obstacles.append(cob) 39 | #print("new obstacle: ", cob["posx"], ",", cob["posy"], ", type = ", cob["type"], ", value = ", 40 | # cob["value"]) 41 | 42 | def get_min_x(self): 43 | return min([cell["posx"] for cell in self.golds]) 44 | 45 | def get_max_x(self): 46 | return max([cell["posx"] for cell in self.golds]) 47 | 48 | def get_min_y(self): 49 | return min([cell["posy"] for cell in self.golds]) 50 | 51 | def get_max_y(self): 52 | return max([cell["posy"] for cell in self.golds]) 53 | 54 | def is_row_has_gold(self, y): 55 | return y in [cell["posy"] for cell in self.golds] 56 | 57 | def is_column_has_gold(self, x): 58 | return x in [cell["posx"] for cell in self.golds] 59 | 60 | def gold_amount(self, x, y): 61 | for cell in self.golds: 62 | if x == cell["posx"] and y == cell["posy"]: 63 | return cell["amount"] 64 | return 0 65 | 66 | def get_obstacle(self, x, y): # Getting the kind of the obstacle at cell(x,y) 67 | for cell in self.obstacles: 68 | if x == cell["posx"] and y == cell["posy"]: 69 | return cell["type"] 70 | return -1 # No obstacle at the cell (x,y) 71 | 72 | 73 | class State: 74 | STATUS_PLAYING = 0 75 | STATUS_ELIMINATED_WENT_OUT_MAP = 1 76 | STATUS_ELIMINATED_OUT_OF_ENERGY = 2 77 | STATUS_ELIMINATED_INVALID_ACTION = 3 78 | STATUS_STOP_EMPTY_GOLD = 4 79 | STATUS_STOP_END_STEP = 5 80 | 81 | def __init__(self): 82 | self.end = False 83 | self.score = 0 84 | self.lastAction = None 85 | self.id = 0 86 | self.x = 0 87 | self.y = 0 88 | self.energy = 0 89 | self.mapInfo = MapInfo() 90 | self.players = [] 91 | self.stepCount = 0 92 | self.status = State.STATUS_PLAYING 93 | 94 | def init_state(self, data): #parse data from server into object 95 | game_info = str_2_json(data) 96 | self.end = False 97 | self.score = 0 98 | self.lastAction = None 99 | self.id = game_info["playerId"] 100 | self.x = game_info["posx"] 101 | self.y = game_info["posy"] 102 | self.energy = game_info["energy"] 103 | self.mapInfo.init_map(game_info["gameinfo"]) 104 | self.stepCount = 0 105 | self.status = State.STATUS_PLAYING 106 | self.players = [{"playerId": 2, "posx": self.x, "posy": self.y}, 107 | {"playerId": 3, "posx": self.x, "posy": self.y}, 108 | {"playerId": 4, "posx": self.x, "posy": self.y}] 109 | 110 | def update_state(self, data): 111 | new_state = str_2_json(data) 112 | for player in new_state["players"]: 113 | if player["playerId"] == self.id: 114 | self.x = player["posx"] 115 | self.y = player["posy"] 116 | self.energy = player["energy"] 117 | self.score = player["score"] 118 | self.lastAction = player["lastAction"] 119 | self.status = player["status"] 120 | 121 | self.mapInfo.update(new_state["golds"], new_state["changedObstacles"]) 122 | self.players = new_state["players"] 123 | for i in range(len(self.players) + 1, 5, 1): 124 | self.players.append({"playerId": i, "posx": self.x, "posy": self.y}) 125 | self.stepCount = self.stepCount + 1 126 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/DQNModel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from warnings import simplefilter 3 | simplefilter(action='ignore', category=FutureWarning) 4 | 5 | import numpy as np 6 | from keras.models import Sequential 7 | from keras.models import model_from_json 8 | from keras.layers import Dense, Activation 9 | from keras import optimizers 10 | from keras import backend as K 11 | import tensorflow as tf 12 | from random import random, randrange 13 | 14 | 15 | # Deep Q Network off-policy 16 | class DQN: 17 | 18 | def __init__( 19 | self, 20 | input_dim, #The number of inputs for the DQN network 21 | action_space, #The number of actions for the DQN network 22 | gamma = 0.99, #The discount factor 23 | epsilon = 1, #Epsilon - the exploration factor 24 | epsilon_min = 0.01, #The minimum epsilon 25 | epsilon_decay = 0.999,#The decay epislon for each update_epsilon time 26 | learning_rate = 0.00025, #The learning rate for the DQN network 27 | tau = 0.125, #The factor for updating the DQN target network from the DQN network 28 | model = None, #The DQN model 29 | target_model = None, #The DQN target model 30 | sess=None 31 | 32 | ): 33 | self.input_dim = input_dim 34 | self.action_space = action_space 35 | self.gamma = gamma 36 | self.epsilon = epsilon 37 | self.epsilon_min = epsilon_min 38 | self.epsilon_decay = epsilon_decay 39 | self.learning_rate = learning_rate 40 | self.tau = tau 41 | 42 | #Creating networks 43 | self.model = self.create_model() #Creating the DQN model 44 | self.target_model = self.create_model() #Creating the DQN target model 45 | 46 | #Tensorflow GPU optimization 47 | config = tf.compat.v1.ConfigProto() 48 | config.gpu_options.allow_growth = True 49 | self.sess = tf.compat.v1.Session(config=config) 50 | K.set_session(sess) 51 | self.sess.run( tf.compat.v1.global_variables_initializer()) 52 | 53 | def create_model(self): 54 | #Creating the network 55 | #Two hidden layers (300,300), their activation is ReLu 56 | #One output layer with action_space of nodes, activation is linear. 57 | model = Sequential() 58 | model.add(Dense(300, input_dim=self.input_dim)) 59 | model.add(Activation('relu')) 60 | model.add(Dense(300)) 61 | model.add(Activation('relu')) 62 | model.add(Dense(self.action_space)) 63 | model.add(Activation('linear')) 64 | #adam = optimizers.adam(lr=self.learning_rate) 65 | sgd = optimizers.SGD(lr=self.learning_rate, decay=1e-6, momentum=0.95) 66 | model.compile(optimizer = sgd, 67 | loss='mse') 68 | return model 69 | 70 | 71 | def act(self,state): 72 | #Get the index of the maximum Q values 73 | a_max = np.argmax(self.model.predict(state.reshape(1,len(state)))) 74 | if (random() < self.epsilon): 75 | a_chosen = randrange(self.action_space) 76 | else: 77 | a_chosen = a_max 78 | return a_chosen 79 | 80 | 81 | def replay(self,samples,batch_size): 82 | inputs = np.zeros((batch_size, self.input_dim)) 83 | targets = np.zeros((batch_size, self.action_space)) 84 | 85 | for i in range(0,batch_size): 86 | state = samples[0][i,:] 87 | action = samples[1][i] 88 | reward = samples[2][i] 89 | new_state = samples[3][i,:] 90 | done= samples[4][i] 91 | 92 | inputs[i,:] = state 93 | targets[i,:] = self.target_model.predict(state.reshape(1,len(state))) 94 | if done: 95 | targets[i,action] = reward # if terminated, only equals reward 96 | else: 97 | Q_future = np.max(self.target_model.predict(new_state.reshape(1,len(new_state)))) 98 | targets[i,action] = reward + Q_future * self.gamma 99 | #Training 100 | loss = self.model.train_on_batch(inputs, targets) 101 | 102 | def target_train(self): 103 | weights = self.model.get_weights() 104 | target_weights = self.target_model.get_weights() 105 | for i in range(0, len(target_weights)): 106 | target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau) 107 | 108 | self.target_model.set_weights(target_weights) 109 | 110 | 111 | def update_epsilon(self): 112 | self.epsilon = self.epsilon*self.epsilon_decay 113 | self.epsilon = max(self.epsilon_min, self.epsilon) 114 | 115 | 116 | def save_model(self,path, model_name): 117 | # serialize model to JSON 118 | model_json = self.model.to_json() 119 | with open(path + model_name + ".json", "w") as json_file: 120 | json_file.write(model_json) 121 | # serialize weights to HDF5 122 | self.model.save_weights(path + model_name + ".h5") 123 | print("Saved model to disk") 124 | 125 | 126 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/MINER_STATE.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def str_2_json(str): 5 | return json.loads(str, encoding="utf-8") 6 | 7 | 8 | class MapInfo: 9 | def __init__(self): 10 | self.max_x = 0 #Width of the map 11 | self.max_y = 0 #Height of the map 12 | self.golds = [] #List of the golds in the map 13 | self.obstacles = [] 14 | self.numberOfPlayers = 0 15 | self.maxStep = 0 #The maximum number of step is set for this map 16 | 17 | def init_map(self, gameInfo): 18 | #Initialize the map at the begining of each episode 19 | self.max_x = gameInfo["width"] - 1 20 | self.max_y = gameInfo["height"] - 1 21 | self.golds = gameInfo["golds"] 22 | self.obstacles = gameInfo["obstacles"] 23 | self.maxStep = gameInfo["steps"] 24 | self.numberOfPlayers = gameInfo["numberOfPlayers"] 25 | 26 | def update(self, golds, changedObstacles): 27 | #Update the map after every step 28 | self.golds = golds 29 | for cob in changedObstacles: 30 | newOb = True 31 | for ob in self.obstacles: 32 | if cob["posx"] == ob["posx"] and cob["posy"] == ob["posy"]: 33 | newOb = False 34 | #print("cell(", cob["posx"], ",", cob["posy"], ") change type from: ", ob["type"], " -> ", 35 | # cob["type"], " / value: ", ob["value"], " -> ", cob["value"]) 36 | ob["type"] = cob["type"] 37 | ob["value"] = cob["value"] 38 | break 39 | if newOb: 40 | self.obstacles.append(cob) 41 | #print("new obstacle: ", cob["posx"], ",", cob["posy"], ", type = ", cob["type"], ", value = ", 42 | # cob["value"]) 43 | 44 | def get_min_x(self): 45 | return min([cell["posx"] for cell in self.golds]) 46 | 47 | def get_max_x(self): 48 | return max([cell["posx"] for cell in self.golds]) 49 | 50 | def get_min_y(self): 51 | return min([cell["posy"] for cell in self.golds]) 52 | 53 | def get_max_y(self): 54 | return max([cell["posy"] for cell in self.golds]) 55 | 56 | def is_row_has_gold(self, y): 57 | return y in [cell["posy"] for cell in self.golds] 58 | 59 | def is_column_has_gold(self, x): 60 | return x in [cell["posx"] for cell in self.golds] 61 | 62 | def gold_amount(self, x, y): #Get the amount of golds at cell (x,y) 63 | for cell in self.golds: 64 | if x == cell["posx"] and y == cell["posy"]: 65 | return cell["amount"] 66 | return 0 67 | 68 | def get_obstacle(self, x, y): # Get the kind of the obstacle at cell(x,y) 69 | for cell in self.obstacles: 70 | if x == cell["posx"] and y == cell["posy"]: 71 | return cell["type"] 72 | return -1 # No obstacle at the cell (x,y) 73 | 74 | 75 | class State: 76 | STATUS_PLAYING = 0 77 | STATUS_ELIMINATED_WENT_OUT_MAP = 1 78 | STATUS_ELIMINATED_OUT_OF_ENERGY = 2 79 | STATUS_ELIMINATED_INVALID_ACTION = 3 80 | STATUS_STOP_EMPTY_GOLD = 4 81 | STATUS_STOP_END_STEP = 5 82 | 83 | def __init__(self): 84 | self.end = False 85 | self.score = 0 86 | self.lastAction = None 87 | self.id = 0 88 | self.x = 0 89 | self.y = 0 90 | self.energy = 0 91 | self.mapInfo = MapInfo() 92 | self.players = [] 93 | self.stepCount = 0 94 | self.status = State.STATUS_PLAYING 95 | 96 | def init_state(self, data): #parse data from server into object 97 | game_info = str_2_json(data) 98 | self.end = False 99 | self.score = 0 100 | self.lastAction = None 101 | self.id = game_info["playerId"] 102 | self.x = game_info["posx"] 103 | self.y = game_info["posy"] 104 | self.energy = game_info["energy"] 105 | self.mapInfo.init_map(game_info["gameinfo"]) 106 | self.stepCount = 0 107 | self.status = State.STATUS_PLAYING 108 | self.players = [{"playerId": 2, "posx": self.x, "posy": self.y}, 109 | {"playerId": 3, "posx": self.x, "posy": self.y}, 110 | {"playerId": 4, "posx": self.x, "posy": self.y}] 111 | 112 | def update_state(self, data): 113 | new_state = str_2_json(data) 114 | for player in new_state["players"]: 115 | if player["playerId"] == self.id: 116 | self.x = player["posx"] 117 | self.y = player["posy"] 118 | self.energy = player["energy"] 119 | self.score = player["score"] 120 | self.lastAction = player["lastAction"] 121 | self.status = player["status"] 122 | 123 | self.mapInfo.update(new_state["golds"], new_state["changedObstacles"]) 124 | self.players = new_state["players"] 125 | for i in range(len(self.players) + 1, 5, 1): 126 | self.players.append({"playerId": i, "posx": self.x, "posy": self.y}) 127 | self.stepCount = self.stepCount + 1 128 | -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/TrainingClient.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from DQNModel import DQN # A class of creating a deep q-learning model 3 | from MinerEnv import MinerEnv # A class of creating a communication environment between the DQN model and the GameMiner environment (GAME_SOCKET_DUMMY.py) 4 | from Memory import Memory # A class of creating a batch in order to store experiences for the training process 5 | 6 | import pandas as pd 7 | import datetime 8 | import numpy as np 9 | 10 | 11 | HOST = "localhost" 12 | PORT = 1111 13 | if len(sys.argv) == 3: 14 | HOST = str(sys.argv[1]) 15 | PORT = int(sys.argv[2]) 16 | 17 | # Create header for saving DQN learning file 18 | now = datetime.datetime.now() #Getting the latest datetime 19 | header = ["Ep", "Step", "Reward", "Total_reward", "Action", "Epsilon", "Done", "Termination_Code"] #Defining header for the save file 20 | filename = "Data/data_" + now.strftime("%Y%m%d-%H%M") + ".csv" 21 | with open(filename, 'w') as f: 22 | pd.DataFrame(columns=header).to_csv(f, encoding='utf-8', index=False, header=True) 23 | 24 | # Parameters for training a DQN model 25 | N_EPISODE = 10000 #The number of episodes for training 26 | MAX_STEP = 1000 #The number of steps for each episode 27 | BATCH_SIZE = 32 #The number of experiences for each replay 28 | MEMORY_SIZE = 100000 #The size of the batch for storing experiences 29 | SAVE_NETWORK = 100 # After this number of episodes, the DQN model is saved for testing later. 30 | INITIAL_REPLAY_SIZE = 1000 #The number of experiences are stored in the memory batch before starting replaying 31 | INPUTNUM = 198 #The number of input values for the DQN model 32 | ACTIONNUM = 6 #The number of actions output from the DQN model 33 | MAP_MAX_X = 21 #Width of the Map 34 | MAP_MAX_Y = 9 #Height of the Map 35 | 36 | # Initialize a DQN model and a memory batch for storing experiences 37 | DQNAgent = DQN(INPUTNUM, ACTIONNUM) 38 | memory = Memory(MEMORY_SIZE) 39 | 40 | # Initialize environment 41 | minerEnv = MinerEnv(HOST, PORT) #Creating a communication environment between the DQN model and the game environment (GAME_SOCKET_DUMMY.py) 42 | minerEnv.start() # Connect to the game 43 | 44 | train = False #The variable is used to indicate that the replay starts, and the epsilon starts decrease. 45 | #Training Process 46 | #the main part of the deep-q learning agorithm 47 | for episode_i in range(0, N_EPISODE): 48 | try: 49 | # Choosing a map in the list 50 | mapID = np.random.randint(1, 6) #Choosing a map ID from 5 maps in Maps folder randomly 51 | posID_x = np.random.randint(MAP_MAX_X) #Choosing a initial position of the DQN agent on X-axes randomly 52 | posID_y = np.random.randint(MAP_MAX_Y) #Choosing a initial position of the DQN agent on Y-axes randomly 53 | #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent 54 | request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") 55 | #Send the request to the game environment (GAME_SOCKET_DUMMY.py) 56 | minerEnv.send_map_info(request) 57 | 58 | # Getting the initial state 59 | minerEnv.reset() #Initialize the game environment 60 | s = minerEnv.get_state()#Get the state after reseting. 61 | #This function (get_state()) is an example of creating a state for the DQN model 62 | total_reward = 0 #The amount of rewards for the entire episode 63 | terminate = False #The variable indicates that the episode ends 64 | maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training 65 | #Start an episde for training 66 | for step in range(0, maxStep): 67 | action = DQNAgent.act(s) # Getting an action from the DQN model from the state (s) 68 | minerEnv.step(str(action)) # Performing the action in order to obtain the new state 69 | s_next = minerEnv.get_state() # Getting a new state 70 | reward = minerEnv.get_reward() # Getting a reward 71 | terminate = minerEnv.check_terminate() # Checking the end status of the episode 72 | 73 | # Add this transition to the memory batch 74 | memory.push(s, action, reward, terminate, s_next) 75 | 76 | # Sample batch memory to train network 77 | if (memory.length > INITIAL_REPLAY_SIZE): 78 | #If there are INITIAL_REPLAY_SIZE experiences in the memory batch 79 | #then start replaying 80 | batch = memory.sample(BATCH_SIZE) #Get a BATCH_SIZE experiences for replaying 81 | DQNAgent.replay(batch, BATCH_SIZE)#Do relaying 82 | train = True #Indicate the training starts 83 | total_reward = total_reward + reward #Plus the reward to the total rewad of the episode 84 | s = s_next #Assign the next state for the next step. 85 | 86 | # Saving data to file 87 | save_data = np.hstack( 88 | [episode_i + 1, step + 1, reward, total_reward, action, DQNAgent.epsilon, terminate]).reshape(1, 7) 89 | with open(filename, 'a') as f: 90 | pd.DataFrame(save_data).to_csv(f, encoding='utf-8', index=False, header=False) 91 | 92 | if terminate == True: 93 | #If the episode ends, then go to the next episode 94 | break 95 | 96 | # Iteration to save the network architecture and weights 97 | if (np.mod(episode_i + 1, SAVE_NETWORK) == 0 and train == True): 98 | DQNAgent.target_train() # Replace the learning weights for target model with soft replacement 99 | #Save the DQN model 100 | now = datetime.datetime.now() #Get the latest datetime 101 | DQNAgent.save_model("TrainedModels/", 102 | "DQNmodel_" + now.strftime("%Y%m%d-%H%M") + "_ep" + str(episode_i + 1)) 103 | 104 | 105 | #Print the training information after the episode 106 | print('Episode %d ends. Number of steps is: %d. Accumulated Reward = %.2f. Epsilon = %.2f .Termination code: %d' % ( 107 | episode_i + 1, step + 1, total_reward, DQNAgent.epsilon, terminate)) 108 | 109 | #Decreasing the epsilon if the replay starts 110 | if train == True: 111 | DQNAgent.update_epsilon() 112 | 113 | except Exception as e: 114 | import traceback 115 | 116 | traceback.print_exc() 117 | # print("Finished.") 118 | break 119 | -------------------------------------------------------------------------------- /Miner-Testing-Server/DUMMY_SERVER.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import math 4 | from random import randrange 5 | import socket 6 | 7 | MAP = "[[0,0,-2,100,0,0,-1,-1,-3,0,0,0,-1,-1,0,0,-3,0,-1,-1,0],[-1,-1,-2,0,0,0,-3,-1,0,-2,0,0,0,-1,0,-1,0,-2,-1,0,0],[0,0,-1,0,0,0,0,-1,-1,-1,0,0,100,0,0,0,0,50,-2,0,0],[0,0,0,0,-2,0,0,0,0,0,0,0,-1,50,-2,0,0,-1,-1,0,0],[-2,0,200,-2,-2,300,0,0,-2,-2,0,0,-3,0,-1,0,0,-3,-1,0,0],[0,-1,0,0,0,0,0,-3,0,0,-1,-1,0,0,0,0,0,0,-2,0,0],[0,-1,-1,0,0,-1,-1,0,0,700,-1,0,0,0,-2,-1,-1,0,0,0,100],[0,0,0,500,0,0,-1,0,-2,-2,-1,-1,0,0,-2,0,-3,0,0,-1,0],[-1,-1,0,-2,0,-1,-2,0,400,-2,-1,-1,500,0,-2,0,-3,100,0,0,0]]" 8 | POS_X = 0 9 | POS_Y = 0 10 | E = 50 11 | MAX_STEP = 50 12 | W = 21 13 | H = 9 14 | 15 | class ObstacleInfo: 16 | # initial energy for obstacles: Land (key = 0): -1, Forest(key = -1): 0 (random), Trap(key = -2): -10, Swamp (key = -3): -5 17 | types = {0: -1, -1: 0, -2: -10, -3: -5} 18 | 19 | def __init__(self): 20 | self.type = 0 21 | self.posx = 0 22 | self.posy = 0 23 | self.value = 0 24 | 25 | 26 | class GoldInfo: 27 | def __init__(self): 28 | self.posx = 0 29 | self.posy = 0 30 | self.amount = 0 31 | 32 | def loads(self, data): 33 | golds = [] 34 | for gd in data: 35 | g = GoldInfo() 36 | g.posx = gd["posx"] 37 | g.posy = gd["posy"] 38 | g.amount = gd["amount"] 39 | golds.append(g) 40 | return golds 41 | 42 | 43 | class PlayerInfo: 44 | STATUS_PLAYING = 0 45 | STATUS_ELIMINATED_WENT_OUT_MAP = 1 46 | STATUS_ELIMINATED_OUT_OF_ENERGY = 2 47 | STATUS_ELIMINATED_INVALID_ACTION = 3 48 | STATUS_STOP_EMPTY_GOLD = 4 49 | STATUS_STOP_END_STEP = 5 50 | 51 | def __init__(self, id): 52 | self.playerId = id 53 | self.score = 0 54 | self.energy = E 55 | self.posx = POS_X 56 | self.posy = POS_Y 57 | self.lastAction = 0 58 | self.status = PlayerInfo.STATUS_PLAYING 59 | self.freeCount = 0 60 | 61 | 62 | class GameInfo: 63 | def __init__(self): 64 | self.numberOfPlayers = 1 65 | self.width = W 66 | self.height = H 67 | self.steps = MAX_STEP 68 | self.golds = [] 69 | self.obstacles = [] 70 | 71 | def loads(self, data): 72 | m = GameInfo() 73 | m.width = data["width"] 74 | m.height = data["height"] 75 | m.golds = GoldInfo().loads(data["golds"]) 76 | m.obstacles = data["obstacles"] 77 | m.numberOfPlayers = data["numberOfPlayers"] 78 | m.steps = data["steps"] 79 | return m 80 | 81 | 82 | class UserMatch: 83 | def __init__(self): 84 | self.playerId = 1 85 | self.posx = POS_X 86 | self.posy = POS_Y 87 | self.energy = E 88 | self.gameinfo = GameInfo() 89 | 90 | def to_json(self): 91 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 92 | 93 | 94 | class StepState: 95 | def __init__(self): 96 | self.players = [] 97 | self.golds = [] 98 | self.changedObstacles = [] 99 | 100 | def to_json(self): 101 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 102 | 103 | 104 | class GameSocket: 105 | bog_energy_chain = {-5: -20, -20: -40, -40: -100, -100: -100} 106 | 107 | def __init__(self): 108 | self.stepCount = 0 109 | self.maxStep = MAX_STEP 110 | self.userMatch = UserMatch() 111 | self.user = PlayerInfo(1) 112 | self.stepState = StepState() 113 | self.map = json.loads(MAP) # running map info: 0->Land, -1->Forest, -2->Trap, -3:Swamp, >0:Gold 114 | self.energyOnMap = json.loads(MAP) # self.energyOnMap[x][y]: <0, amount of energy which player will consume if it move into (x,y) 115 | self.E = E 116 | self.stepCount = 0 117 | self.craftUsers = [] # players that craft at current step - for calculating amount of gold 118 | self.craftMap = {} # cells that players craft at current step, key: x_y, value: number of players that craft at (x,y) 119 | 120 | def setup(self): 121 | self.init_map() 122 | self.maxStep = self.userMatch.gameinfo.steps 123 | 124 | # init data for players 125 | self.stepState.players = [self.user] 126 | self.E = self.userMatch.energy 127 | 128 | def init_map(self): # load map info 129 | i = 0 130 | while i < len(self.map): 131 | j = 0 132 | while j < len(self.map[i]): 133 | if self.map[i][j] > 0: # gold 134 | g = GoldInfo() 135 | g.posx = j 136 | g.posy = i 137 | g.amount = self.map[i][j] 138 | self.userMatch.gameinfo.golds.append(g) 139 | else: # obstacles 140 | o = ObstacleInfo() 141 | o.posx = j 142 | o.posy = i 143 | o.type = -self.map[i][j] 144 | o.value = ObstacleInfo.types[self.map[i][j]] 145 | self.userMatch.gameinfo.obstacles.append(o) 146 | j += 1 147 | i += 1 148 | self.stepState.golds = self.userMatch.gameinfo.golds 149 | for x in range(len(self.map)): 150 | for y in range(len(self.map[x])): 151 | if self.map[x][y] > 0: # gold 152 | self.energyOnMap[x][y] = -4 153 | else: # obstacles 154 | self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]] 155 | 156 | def get_game_info(self): 157 | data = self.userMatch.to_json() 158 | print("send: ", data) 159 | return data 160 | 161 | def get_step(self): # send data to player (simulate player's receive request) 162 | self.stepCount = self.stepCount + 1 163 | if self.stepCount >= self.maxStep: 164 | for player in self.stepState.players: 165 | player.status = PlayerInfo.STATUS_STOP_END_STEP 166 | data = self.stepState.to_json() 167 | return data 168 | 169 | def receive(self, message): # receive message from player (simulate send request from player) 170 | self.stepState.changedObstacles = [] 171 | action = int(message) 172 | # print("Action = ", action) 173 | self.user.lastAction = action 174 | self.craftUsers = [] 175 | self.step_action(self.user, action) 176 | self.action_5_craft() 177 | for c in self.stepState.changedObstacles: 178 | self.map[c["posy"]][c["posx"]] = -c["type"] 179 | self.energyOnMap[c["posy"]][c["posx"]] = c["value"] 180 | 181 | def step_action(self, user, action): 182 | switcher = { 183 | 0: self.action_0_left, 184 | 1: self.action_1_right, 185 | 2: self.action_2_up, 186 | 3: self.action_3_down, 187 | 4: self.action_4_free, 188 | 5: self.action_5_craft_pre 189 | } 190 | func = switcher.get(action, self.invalid_action) 191 | func(user) 192 | 193 | def action_5_craft_pre(self, user): # collect players who craft at current step 194 | user.freeCount = 0 195 | if self.map[user.posy][user.posx] <= 0: # craft at the non-gold cell 196 | user.energy -= 10 197 | if user.energy <= 0: 198 | user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY 199 | user.lastAction = 6 #eliminated 200 | else: 201 | user.energy -= 5 202 | if user.energy > 0: 203 | self.craftUsers.append(user) 204 | key = str(user.posx) + "_" + str(user.posy) 205 | if key in self.craftMap: 206 | count = self.craftMap[key] 207 | self.craftMap[key] = count + 1 208 | else: 209 | self.craftMap[key] = 1 210 | else: 211 | user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY 212 | user.lastAction = 6 #eliminated 213 | 214 | def action_0_left(self, user): # user go left 215 | user.freeCount = 0 216 | user.posx = user.posx - 1 217 | if user.posx < 0: 218 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 219 | user.lastAction = 6 #eliminated 220 | else: 221 | self.go_to_pos(user) 222 | 223 | def action_1_right(self, user): # user go right 224 | user.freeCount = 0 225 | user.posx = user.posx + 1 226 | if user.posx >= self.userMatch.gameinfo.width: 227 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 228 | user.lastAction = 6 #eliminated 229 | else: 230 | self.go_to_pos(user) 231 | 232 | def action_2_up(self, user): # user go up 233 | user.freeCount = 0 234 | user.posy = user.posy - 1 235 | if user.posy < 0: 236 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 237 | user.lastAction = 6 #eliminated 238 | else: 239 | self.go_to_pos(user) 240 | 241 | def action_3_down(self, user): # user go right 242 | user.freeCount = 0 243 | user.posy = user.posy + 1 244 | if user.posy >= self.userMatch.gameinfo.height: 245 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 246 | user.lastAction = 6 #eliminated 247 | else: 248 | self.go_to_pos(user) 249 | 250 | def action_4_free(self, user): # user free 251 | user.freeCount += 1 252 | if user.freeCount == 1: 253 | user.energy += int(self.E / 4) 254 | elif user.freeCount == 2: 255 | user.energy += int(self.E / 3) 256 | elif user.freeCount == 3: 257 | user.energy += int(self.E / 2) 258 | else: 259 | user.energy = self.E 260 | if user.energy > self.E: 261 | user.energy = self.E 262 | 263 | def action_5_craft(self): 264 | craftCount = len(self.craftUsers) 265 | # print ("craftCount",craftCount) 266 | if (craftCount > 0): 267 | for user in self.craftUsers: 268 | x = user.posx 269 | y = user.posy 270 | key = str(user.posx) + "_" + str(user.posy) 271 | c = self.craftMap[key] 272 | m = min(math.ceil(self.map[y][x] / c), 50) 273 | user.score += m 274 | # print ("user", user.playerId, m) 275 | for user in self.craftUsers: 276 | x = user.posx 277 | y = user.posy 278 | key = str(user.posx) + "_" + str(user.posy) 279 | if key in self.craftMap: 280 | c = self.craftMap[key] 281 | del self.craftMap[key] 282 | m = min(math.ceil(self.map[y][x] / c), 50) 283 | self.map[y][x] -= m * c 284 | if self.map[y][x] < 0: 285 | self.map[y][x] = 0 286 | self.energyOnMap[y][x] = ObstacleInfo.types[0] 287 | for g in self.stepState.golds: 288 | if g.posx == x and g.posy == y: 289 | g.amount = self.map[y][x] 290 | if g.amount == 0: 291 | self.stepState.golds.remove(g) 292 | self.add_changed_obstacle(x, y, 0, ObstacleInfo.types[0]) 293 | if len(self.stepState.golds) == 0: 294 | for player in self.stepState.players: 295 | player.status = PlayerInfo.STATUS_STOP_EMPTY_GOLD 296 | break; 297 | self.craftMap = {} 298 | 299 | def invalid_action(self, user): 300 | user.status = PlayerInfo.STATUS_ELIMINATED_INVALID_ACTION 301 | user.lastAction = 6 #eliminated 302 | 303 | def go_to_pos(self, user): # player move to cell(x,y) 304 | if self.map[user.posy][user.posx] == -1: 305 | user.energy -= randrange(16) + 5 306 | elif self.map[user.posy][user.posx] == 0: 307 | user.energy += self.energyOnMap[user.posy][user.posx] 308 | elif self.map[user.posy][user.posx] == -2: 309 | user.energy += self.energyOnMap[user.posy][user.posx] 310 | self.add_changed_obstacle(user.posx, user.posy, 0, ObstacleInfo.types[0]) 311 | elif self.map[user.posy][user.posx] == -3: 312 | user.energy += self.energyOnMap[user.posy][user.posx] 313 | self.add_changed_obstacle(user.posx, user.posy, 3, 314 | self.bog_energy_chain[self.energyOnMap[user.posy][user.posx]]) 315 | else: 316 | user.energy -= 4 317 | if user.energy <= 0: 318 | user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY 319 | user.lastAction = 6 #eliminated 320 | 321 | def add_changed_obstacle(self, x, y, t, v): 322 | added = False 323 | for o in self.stepState.changedObstacles: 324 | if o["posx"] == x and o["posy"] == y: 325 | added = True 326 | break 327 | if not added: 328 | o = {} 329 | o["posx"] = x 330 | o["posy"] = y 331 | o["type"] = t 332 | o["value"] = v 333 | self.stepState.changedObstacles.append(o) 334 | 335 | 336 | if __name__ == "__main__": 337 | 338 | HOST = "localhost" 339 | PORT = int(sys.argv[1]) 340 | 341 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 342 | print('# Socket created') 343 | 344 | try: 345 | s.bind((HOST, PORT)) 346 | except socket.error as msg: 347 | print('# Bind failed. ') 348 | sys.exit() 349 | s.listen(10) 350 | conn, addr = s.accept() 351 | print('# Connected to ' + addr[0] + ':' + str(addr[1])) 352 | 353 | game = GameSocket() 354 | game.setup() 355 | conn.send(bytes(game.get_game_info(),"utf-8")) 356 | while game.user.status == 0: 357 | data = conn.recv(1024) 358 | game.receive(data) 359 | conn.send(bytes(game.get_step(), "utf-8")) 360 | 361 | s.close() 362 | print("Player score: ", game.user.score) -------------------------------------------------------------------------------- /Miner-Training-Local-CodeSample/GAME_SOCKET_DUMMY.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from array import * 3 | import json 4 | import os 5 | import math 6 | from bot1 import Bot1 7 | from bot2 import Bot2 8 | from bot3 import Bot3 9 | from random import randrange 10 | 11 | 12 | class ObstacleInfo: 13 | # initial energy for obstacles: Land (key = 0): -1, Forest(key = -1): 0 (random), Trap(key = -2): -10, Swamp (key = -3): -5 14 | types = {0: -1, -1: 0, -2: -10, -3: -5} 15 | 16 | def __init__(self): 17 | self.type = 0 18 | self.posx = 0 19 | self.posy = 0 20 | self.value = 0 21 | 22 | 23 | class GoldInfo: 24 | def __init__(self): 25 | self.posx = 0 26 | self.posy = 0 27 | self.amount = 0 28 | 29 | def loads(self, data): 30 | golds = [] 31 | for gd in data: 32 | g = GoldInfo() 33 | g.posx = gd["posx"] 34 | g.posy = gd["posy"] 35 | g.amount = gd["amount"] 36 | golds.append(g) 37 | return golds 38 | 39 | 40 | class PlayerInfo: 41 | STATUS_PLAYING = 0 42 | STATUS_ELIMINATED_WENT_OUT_MAP = 1 43 | STATUS_ELIMINATED_OUT_OF_ENERGY = 2 44 | STATUS_ELIMINATED_INVALID_ACTION = 3 45 | STATUS_STOP_EMPTY_GOLD = 4 46 | STATUS_STOP_END_STEP = 5 47 | 48 | def __init__(self, id): 49 | self.playerId = id 50 | self.score = 0 51 | self.energy = 0 52 | self.posx = 0 53 | self.posy = 0 54 | self.lastAction = -1 55 | self.status = PlayerInfo.STATUS_PLAYING 56 | self.freeCount = 0 57 | 58 | 59 | class GameInfo: 60 | def __init__(self): 61 | self.numberOfPlayers = 1 62 | self.width = 0 63 | self.height = 0 64 | self.steps = 100 65 | self.golds = [] 66 | self.obstacles = [] 67 | 68 | def loads(self, data): 69 | m = GameInfo() 70 | m.width = data["width"] 71 | m.height = data["height"] 72 | m.golds = GoldInfo().loads(data["golds"]) 73 | m.obstacles = data["obstacles"] 74 | m.numberOfPlayers = data["numberOfPlayers"] 75 | m.steps = data["steps"] 76 | return m 77 | 78 | 79 | class UserMatch: 80 | def __init__(self): 81 | self.playerId = 1 82 | self.posx = 0 83 | self.posy = 0 84 | self.energy = 50 85 | self.gameinfo = GameInfo() 86 | 87 | def to_json(self): 88 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 89 | 90 | 91 | class StepState: 92 | def __init__(self): 93 | self.players = [] 94 | self.golds = [] 95 | self.changedObstacles = [] 96 | 97 | def to_json(self): 98 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 99 | 100 | 101 | class GameSocket: 102 | bog_energy_chain = {-5: -20, -20: -40, -40: -100, -100: -100} 103 | 104 | def __init__(self, host, port): 105 | self.stepCount = 0 106 | self.maxStep = 0 107 | self.mapdir = "Maps" # where to load all pre-defined maps 108 | self.mapid = "" 109 | self.userMatch = UserMatch() 110 | self.user = PlayerInfo(1) 111 | self.stepState = StepState() 112 | self.maps = {} # key: map file name, value: file content 113 | self.map = [] # running map info: 0->Land, -1->Forest, -2->Trap, -3:Swamp, >0:Gold 114 | self.energyOnMap = [] # self.energyOnMap[x][y]: <0, amount of energy which player will consume if it move into (x,y) 115 | self.E = 50 116 | self.resetFlag = True 117 | self.craftUsers = [] # players that craft at current step - for calculating amount of gold 118 | self.bots = [] 119 | self.craftMap = {} # cells that players craft at current step, key: x_y, value: number of players that craft at (x,y) 120 | 121 | def init_bots(self): 122 | self.bots = [Bot1(2), Bot2(3), Bot3(4)] # use bot1(id=2), bot2(id=3), bot3(id=4) 123 | for (bot) in self.bots: # at the beginning, all bots will have same position, energy as player 124 | bot.info.posx = self.user.posx 125 | bot.info.posy = self.user.posy 126 | bot.info.energy = self.user.energy 127 | bot.info.lastAction = -1 128 | bot.info.status = PlayerInfo.STATUS_PLAYING 129 | bot.info.score = 0 130 | self.stepState.players.append(bot.info) 131 | self.userMatch.gameinfo.numberOfPlayers = len(self.stepState.players) 132 | print("numberOfPlayers: ", self.userMatch.gameinfo.numberOfPlayers) 133 | 134 | def reset(self, requests): # load new game by given request: [map id (filename), posx, posy, initial energy] 135 | # load new map 136 | self.reset_map(requests[0]) 137 | self.userMatch.posx = int(requests[1]) 138 | self.userMatch.posy = int(requests[2]) 139 | self.userMatch.energy = int(requests[3]) 140 | self.userMatch.gameinfo.steps = int(requests[4]) 141 | self.maxStep = self.userMatch.gameinfo.steps 142 | 143 | # init data for players 144 | self.user.posx = self.userMatch.posx # in 145 | self.user.posy = self.userMatch.posy 146 | self.user.energy = self.userMatch.energy 147 | self.user.status = PlayerInfo.STATUS_PLAYING 148 | self.user.score = 0 149 | self.stepState.players = [self.user] 150 | self.E = self.userMatch.energy 151 | self.resetFlag = True 152 | self.init_bots() 153 | self.stepCount = 0 154 | 155 | def reset_map(self, id): # load map info 156 | self.mapId = id 157 | self.map = json.loads(self.maps[self.mapId]) 158 | self.userMatch = self.map_info(self.map) 159 | #print(self.map) 160 | self.stepState.golds = self.userMatch.gameinfo.golds 161 | #self.map = json.loads(self.maps[self.mapId]) 162 | self.energyOnMap = json.loads(self.maps[self.mapId]) 163 | for x in range(len(self.map)): 164 | for y in range(len(self.map[x])): 165 | if self.map[x][y] > 0: # gold 166 | self.energyOnMap[x][y] = -4 167 | else: # obstacles 168 | self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]] 169 | 170 | def connect(self): # simulate player's connect request 171 | print("Connected to server.") 172 | # load all pre-defined maps from mapDir 173 | for filename in os.listdir(self.mapdir): 174 | print("Found: " + filename) 175 | with open(os.path.join(self.mapdir, filename), 'r') as f: 176 | self.maps[filename] = f.read() 177 | 178 | def map_info(self, map): # get map info 179 | # print(map) 180 | userMatch = UserMatch() 181 | userMatch.gameinfo.height = len(map) 182 | userMatch.gameinfo.width = len(map[0]) 183 | i = 0 184 | max_gold_num = 10000 185 | while i < len(map): 186 | j = 0 187 | while j < len(map[i]): 188 | if map[i][j] > 0: # gold 189 | g = GoldInfo() 190 | g.posx = j 191 | g.posy = i 192 | g.amount = (randrange(min(25, max(int(max_gold_num/50), 1))) + 1) * 50 193 | self.map[i][j] = g.amount 194 | max_gold_num -= g.amount 195 | userMatch.gameinfo.golds.append(g) 196 | else: # obstacles 197 | o = ObstacleInfo() 198 | o.posx = j 199 | o.posy = i 200 | o.type = -map[i][j] 201 | o.value = ObstacleInfo.types[map[i][j]] 202 | userMatch.gameinfo.obstacles.append(o) 203 | j += 1 204 | i += 1 205 | #print("max_gold_num=",max_gold_num) 206 | return userMatch 207 | 208 | def receive(self): # send data to player (simulate player's receive request) 209 | if self.resetFlag: # for the first time -> send game info 210 | self.resetFlag = False 211 | data = self.userMatch.to_json() 212 | for (bot) in self.bots: 213 | bot.new_game(data) 214 | #print(data) 215 | return data 216 | else: # send step state 217 | self.stepCount = self.stepCount + 1 218 | if self.stepCount >= self.maxStep: 219 | for player in self.stepState.players: 220 | player.status = PlayerInfo.STATUS_STOP_END_STEP 221 | data = self.stepState.to_json() 222 | for (bot) in self.bots: # update bots' state 223 | bot.new_state(data) 224 | # print(data) 225 | return data 226 | 227 | def send(self, message): # receive message from player (simulate send request from player) 228 | if message.isnumeric(): # player send action 229 | self.resetFlag = False 230 | self.stepState.changedObstacles = [] 231 | action = int(message) 232 | # print("Action = ", action) 233 | self.user.lastAction = action 234 | self.craftUsers = [] 235 | self.step_action(self.user, action) 236 | for bot in self.bots: 237 | if bot.info.status == PlayerInfo.STATUS_PLAYING: 238 | action = bot.next_action() 239 | bot.info.lastAction = action 240 | # print("Bot Action: ", action) 241 | self.step_action(bot.info, action) 242 | self.action_5_craft() 243 | for c in self.stepState.changedObstacles: 244 | self.map[c["posy"]][c["posx"]] = -c["type"] 245 | self.energyOnMap[c["posy"]][c["posx"]] = c["value"] 246 | 247 | else: # reset game 248 | requests = message.split(",") 249 | print("Reset game: ", requests) 250 | self.reset(requests) 251 | 252 | def step_action(self, user, action): 253 | switcher = { 254 | 0: self.action_0_left, 255 | 1: self.action_1_right, 256 | 2: self.action_2_up, 257 | 3: self.action_3_down, 258 | 4: self.action_4_free, 259 | 5: self.action_5_craft_pre 260 | } 261 | func = switcher.get(action, self.invalidAction) 262 | func(user) 263 | 264 | def action_5_craft_pre(self, user): # collect players who craft at current step 265 | user.freeCount = 0 266 | if self.map[user.posy][user.posx] <= 0: # craft at the non-gold cell 267 | user.energy -= 10 268 | if user.energy <= 0: 269 | user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY 270 | user.lastAction = 6 #eliminated 271 | else: 272 | user.energy -= 5 273 | if user.energy > 0: 274 | self.craftUsers.append(user) 275 | key = str(user.posx) + "_" + str(user.posy) 276 | if key in self.craftMap: 277 | count = self.craftMap[key] 278 | self.craftMap[key] = count + 1 279 | else: 280 | self.craftMap[key] = 1 281 | else: 282 | user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY 283 | user.lastAction = 6 #eliminated 284 | 285 | def action_0_left(self, user): # user go left 286 | user.freeCount = 0 287 | user.posx = user.posx - 1 288 | if user.posx < 0: 289 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 290 | user.lastAction = 6 #eliminated 291 | else: 292 | self.go_to_pos(user) 293 | 294 | def action_1_right(self, user): # user go right 295 | user.freeCount = 0 296 | user.posx = user.posx + 1 297 | if user.posx >= self.userMatch.gameinfo.width: 298 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 299 | user.lastAction = 6 #eliminated 300 | else: 301 | self.go_to_pos(user) 302 | 303 | def action_2_up(self, user): # user go up 304 | user.freeCount = 0 305 | user.posy = user.posy - 1 306 | if user.posy < 0: 307 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 308 | user.lastAction = 6 #eliminated 309 | else: 310 | self.go_to_pos(user) 311 | 312 | def action_3_down(self, user): # user go right 313 | user.freeCount = 0 314 | user.posy = user.posy + 1 315 | if user.posy >= self.userMatch.gameinfo.height: 316 | user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP 317 | user.lastAction = 6 #eliminated 318 | else: 319 | self.go_to_pos(user) 320 | 321 | def action_4_free(self, user): # user free 322 | user.freeCount += 1 323 | if user.freeCount == 1: 324 | user.energy += int(self.E / 4) 325 | elif user.freeCount == 2: 326 | user.energy += int(self.E / 3) 327 | elif user.freeCount == 3: 328 | user.energy += int(self.E / 2) 329 | else: 330 | user.energy = self.E 331 | if user.energy > self.E: 332 | user.energy = self.E 333 | 334 | def action_5_craft(self): 335 | craftCount = len(self.craftUsers) 336 | # print ("craftCount",craftCount) 337 | if (craftCount > 0): 338 | for user in self.craftUsers: 339 | x = user.posx 340 | y = user.posy 341 | key = str(user.posx) + "_" + str(user.posy) 342 | c = self.craftMap[key] 343 | m = min(math.ceil(self.map[y][x] / c), 50) 344 | user.score += m 345 | # print ("user", user.playerId, m) 346 | for user in self.craftUsers: 347 | x = user.posx 348 | y = user.posy 349 | key = str(user.posx) + "_" + str(user.posy) 350 | if key in self.craftMap: 351 | c = self.craftMap[key] 352 | del self.craftMap[key] 353 | m = min(math.ceil(self.map[y][x] / c), 50) 354 | self.map[y][x] -= m * c 355 | if self.map[y][x] < 0: 356 | self.map[y][x] = 0 357 | self.energyOnMap[y][x] = ObstacleInfo.types[0] 358 | for g in self.stepState.golds: 359 | if g.posx == x and g.posy == y: 360 | g.amount = self.map[y][x] 361 | if g.amount == 0: 362 | self.stepState.golds.remove(g) 363 | self.add_changed_obstacle(x, y, 0, ObstacleInfo.types[0]) 364 | if len(self.stepState.golds) == 0: 365 | for player in self.stepState.players: 366 | player.status = PlayerInfo.STATUS_STOP_EMPTY_GOLD 367 | break; 368 | self.craftMap = {} 369 | 370 | def invalidAction(self, user): 371 | user.status = PlayerInfo.STATUS_ELIMINATED_INVALID_ACTION 372 | user.lastAction = 6 #eliminated 373 | 374 | def go_to_pos(self, user): # player move to cell(x,y) 375 | if self.map[user.posy][user.posx] == -1: 376 | user.energy -= randrange(16) + 5 377 | elif self.map[user.posy][user.posx] == 0: 378 | user.energy += self.energyOnMap[user.posy][user.posx] 379 | elif self.map[user.posy][user.posx] == -2: 380 | user.energy += self.energyOnMap[user.posy][user.posx] 381 | self.add_changed_obstacle(user.posx, user.posy, 0, ObstacleInfo.types[0]) 382 | elif self.map[user.posy][user.posx] == -3: 383 | user.energy += self.energyOnMap[user.posy][user.posx] 384 | self.add_changed_obstacle(user.posx, user.posy, 3, 385 | self.bog_energy_chain[self.energyOnMap[user.posy][user.posx]]) 386 | else: 387 | user.energy -= 4 388 | if user.energy <= 0: 389 | user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY 390 | user.lastAction = 6 #eliminated 391 | 392 | def add_changed_obstacle(self, x, y, t, v): 393 | added = False 394 | for o in self.stepState.changedObstacles: 395 | if o["posx"] == x and o["posy"] == y: 396 | added = True 397 | break 398 | if added == False: 399 | o = {} 400 | o["posx"] = x 401 | o["posy"] = y 402 | o["type"] = t 403 | o["value"] = v 404 | self.stepState.changedObstacles.append(o) 405 | 406 | def close(self): 407 | print("Close socket.") 408 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rlcomp2020 2 | This is sample source code for Reinforcement Learning Competition, hosted by FPT-Software (Hanoi, Vietnam). The game is Gold Miner. 3 | 4 | 5 | 6 | *** 7 | ***(English version, Vietnamese below)*** 8 | 9 | ## Sample source code description: training and competition 10 | 11 | During the competition, the following State information will be returned after an Action is performed: 12 | 13 | - Information about competing Agents . 14 | 15 | ```json 16 | { 17 | "playerId": Agent's ID, integer; 18 | "posx": Agent's X position, integer; 19 | "posy": Agent's Y position, integer; 20 | "score": Agent's amount of gold mined, integer; 21 | "energy": Agent's amount of remaining energy, integer; 22 | "lastAction": the last action, integer; 23 | "status": Agent's status - playing or eliminated, integer 24 | } 25 | ``` 26 | - Information about the remaining obstacles on the map (their position and the amount of energy that will be subtracted when an Agent passes by). 27 | - Information about the remaining gold mines on the map (their position and the amount of gold). 28 | - Map size (height and witdth) 29 | 30 | Based on the returned State information, teams can decide their own training strategies, such as designing Reward Function and defining State Space. In the two sample source code (Miner-Training-Local-CodeSample and Miner-Testing-CodeSample) provided to teams (described below), we will give an example on designing Reward Function and defining State Space using 02 functions get_state() and get_reward() respectively. Below is an overview of the two sample source code provided for training and competition: 31 | ## A. Source code for training - Miner-Training-Local-CodeSample 32 | This is the sample source code used for training. The source code contains 02 major parts: Miner Game Environment and Deep reinforcement learning algorithm (Deep-Q learning - DQN). 33 | 34 | ![alt text](image/codeAI/Picture1.png) 35 | *Figure 1: The information flow between programs in the sample source code 36 | used for training* 37 | Details of the two parts are as follows: 38 | ### 1. Miner Game Environment 39 | The source code of Miner Game Environment is derived from the original source code of Miner Game on Codelearn system. It includes GAME_SOCKET_DUMMY.py, MINER_STATE.py, MinerEnv.py, Maps, and 03 Bots (bot1.py, bot2.py, bot3.py). Figure 2 illustrates the exchange process of map information and Agent's State information between MinerEnv.py and GAME_SOCKET_DUMMY.py. The details of programs are described below. 40 | 41 | ![alt text](image/codeAI/Picture2.png) 42 | *Figure 2: The information flow between MinerEnv.py and GAME_SOCKET_DUMMY.py during training (simulating the information flow between client and server)s* 43 | 44 | a) MinerEnv.py: A program designed based on the general structure of the reinforcement learning environment to help teams access the main program (GAME_SOCKET_DUMMYpy) in a simple and convenient way. Some of the main functions in the program are as follows: 45 | 46 | - **start()**: a single-use function used to simulate the process of connecting to the server to start playing. During training, this function calls the **connect()** function in GAME_SOCKET_DUMMY.py to read 05 maps in Maps folder. 47 | 48 | - **send_map_info()**: a function used to select maps to train Agents. 49 | 50 | - **reset()**: a function used to initialize a map and a State for the Agent. This function calls the **receive()** function in GAME_SOCKET_DUMMY.py to get the map initial information saved in a json message, as well as the **init_state(message)** function in MINER_STATE.py to update the Agent's State with the map initial information. 51 | 52 | - **step()**: a function used to send an action to GAME_SOCKET_DUMMY.py and receive the changes of map information and Agent's State. 53 | 54 | - **get_state()**: a function provided as an example of defining a State of the Agent during training. Teams can overwrite this function to define a State that suits their training strategies. 55 | 56 | - **get_reward()**: a function provided as an example of defining a reward function of the Agent during training. Teams can overwrite this function to define a reward function that suits their training strategies. 57 | 58 | b) MINER_STATE.py (teams should not change the source code in this program): this program is a sample source code for saving the map information and the Agent's State received from GAME_SOCKET_DUMMY.py (which will be sent from the server in the actual competition). This program is designed to help teams manage their State information easily during training. Map and State classes, along with some main functions in these two classes, are as follows: 59 | 60 | - **MapInfo** (Class): a class used to store all map information. This class includes max_x, max_y, maxStep, numberOfPlayer, golds (the current amount of remaining gold on the map), obstacles (the information about current obstacles on the map). 61 | 62 | + **update (golds, changedObstacles)**: update the map information after each step. 63 | 64 | - **State** (Class): a class that contains the States of the game (including the player's State and map). 65 | 66 | + **init_state(data)**: a function used to initialize the map information and the Agent's State at the beginning of an episode in training (or a match in the actual competition). 67 | + **update_state(data)**: a function used to update the State of the game after each step. The transferred data includes the map information and the Agent's State. 68 | 69 | c) GAME_SOCKET_DUMMY.py (teams should not change the source code in this program): a program used to simulate gold miner game, including the process of transferring data (message) to the server. This program contains 07 classes: ObstacleInfo, GoldInfo, PlayerInfo, GameInfo, UserMatch, StepState and GameSocket. GameSocket is the main class and contains the following main functions: 70 | 71 | - **__ init__ (host, port)**: a function used to initialize the environment. In this function, the purpose of host and port initialization is to simulate the connection on the server in the actual competition. 72 | 73 | - **init_bots()**: a function designed to assist the players to train Agent with bots. To specify a bot to participate in training, use the following command: **self.bots = [Bot1(2), Bot2(3), Bot3(4)]** 74 | 75 | - **connect()**: a function used to simulate the connection from client to server. In training, the function will upload maps from the Maps folder to the environment. 76 | 77 | - **receive()**: a function used to simulate the action in which the client receives messages from the server. During training, if this function is called for the first time, it will return the map initial information and the Agent's initial State. In other cases, it will return the current map information and the Agent's current State. 78 | 79 | - **send()**: a function used to simulate the action in which the client sends messages to the server. During training, there are 2 types of messages from the client: 80 | 81 | + Action: an action for the next step, the data type is numeric. 82 | 83 | + Request: a request for parameters to initialize the game environment. The parameters include: map, init_x, init_y, init_energe, max_step. For example, request = "map1,1,2,100,150" means that the server will use the map information (gold, obstacles) from map1 in the Maps folder, the players will start from position (x = 1, y = 2) with an initial energy of 100, and the game will have a maximum of 150 steps. 84 | 85 | d) Maps: The Maps folder. It containing 05 sample maps for training. Information on traps will be the same in these 05 maps, only the positions of gold mines and the amount of gold will change in the preliminary round. Teams may redesign these maps to suit their own training strategies. Teams need to pay attention to the followings when working with Maps: 86 | 87 | - Each file in the Maps folder is considered a map, the filename is the map name. 88 | 89 | - Each map is a matrix of integers with the following meanings: 90 | | ID | Type | 91 | | ---| ---| 92 | | 0 | Land | 93 | | -1 | Woods | 94 | | -2 | Trap | 95 | | -3 | Swamp | 96 | | >0 | Gold | 97 | 98 | - Select a training map as follows: 99 | 100 | + Function to select a map in MinerEnv.py file: send_map_info(request) 101 | 102 | + Request structure: {map_name},{init_x},{init_y},{init_energy} 103 | 104 | + For example, request = "map2,4,5,1000" means that map2 will be used for the match,the players will start from position (x = 4, y = 5) with an initial energy of 1000. 105 | 106 | e) Bots (not the bots used in the preliminary round): 3 sample bots (bot1.py, bot2.py, and bot3.py) are provided to teams. Teams can create bots to suit their training strategies. The bots will be put into play in the game environment via GAME_SOCKET_DUMMY.py. You will need to declare the bots (import Botx) and initialize them (init_bots()). Some of the main functions in the bot source code are as follows: 107 | 108 | - **new_game(data)**: a function used to initialize the game environment (including initial map information and the initial state of the bots). 109 | - **new_state(data**) : a function used to update the State received from the server. 110 | - **next_action** : a function used to return an Action for the next step. 111 | 112 | ### 2. Deep reinforcement learning algorithm (Deep Q-learning) 113 | In this section, the source code is written based on the Deep reinforcement learning algorithm (Deep Q-learning - DQN). The DQN algorithm has been introduced in a work of Mnih et al ("Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533). The source code contains the following program files: TrainingClient.py, DQNModel.py, and Memory.py. 114 | 115 | a) TrainingClient.py: this program allows communication with the game environment. Some main points in this program are as follows: 116 | 117 | - Initialize parameters for the algorithm: 118 | 119 | N_EPISODE = 10000 #The number of episodes for training 120 | 121 | MAX_STEP = 1000 #The number of steps for each episode 122 | 123 | BATCH_SIZE = 32 #The number of experiences used in each training session 124 | 125 | MEMORY_SIZE = 100000 #The memory capacity to save experiences 126 | 127 | SAVE_NETWORK = 100 # The number of episodes after which the DQN network will be saved 128 | 129 | INITIAL_REPLAY_SIZE = 1000 #The number of experiences required to start training 130 | 131 | INPUTNUM = 198 #The number of inputs for the DQN network 132 | 133 | ACTIONNUM = 6 #The number of actions equivalent to the number of outputs of the DQN network 134 | 135 | - Initialize the game environment: 136 | 137 | minerEnv = MinerEnv(HOST, PORT) 138 | 139 | minerEnv.start() 140 | 141 | - Acquire the initial State of the Agent: 142 | 143 | minerEnv.reset() 144 | 145 | s = minerEnv.get_state() 146 | 147 | - Perform an Action: 148 | 149 | action = DQNAgent.act(s) 150 | 151 | minerEnv.step(str(action)) 152 | 153 | - Acquire the current State of the Agent and reward for the last Action, check the requirements to terminate the episode: 154 | 155 | s_next = minerEnv.get_state() 156 | 157 | reward = minerEnv.get_reward() 158 | 159 | terminate = minerEnv.check_terminate() 160 | 161 | - Train the DQN network (put some experiences from Memory to DQNAgent to start training): 162 | 163 | batch = memory.sample(BATCH_SIZE) 164 | 165 | DQNAgent.replay(batch, BATCH_SIZE) 166 | 167 | b) DQNModel.py: this source code is designed to allow the creation of deep learning models and model training functions. Some main points in this program are as follows: 168 | 169 | - **Initialize numeric parameters**: 170 | 171 | gamma = 0.99, #The discount factor 172 | 173 | epsilon = 1, #Epsilon - the exploration factor 174 | 175 | epsilon_min = 0.01, #The minimum epsilon 176 | 177 | epsilon_decay = 0.999,#The decay epislon for each update_epsilon time 178 | 179 | learning_rate = 0.00025, #The learning rate for the DQN network 180 | 181 | - **create_model()**: a function used to create a deep network. The network contains 02 hidden layers (each layer has 300 nodes, the activation function of these 02 hidden layers is ReLu) and an output layer (06 nodes corresponding to 06 Q-action values of 06 actions, the activation function is Linear). 182 | 183 | - **act(state)**: a function used to return an Action for the Agent at the State. 184 | 185 | - **replay(samples, batch_size)**: a function used to train the deep network with experiences from Memory file. 186 | 187 | - **update_epsilon()**: a function used to reduce epsilon (exploration factor). 188 | 189 | c) Memory.py: this source code is used to store data (experiences) for training. 190 | 191 | **Note**: As the above source code is used for training, the game ends only when the map runs out of gold or the players are eliminated. 192 | 193 | ### B. Source code for competition -Miner-Testing-CodeSample 194 | 195 | ![alt text](image/codeAI/Picture3.png) 196 | *Figure 3: The information flow between programs in the sample source code used in the competition* 197 | - A source code designed for teams to use in official competitions. 198 | 199 | - The difference between this and the source code provided for training (Miner-Training-Local-CodeSample) is that this source code uses GAME_SOCKET.py instead of GAME_SOCKET_DUMMY.py. GAME_SOCKET.py allows data transfer to the server. 200 | 201 | - Information on the HOST and PORT of the server is taken from the environment variables when TestingAgent.py is executed. 202 | 203 | - The other source code (MINER_STATE.py and MinerEnv.py) is similar to that provided for training (Miner-Testing-CodeSample). 204 | 205 | - In the source code, a trained DQN model (RLModelSample.json, RLModelSample.h5) is provided as an example for uploading a model in the competition (Note: the model has not been fully trained to be able to compete). In particular, the json file stores the network parameters and the h5 file stores the network weight. 206 | 207 | 208 | 209 | ## MinerAI environment installation 210 | 211 | This guideline is provided for users to install Python environment with libraries used at server, facilitating the running of the test code. 212 | 213 | The environment can be installed in two ways.: 214 | 215 | - Install the environment directly on your PC: 216 | - Advantages: easy and familiar to those who have worked with Python 217 | - Disadvantages: there may be difference to the actual running environment due to different OS (the actual environment in which the server runs your code is Ubuntu Server 18.04) 218 | 219 | - Using Docker to install: 220 | - Advantages: the code environment is similar to the actual running environment 221 | - Disadvantages: installing Docker may be difficult for some older OS 222 | 223 | Installation instructions: 224 | - Python 3.6.9 (Ubuntu) – python 3.7.4 (windows) 225 | - Tensorflow 1.14.0 or 2.2.0 226 | - Keras 2.3.1 227 | - Numpy 1.18.4 228 | - Pandas 0.15 229 | - PyTorch 1.5.0 230 | - joblib 0.16.0 231 | - ray 0.8.6 (ray[rllib], ray[tune]) 232 | - requests 2.24.0 233 | - semver 2.10.2 234 | - tf-agents 0.3.0 (0.5.0 on Tensorflow 2.2.0) 235 | - Pyqlearning v1.2.4 236 | - Mushroom-RL v1.4.0 237 | - gym 0.17.2 238 | - opencv-python 4.2.0.34 239 | - prettytable 0.7.2 240 | - yacs 0.1.7 241 | 242 | 1. Installing directly 243 | 1. Windows 244 | 1. Install Python 3.7.4: 245 | 246 | Download the executable installer relevant to the OS on your PC at: https://www.python.org/downloads/release/python-374/ 247 | 248 | (The download link is located in the Files section at the bottom of the page) 249 | 250 | Install and set Windows environment variables to PYTHON_HOME /, PYTHON_HOME / Scripts 251 | 252 | ![alt text](image/minerEnv/Picture1.png) 253 | ![alt text](image/minerEnv/Picture2.png) 254 | 255 | Verify the installation, make sure the version is 3.7.4 256 | 257 | ![alt text](image/minerEnv/Picture3.png) 258 | 2. Install pip3: 259 | 260 | Run the following command to install: 261 | ``` 262 | python37 -m pip install --upgrade pip 263 | ``` 264 | Verify the installation, make sure pip3 is installed in Python37 265 | 266 | ![alt text](image/minerEnv/Picture4.png) 267 | 3. Install virtualenv: 268 | 269 | Run the following command to install: 270 | ``` 271 | pip3 install virtualenv 272 | ``` 273 | 274 | Verify the installation, make sure virtualenv is installed in Python37 275 | 276 | ![alt text](image/minerEnv/Picture5.png) 277 | 4. Install libraries: 278 | 279 | In order not to affect the normal Python environment, the installation will be done in a virtual environment. 280 | 281 | - Change the current directory to the directory you want to install and create a virtual environment: 282 | ``` 283 | virtualenv -p python37 {env_name} 284 | ``` 285 | **{env_name}**: an environment's name of your choice. 286 | 287 | For example, if you want to name the environment as Miner, the installation command will be: 288 | ``` 289 | virtualenv -p python37 miner 290 | ``` 291 | - Activate the virtual environment 292 | ``` 293 | .\{env_name}\Scripts\activate 294 | ``` 295 | - Install libraries: 296 | ``` 297 | pip3 install numpy==1.18.4 298 | pip3 install keras==2.3.1 299 | pip3 install pandas==1.0.4 300 | pip3 install tensorflow==1.14.0 301 | pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 302 | pip3 install joblib==0.16.0 303 | pip3 install ray==0.8.6 304 | pip3 install ray[rllib] 305 | pip3 install ray[tune] 306 | pip3 install requests==2.24.0 307 | pip3 install semver==2.10.2 308 | pip3 install tf-agents==0.3.0 309 | pip3 install Pyqlearning==v1.2.4 310 | pip3 install Mushroom-RL==v1.4.0 311 | pip3 install gym==0.17.2 312 | pip3 install opencv-python==4.2.0.34 313 | pip3 install prettytable==0.7.2 314 | pip3 install yacs==0.1.7 315 | ``` 316 | - Run code: the code is run in the virtual environment set up in the previous step, therefore, make sure to activate the virtual environment in advance. 317 | 2. Ubuntu 18.04 318 | 1. Install Python 3.6.9: 319 | 320 | Installation commands: 321 | ``` 322 | apt-get update 323 | apt-get install python3 324 | ``` 325 | (See here on how to install in other OS versions: https://askubuntu.com/questions/865554/how-do-i-install-python-3-6-using-apt-get) 326 | 327 | Verify if the right version is installed: 328 | 329 | ![alt text](image/minerEnv/Picture6.png) 330 | 2. Install pip: 331 | 332 | Run the following command to install: 333 | ``` 334 | python3 -m pip install --upgrade pip 335 | ``` 336 | Or 337 | ``` 338 | sudo apt install python3-pip 339 | ``` 340 | 3. Install virtualenv: 341 | 342 | Run the following command to install: 343 | ``` 344 | pip3 install virtualenv 345 | ``` 346 | 4. Install libraries: 347 | 348 | In order not to affect the normal Python environment, the installation will be done in a virtual environment. 349 | 350 | - Change the current directory to the directory you want to install and create a virtual environment: 351 | ``` 352 | virtualenv -p python3 {env_name} 353 | ``` 354 | **{env_name}**: an environment's name of your choice. 355 | 356 | For example, if you want to name the environment as Miner, the installation command will be: 357 | ``` 358 | virtualenv -p python3 miner 359 | ``` 360 | - Activate the virtual environment: 361 | ``` 362 | cd {env_name}/bin 363 | source ./activate 364 | ``` 365 | - Install libraries: 366 | ``` 367 | pip3 install numpy==1.18.4 368 | pip3 install keras==2.3.1 369 | pip3 install pandas==0.15 370 | pip3 install tensorflow==1.14.0 371 | pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 372 | pip3 install joblib==0.16.0 373 | pip3 install ray==0.8.6 374 | pip3 install ray[rllib] 375 | pip3 install ray[tune] 376 | pip3 install requests==2.24.0 377 | pip3 install semver==2.10.2 378 | pip3 install tf-agents==0.3.0 379 | pip3 install Pyqlearning==v1.2.4 380 | pip3 install Mushroom-RL==v1.4.0 381 | pip3 install gym==0.17.2 382 | pip3 install opencv-python==4.2.0.34 383 | pip3 install prettytable==0.7.2 384 | pip3 install yacs==0.1.7 385 | ``` 386 | - Run code: the code is run in the virtual environment set up in the previous step, therefore, make sure to activate the virtual environment in advance. 387 | 2. Using Docker 388 | 389 | We will provide a Docker image which has installed all the environments similar to the actual server environment. 390 | 391 | In this section, we will show you how to install the Docker, and how to use the image we provide. 392 | 393 | 1. Install Docker 394 | 395 | You can easily look for comprehensive and detailed instructions to install Docker on the internet. Below are just some examples. 396 | 397 | 1. Windows: 398 | 399 | - Windows 10: Visit the link below to download the installer and install on your PC: 400 | https://hub.docker.com/editions/community/docker-ce-desktop-windows/ 401 | - Windows 7: Follow the instructions at: https://webme.ie/how-to-install-docker-on-windows-7/ 402 | 2. Ubuntu: 403 | - Ubuntu 18.04: Follow the instructions at: 404 | https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-18-04 405 | - Ubuntu 16.04: Follow the instructions at: https://docs.docker.com/engine/install/ubuntu/ 406 | 2. Use Doker image 407 | 1. Pull image: 408 | 409 | Execute the following command to pull image 410 | ``` 411 | docker pull codelearnio/miner-ai:training-v5 412 | ``` 413 | 414 | Verify if the image has been successfully pulled by the following command: 415 | ``` 416 | docker images –a 417 | ``` 418 | The displayed information will include the Docker image with the following details: 419 | ![alt text](image/minerEnv/Picture7.png) 420 | 421 | 2. Use image: 422 | 423 | This section will instruct you to use some basic commands in Docker container. 424 | 425 | For other commands, refer to the following link: 426 | https://docs.docker.com/engine/reference/commandline/docker/ 427 | 428 | - Create and run the Docker container from the existing image: 429 | 430 | Create and run the Docker container with the provided Docker image by the following command: 431 | ``` 432 | docker run -it -v {WORKING_DIR}:/v b71e2ea7dec6 433 | ``` 434 | Note: **{WORKING_DIR}** lis the path to the directory where your source is located; 435 | 436 | For example: if you put the source code in **D:\MinerAI** directory, the run command will be: 437 | ``` 438 | docker run -it -v D:\MinerAI:/v b71e2ea7dec6 439 | ``` 440 | You can name the container by adding parameters: **--name={name}** 441 | 442 | Change the current directory to the binded directory: **cd /v** 443 | 444 | Check the files binded to the container: **ls** 445 | ![alt text](image/minerEnv/Picture8.png) 446 | Then execute run **python3** command with your source code without any additional installation.. 447 | 448 | For example: 449 | ``` 450 | python3 TrainingClient.pyt 451 | ``` 452 | - Check the existing containers: 453 | ``` 454 | docker container ls -a 455 | ``` 456 | ![alt text](image/minerEnv/Picture9.png) 457 | As shown above, there are 2 containers initialized from the image **b71e2ea7dec6**: container **2f3d9797c028** is up running while container, container **f39ab8375c62** has stopped 458 | 459 | - Attach to a running Docker: 460 | ``` 461 | docker attach {container_id} 462 | ``` 463 | ![alt text](image/minerEnv/Picture10.png) 464 | 465 | Note: You can replace ***{container_id}*** with ***{container_name}*** 466 | - Start a stopped Docker container: 467 | ``` 468 | docker start –a {container_id} 469 | ``` 470 | ![alt text](image/minerEnv/Picture11.png) 471 | - Stop a running container: 472 | ``` 473 | docker stop {container_id} 474 | ``` 475 | - Remove a Docker container: 476 | ``` 477 | docker rm {container_id} 478 | ``` 479 | 480 | 481 | *** 482 | 483 | ***Vietnamese version*** 484 | ## Mô Tả Mã Nguồn Mẫu: Huấn Luyện và Thi đấu 485 | 486 | Trong quá trình thi đấu, thông tin trạng thái (State) được trả về sau khi thực hiện hành động (Action) bao gồm: 487 | 488 | - Thông tin của những agent đang thi đấu 489 | 490 | ```json 491 | { 492 | "playerId" tên định danh của agent, kiểu integer; 493 | "posx": vị trí theo tọa độ x của agent, kiểu integer; 494 | "posy": vị trí theo tọa độ y của agent, kiểu integer; 495 | "score": số vàng agent đào được, kiểu integer; 496 | "energy": số năng lượng còn lại của agent, kiểu integer; 497 | "lastAction": lưu action vừa thực hiện, kiểu integer; 498 | "status": trạng thái của agent - đang chơi hay đã bị loại, kiểu integer 499 | } 500 | ``` 501 | - Thông tin các vật cản còn lại trên bản đồ (vị trí và số năng lượng sẽ bị trừ khi một agent đi qua). 502 | - Thông tin số bãi vàng còn lại trên bản đồ (vị trí và số vàng). 503 | - Kích thước của bản đồ (chiều cao và độ rộng). 504 | 505 | Từ thông tin State được trả về trên, các đội chơi quyết định chiến lược huấn luyện riêng như thiết kế hàm thưởng (Reward Function) và định nghĩa không gian State (State Space). Trong hai mã nguồn mẫu **(Miner-Training-Local-CodeSample và Miner-Testing-CodeSample)** được cung cấp cho các đội chơi (được mô tả ở phía dưới),chúng tôi đưa ra một ví dụ về việc thiết kế Reward Function và định nghĩa State Space lần lượt trong 02 hàm *get_state()* và *get_reward()*. Dưới đây là mô tả tổng quan về hai mã nguồn được cung cấp cho việc huấn luyện và thi đấu:: 506 | ## A. Mã nguồn cho huấn luyện - Miner-Training-Local-CodeSample 507 | Đây là mã nguồn (source code) mẫu được sử dụng cho quá trình huấn luyện tại máy của các đội chơi. Mã nguồn bao gồm 02 phần chính: môi trường trò chơi đào vàng (Miner Game Environment) và thuật toán học tăng cường (Deep-Q learning -DQN). Hình 1 cung cấp cái nhìn trực quan về luồng trao đổi thông tin giữa các chương trình. 508 | 509 | ![alt text](image/codeAI/Picture1.png) 510 | *Hình 1: Luồng trao đổi thông tin giữa các chương trình trong mã nguồn mẫu 511 | được sử dụng trong Huấn luyện* 512 | 513 | 514 | Chi tiết của hai phần như sau: 515 | ### 1. Phần môi trường trò chơi đào vàng (Miner Game Environment). 516 | Mã nguồn của môi trường được lấy từ mã nguồn gốc của trò chơi đào vàng (Miner Game) trên hệ thống Codelearn. Mã nguồn bao gồm: GAME_SOCKET_DUMMY.py, MINER_STATE.py, MinerEnv.py, Maps, và 03 Bots (bot1.py, bot2.py, bot3.py). Hình 2 mô tả quá trình trao đổi về thông tin bản đồ và trạng thái của agent giữa MinerEnv.py và GAME_SOCKET_DUMMY.py. Mô tả chi tiết các chương trình được miêu tả phía dưới: 517 | 518 | ![alt text](image/codeAI/Picture2.png) 519 | *Hình 2: Luồng trao đổi thông tin giữa MinerEnv.py và GAME_SOCKET_DUMMY.py được mô phỏng giữa client và server trong Huấn luyện* 520 | 521 | a) MinerEnv.py: Chương trình được thiết kế theo cấu trúc chung của môi trường học tăng cường (Reinforcement learning environment) cho phép các đội chơi truy cập tới chương trình chính (GAME_SOCKET_DUMMY.py) đơn giản và thuận tiện. Một số hàm chính trong chương trình như sau: 522 | 523 | 524 | - **start()**: hàm dùng một lần duy nhất với mục đích mô phỏng lại quá trình kết nối tới server để bắt đầu chơi. Trong Training, hàm này gọi tới hàm **connect()** trong GAME_SOCKET_DUMMY.py để đọc 05 maps trong tệp Maps. 525 | 526 | - **send_map_info()**: hàm được sử dụng nhằm mục đích chọn map để huấn luyện agent. 527 | 528 | - **reset()**: hàm được sử dụng nhằm mục đích khởi tạo map và state cho agent. Hàm này sẽ gọi tới hàm **receive()** trong GAME_SOCKET_DUMMY.py để lấy thông tin ban đầu của map được lưu trong một message định dạng json, và hàm **init_state(message)** trong MINER_STATE.py để cập nhật thông tin ban đầu của map tới state của agent. 529 | 530 | - **step()**: hàm được sử dụng nhằm mục đích gửi một hành động (action) tới GAME_SOCKET_DUMMY.py, và sẽ nhận về thông tin bản đồ (map) và trạng thái (state) của agent thay đổi. 531 | 532 | - **get_state()**: hàm được cung cấp như một ví dụ cho việc định nghĩa một trạng thái (state) của agent cho quá trình huấn luyện. Các đội chơi tùy vào chiến lược huấn luyện riêng có thể viết lại hàm để đưa ra được state của agent phù hợp. 533 | 534 | - **get_reward()**: hàm được cung cấp như một ví dụ cho việc định nghĩa một hàm thưởng (reward function) của agent cho quá trình huấn luyện. Các đội chơi tùy vào chiến lược huấn luyện riêng có thể viết lại hàm để đưa ra được hàm reward function cho phù hợp. 535 | 536 | b) MINER_STATE.py ***(Các đội chơi không nên thay đổi mã nguồn trong chương trình)***: Chương trình là mã nguồn mẫu cho việc lưu thông tin bản đồ (map) và trạng thái (state) của agent nhận được từ GAME_SOCKET_DUMMY.py (trong thi đấu sẽ nhận từ server). Chương trình được thiết kế giúp cho các đội chơi quản lý state dễ dàng trong quá trình huấn luyện. Hai lớp (class) map và state cùng với một số hàm chính trong hai lớp như sau: 537 | 538 | - **MapInfo** (Class): Là lớp dùng cho việc lưu trữ toàn bộ thông tin của bản đồ. Bao gồm: max_x, max_y, maxStep, numberOfPlayer, golds: số vàng còn lại trên map tại thời điểm hiện tại, obstacles: thông tin các vật cản hiện tại trên bản đồ. 539 | + **update (golds, changedObstacles)**: cập nhật lại thông tin bản đồ sau mỗi bước (step). 540 | 541 | - **State** (Class): là lớp chứa trạng thái (state) của trò chơi (bao gồm trạng thái của người chơi và bản đồ). 542 | 543 | + **init_state(data)**: hàm khởi tạo thông tin của bản đồ (map) và trạng thái (state) của agent tại thời điểm bắt đầu một episode trong training (hay một trận đấu trong thi đấu). 544 | + **update_state(data)**: hàm cập nhật trạng thái (state) của trò chơi sau mỗi lượt chơi (step). Data được truyền vào bao gồm thông tin bản đồ và trạng thái của agent. 545 | 546 | c) GAME_SOCKET_DUMMY.py ***(Các đội chơi không nên thay đổi mã nguồn trong chương trình)*** : Chương trình mô phỏng lại trò chơi đào vàng bao gồm cả quá trình truyền nhận dữ liệu (message) tới máy chủ (server). Chương trình bao gồm 07 lớp (class) : ObstacleInfo, GoldInfo, PlayerInfo, GameInfo, UserMatch, StepState,và GameSocket. Trong đó, lớp GameSocket là lớp chính gồm những hàm chính sau: 547 | 548 | - **__ init__ (host, port)**: a function used to initialize the environment. In this function, the purpose of host and port initialization is to simulate the connection on the server in the actual competition. 549 | 550 | - **init_bots()**:hàm được thiết kế để hỗ trợ người chơi huấn luyện agent với bot. Để chỉ định bot tham gia vào huấn luyện, sử dụng dòng lệnh sau : **self.bots = [Bot1(2), Bot2(3), Bot3(4)]**. 551 | 552 | - **connect()**: hàm này mô phỏng hành động connect từ client đến server. Trong huấn luyện, hàm sẽ tải các bản đồ từ tệp Maps lên môi trường. 553 | 554 | - **receive()**: hàm này mô phỏng hành động client nhận message từ server. Trong huấn luyện, hàm sẽ trả về thông tin bản đồ ban đầu cũng như trạng thái ban đầu của agent nếu lần đầu được gọi, và sẽ trả về thông tin bản đồ hiện tại cũng như trạng thái hiện tại của agent. 555 | 556 | - **send()**: hàm này mô phỏng hành động client gửi message lên server. Trong huấn luyện, sẽ có 2 kiểu message từ client: 557 | 558 | + Action: là hành động cho step tiếp theo và là kiểu số. 559 | 560 | + Request: là yêu cầu các thông số cho việc khởi tạo môi trường trò chơi. Các thông số bao gồm: (map, init_x, init_y, init_energe, max_step). Ví dụ: request = “map1,1,2,100,150” sẽ hiểu là server sẽ sử dụng thông tin bản đồ (vàng, vật cản) theo map1 trong tệp Maps, khởi tạo vị trí ban đầu cho người chơi tại ô (x = 1, y = 2), với năng lượng ban đầu là 100, và trận đấu có tối đa 150 lượt chơi (step). 561 | 562 | d) Maps: Thư mục Maps chứa 05 bản đồ (map) mẫu phục vụ cho việc huấn luyện. Thông tin về bẫy trong 05 bản đồ là được giữ nguyên, và chỉ thay đổi vị trí vàng và số lượng vàng trong vòng sơ loại. Đội chơi có thể thiết kế lại những bản đồ này cho phù hợp với chiến lược huấn luyện riêng. Một số nội dung cần chú ý khi các đội chơi làm việc với Maps như sau: 563 | 564 | - Mỗi file trong thư mục Maps được xem là một bản đồ, tên của file (filename) được xem là tên của bản đồ (map name). 565 | 566 | - Mỗi bản đồ là một ma trận các số nguyên với ý nghĩa như sau: 567 | | ID | Type | 568 | | ---| ---| 569 | | 0 | Đất | 570 | | -1 | Rừng | 571 | | -2 | Bẫy | 572 | | -3 | Đầm lầy | 573 | | >0 | Vàng | 574 | 575 | 576 | - Chọn bản đồ trong huấn luyện như sau: 577 | 578 | + Hàm chọn bản đồ trong file MinerEnv.py : send_map_info(request) 579 | 580 | + Cấu trúc của request: {map_name},{init_x},{init_y},{init_energy} 581 | 582 | + Ví dụ: request = "map2,4,5,1000" - là trận đấu sẽ sử dụng map2, người chơi (players) xuất phát từ tọa độ: x = 4, y = 5 với năng lượng (energy) được khởi tạo là 1000. 583 | 584 | e) Bots ***(không phải các bots được dùng trong vòng sơ loại)***: 03 bots (bot1.py, bot2.py, và bot3.py) được cung cấp mẫu cho các đội chơi. Các đội chơi có thể tạo những bot theo chiến lược huấn luyện riêng. Các bots sẽ được đưa vào trong môi trường trò chơi trong GAME_SOCKET_DUMMY.py. Bao gồm 02 bước: khai báo bots (import Botx) và khởi tạo bot (init_bots()). Một số hàm chính trong những mã nguồn bot như sau: 585 | 586 | - **new_game(data)**: hàm thực hiện khởi tạo môi trường trò chơi (bao gồm thông tin bản đồ và trạng thái ban đầu cho bot). 587 | - **new_state(data)**: hàm cập nhật trạng thái (state) nhận được từ server. 588 | - **next_action**: hàm trả về một hành động (action) cho bước (step) tiếp theo. 589 | 590 | ### 2. Phần thuật toán học tăng cường (Deep Q-learning) 591 | Trong phần này, mã nguồn được viết theo thuật toán học tăng cường sâu (Deep Q-learning - DQN). Nguồn thuật toán được giới thiệu trong nghiên cứu của Mnih et al *("Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533)*. Mã nguồn thuật toán bao gồm những file chương trình sau: TrainingClient.py, DQNModel.py, và Memory.py. 592 | 593 | a) TrainingClient.py: đây là mã nguồn của thuật toán DQN cho phép giao tiếp với phần môi trường trò chơi. Trong chương trình này, một số phần cần chú ý như sau: 594 | 595 | - Khởi tạo các tham số cho thuật toán: 596 | 597 | N_EPISODE = 10000 #Số episode cho huấn luyện 598 | MAX_STEP = 1000 #Số bước (step) cho mỗi episode 599 | BATCH_SIZE = 32 #Số trải nghiệm (experiences) được sử dụng cho môi lần huấn luyện 600 | MEMORY_SIZE = 100000 #Kích thước bộ nhớ lưu những trải nghiệm 601 | SAVE_NETWORK = 100 #Sau số episode này, mạng DQN sẽ được lưu lại 602 | INITIAL_REPLAY_SIZE = 1000 #Số trải nghiệm cần phải có trong bộ nhớ để bắt đầu huấn luyện. 603 | INPUTNUM = 198 #Số đầu vào cho mạng DQN 604 | ACTIONNUM = 6 #Số hành động tương đương số đầu ra của mạng DQN 605 | 606 | - Khởi tạo môi trường trò chơi: 607 | 608 | minerEnv = MinerEnv(HOST, PORT) 609 | 610 | minerEnv.start() 611 | 612 | - Lấy trạng thái (state) ban đầu của agent: 613 | 614 | minerEnv.reset() 615 | 616 | s = minerEnv.get_state() 617 | 618 | - Thực hiện hành động (action): 619 | 620 | action = DQNAgent.act(s) 621 | 622 | minerEnv.step(str(action)) 623 | 624 | -Lấy trạng thái mới của agent, phần thưởng (reward) cho hành động vừa thực hiện, và kiểm tra điều kiện kết thúc episode: 625 | 626 | s_next = minerEnv.get_state() 627 | 628 | reward = minerEnv.get_reward() 629 | 630 | terminate = minerEnv.check_terminate() 631 | 632 | - Huấn luyện mạng DQN (lấy một số trải nghiệm (experiences) từ Memory, và đưa vào DQNAgent để huấn luyện): 633 | 634 | batch = memory.sample(BATCH_SIZE) 635 | 636 | DQNAgent.replay(batch, BATCH_SIZE) 637 | 638 | b) DQNModel.py: mã nguồn này được thiết kế cho phép tạo những mô hình học sâu (deep learning models) và những hàm huấn luyện mô hình. Trong chương trình này, một số phần cần chú ý như sau: 639 | 640 | - Khởi tạo tham số học: 641 | 642 | gamma = 0.99, #The discount factor 643 | 644 | epsilon = 1, #Epsilon - the exploration factor 645 | 646 | epsilon_min = 0.01, #The minimum epsilon 647 | 648 | epsilon_decay = 0.999,#The decay epislon for each update_epsilon time 649 | 650 | learning_rate = 0.00025, #The learning rate for the DQN network 651 | 652 | - create_model(): hàm được sử dụng để tạo một mạng sâu (deep network). Mạng chứa 02 lớp ẩn (hidden layers), mỗi lớp có 300 nút (node), hàm kích hoạt (activation) của hai lớp ẩn là ReLu, và một lớp đầu ra có 06 nút (node) tương ứng với 06 giá trị Q-action của 06 actions với hàm kích hoạt là Linear. 653 | 654 | - act(state): hàm trả về hành động cho agent tại trạng thái state. 655 | 656 | - replay(samples, batch_size): hàm được sử dụng để huấn luyện mạng sâu từ những trải nghiệm (experiences) lấy ra từ Memory file. 657 | 658 | - update_epsilon(): hàm thực hiện việc giảm epsilon hay giảm tham dò (exploration). 659 | 660 | c) Memory.py: mã nguồn được sử dụng để lưu dữ liệu (experiences) cho việc huấn luyện. 661 | 662 | **Chú ý**: Mã nguồn trên được dùng cho việc huấn luyện nên trận đấu kết thúc chỉ khi hết vàng trên bản đồ (map) hoặc người chơi bị loại (khi hết vàng hoặc ra khỏi bản đồ). 663 | 664 | ### B. Mã nguồn thi đấu -Miner-Testing-CodeSample 665 | 666 | ![alt text](image/codeAI/Picture3.png) 667 | *Hình 3: Luồng trao đổi thông tin giữa các chương trình trong mã nguồn mẫu được sử dụng trong thi đấu.* 668 | - Mã nguồn được thiết kế cho các đội chơi sử dụng để tham gia cuộc thi chính thức. 669 | - Điểm khác với mã nguồn cung cấp cho phần huấn luyện (Miner-Training-Local-CodeSample) đó là sử dụng GAME_SOCKET.py thay cho GAME_SOCKET_DUMMY.py. Chương trình GAME_SOCKET.py cho phép truyền nhận dữ liệu tới máy chủ (server). 670 | - HOST và PORT của server được lấy từ biến môi trường khi run TestingAgent.py 671 | - Những mã nguồn còn lại (MINER_STATE.py và MinerEnv.py) được giữ nguyên như trong mã nguồn được cung cấp cho huấn luyện (Miner-Testing-CodeSample). 672 | - Trong mã nguồn, một DQN model (RLModelSample.json, RLModelSample.h5) đã được huấn luyện được cung cấp để làm ví dụ cho việc tải model lên thi đấu (Chú ý: model trên chưa được huấn luyện đầy đủ để có thể thi đấu). Trong đó, file định dạng json lưu tham số mạng và file định dạng h5 lưu trọng số của mạng. 673 | 674 | 675 | ## Cài đặt môi trường MinerAI 676 | 677 | Phần này hướng dẫn người dùng cài đặt môi trường python với các thư viện được sử dụng ở server, giúp cho việc chạy thử nghiệm code được chính xác hơn. 678 | 679 | Trong này, chúng tôi sẽ hướng dẫn các bạn cài đặt môi trường theo 2 cách: 680 | 681 | - Cài đặt môi trường trực tiếp trên máy tính: 682 | - Ưu điểm: dễ thực hiện, quen thuộc với các bạn đã từng code và cài đặt python 683 | - Nhược điểm: có thể sẽ có chênh lệch so với môi trường chạy thực tế do hệ điều hành khác nhau (Môi trường thực tế mà server sẽ run code của các bạn là Ubuntu Server 18.04) 684 | 685 | - Cài đặt sử dụng docker: 686 | - Ưu điểm: sao chép được môi trường code giống với môi trường chạy thực tế 687 | - Nhược điểm: việc cài đặt docker có thể sẽ có khó khăn với một số hệ điều hành cũ 688 | 689 | Nội dung cài đặt: 690 | - Python 3.6.9 (Ubuntu) – python 3.7.4 (windows) 691 | - Tensorflow 1.14.0 hoặc 2.2.0 692 | - Keras 2.3.1 693 | - Numpy 1.18.4 694 | - Pandas 0.15 695 | - PyTorch 1.5.0 696 | - joblib 0.16.0 697 | - ray 0.8.6 (ray[rllib], ray[tune]) 698 | - requests 2.24.0 699 | - semver 2.10.2 700 | - tf-agents 0.3.0 (0.5.0 với Tensorflow 2.2.0) 701 | - Pyqlearning v1.2.4 702 | - Mushroom-RL v1.4.0 703 | - gym 0.17.2 704 | - opencv-python 4.2.0.34 705 | - prettytable 0.7.2 706 | - yacs 0.1.7 707 | 708 | 1. Run trực tiếp 709 | 1. Windows 710 | 1. Cài đặt python 3.7.4: 711 | 712 | Download bản executable installer cho OS tương ứng từ: https://www.python.org/downloads/release/python-374/ 713 | 714 | (Link download được đặt ở mục Files phía cuối trang) 715 | 716 | Thực hiện cài đặt và setup biến môi trường windows đến PYTHON_HOME/, PYTHON_HOME/Scripts 717 | 718 | ![alt text](image/minerEnv/Picture1.png) 719 | ![alt text](image/minerEnv/Picture2.png) 720 | 721 | Kiểm tra cài đăt, đảm bảo version là 3.7.4 722 | 723 | ![alt text](image/minerEnv/Picture3.png) 724 | 2. Cài đặt pip3: 725 | 726 | Chạy lệnh sau để thực hiện cài đặt: 727 | ``` 728 | python37 -m pip install --upgrade pip 729 | ``` 730 | Kiểm tra cài đặt, đảm bảo pip3 được cài đặt trong Python37 731 | 732 | ![alt text](image/minerEnv/Picture4.png) 733 | 3. Cài đặt virtualenv: 734 | 735 | Chạy lệnh sau để thực hiện cài đặt: 736 | ``` 737 | pip3 install virtualenv 738 | ``` 739 | 740 | Kiểm tra cài đặt, đảm bảo virtualenv được cài đặt trong Python37 741 | 742 | ![alt text](image/minerEnv/Picture5.png) 743 | 4. Cài đặt thư viện: 744 | 745 | Để không ảnh hưởng đến môi trường python chung, việc cài đặt sẽ được thực hiện trên môi trường ảo. 746 | 747 | - Chuyển thư mục hiện thời về thư mục bạn muốn cài đặt và thực hiện tạo môi trường ảo: 748 | ``` 749 | virtualenv -p python37 {env_name} 750 | ``` 751 | {env_name}: là tên môi trường do bạn tự chọn. 752 | 753 | Ví dụ, nếu bạn muốn đặt tên môi trường là miner thì lệnh cài đặt sẽ là: 754 | ``` 755 | virtualenv -p python37 miner 756 | ``` 757 | - Kích hoạt môi trường ảo 758 | ``` 759 | .\{env_name}\Scripts\activate 760 | ``` 761 | - Cài đặt thư viện: 762 | ``` 763 | pip3 install numpy==1.18.4 764 | pip3 install keras==2.3.1 765 | pip3 install pandas==1.0.4 766 | pip3 install tensorflow==1.14.0 767 | pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 768 | pip3 install joblib==0.16.0 769 | pip3 install ray==0.8.6 770 | pip3 install ray[rllib] 771 | pip3 install ray[tune] 772 | pip3 install requests==2.24.0 773 | pip3 install semver==2.10.2 774 | pip3 install tf-agents==0.3.0 775 | pip3 install Pyqlearning==v1.2.4 776 | pip3 install Mushroom-RL==v1.4.0 777 | pip3 install gym==0.17.2 778 | pip3 install opencv-python==4.2.0.34 779 | pip3 install prettytable==0.7.2 780 | pip3 install yacs==0.1.7 781 | ``` 782 | - Run code: code được run ở môi trường ảo mà bạn cài ở step trước, vì thế đừng quên kích hoạt môi trường ảo trước khi run nhé! 783 | 2. Ubuntu 18.04 784 | 1. Cài đặt python 3.6.9: 785 | 786 | Các lệnh cài đặt: 787 | ``` 788 | apt-get update 789 | apt-get install python3 790 | ``` 791 | (Tham khảo thêm cách cài đặt ở các version OS khác ở: https://askubuntu.com/questions/865554/how-do-i-install-python-3-6-using-apt-get) 792 | 793 | Kiểm tra cài đặt đúng version: 794 | 795 | ![alt text](image/minerEnv/Picture6.png) 796 | 2. Cài đặt pip: 797 | 798 | Run lệnh sau để cài đặt: 799 | ``` 800 | python3 -m pip install --upgrade pip 801 | ``` 802 | Hoặc 803 | ``` 804 | sudo apt install python3-pip 805 | ``` 806 | 3. Cài đặt virtualenv: 807 | 808 | Chạy lệnh sau để thực hiện cài đặt: 809 | ``` 810 | pip3 install virtualenv 811 | ``` 812 | 4. Cài đặt thư viện: 813 | 814 | Để không ảnh hưởng đến môi trường python chung, việc cài đặt sẽ được thực hiện trên môi trường ảo. 815 | 816 | - Chuyển thư mục hiện thời về thư mục bạn muốn cài đặt và thực hiện tạo môi trường ảo: 817 | ``` 818 | virtualenv -p python3 {env_name} 819 | ``` 820 | {env_name}: là tên môi trường do bạn tự chọn. 821 | 822 | Ví dụ, nếu bạn muốn đặt tên môi trường là miner thì lệnh cài đặt sẽ là: 823 | ``` 824 | virtualenv -p python3 miner 825 | ``` 826 | - Kích hoạt môi trường ảo 827 | ``` 828 | cd {env_name}/bin 829 | source ./activate 830 | ``` 831 | - Cài đặt thư viện: 832 | ``` 833 | pip3 install numpy==1.18.4 834 | pip3 install keras==2.3.1 835 | pip3 install pandas==0.15 836 | pip3 install tensorflow==1.14.0 837 | pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 838 | pip3 install joblib==0.16.0 839 | pip3 install ray==0.8.6 840 | pip3 install ray[rllib] 841 | pip3 install ray[tune] 842 | pip3 install requests==2.24.0 843 | pip3 install semver==2.10.2 844 | pip3 install tf-agents==0.3.0 845 | pip3 install Pyqlearning==v1.2.4 846 | pip3 install Mushroom-RL==v1.4.0 847 | pip3 install gym==0.17.2 848 | pip3 install opencv-python==4.2.0.34 849 | pip3 install prettytable==0.7.2 850 | pip3 install yacs==0.1.7 851 | ``` 852 | - Run code: code được run ở môi trường ảo mà bạn cài ở step trước, vì thế đừng quên kích hoạt môi trường ảo trước khi run nhé! 853 | 2. Sử dụng docker 854 | 855 | Chúng tôi cung cấp cho các bạn docker image mà đã cài đặt đủ các môi trường giống như ở môi trường server thật. 856 | 857 | Ở phần này, chúng tôi sẽ hướng dẫn cho bạn cách cài đặt docker, và cách sử dụng image mà chúng tôi cung cấp 858 | 859 | 1. Cài đặt Docker 860 | 861 | Việc cài đặt Docker được hướng dẫn khá đầy đủ và chi tiết ở nhiều nguồn trên internet, vì thế chúng tôi xin phép chỉ cung cấp các link hướng dẫn mà chúng tôi đã thử nghiệm thực hiện theo thành công. 862 | 863 | 1. Windows: 864 | 865 | - Windows 10: truy cập vào đường link dưới đây để download bộ cài và thực hiện cài đặt trên máy tính: 866 | https://hub.docker.com/editions/community/docker-ce-desktop-windows/ 867 | - Windows 7: thực hiện việc cài đặt theo đường link sau: https://webme.ie/how-to-install-docker-on-windows-7/ 868 | 2. Ubuntu: 869 | - Ubuntu 18.04: Thực hiện cài đặt theo hướng dẫn ở đường link sau: 870 | https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-18-04 871 | - Ubuntu 16.04: Thực hiện cài đặt theo hướng dẫn ở đường link sau: https://docs.docker.com/engine/install/ubuntu/ 872 | 2. Sử dụng docker image 873 | 1. Pull image: 874 | 875 | Thực hiện lệnh sau để pull image về 876 | ``` 877 | docker pull codelearnio/miner-ai:training-v5 878 | ``` 879 | 880 | Kiểm tra image đã được pull về thành công bằng lệnh: 881 | ``` 882 | docker images –a 883 | ``` 884 | Thông tin được hiển thị ra sẽ bao gồm docker image có thông tin dưới đây: 885 | ![alt text](image/minerEnv/Picture7.png) 886 | 887 | 2. Sử dụng image: 888 | 889 | Ở phần này, chúng tôi hướng dẫn bạn sử dụng 1 số lệnh cơ bản với docker container. 890 | 891 | Để tìm hiểu thêm các lệnh khác, các bạn có thể tham khảo ở đường link sau: 892 | https://docs.docker.com/engine/reference/commandline/docker/ 893 | 894 | - Tạo mới và run docker container từ image có sẵn: 895 | 896 | Tạo và run docker container với docker image được cung cấp bằng lệnh sau: 897 | ``` 898 | docker run -it -v {WORKING_DIR}:/v b71e2ea7dec6 899 | ``` 900 | Note: **{WORKING_DIR}** là path dẫn đến thư mục chứa source của bạn; 901 | 902 | Ví dụ: bạn để source code ở thư mục **D:\MinerAI** thì lệnh run của bạn sẽ là: 903 | ``` 904 | docker run -it -v D:\MinerAI:/v b71e2ea7dec6 905 | ``` 906 | Bạn có thể đặt tên container bằng cách thêm tham số: **--name={name}** 907 | 908 | Chuyển thư mục hiện thời về thư mục đã được bind: **cd /v** 909 | 910 | Kiểm tra các file đã được bind vào container: **ls** 911 | ![alt text](image/minerEnv/Picture8.png) 912 | Tại đây bạn có thể thực hiện lệnh run **python3** với source code của bạn mà không cần phải cài đặt gì thêm. 913 | 914 | Ví dụ: 915 | ``` 916 | python3 TrainingClient.pyt 917 | ``` 918 | - Kiểm tra các container đang có: 919 | ``` 920 | docker container ls -a 921 | ``` 922 | ![alt text](image/minerEnv/Picture9.png) 923 | Với hiển thị như ở hình trên, ta thấy đang có 2 container được khởi tạo từ image **b71e2ea7dec6**, container **2f3d9797c028** đang run, container **f39ab8375c62** đã stop 924 | 925 | - Attach docker đang chạy: 926 | ``` 927 | docker attach {container_id} 928 | ``` 929 | ![alt text](image/minerEnv/Picture10.png) 930 | 931 | Note: bạn có thể thay thế ***{container_id}*** bằng ***{container_name}*** 932 | - Start docker đang stop: 933 | ``` 934 | docker start –a {container_id} 935 | ``` 936 | ![alt text](image/minerEnv/Picture11.png) 937 | - Stop container đang run: 938 | ``` 939 | docker stop {container_id} 940 | ``` 941 | - Remove docker container: 942 | ``` 943 | docker rm {container_id} 944 | ``` 945 | -------------------------------------------------------------------------------- /Miner-Colab-CodeSample/Miner_Training_Colab_CodeSample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/", 9 | "height": 105 10 | }, 11 | "colab_type": "code", 12 | "id": "qHcugJLenzKZ", 13 | "outputId": "dcbaa3c4-e70c-4cb7-fb12-6221a3bf016a" 14 | }, 15 | "outputs": [ 16 | { 17 | "name": "stderr", 18 | "output_type": "stream", 19 | "text": [ 20 | "Using TensorFlow backend.\n" 21 | ] 22 | }, 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", 28 | "Instructions for updating:\n", 29 | "non-resource variables are not supported in the long term\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "import sys\n", 35 | "import numpy as np\n", 36 | "import pandas as pd\n", 37 | "import datetime\n", 38 | "import json\n", 39 | "from array import *\n", 40 | "import os\n", 41 | "import math\n", 42 | "from random import randrange\n", 43 | "import random\n", 44 | "\n", 45 | "from keras.models import Sequential\n", 46 | "from keras.models import model_from_json\n", 47 | "from keras.layers import Dense, Activation\n", 48 | "from keras import optimizers\n", 49 | "\n", 50 | "import tensorflow.compat.v1 as tf\n", 51 | "from tensorflow.compat.v1.keras import backend as K\n", 52 | "tf.disable_v2_behavior()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "colab": {}, 60 | "colab_type": "code", 61 | "id": "e3tvcApSyW1g" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "#Classes in GAME_SOCKET_DUMMY.py\n", 66 | "class ObstacleInfo:\n", 67 | " # initial energy for obstacles: Land (key = 0): -1, Forest(key = -1): 0 (random), Trap(key = -2): -10, Swamp (key = -3): -5\n", 68 | " types = {0: -1, -1: 0, -2: -10, -3: -5}\n", 69 | "\n", 70 | " def __init__(self):\n", 71 | " self.type = 0\n", 72 | " self.posx = 0\n", 73 | " self.posy = 0\n", 74 | " self.value = 0\n", 75 | " \n", 76 | "class GoldInfo:\n", 77 | " def __init__(self):\n", 78 | " self.posx = 0\n", 79 | " self.posy = 0\n", 80 | " self.amount = 0\n", 81 | "\n", 82 | " def loads(self, data):\n", 83 | " golds = []\n", 84 | " for gd in data:\n", 85 | " g = GoldInfo()\n", 86 | " g.posx = gd[\"posx\"]\n", 87 | " g.posy = gd[\"posy\"]\n", 88 | " g.amount = gd[\"amount\"]\n", 89 | " golds.append(g)\n", 90 | " return golds\n", 91 | "\n", 92 | "class PlayerInfo:\n", 93 | " STATUS_PLAYING = 0\n", 94 | " STATUS_ELIMINATED_WENT_OUT_MAP = 1\n", 95 | " STATUS_ELIMINATED_OUT_OF_ENERGY = 2\n", 96 | " STATUS_ELIMINATED_INVALID_ACTION = 3\n", 97 | " STATUS_STOP_EMPTY_GOLD = 4\n", 98 | " STATUS_STOP_END_STEP = 5\n", 99 | "\n", 100 | " def __init__(self, id):\n", 101 | " self.playerId = id\n", 102 | " self.score = 0\n", 103 | " self.energy = 0\n", 104 | " self.posx = 0\n", 105 | " self.posy = 0\n", 106 | " self.lastAction = -1\n", 107 | " self.status = PlayerInfo.STATUS_PLAYING\n", 108 | " self.freeCount = 0\n", 109 | "\n", 110 | "class GameInfo:\n", 111 | " def __init__(self):\n", 112 | " self.numberOfPlayers = 1\n", 113 | " self.width = 0\n", 114 | " self.height = 0\n", 115 | " self.steps = 100\n", 116 | " self.golds = []\n", 117 | " self.obstacles = []\n", 118 | "\n", 119 | " def loads(self, data):\n", 120 | " m = GameInfo()\n", 121 | " m.width = data[\"width\"]\n", 122 | " m.height = data[\"height\"]\n", 123 | " m.golds = GoldInfo().loads(data[\"golds\"])\n", 124 | " m.obstacles = data[\"obstacles\"]\n", 125 | " m.numberOfPlayers = data[\"numberOfPlayers\"]\n", 126 | " m.steps = data[\"steps\"]\n", 127 | " return m\n", 128 | "\n", 129 | "class UserMatch:\n", 130 | " def __init__(self):\n", 131 | " self.playerId = 1\n", 132 | " self.posx = 0\n", 133 | " self.posy = 0\n", 134 | " self.energy = 50\n", 135 | " self.gameinfo = GameInfo()\n", 136 | "\n", 137 | " def to_json(self):\n", 138 | " return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)\n", 139 | "\n", 140 | "class StepState:\n", 141 | " def __init__(self):\n", 142 | " self.players = []\n", 143 | " self.golds = []\n", 144 | " self.changedObstacles = []\n", 145 | "\n", 146 | " def to_json(self):\n", 147 | " return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 19, 153 | "metadata": { 154 | "colab": {}, 155 | "colab_type": "code", 156 | "id": "Madmz8hE1op6" 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "#Main class in GAME_SOCKET_DUMMY.py\n", 161 | "class GameSocket:\n", 162 | " bog_energy_chain = {-5: -20, -20: -40, -40: -100, -100: -100}\n", 163 | "\n", 164 | " def __init__(self):\n", 165 | " self.stepCount = 0\n", 166 | " self.maxStep = 0\n", 167 | " self.mapdir = \"Maps\" # where to load all pre-defined maps\n", 168 | " self.mapid = \"\"\n", 169 | " self.userMatch = UserMatch()\n", 170 | " self.user = PlayerInfo(1)\n", 171 | " self.stepState = StepState()\n", 172 | " self.maps = {} # key: map file name, value: file content\n", 173 | " self.map = [] # running map info: 0->Land, -1->Forest, -2->Trap, -3:Swamp, >0:Gold\n", 174 | " self.energyOnMap = [] # self.energyOnMap[x][y]: <0, amount of energy which player will consume if it move into (x,y)\n", 175 | " self.E = 50\n", 176 | " self.resetFlag = True\n", 177 | " self.craftUsers = [] # players that craft at current step - for calculating amount of gold\n", 178 | " self.bots = []\n", 179 | " self.craftMap = {} # cells that players craft at current step, key: x_y, value: number of players that craft at (x,y)\n", 180 | "\n", 181 | " def init_bots(self):\n", 182 | " self.bots = [Bot1(2), Bot2(3), Bot3(4)] # use bot1(id=2), bot2(id=3), bot3(id=4)\n", 183 | " for (bot) in self.bots: # at the beginning, all bots will have same position, energy as player\n", 184 | " bot.info.posx = self.user.posx\n", 185 | " bot.info.posy = self.user.posy\n", 186 | " bot.info.energy = self.user.energy\n", 187 | " bot.info.lastAction = -1\n", 188 | " bot.info.status = PlayerInfo.STATUS_PLAYING\n", 189 | " bot.info.score = 0\n", 190 | " self.stepState.players.append(bot.info)\n", 191 | " self.userMatch.gameinfo.numberOfPlayers = len(self.stepState.players)\n", 192 | " print(\"numberOfPlayers: \", self.userMatch.gameinfo.numberOfPlayers)\n", 193 | "\n", 194 | " def reset(self, requests): # load new game by given request: [map id (filename), posx, posy, initial energy]\n", 195 | " # load new map\n", 196 | " self.reset_map(requests[0])\n", 197 | " self.userMatch.posx = int(requests[1])\n", 198 | " self.userMatch.posy = int(requests[2])\n", 199 | " self.userMatch.energy = int(requests[3])\n", 200 | " self.userMatch.gameinfo.steps = int(requests[4])\n", 201 | " self.maxStep = self.userMatch.gameinfo.steps\n", 202 | "\n", 203 | " # init data for players\n", 204 | " self.user.posx = self.userMatch.posx # in\n", 205 | " self.user.posy = self.userMatch.posy\n", 206 | " self.user.energy = self.userMatch.energy\n", 207 | " self.user.status = PlayerInfo.STATUS_PLAYING\n", 208 | " self.user.score = 0\n", 209 | " self.stepState.players = [self.user]\n", 210 | " self.E = self.userMatch.energy\n", 211 | " self.resetFlag = True\n", 212 | " self.init_bots()\n", 213 | " self.stepCount = 0\n", 214 | "\n", 215 | " def reset_map(self, id): # load map info\n", 216 | " self.mapId = id\n", 217 | " self.map = json.loads(self.maps[self.mapId])\n", 218 | " self.userMatch = self.map_info(self.map)\n", 219 | " self.stepState.golds = self.userMatch.gameinfo.golds\n", 220 | " self.map = json.loads(self.maps[self.mapId])\n", 221 | " self.energyOnMap = json.loads(self.maps[self.mapId])\n", 222 | " for x in range(len(self.map)):\n", 223 | " for y in range(len(self.map[x])):\n", 224 | " if self.map[x][y] > 0: # gold\n", 225 | " self.energyOnMap[x][y] = -4\n", 226 | " else: # obstacles\n", 227 | " self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]]\n", 228 | "\n", 229 | " def connect(self): # simulate player's connect request\n", 230 | " print(\"Connected to server.\")\n", 231 | " for mapid in range(len(Maps)):\n", 232 | " filename = \"map\" + str(mapid)\n", 233 | " print(\"Found: \" + filename)\n", 234 | " self.maps[filename] = str(Maps[mapid])\n", 235 | "\n", 236 | " def map_info(self, map): # get map info\n", 237 | " # print(map)\n", 238 | " userMatch = UserMatch()\n", 239 | " userMatch.gameinfo.height = len(map)\n", 240 | " userMatch.gameinfo.width = len(map[0])\n", 241 | " i = 0\n", 242 | " while i < len(map):\n", 243 | " j = 0\n", 244 | " while j < len(map[i]):\n", 245 | " if map[i][j] > 0: # gold\n", 246 | " g = GoldInfo()\n", 247 | " g.posx = j\n", 248 | " g.posy = i\n", 249 | " g.amount = map[i][j]\n", 250 | " userMatch.gameinfo.golds.append(g)\n", 251 | " else: # obstacles\n", 252 | " o = ObstacleInfo()\n", 253 | " o.posx = j\n", 254 | " o.posy = i\n", 255 | " o.type = -map[i][j]\n", 256 | " o.value = ObstacleInfo.types[map[i][j]]\n", 257 | " userMatch.gameinfo.obstacles.append(o)\n", 258 | " j += 1\n", 259 | " i += 1\n", 260 | " return userMatch\n", 261 | "\n", 262 | " def receive(self): # send data to player (simulate player's receive request)\n", 263 | " if self.resetFlag: # for the first time -> send game info\n", 264 | " self.resetFlag = False\n", 265 | " data = self.userMatch.to_json()\n", 266 | " for (bot) in self.bots:\n", 267 | " bot.new_game(data)\n", 268 | " # print(data)\n", 269 | " return data\n", 270 | " else: # send step state\n", 271 | " self.stepCount = self.stepCount + 1\n", 272 | " if self.stepCount >= self.maxStep:\n", 273 | " for player in self.stepState.players:\n", 274 | " player.status = PlayerInfo.STATUS_STOP_END_STEP\n", 275 | " data = self.stepState.to_json()\n", 276 | " for (bot) in self.bots: # update bots' state\n", 277 | " bot.new_state(data)\n", 278 | " # print(data)\n", 279 | " return data\n", 280 | "\n", 281 | " def send(self, message): # receive message from player (simulate send request from player)\n", 282 | " if message.isnumeric(): # player send action\n", 283 | " self.resetFlag = False\n", 284 | " self.stepState.changedObstacles = []\n", 285 | " action = int(message)\n", 286 | " # print(\"Action = \", action)\n", 287 | " self.user.lastAction = action\n", 288 | " self.craftUsers = []\n", 289 | " self.step_action(self.user, action)\n", 290 | " for bot in self.bots:\n", 291 | " if bot.info.status == PlayerInfo.STATUS_PLAYING:\n", 292 | " action = bot.next_action()\n", 293 | " bot.info.lastAction = action\n", 294 | " # print(\"Bot Action: \", action)\n", 295 | " self.step_action(bot.info, action)\n", 296 | " self.action_5_craft()\n", 297 | " for c in self.stepState.changedObstacles:\n", 298 | " self.map[c[\"posy\"]][c[\"posx\"]] = -c[\"type\"]\n", 299 | " self.energyOnMap[c[\"posy\"]][c[\"posx\"]] = c[\"value\"]\n", 300 | "\n", 301 | " else: # reset game\n", 302 | " requests = message.split(\",\")\n", 303 | " print(\"Reset game: \", requests)\n", 304 | " self.reset(requests)\n", 305 | "\n", 306 | " def step_action(self, user, action):\n", 307 | " switcher = {\n", 308 | " 0: self.action_0_left,\n", 309 | " 1: self.action_1_right,\n", 310 | " 2: self.action_2_up,\n", 311 | " 3: self.action_3_down,\n", 312 | " 4: self.action_4_free,\n", 313 | " 5: self.action_5_craft_pre\n", 314 | " }\n", 315 | " func = switcher.get(action, self.invalidAction)\n", 316 | " func(user)\n", 317 | "\n", 318 | " def action_5_craft_pre(self, user): # collect players who craft at current step\n", 319 | " user.freeCount = 0\n", 320 | " if self.map[user.posy][user.posx] <= 0: # craft at the non-gold cell\n", 321 | " user.energy -= 10\n", 322 | " if user.energy <= 0:\n", 323 | " user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY\n", 324 | " user.lastAction = 6 #eliminated\n", 325 | " else:\n", 326 | " user.energy -= 5\n", 327 | " if user.energy > 0:\n", 328 | " self.craftUsers.append(user)\n", 329 | " key = str(user.posx) + \"_\" + str(user.posy)\n", 330 | " if key in self.craftMap:\n", 331 | " count = self.craftMap[key]\n", 332 | " self.craftMap[key] = count + 1\n", 333 | " else:\n", 334 | " self.craftMap[key] = 1\n", 335 | " else:\n", 336 | " user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY\n", 337 | " user.lastAction = 6 #eliminated\n", 338 | "\n", 339 | " def action_0_left(self, user): # user go left\n", 340 | " user.freeCount = 0\n", 341 | " user.posx = user.posx - 1\n", 342 | " if user.posx < 0:\n", 343 | " user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n", 344 | " user.lastAction = 6 #eliminated\n", 345 | " else:\n", 346 | " self.go_to_pos(user)\n", 347 | "\n", 348 | " def action_1_right(self, user): # user go right\n", 349 | " user.freeCount = 0\n", 350 | " user.posx = user.posx + 1\n", 351 | " if user.posx >= self.userMatch.gameinfo.width:\n", 352 | " user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n", 353 | " user.lastAction = 6 #eliminated\n", 354 | " else:\n", 355 | " self.go_to_pos(user)\n", 356 | "\n", 357 | " def action_2_up(self, user): # user go up\n", 358 | " user.freeCount = 0\n", 359 | " user.posy = user.posy - 1\n", 360 | " if user.posy < 0:\n", 361 | " user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n", 362 | " user.lastAction = 6 #eliminated\n", 363 | " else:\n", 364 | " self.go_to_pos(user)\n", 365 | "\n", 366 | " def action_3_down(self, user): # user go right\n", 367 | " user.freeCount = 0\n", 368 | " user.posy = user.posy + 1\n", 369 | " if user.posy >= self.userMatch.gameinfo.height:\n", 370 | " user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n", 371 | " user.lastAction = 6 #eliminated\n", 372 | " else:\n", 373 | " self.go_to_pos(user)\n", 374 | "\n", 375 | " def action_4_free(self, user): # user free\n", 376 | " user.freeCount += 1\n", 377 | " if user.freeCount == 1:\n", 378 | " user.energy += int(self.E / 4)\n", 379 | " elif user.freeCount == 2:\n", 380 | " user.energy += int(self.E / 3)\n", 381 | " elif user.freeCount == 3:\n", 382 | " user.energy += int(self.E / 2)\n", 383 | " else:\n", 384 | " user.energy = self.E\n", 385 | " if user.energy > self.E:\n", 386 | " user.energy = self.E\n", 387 | "\n", 388 | " def action_5_craft(self):\n", 389 | " craftCount = len(self.craftUsers)\n", 390 | " # print (\"craftCount\",craftCount)\n", 391 | " if (craftCount > 0):\n", 392 | " for user in self.craftUsers:\n", 393 | " x = user.posx\n", 394 | " y = user.posy\n", 395 | " key = str(user.posx) + \"_\" + str(user.posy)\n", 396 | " c = self.craftMap[key]\n", 397 | " m = min(math.ceil(self.map[y][x] / c), 50)\n", 398 | " user.score += m\n", 399 | " # print (\"user\", user.playerId, m)\n", 400 | " for user in self.craftUsers:\n", 401 | " x = user.posx\n", 402 | " y = user.posy\n", 403 | " key = str(user.posx) + \"_\" + str(user.posy)\n", 404 | " if key in self.craftMap:\n", 405 | " c = self.craftMap[key]\n", 406 | " del self.craftMap[key]\n", 407 | " m = min(math.ceil(self.map[y][x] / c), 50)\n", 408 | " self.map[y][x] -= m * c\n", 409 | " if self.map[y][x] < 0:\n", 410 | " self.map[y][x] = 0\n", 411 | " self.energyOnMap[y][x] = ObstacleInfo.types[0]\n", 412 | " for g in self.stepState.golds:\n", 413 | " if g.posx == x and g.posy == y:\n", 414 | " g.amount = self.map[y][x]\n", 415 | " if g.amount == 0:\n", 416 | " self.stepState.golds.remove(g)\n", 417 | " self.add_changed_obstacle(x, y, 0, ObstacleInfo.types[0])\n", 418 | " if len(self.stepState.golds) == 0:\n", 419 | " for player in self.stepState.players:\n", 420 | " player.status = PlayerInfo.STATUS_STOP_EMPTY_GOLD\n", 421 | " break;\n", 422 | " self.craftMap = {}\n", 423 | "\n", 424 | " def invalidAction(self, user):\n", 425 | " user.status = PlayerInfo.STATUS_ELIMINATED_INVALID_ACTION\n", 426 | " user.lastAction = 6 #eliminated\n", 427 | "\n", 428 | " def go_to_pos(self, user): # player move to cell(x,y)\n", 429 | " if self.map[user.posy][user.posx] == -1:\n", 430 | " user.energy -= randrange(16) + 5\n", 431 | " elif self.map[user.posy][user.posx] == 0:\n", 432 | " user.energy += self.energyOnMap[user.posy][user.posx]\n", 433 | " elif self.map[user.posy][user.posx] == -2:\n", 434 | " user.energy += self.energyOnMap[user.posy][user.posx]\n", 435 | " self.add_changed_obstacle(user.posx, user.posy, 0, ObstacleInfo.types[0])\n", 436 | " elif self.map[user.posy][user.posx] == -3:\n", 437 | " user.energy += self.energyOnMap[user.posy][user.posx]\n", 438 | " self.add_changed_obstacle(user.posx, user.posy, 3,\n", 439 | " self.bog_energy_chain[self.energyOnMap[user.posy][user.posx]])\n", 440 | " else:\n", 441 | " user.energy -= 4\n", 442 | " if user.energy <= 0:\n", 443 | " user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY\n", 444 | " user.lastAction = 6 #eliminated\n", 445 | "\n", 446 | " def add_changed_obstacle(self, x, y, t, v):\n", 447 | " added = False\n", 448 | " for o in self.stepState.changedObstacles:\n", 449 | " if o[\"posx\"] == x and o[\"posy\"] == y:\n", 450 | " added = True\n", 451 | " break\n", 452 | " if added == False:\n", 453 | " o = {}\n", 454 | " o[\"posx\"] = x\n", 455 | " o[\"posy\"] = y\n", 456 | " o[\"type\"] = t\n", 457 | " o[\"value\"] = v\n", 458 | " self.stepState.changedObstacles.append(o)\n", 459 | "\n", 460 | " def close(self):\n", 461 | " print(\"Close socket.\")" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 5, 467 | "metadata": { 468 | "colab": {}, 469 | "colab_type": "code", 470 | "id": "ZEExD0BCyePu" 471 | }, 472 | "outputs": [], 473 | "source": [ 474 | "#Bots :bot1\n", 475 | "class Bot1:\n", 476 | " ACTION_GO_LEFT = 0\n", 477 | " ACTION_GO_RIGHT = 1\n", 478 | " ACTION_GO_UP = 2\n", 479 | " ACTION_GO_DOWN = 3\n", 480 | " ACTION_FREE = 4\n", 481 | " ACTION_CRAFT = 5\n", 482 | "\n", 483 | " def __init__(self, id):\n", 484 | " self.state = State()\n", 485 | " self.info = PlayerInfo(id)\n", 486 | "\n", 487 | " def next_action(self):\n", 488 | " if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:\n", 489 | " if self.info.energy >= 6:\n", 490 | " return self.ACTION_CRAFT\n", 491 | " else:\n", 492 | " return self.ACTION_FREE\n", 493 | " if self.info.energy < 5:\n", 494 | " return self.ACTION_FREE\n", 495 | " else:\n", 496 | " action = self.ACTION_GO_UP\n", 497 | " if self.info.posy % 2 == 0:\n", 498 | " if self.info.posx < self.state.mapInfo.max_x:\n", 499 | " action = self.ACTION_GO_RIGHT\n", 500 | " else:\n", 501 | " if self.info.posx > 0:\n", 502 | " action = self.ACTION_GO_LEFT\n", 503 | " else:\n", 504 | " action = self.ACTION_GO_DOWN\n", 505 | " return action\n", 506 | "\n", 507 | " def new_game(self, data):\n", 508 | " try:\n", 509 | " self.state.init_state(data)\n", 510 | " except Exception as e:\n", 511 | " import traceback\n", 512 | " traceback.print_exc()\n", 513 | "\n", 514 | " def new_state(self, data):\n", 515 | " # action = self.next_action();\n", 516 | " # self.socket.send(action)\n", 517 | " try:\n", 518 | " self.state.update_state(data)\n", 519 | " except Exception as e:\n", 520 | " import traceback\n", 521 | " traceback.print_exc()\n" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 6, 527 | "metadata": { 528 | "colab": {}, 529 | "colab_type": "code", 530 | "id": "UYHsBcVEyiCm" 531 | }, 532 | "outputs": [], 533 | "source": [ 534 | "#Bots :bot2\n", 535 | "class Bot2:\n", 536 | " ACTION_GO_LEFT = 0\n", 537 | " ACTION_GO_RIGHT = 1\n", 538 | " ACTION_GO_UP = 2\n", 539 | " ACTION_GO_DOWN = 3\n", 540 | " ACTION_FREE = 4\n", 541 | " ACTION_CRAFT = 5\n", 542 | "\n", 543 | " def __init__(self, id):\n", 544 | " self.state = State()\n", 545 | " self.info = PlayerInfo(id)\n", 546 | "\n", 547 | " def next_action(self):\n", 548 | " if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:\n", 549 | " if self.info.energy >= 6:\n", 550 | " return self.ACTION_CRAFT\n", 551 | " else:\n", 552 | " return self.ACTION_FREE\n", 553 | " if self.info.energy < 5:\n", 554 | " return self.ACTION_FREE\n", 555 | " else:\n", 556 | " action = np.random.randint(0, 4) \n", 557 | " return action\n", 558 | "\n", 559 | " def new_game(self, data):\n", 560 | " try:\n", 561 | " self.state.init_state(data)\n", 562 | " except Exception as e:\n", 563 | " import traceback\n", 564 | " traceback.print_exc()\n", 565 | "\n", 566 | " def new_state(self, data):\n", 567 | " # action = self.next_action();\n", 568 | " # self.socket.send(action)\n", 569 | " try:\n", 570 | " self.state.update_state(data)\n", 571 | " except Exception as e:\n", 572 | " import traceback\n", 573 | " traceback.print_exc()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 7, 579 | "metadata": { 580 | "colab": {}, 581 | "colab_type": "code", 582 | "id": "CCQo94-0ykm6" 583 | }, 584 | "outputs": [], 585 | "source": [ 586 | "#Bots :bot3\n", 587 | "class Bot3:\n", 588 | " ACTION_GO_LEFT = 0\n", 589 | " ACTION_GO_RIGHT = 1\n", 590 | " ACTION_GO_UP = 2\n", 591 | " ACTION_GO_DOWN = 3\n", 592 | " ACTION_FREE = 4\n", 593 | " ACTION_CRAFT = 5\n", 594 | "\n", 595 | " def __init__(self, id):\n", 596 | " self.state = State()\n", 597 | " self.info = PlayerInfo(id)\n", 598 | "\n", 599 | " def next_action(self):\n", 600 | " if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:\n", 601 | " if self.info.energy >= 6:\n", 602 | " return self.ACTION_CRAFT\n", 603 | " else:\n", 604 | " return self.ACTION_FREE\n", 605 | " if self.info.energy < 5:\n", 606 | " return self.ACTION_FREE\n", 607 | " else:\n", 608 | " action = self.ACTION_GO_LEFT\n", 609 | " if self.info.posx % 2 == 0:\n", 610 | " if self.info.posy < self.state.mapInfo.max_y:\n", 611 | " action = self.ACTION_GO_DOWN\n", 612 | " else:\n", 613 | " if self.info.posy > 0:\n", 614 | " action = self.ACTION_GO_UP\n", 615 | " else:\n", 616 | " action = self.ACTION_GO_RIGHT \n", 617 | " return action\n", 618 | "\n", 619 | " def new_game(self, data):\n", 620 | " try:\n", 621 | " self.state.init_state(data)\n", 622 | " except Exception as e:\n", 623 | " import traceback\n", 624 | " traceback.print_exc()\n", 625 | "\n", 626 | " def new_state(self, data):\n", 627 | " # action = self.next_action();\n", 628 | " # self.socket.send(action)\n", 629 | " try:\n", 630 | " self.state.update_state(data)\n", 631 | " except Exception as e:\n", 632 | " import traceback\n", 633 | " traceback.print_exc()" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 8, 639 | "metadata": { 640 | "colab": {}, 641 | "colab_type": "code", 642 | "id": "agO3td72yvaS" 643 | }, 644 | "outputs": [], 645 | "source": [ 646 | "#MinerState.py\n", 647 | "def str_2_json(str):\n", 648 | " return json.loads(str, encoding=\"utf-8\")\n", 649 | "\n", 650 | "\n", 651 | "class MapInfo:\n", 652 | " def __init__(self):\n", 653 | " self.max_x = 0 #Width of the map\n", 654 | " self.max_y = 0 #Height of the map\n", 655 | " self.golds = [] #List of the golds in the map\n", 656 | " self.obstacles = []\n", 657 | " self.numberOfPlayers = 0\n", 658 | " self.maxStep = 0 #The maximum number of step is set for this map\n", 659 | "\n", 660 | " def init_map(self, gameInfo):\n", 661 | " #Initialize the map at the begining of each episode\n", 662 | " self.max_x = gameInfo[\"width\"] - 1\n", 663 | " self.max_y = gameInfo[\"height\"] - 1\n", 664 | " self.golds = gameInfo[\"golds\"]\n", 665 | " self.obstacles = gameInfo[\"obstacles\"]\n", 666 | " self.maxStep = gameInfo[\"steps\"]\n", 667 | " self.numberOfPlayers = gameInfo[\"numberOfPlayers\"]\n", 668 | "\n", 669 | " def update(self, golds, changedObstacles):\n", 670 | " #Update the map after every step\n", 671 | " self.golds = golds\n", 672 | " for cob in changedObstacles:\n", 673 | " newOb = True\n", 674 | " for ob in self.obstacles:\n", 675 | " if cob[\"posx\"] == ob[\"posx\"] and cob[\"posy\"] == ob[\"posy\"]:\n", 676 | " newOb = False\n", 677 | " #print(\"cell(\", cob[\"posx\"], \",\", cob[\"posy\"], \") change type from: \", ob[\"type\"], \" -> \",\n", 678 | " # cob[\"type\"], \" / value: \", ob[\"value\"], \" -> \", cob[\"value\"])\n", 679 | " ob[\"type\"] = cob[\"type\"]\n", 680 | " ob[\"value\"] = cob[\"value\"]\n", 681 | " break\n", 682 | " if newOb:\n", 683 | " self.obstacles.append(cob)\n", 684 | " #print(\"new obstacle: \", cob[\"posx\"], \",\", cob[\"posy\"], \", type = \", cob[\"type\"], \", value = \",\n", 685 | " # cob[\"value\"])\n", 686 | "\n", 687 | " def get_min_x(self):\n", 688 | " return min([cell[\"posx\"] for cell in self.golds])\n", 689 | "\n", 690 | " def get_max_x(self):\n", 691 | " return max([cell[\"posx\"] for cell in self.golds])\n", 692 | "\n", 693 | " def get_min_y(self):\n", 694 | " return min([cell[\"posy\"] for cell in self.golds])\n", 695 | "\n", 696 | " def get_max_y(self):\n", 697 | " return max([cell[\"posy\"] for cell in self.golds])\n", 698 | "\n", 699 | " def is_row_has_gold(self, y):\n", 700 | " return y in [cell[\"posy\"] for cell in self.golds]\n", 701 | "\n", 702 | " def is_column_has_gold(self, x):\n", 703 | " return x in [cell[\"posx\"] for cell in self.golds]\n", 704 | "\n", 705 | " def gold_amount(self, x, y): #Get the amount of golds at cell (x,y)\n", 706 | " for cell in self.golds:\n", 707 | " if x == cell[\"posx\"] and y == cell[\"posy\"]:\n", 708 | " return cell[\"amount\"]\n", 709 | " return 0 \n", 710 | "\n", 711 | " def get_obstacle(self, x, y): # Get the kind of the obstacle at cell(x,y)\n", 712 | " for cell in self.obstacles:\n", 713 | " if x == cell[\"posx\"] and y == cell[\"posy\"]:\n", 714 | " return cell[\"type\"]\n", 715 | " return -1 # No obstacle at the cell (x,y)\n", 716 | "\n", 717 | "\n", 718 | "class State:\n", 719 | " STATUS_PLAYING = 0\n", 720 | " STATUS_ELIMINATED_WENT_OUT_MAP = 1\n", 721 | " STATUS_ELIMINATED_OUT_OF_ENERGY = 2\n", 722 | " STATUS_ELIMINATED_INVALID_ACTION = 3\n", 723 | " STATUS_STOP_EMPTY_GOLD = 4\n", 724 | " STATUS_STOP_END_STEP = 5\n", 725 | "\n", 726 | " def __init__(self):\n", 727 | " self.end = False\n", 728 | " self.score = 0\n", 729 | " self.lastAction = None\n", 730 | " self.id = 0\n", 731 | " self.x = 0\n", 732 | " self.y = 0\n", 733 | " self.energy = 0\n", 734 | " self.mapInfo = MapInfo()\n", 735 | " self.players = []\n", 736 | " self.stepCount = 0\n", 737 | " self.status = State.STATUS_PLAYING\n", 738 | "\n", 739 | " def init_state(self, data): #parse data from server into object\n", 740 | " game_info = str_2_json(data)\n", 741 | " self.end = False\n", 742 | " self.score = 0\n", 743 | " self.lastAction = None\n", 744 | " self.id = game_info[\"playerId\"]\n", 745 | " self.x = game_info[\"posx\"]\n", 746 | " self.y = game_info[\"posy\"]\n", 747 | " self.energy = game_info[\"energy\"]\n", 748 | " self.mapInfo.init_map(game_info[\"gameinfo\"])\n", 749 | " self.stepCount = 0\n", 750 | " self.status = State.STATUS_PLAYING\n", 751 | " self.players = [{\"playerId\": 2, \"posx\": self.x, \"posy\": self.y},\n", 752 | " {\"playerId\": 3, \"posx\": self.x, \"posy\": self.y},\n", 753 | " {\"playerId\": 4, \"posx\": self.x, \"posy\": self.y}]\n", 754 | "\n", 755 | " def update_state(self, data):\n", 756 | " new_state = str_2_json(data)\n", 757 | " for player in new_state[\"players\"]:\n", 758 | " if player[\"playerId\"] == self.id:\n", 759 | " self.x = player[\"posx\"]\n", 760 | " self.y = player[\"posy\"]\n", 761 | " self.energy = player[\"energy\"]\n", 762 | " self.score = player[\"score\"]\n", 763 | " self.lastAction = player[\"lastAction\"]\n", 764 | " self.status = player[\"status\"]\n", 765 | "\n", 766 | " self.mapInfo.update(new_state[\"golds\"], new_state[\"changedObstacles\"])\n", 767 | " self.players = new_state[\"players\"]\n", 768 | " for i in range(len(self.players) + 1, 5, 1):\n", 769 | " self.players.append({\"playerId\": i, \"posx\": self.x, \"posy\": self.y})\n", 770 | " self.stepCount = self.stepCount + 1" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 9, 776 | "metadata": { 777 | "colab": {}, 778 | "colab_type": "code", 779 | "id": "QHa-DcAcyyMc" 780 | }, 781 | "outputs": [], 782 | "source": [ 783 | "#MinerEnv.py\n", 784 | "TreeID = 1\n", 785 | "TrapID = 2\n", 786 | "SwampID = 3\n", 787 | "class MinerEnv:\n", 788 | " def __init__(self):\n", 789 | " self.socket = GameSocket()\n", 790 | " self.state = State()\n", 791 | " \n", 792 | " self.score_pre = self.state.score#Storing the last score for designing the reward function\n", 793 | "\n", 794 | " def start(self): #connect to server\n", 795 | " self.socket.connect()\n", 796 | "\n", 797 | " def end(self): #disconnect server\n", 798 | " self.socket.close()\n", 799 | "\n", 800 | " def send_map_info(self, request):#tell server which map to run\n", 801 | " self.socket.send(request)\n", 802 | "\n", 803 | " def reset(self): #start new game\n", 804 | " try:\n", 805 | " message = self.socket.receive() #receive game info from server\n", 806 | " self.state.init_state(message) #init state\n", 807 | " except Exception as e:\n", 808 | " import traceback\n", 809 | " traceback.print_exc()\n", 810 | "\n", 811 | " def step(self, action): #step process\n", 812 | " self.socket.send(action) #send action to server\n", 813 | " try:\n", 814 | " message = self.socket.receive() #receive new state from server\n", 815 | " self.state.update_state(message) #update to local state\n", 816 | " except Exception as e:\n", 817 | " import traceback\n", 818 | " traceback.print_exc()\n", 819 | "\n", 820 | " # Functions are customized by client\n", 821 | " def get_state(self):\n", 822 | " # Building the map\n", 823 | " view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int)\n", 824 | " for i in range(self.state.mapInfo.max_x + 1):\n", 825 | " for j in range(self.state.mapInfo.max_y + 1):\n", 826 | " if self.state.mapInfo.get_obstacle(i, j) == TreeID: # Tree\n", 827 | " view[i, j] = -TreeID\n", 828 | " if self.state.mapInfo.get_obstacle(i, j) == TrapID: # Trap\n", 829 | " view[i, j] = -TrapID\n", 830 | " if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp\n", 831 | " view[i, j] = -SwampID\n", 832 | " if self.state.mapInfo.gold_amount(i, j) > 0:\n", 833 | " view[i, j] = self.state.mapInfo.gold_amount(i, j)\n", 834 | "\n", 835 | " DQNState = view.flatten().tolist() #Flattening the map matrix to a vector\n", 836 | " \n", 837 | " # Add position and energy of agent to the DQNState\n", 838 | " DQNState.append(self.state.x)\n", 839 | " DQNState.append(self.state.y)\n", 840 | " DQNState.append(self.state.energy)\n", 841 | " #Add position of bots \n", 842 | " for player in self.state.players:\n", 843 | " if player[\"playerId\"] != self.state.id:\n", 844 | " DQNState.append(player[\"posx\"])\n", 845 | " DQNState.append(player[\"posy\"])\n", 846 | " \n", 847 | " #Convert the DQNState from list to array for training\n", 848 | " DQNState = np.array(DQNState)\n", 849 | "\n", 850 | " return DQNState\n", 851 | "\n", 852 | " def get_reward(self):\n", 853 | " # Calculate reward\n", 854 | " reward = 0\n", 855 | " score_action = self.state.score - self.score_pre\n", 856 | " self.score_pre = self.state.score\n", 857 | " if score_action > 0:\n", 858 | " #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action)\n", 859 | " reward += score_action\n", 860 | " \n", 861 | " #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward\n", 862 | " if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID: # Tree\n", 863 | " reward -= TreeID\n", 864 | " if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID: # Trap\n", 865 | " reward -= TrapID\n", 866 | " if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID: # Swamp\n", 867 | " reward -= SwampID\n", 868 | "\n", 869 | " # If out of the map, then the DQN agent should be punished by a larger nagative reward.\n", 870 | " if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP:\n", 871 | " reward += -10\n", 872 | " \n", 873 | " #Run out of energy, then the DQN agent should be punished by a larger nagative reward.\n", 874 | " if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY:\n", 875 | " reward += -10\n", 876 | " # print (\"reward\",reward)\n", 877 | " return reward\n", 878 | "\n", 879 | " def check_terminate(self):\n", 880 | " #Checking the status of the game\n", 881 | " #it indicates the game ends or is playing\n", 882 | " return self.state.status != State.STATUS_PLAYING" 883 | ] 884 | }, 885 | { 886 | "cell_type": "code", 887 | "execution_count": 10, 888 | "metadata": { 889 | "colab": {}, 890 | "colab_type": "code", 891 | "id": "Qf2sasVey0sm" 892 | }, 893 | "outputs": [], 894 | "source": [ 895 | "#DQNModel.py\n", 896 | "class DQN: \n", 897 | " \n", 898 | " def __init__(\n", 899 | " self,\n", 900 | " input_dim, #The number of inputs for the DQN network\n", 901 | " action_space, #The number of actions for the DQN network\n", 902 | " gamma = 0.99, #The discount factor\n", 903 | " epsilon = 1, #Epsilon - the exploration factor\n", 904 | " epsilon_min = 0.01, #The minimum epsilon \n", 905 | " epsilon_decay = 0.999,#The decay epislon for each update_epsilon time\n", 906 | " learning_rate = 0.00025, #The learning rate for the DQN network\n", 907 | " tau = 0.125, #The factor for updating the DQN target network from the DQN network\n", 908 | " model = None, #The DQN model\n", 909 | " target_model = None, #The DQN target model \n", 910 | " sess=None\n", 911 | " \n", 912 | " ):\n", 913 | " self.input_dim = input_dim\n", 914 | " self.action_space = action_space\n", 915 | " self.gamma = gamma\n", 916 | " self.epsilon = epsilon\n", 917 | " self.epsilon_min = epsilon_min\n", 918 | " self.epsilon_decay = epsilon_decay\n", 919 | " self.learning_rate = learning_rate\n", 920 | " self.tau = tau\n", 921 | " \n", 922 | " #Creating networks\n", 923 | " self.model = self.create_model() #Creating the DQN model\n", 924 | " self.target_model = self.create_model() #Creating the DQN target model\n", 925 | " \n", 926 | " #Tensorflow GPU optimization\n", 927 | " config = tf.ConfigProto()\n", 928 | " config.gpu_options.allow_growth = True\n", 929 | " self.sess = tf.Session(config=config)\n", 930 | " K.set_session(sess)\n", 931 | " self.sess.run( tf.global_variables_initializer()) \n", 932 | " \n", 933 | " def create_model(self):\n", 934 | " #Creating the network\n", 935 | " #Two hidden layers (300,300), their activation is ReLu\n", 936 | " #One output layer with action_space of nodes, activation is linear.\n", 937 | " model = Sequential()\n", 938 | " model.add(Dense(300, input_dim=self.input_dim))\n", 939 | " model.add(Activation('relu'))\n", 940 | " model.add(Dense(300))\n", 941 | " model.add(Activation('relu'))\n", 942 | " model.add(Dense(self.action_space))\n", 943 | " model.add(Activation('linear')) \n", 944 | " #adam = optimizers.adam(lr=self.learning_rate)\n", 945 | " sgd = optimizers.SGD(lr=self.learning_rate, decay=1e-6, momentum=0.95)\n", 946 | " model.compile(optimizer = sgd,\n", 947 | " loss='mse')\n", 948 | " return model\n", 949 | " \n", 950 | " \n", 951 | " def act(self,state):\n", 952 | " #Get the index of the maximum Q values \n", 953 | " a_max = np.argmax(self.model.predict(state.reshape(1,len(state)))) \n", 954 | " if (random.random() < self.epsilon):\n", 955 | " a_chosen = randrange(self.action_space)\n", 956 | " else:\n", 957 | " a_chosen = a_max \n", 958 | " return a_chosen\n", 959 | " \n", 960 | " \n", 961 | " def replay(self,samples,batch_size):\n", 962 | " inputs = np.zeros((batch_size, self.input_dim))\n", 963 | " targets = np.zeros((batch_size, self.action_space))\n", 964 | " \n", 965 | " for i in range(0,batch_size):\n", 966 | " state = samples[0][i,:]\n", 967 | " action = samples[1][i]\n", 968 | " reward = samples[2][i]\n", 969 | " new_state = samples[3][i,:]\n", 970 | " done= samples[4][i]\n", 971 | " \n", 972 | " inputs[i,:] = state\n", 973 | " targets[i,:] = self.target_model.predict(state.reshape(1,len(state))) \n", 974 | " if done:\n", 975 | " targets[i,action] = reward # if terminated, only equals reward\n", 976 | " else:\n", 977 | " Q_future = np.max(self.target_model.predict(new_state.reshape(1,len(new_state))))\n", 978 | " targets[i,action] = reward + Q_future * self.gamma\n", 979 | " #Training\n", 980 | " loss = self.model.train_on_batch(inputs, targets) \n", 981 | " \n", 982 | " def target_train(self): \n", 983 | " weights = self.model.get_weights()\n", 984 | " target_weights = self.target_model.get_weights()\n", 985 | " for i in range(0, len(target_weights)):\n", 986 | " target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)\n", 987 | " \n", 988 | " self.target_model.set_weights(target_weights) \n", 989 | " \n", 990 | " \n", 991 | " def update_epsilon(self):\n", 992 | " self.epsilon = self.epsilon*self.epsilon_decay\n", 993 | " self.epsilon = max(self.epsilon_min, self.epsilon)\n", 994 | " \n", 995 | " \n", 996 | " def save_model(self, model_name):\n", 997 | " # serialize model to JSON\n", 998 | " model_json = self.model.to_json()\n", 999 | " with open(model_name + \".json\", \"w\") as json_file:\n", 1000 | " json_file.write(model_json)\n", 1001 | " # serialize weights to HDF5\n", 1002 | " self.model.save_weights( model_name + \".h5\")\n", 1003 | " print(\"Saved model to disk\")" 1004 | ] 1005 | }, 1006 | { 1007 | "cell_type": "code", 1008 | "execution_count": 11, 1009 | "metadata": { 1010 | "colab": {}, 1011 | "colab_type": "code", 1012 | "id": "MARetIYHy4qp" 1013 | }, 1014 | "outputs": [], 1015 | "source": [ 1016 | "#Memory.py\n", 1017 | "class Memory: \n", 1018 | " capacity = None \n", 1019 | " \n", 1020 | " def __init__(\n", 1021 | " self,\n", 1022 | " capacity,\n", 1023 | " length = None,\n", 1024 | " states = None,\n", 1025 | " actions = None,\n", 1026 | " rewards = None,\n", 1027 | " dones = None,\n", 1028 | " states2 = None, \n", 1029 | " ):\n", 1030 | " self.capacity = capacity\n", 1031 | " self.length = 0\n", 1032 | " self.states = states\n", 1033 | " self.actions = actions\n", 1034 | " self.rewards = rewards\n", 1035 | " self.dones = dones\n", 1036 | " self.states2 = states2\n", 1037 | "\n", 1038 | " def push(self, s, a, r, done, s2):\n", 1039 | " if self.states is None:\n", 1040 | " self.states = s\n", 1041 | " self.actions = a\n", 1042 | " self.rewards = r\n", 1043 | " self.dones = done\n", 1044 | " self.states2 = s2\n", 1045 | " else:\n", 1046 | " self.states = np.vstack((self.states,s))\n", 1047 | " self.actions = np.vstack((self.actions,a))\n", 1048 | " self.rewards = np.vstack((self.rewards, r))\n", 1049 | " self.dones = np.vstack((self.dones, done))\n", 1050 | " self.states2 = np.vstack((self.states2,s2))\n", 1051 | " \n", 1052 | " self.length = self.length + 1\n", 1053 | " \n", 1054 | " if (self.length > self.capacity): \n", 1055 | " self.states = np.delete(self.states,(0), axis = 0)\n", 1056 | " self.actions = np.delete(self.actions,(0), axis = 0)\n", 1057 | " self.rewards = np.delete(self.rewards,(0), axis = 0)\n", 1058 | " self.dones = np.delete(self.dones,(0), axis = 0)\n", 1059 | " self.states2 = np.delete(self.states2,(0), axis = 0) \n", 1060 | " self.length = self.length - 1\n", 1061 | " \n", 1062 | " \n", 1063 | " def sample(self,batch_size):\n", 1064 | " if (self.length >= batch_size):\n", 1065 | " idx = random.sample(range(0,self.length),batch_size)\n", 1066 | " s = self.states[idx,:]\n", 1067 | " a = self.actions[idx,:]\n", 1068 | " r = self.rewards[idx,:]\n", 1069 | " d = self.dones[idx,:]\n", 1070 | " s2 = self.states2[idx,:]\n", 1071 | " \n", 1072 | " return list([s,a,r,s2,d])" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "code", 1077 | "execution_count": 12, 1078 | "metadata": { 1079 | "colab": {}, 1080 | "colab_type": "code", 1081 | "id": "1ZqwV8edy7eX" 1082 | }, 1083 | "outputs": [], 1084 | "source": [ 1085 | "#Creating Maps\n", 1086 | "#This function is used to create 05 maps instead of loading them from Maps folder in the local\n", 1087 | "def CreateMaps():\n", 1088 | " map1 = [\n", 1089 | " [0, 0, -2, 100, 0, 0, -1, -1, -3, 0, 0, 0, -1, -1, 0, 0, -3, 0, -1, -1,0],\n", 1090 | " [-1,-1, -2, 0, 0, 0, -3, -1, 0, -2, 0, 0, 0, -1, 0, -1, 0, -2, -1, 0,0],\n", 1091 | " [0, 0, -1, 0, 0, 0, 0, -1, -1, -1, 0, 0, 100, 0, 0, 0, 0, 50, -2, 0,0],\n", 1092 | " [0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, -1, 50, -2, 0, 0, -1, -1, 0,0],\n", 1093 | " [-2, 0, 200, -2, -2, 300, 0, 0, -2, -2, 0, 0, -3, 0, -1, 0, 0, -3, -1, 0,0],\n", 1094 | " [0, -1, 0, 0, 0, 0, 0, -3, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, -2, 0,0],\n", 1095 | " [0, -1, -1, 0, 0, -1, -1, 0, 0, 700, -1, 0, 0, 0, -2, -1, -1, 0, 0, 0,100],\n", 1096 | " [0, 0, 0, 500, 0, 0, -1, 0, -2, -2, -1, -1, 0, 0, -2, 0, -3, 0, 0, -1,0],\n", 1097 | " [-1, -1, 0,-2 , 0, -1, -2, 0, 400, -2, -1, -1, 500, 0, -2, 0, -3, 100, 0, 0,0]\n", 1098 | " ]\n", 1099 | " map2 = [\n", 1100 | " [0, 0, -2, 0, 0, 0, -1, -1, -3, 0, 0, 0, -1, -1, 0, 0, -3, 0, -1, -1,0],\n", 1101 | " [-1,-1, -2, 100, 0, 0, -3, -1, 0, -2, 100, 0, 0, -1, 0, -1, 0, -2, -1, 0,0],\n", 1102 | " [0, 0, -1, 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 50, 0, -2, 0,0],\n", 1103 | " [0, 200, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, -1, 50, -2, 0, 0, -1, -1, 0,0],\n", 1104 | " [-2, 0, 0, -2, -2, 0, 0, 0, -2, -2, 0, 0, -3, 0, -1, 0, 0, -3, -1, 0,0],\n", 1105 | " [0, -1, 0, 0, 300, 0, 0, -3, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, -2, 0,0],\n", 1106 | " [500, -1, -1, 0, 0, -1, -1, 0, 700, 0, -1, 0, 0, 0, -2, -1, -1, 0, 0, 0,0],\n", 1107 | " [0, 0, 0, 0, 0, 0, -1, 0, -2, -2, -1, -1, 0, 0, -2, 0, -3, 100, 0, -1,0],\n", 1108 | " [-1, -1, 0,-2 , 0, -1, -2, 400, 0, -2, -1, -1, 0, 500, -2, 0, -3, 0, 0, 100,0]\n", 1109 | " ]\n", 1110 | " map3= [\n", 1111 | " [0, 0, -2, 0, 0, 0, -1, -1, -3, 0, 100, 0, -1, -1, 0, 0, -3, 0, -1, -1,0],\n", 1112 | " [-1,-1, -2, 0, 0, 0, -3, -1, 0, -2, 0, 0, 0, -1, 0, -1, 0, -2, -1, 0,0 ],\n", 1113 | " [0, 0, -1, 0, 0, 0, 100, -1, -1, -1, 0, 0, 50, 0, 0, 0, 50, 0, -2, 0,0],\n", 1114 | " [0, 200, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, -1, 0, -2, 0, 0, -1, -1, 0,0],\n", 1115 | " [-2, 0, 0, -2, -2, 0, 0, 0, -2, -2, 0, 0, -3, 0, -1, 0, 0, -3, -1, 0,0],\n", 1116 | " [0, -1, 0, 300, 0, 0, 0, -3, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, -2, 0,0],\n", 1117 | " [0, -1, -1, 0, 0, -1, -1, 700, 0, 0, -1, 0, 0, 0, -2, -1, -1, 0, 0, 0,0],\n", 1118 | " [0, 0, 0, 0, 0, 500, -1, 0, -2, -2, -1, -1, 0, 0, -2, 0, -3, 0, 700, -1,0],\n", 1119 | " [-1, -1, 0,-2 , 0, -1, -2, 400, 0, -2, -1, -1, 0, 500, -2, 0, -3, 0, 0, 100,0]\n", 1120 | " ]\n", 1121 | " map4=[\n", 1122 | " [0, 0, -2, 0, 0, 0, -1, -1, -3, 0, 0, 0, -1, -1, 0, 0, -3, 0, -1, -1,0],\n", 1123 | " [-1,-1, -2, 0, 0, 0, -3, -1, 0, -2, 0, 0, 100, -1, 0, -1, 0, -2, -1, 0,0],\n", 1124 | " [0, 0, -1, 0, 100, 0, 0, -1, -1, -1, 0, 0, 0, 0, 50, 0, 50, 0, -2, 0,0],\n", 1125 | " [0, 200, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, -1, 0, -2, 0, 0, -1, -1, 0,0],\n", 1126 | " [-2, 0, 0, -2, -2, 0, 0, 0, -2, -2, 0, 0, -3, 0, -1, 0, 0, -3, -1, 0,0],\n", 1127 | " [0, -1, 0, 0, 0, 0, 300, -3, 0, 700, -1, -1, 0, 0, 0, 0, 0, 0, -2, 0,0],\n", 1128 | " [0, -1, -1, 0, 0, -1, -1, 0, 0, 0, -1, 0, 0, 0, -2, -1, -1, 0, 0, 100,0],\n", 1129 | " [500, 0, 0, 0, 0, 0, -1, 0, -2, -2, -1, -1, 0, 0, -2, 0, -3, 0, 0, -1,0],\n", 1130 | " [-1, -1, 0,-2 , 0, -1, -2, 400, 0, -2, -1, -1, 0, 500, -2, 0, -3, 0, 0, 100,0]\n", 1131 | "\n", 1132 | " ]\n", 1133 | " map5=[\n", 1134 | " [0, 0, -2, 0, 100, 0, -1, -1, -3, 0, 0, 0, -1, -1, 0, 0, -3, 0, -1, -1,0],\n", 1135 | " [-1,-1, -2, 0, 0, 0, -3, -1, 0, -2, 100, 0, 0, -1, 0, -1, 0, -2, -1, 0,0],\n", 1136 | " [0, 0, -1, 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, 0, 50, 0, 0, 0, -2, 0,0],\n", 1137 | " [0, 200, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, -1, 0, -2, 0, 50, -1, -1, 0,0],\n", 1138 | " [-2, 0, 0, -2, -2, 0, 0, 0, -2, -2, 0, 0, -3, 0, -1, 0, 0, -3, -1, 0,0],\n", 1139 | " [0, -1, 0, 0, 300, 0, 0, -3, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, -2, 0,0],\n", 1140 | " [500, -1, -1, 0, 0, -1, -1, 0, 0, 700, -1, 0, 0, 0, -2, -1, -1, 0, 0, 100,0],\n", 1141 | " [0, 0, 0, 0, 0, 0, -1, 0, -2, -2, -1, -1, 0, 0, -2, 0, -3, 0, 0, -1,0],\n", 1142 | " [-1, -1, 0,-2 , 0, -1, -2, 400, 0, -2, -1, -1, 0, 500, -2, 0, -3, 0, 0, 100,0]\n", 1143 | " ]\n", 1144 | " Maps = (map1,map2,map3,map4,map5)\n", 1145 | " return Maps \n", 1146 | "\n" 1147 | ] 1148 | }, 1149 | { 1150 | "cell_type": "code", 1151 | "execution_count": null, 1152 | "metadata": { 1153 | "colab": { 1154 | "base_uri": "https://localhost:8080/", 1155 | "height": 1000 1156 | }, 1157 | "colab_type": "code", 1158 | "id": "SKN2xerQy-p8", 1159 | "outputId": "7ef7de46-4787-456c-e8f2-228058511229" 1160 | }, 1161 | "outputs": [ 1162 | { 1163 | "name": "stdout", 1164 | "output_type": "stream", 1165 | "text": [ 1166 | "Connected to server.\n", 1167 | "Found: map0\n", 1168 | "Found: map1\n", 1169 | "Found: map2\n", 1170 | "Found: map3\n", 1171 | "Found: map4\n", 1172 | "Reset game: ['map0', '5', '4', '50', '100']\n", 1173 | "numberOfPlayers: 4\n", 1174 | "Episode 1 ends. Number of steps is: 7. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1175 | "Reset game: ['map4', '12', '8', '50', '100']\n", 1176 | "numberOfPlayers: 4\n", 1177 | "Episode 2 ends. Number of steps is: 1. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1178 | "Reset game: ['map2', '4', '4', '50', '100']\n", 1179 | "numberOfPlayers: 4\n", 1180 | "Episode 3 ends. Number of steps is: 28. Accumlated Reward = -15.00. Epsilon = 1.00 .Termination code: 1\n", 1181 | "Reset game: ['map2', '10', '5', '50', '100']\n", 1182 | "numberOfPlayers: 4\n", 1183 | "Episode 4 ends. Number of steps is: 6. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n", 1184 | "Reset game: ['map1', '4', '3', '50', '100']\n", 1185 | "numberOfPlayers: 4\n", 1186 | "Episode 5 ends. Number of steps is: 17. Accumlated Reward = 33.00. Epsilon = 1.00 .Termination code: 1\n", 1187 | "Reset game: ['map1', '12', '3', '50', '100']\n", 1188 | "numberOfPlayers: 4\n", 1189 | "Episode 6 ends. Number of steps is: 13. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1190 | "Reset game: ['map1', '1', '5', '50', '100']\n", 1191 | "numberOfPlayers: 4\n", 1192 | "Episode 7 ends. Number of steps is: 10. Accumlated Reward = -15.00. Epsilon = 1.00 .Termination code: 1\n", 1193 | "Reset game: ['map0', '1', '4', '50', '100']\n", 1194 | "numberOfPlayers: 4\n", 1195 | "Episode 8 ends. Number of steps is: 3. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1196 | "Reset game: ['map4', '18', '7', '50', '100']\n", 1197 | "numberOfPlayers: 4\n", 1198 | "Episode 9 ends. Number of steps is: 15. Accumlated Reward = -17.00. Epsilon = 1.00 .Termination code: 1\n", 1199 | "Reset game: ['map0', '0', '1', '50', '100']\n", 1200 | "numberOfPlayers: 4\n", 1201 | "Episode 10 ends. Number of steps is: 5. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1202 | "Reset game: ['map3', '14', '2', '50', '100']\n", 1203 | "numberOfPlayers: 4\n", 1204 | "Episode 11 ends. Number of steps is: 50. Accumlated Reward = -19.00. Epsilon = 1.00 .Termination code: 1\n", 1205 | "Reset game: ['map4', '19', '8', '50', '100']\n", 1206 | "numberOfPlayers: 4\n", 1207 | "Episode 12 ends. Number of steps is: 5. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1208 | "Reset game: ['map1', '1', '4', '50', '100']\n", 1209 | "numberOfPlayers: 4\n", 1210 | "Episode 13 ends. Number of steps is: 17. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1211 | "Reset game: ['map4', '18', '4', '50', '100']\n", 1212 | "numberOfPlayers: 4\n", 1213 | "Episode 14 ends. Number of steps is: 5. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n", 1214 | "Reset game: ['map1', '2', '5', '50', '100']\n", 1215 | "numberOfPlayers: 4\n", 1216 | "Episode 15 ends. Number of steps is: 4. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n", 1217 | "Reset game: ['map2', '19', '2', '50', '100']\n", 1218 | "numberOfPlayers: 4\n", 1219 | "Episode 16 ends. Number of steps is: 7. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1220 | "Reset game: ['map1', '10', '5', '50', '100']\n", 1221 | "numberOfPlayers: 4\n", 1222 | "Episode 17 ends. Number of steps is: 32. Accumlated Reward = -24.00. Epsilon = 1.00 .Termination code: 1\n", 1223 | "Reset game: ['map1', '3', '1', '50', '100']\n", 1224 | "numberOfPlayers: 4\n", 1225 | "Episode 18 ends. Number of steps is: 20. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1226 | "Reset game: ['map1', '20', '7', '50', '100']\n", 1227 | "numberOfPlayers: 4\n", 1228 | "Episode 19 ends. Number of steps is: 2. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1229 | "Reset game: ['map0', '5', '1', '50', '100']\n", 1230 | "numberOfPlayers: 4\n", 1231 | "Episode 20 ends. Number of steps is: 13. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1232 | "Reset game: ['map2', '3', '2', '50', '100']\n", 1233 | "numberOfPlayers: 4\n", 1234 | "Episode 21 ends. Number of steps is: 8. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1235 | "Reset game: ['map3', '9', '2', '50', '100']\n", 1236 | "numberOfPlayers: 4\n", 1237 | "Episode 22 ends. Number of steps is: 14. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1238 | "Reset game: ['map2', '19', '2', '50', '100']\n", 1239 | "numberOfPlayers: 4\n", 1240 | "Episode 23 ends. Number of steps is: 8. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1241 | "Reset game: ['map0', '10', '4', '50', '100']\n", 1242 | "numberOfPlayers: 4\n", 1243 | "Episode 24 ends. Number of steps is: 10. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1244 | "Reset game: ['map4', '11', '1', '50', '100']\n", 1245 | "numberOfPlayers: 4\n", 1246 | "Episode 25 ends. Number of steps is: 3. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1247 | "Reset game: ['map0', '15', '2', '50', '100']\n", 1248 | "numberOfPlayers: 4\n", 1249 | "Episode 26 ends. Number of steps is: 23. Accumlated Reward = -27.00. Epsilon = 1.00 .Termination code: 1\n", 1250 | "Reset game: ['map2', '17', '4', '50', '100']\n", 1251 | "numberOfPlayers: 4\n", 1252 | "Episode 27 ends. Number of steps is: 4. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1253 | "Reset game: ['map0', '6', '4', '50', '100']\n", 1254 | "numberOfPlayers: 4\n", 1255 | "Episode 28 ends. Number of steps is: 14. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1256 | "Reset game: ['map1', '14', '1', '50', '100']\n", 1257 | "numberOfPlayers: 4\n", 1258 | "Episode 29 ends. Number of steps is: 10. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n", 1259 | "Reset game: ['map4', '20', '1', '50', '100']\n", 1260 | "numberOfPlayers: 4\n", 1261 | "Episode 30 ends. Number of steps is: 5. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1262 | "Reset game: ['map3', '19', '0', '50', '100']\n", 1263 | "numberOfPlayers: 4\n", 1264 | "Episode 31 ends. Number of steps is: 5. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n", 1265 | "Reset game: ['map0', '1', '1', '50', '100']\n", 1266 | "numberOfPlayers: 4\n", 1267 | "Episode 32 ends. Number of steps is: 4. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1268 | "Reset game: ['map0', '18', '6', '50', '100']\n", 1269 | "numberOfPlayers: 4\n", 1270 | "Episode 33 ends. Number of steps is: 13. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n", 1271 | "Reset game: ['map4', '16', '1', '50', '100']\n", 1272 | "numberOfPlayers: 4\n", 1273 | "Episode 34 ends. Number of steps is: 6. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1274 | "Reset game: ['map1', '0', '0', '50', '100']\n", 1275 | "numberOfPlayers: 4\n", 1276 | "Episode 35 ends. Number of steps is: 2. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1277 | "Reset game: ['map3', '0', '4', '50', '100']\n", 1278 | "numberOfPlayers: 4\n", 1279 | "Episode 36 ends. Number of steps is: 26. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1280 | "Reset game: ['map4', '9', '4', '50', '100']\n", 1281 | "numberOfPlayers: 4\n", 1282 | "Episode 37 ends. Number of steps is: 19. Accumlated Reward = 90.00. Epsilon = 1.00 .Termination code: 1\n", 1283 | "Reset game: ['map1', '18', '0', '50', '100']\n", 1284 | "numberOfPlayers: 4\n", 1285 | "Episode 38 ends. Number of steps is: 2. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1286 | "Reset game: ['map2', '0', '0', '50', '100']\n", 1287 | "numberOfPlayers: 4\n", 1288 | "Episode 39 ends. Number of steps is: 11. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1289 | "Reset game: ['map0', '6', '1', '50', '100']\n", 1290 | "numberOfPlayers: 4\n", 1291 | "Episode 40 ends. Number of steps is: 14. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1292 | "Reset game: ['map2', '15', '5', '50', '100']\n", 1293 | "numberOfPlayers: 4\n", 1294 | "Episode 41 ends. Number of steps is: 14. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1295 | "Reset game: ['map3', '18', '6', '50', '100']\n", 1296 | "numberOfPlayers: 4\n", 1297 | "Episode 42 ends. Number of steps is: 14. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n", 1298 | "Reset game: ['map2', '12', '6', '50', '100']\n", 1299 | "numberOfPlayers: 4\n", 1300 | "Episode 43 ends. Number of steps is: 13. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1301 | "Reset game: ['map1', '2', '7', '50', '100']\n", 1302 | "numberOfPlayers: 4\n", 1303 | "Episode 44 ends. Number of steps is: 8. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1304 | "Reset game: ['map0', '7', '4', '50', '100']\n", 1305 | "numberOfPlayers: 4\n", 1306 | "Episode 45 ends. Number of steps is: 10. Accumlated Reward = -20.00. Epsilon = 1.00 .Termination code: 1\n", 1307 | "Reset game: ['map1', '3', '8', '50', '100']\n", 1308 | "numberOfPlayers: 4\n", 1309 | "Episode 46 ends. Number of steps is: 3. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1310 | "Reset game: ['map1', '3', '0', '50', '100']\n", 1311 | "numberOfPlayers: 4\n", 1312 | "Episode 47 ends. Number of steps is: 9. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1313 | "Reset game: ['map0', '16', '2', '50', '100']\n", 1314 | "numberOfPlayers: 4\n", 1315 | "Episode 48 ends. Number of steps is: 12. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1316 | "Reset game: ['map4', '15', '8', '50', '100']\n", 1317 | "numberOfPlayers: 4\n", 1318 | "Episode 49 ends. Number of steps is: 3. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1319 | "Reset game: ['map0', '8', '3', '50', '100']\n", 1320 | "numberOfPlayers: 4\n", 1321 | "Episode 50 ends. Number of steps is: 33. Accumlated Reward = 38.00. Epsilon = 1.00 .Termination code: 1\n", 1322 | "Reset game: ['map3', '4', '6', '50', '100']\n", 1323 | "numberOfPlayers: 4\n", 1324 | "Episode 51 ends. Number of steps is: 16. Accumlated Reward = -23.00. Epsilon = 1.00 .Termination code: 1\n", 1325 | "Reset game: ['map1', '15', '4', '50', '100']\n", 1326 | "numberOfPlayers: 4\n", 1327 | "Episode 52 ends. Number of steps is: 13. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1328 | "Reset game: ['map4', '20', '3', '50', '100']\n", 1329 | "numberOfPlayers: 4\n", 1330 | "Episode 53 ends. Number of steps is: 4. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1331 | "Reset game: ['map4', '9', '3', '50', '100']\n", 1332 | "numberOfPlayers: 4\n", 1333 | "Episode 54 ends. Number of steps is: 11. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n", 1334 | "Reset game: ['map4', '13', '8', '50', '100']\n", 1335 | "numberOfPlayers: 4\n", 1336 | "Episode 55 ends. Number of steps is: 7. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1337 | "Reset game: ['map3', '9', '7', '50', '100']\n", 1338 | "numberOfPlayers: 4\n", 1339 | "Episode 56 ends. Number of steps is: 5. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1340 | "Reset game: ['map2', '1', '2', '50', '100']\n", 1341 | "numberOfPlayers: 4\n", 1342 | "Episode 57 ends. Number of steps is: 5. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1343 | "Reset game: ['map1', '15', '6', '50', '100']\n", 1344 | "numberOfPlayers: 4\n", 1345 | "Episode 58 ends. Number of steps is: 13. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n", 1346 | "Reset game: ['map2', '5', '3', '50', '100']\n", 1347 | "numberOfPlayers: 4\n", 1348 | "Episode 59 ends. Number of steps is: 8. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1349 | "Reset game: ['map3', '8', '8', '50', '100']\n", 1350 | "numberOfPlayers: 4\n", 1351 | "Episode 60 ends. Number of steps is: 17. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1352 | "Reset game: ['map0', '14', '7', '50', '100']\n", 1353 | "numberOfPlayers: 4\n", 1354 | "Episode 61 ends. Number of steps is: 6. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1355 | "Reset game: ['map2', '20', '1', '50', '100']\n", 1356 | "numberOfPlayers: 4\n", 1357 | "Episode 62 ends. Number of steps is: 10. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1358 | "Reset game: ['map2', '3', '7', '50', '100']\n", 1359 | "numberOfPlayers: 4\n", 1360 | "Episode 63 ends. Number of steps is: 23. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1361 | "Reset game: ['map2', '0', '0', '50', '100']\n", 1362 | "numberOfPlayers: 4\n", 1363 | "Episode 64 ends. Number of steps is: 1. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1364 | "Reset game: ['map3', '8', '5', '50', '100']\n", 1365 | "numberOfPlayers: 4\n", 1366 | "Episode 65 ends. Number of steps is: 8. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1367 | "Reset game: ['map2', '13', '6', '50', '100']\n", 1368 | "numberOfPlayers: 4\n", 1369 | "Episode 66 ends. Number of steps is: 19. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1370 | "Reset game: ['map4', '18', '3', '50', '100']\n", 1371 | "numberOfPlayers: 4\n", 1372 | "Episode 67 ends. Number of steps is: 6. Accumlated Reward = -22.00. Epsilon = 1.00 .Termination code: 1\n", 1373 | "Reset game: ['map3', '15', '1', '50', '100']\n", 1374 | "numberOfPlayers: 4\n", 1375 | "Episode 68 ends. Number of steps is: 6. Accumlated Reward = -19.00. Epsilon = 1.00 .Termination code: 1\n", 1376 | "Reset game: ['map4', '17', '1', '50', '100']\n", 1377 | "numberOfPlayers: 4\n", 1378 | "Episode 69 ends. Number of steps is: 40. Accumlated Reward = -25.00. Epsilon = 1.00 .Termination code: 1\n", 1379 | "Reset game: ['map1', '7', '3', '50', '100']\n", 1380 | "numberOfPlayers: 4\n", 1381 | "Episode 70 ends. Number of steps is: 51. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n", 1382 | "Reset game: ['map3', '19', '1', '50', '100']\n", 1383 | "numberOfPlayers: 4\n", 1384 | "Episode 71 ends. Number of steps is: 10. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n", 1385 | "Reset game: ['map1', '17', '6', '50', '100']\n", 1386 | "numberOfPlayers: 4\n", 1387 | "Episode 72 ends. Number of steps is: 12. Accumlated Reward = -24.00. Epsilon = 1.00 .Termination code: 1\n", 1388 | "Reset game: ['map1', '7', '5', '50', '100']\n", 1389 | "numberOfPlayers: 4\n", 1390 | "Episode 73 ends. Number of steps is: 27. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1391 | "Reset game: ['map3', '1', '5', '50', '100']\n", 1392 | "numberOfPlayers: 4\n", 1393 | "Episode 74 ends. Number of steps is: 11. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1394 | "Reset game: ['map1', '13', '5', '50', '100']\n", 1395 | "numberOfPlayers: 4\n", 1396 | "Episode 75 ends. Number of steps is: 22. Accumlated Reward = 10.00. Epsilon = 1.00 .Termination code: 1\n", 1397 | "Reset game: ['map2', '17', '8', '50', '100']\n", 1398 | "numberOfPlayers: 4\n", 1399 | "Episode 76 ends. Number of steps is: 1. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1400 | "Reset game: ['map3', '9', '3', '50', '100']\n", 1401 | "numberOfPlayers: 4\n", 1402 | "Episode 77 ends. Number of steps is: 26. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n", 1403 | "Reset game: ['map2', '8', '3', '50', '100']\n", 1404 | "numberOfPlayers: 4\n", 1405 | "Episode 78 ends. Number of steps is: 13. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n", 1406 | "Reset game: ['map2', '6', '3', '50', '100']\n", 1407 | "numberOfPlayers: 4\n", 1408 | "Episode 79 ends. Number of steps is: 18. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1409 | "Reset game: ['map4', '11', '5', '50', '100']\n", 1410 | "numberOfPlayers: 4\n", 1411 | "Episode 80 ends. Number of steps is: 11. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1412 | "Reset game: ['map0', '13', '8', '50', '100']\n", 1413 | "numberOfPlayers: 4\n", 1414 | "Episode 81 ends. Number of steps is: 4. Accumlated Reward = 40.00. Epsilon = 1.00 .Termination code: 1\n", 1415 | "Reset game: ['map4', '5', '2', '50', '100']\n", 1416 | "numberOfPlayers: 4\n", 1417 | "Episode 82 ends. Number of steps is: 8. Accumlated Reward = -15.00. Epsilon = 1.00 .Termination code: 1\n", 1418 | "Reset game: ['map4', '16', '3', '50', '100']\n", 1419 | "numberOfPlayers: 4\n", 1420 | "Episode 83 ends. Number of steps is: 7. Accumlated Reward = -5.00. Epsilon = 1.00 .Termination code: 1\n", 1421 | "Reset game: ['map0', '19', '3', '50', '100']\n", 1422 | "numberOfPlayers: 4\n", 1423 | "Episode 84 ends. Number of steps is: 11. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n", 1424 | "Reset game: ['map0', '10', '1', '50', '100']\n", 1425 | "numberOfPlayers: 4\n", 1426 | "Episode 85 ends. Number of steps is: 12. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n", 1427 | "Reset game: ['map0', '3', '5', '50', '100']\n", 1428 | "numberOfPlayers: 4\n" 1429 | ] 1430 | } 1431 | ], 1432 | "source": [ 1433 | "#DQN Algorithm-Main\n", 1434 | "#Create header for saving DQN learning file\n", 1435 | "'''now = datetime.datetime.now()\n", 1436 | "header = [\"Ep\",\"Step\", \"Reward\",\"Total_reward\",\"Action\",\"Epsilon\",\"Done\",\"Termination_Code\"]\n", 1437 | "filename = \"Data/data_\" + now.strftime(\"%Y%m%d-%H%M\") + \".csv\"\n", 1438 | "with open(filename, 'w') as f:\n", 1439 | " pd.DataFrame(columns = header).to_csv(f,encoding='utf-8', index=False, header = True)'''\n", 1440 | "\n", 1441 | "# Parameters for training a DQN model\n", 1442 | "N_EPISODE = 10000 #The number of episodes for training\n", 1443 | "MAX_STEP = 1000 #The number of steps for each episode\n", 1444 | "BATCH_SIZE = 32 #The number of experiences for each replay \n", 1445 | "MEMORY_SIZE = 100000 #The size of the batch for storing experiences\n", 1446 | "SAVE_NETWORK = 100 # After this number of episodes, the DQN model is saved for testing later. \n", 1447 | "INITIAL_REPLAY_SIZE = 1000 #The number of experiences are stored in the memory batch before starting replaying\n", 1448 | "INPUTNUM = 198 #The number of input values for the DQN model\n", 1449 | "ACTIONNUM = 6 #The number of actions output from the DQN model\n", 1450 | "MAP_MAX_X = 21 #Width of the Map\n", 1451 | "MAP_MAX_Y = 9 #Height of the Map\n", 1452 | "\n", 1453 | "# Initialize network and memory\n", 1454 | "DQNAgent = DQN(INPUTNUM,ACTIONNUM)\n", 1455 | "memory = Memory(MEMORY_SIZE)\n", 1456 | "\n", 1457 | "# Initialize environment\n", 1458 | "Maps = CreateMaps()#Creating 05 maps\n", 1459 | "minerEnv = MinerEnv()#Creating a communication environment between the DQN model and the game environment\n", 1460 | "minerEnv.start() #Connect to the game\n", 1461 | "\n", 1462 | "train = False #The variable is used to indicate that the replay starts, and the epsilon starts decrease.\n", 1463 | "#Training Process\n", 1464 | "#the main part of the deep-q learning agorithm \n", 1465 | "for episode_i in range(0,N_EPISODE):\n", 1466 | " try:\n", 1467 | " # Choosing a map in the list\n", 1468 | " mapID = np.random.randint(0, 5) #Choosing a map ID from 5 maps in Maps folder randomly\n", 1469 | " posID_x = np.random.randint(MAP_MAX_X) #Choosing a initial position of the DQN agent on X-axes randomly\n", 1470 | " posID_y = np.random.randint(MAP_MAX_Y) #Choosing a initial position of the DQN agent on Y-axes randomly\n", 1471 | " #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent\n", 1472 | " request = (\"map\" + str(mapID) + \",\" + str(posID_x) + \",\" + str(posID_y) + \",50,100\") \n", 1473 | " #Send the request to the game environment\n", 1474 | " minerEnv.send_map_info(request)\n", 1475 | "\n", 1476 | " # Getting the initial state\n", 1477 | " minerEnv.reset() #Initialize the game environment\n", 1478 | " s = minerEnv.get_state()#Get the state after reseting. \n", 1479 | " #This function (get_state()) is an example of creating a state for the DQN model \n", 1480 | " total_reward = 0 #The amount of rewards for the entire episode\n", 1481 | " terminate = False #The variable indicates that the episode ends\n", 1482 | " maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training\n", 1483 | " #Start an episde for training\n", 1484 | " for step in range(0, maxStep):\n", 1485 | " action = DQNAgent.act(s) # Getting an action from the DQN model from the state (s)\n", 1486 | " minerEnv.step(str(action)) # Performing the action in order to obtain the new state\n", 1487 | " s_next = minerEnv.get_state() # Getting a new state\n", 1488 | " reward = minerEnv.get_reward() # Getting a reward\n", 1489 | " terminate = minerEnv.check_terminate() # Checking the end status of the episode\n", 1490 | " \n", 1491 | " # Add this transition to the memory batch\n", 1492 | " memory.push(s, action, reward, terminate, s_next)\n", 1493 | "\n", 1494 | " # Sample batch memory to train network\n", 1495 | " if (memory.length > INITIAL_REPLAY_SIZE):\n", 1496 | " #If there are INITIAL_REPLAY_SIZE experiences in the memory batch\n", 1497 | " #then start replaying\n", 1498 | " batch = memory.sample(BATCH_SIZE) #Get a BATCH_SIZE experiences for replaying\n", 1499 | " DQNAgent.replay(batch, BATCH_SIZE)#Do relaying\n", 1500 | " train = True #Indicate the training starts\n", 1501 | " total_reward = total_reward + reward #Plus the reward to the total rewad of the episode\n", 1502 | " s = s_next #Assign the next state for the next step.\n", 1503 | " \n", 1504 | " #Saving data to file\n", 1505 | " '''save_data = np.hstack([episode_i+1,step+1,reward,total_reward,action, DQNAgent.epsilon, terminate]).reshape(1,7)\n", 1506 | " with open(filename, 'a') as f:\n", 1507 | " pd.DataFrame(save_data).to_csv(f,encoding='utf-8', index=False, header = False)'''\n", 1508 | "\n", 1509 | " if terminate == True:\n", 1510 | " #If the episode ends, then go to the next episode\n", 1511 | " break\n", 1512 | " \n", 1513 | " # Iteration to save the network architecture and weights\n", 1514 | " if (np.mod(episode_i + 1, SAVE_NETWORK) == 0 and train == True):\n", 1515 | " DQNAgent.target_train() # Replace the learning weights for target model with soft replacement\n", 1516 | " #Save the DQN model\n", 1517 | " now = datetime.datetime.now() #Get the latest datetime \n", 1518 | " DQNAgent.save_model( \"DQNmodel_\" + now.strftime(\"%Y%m%d-%H%M\") + \"_ep\" + str(episode_i+1)) \n", 1519 | " \n", 1520 | " #Print the training information after the episode\n", 1521 | " print('Episode %d ends. Number of steps is: %d. Accumlated Reward = %.2f. Epsilon = %.2f .Termination code: %d' % (episode_i+1, step+1, total_reward, DQNAgent.epsilon, terminate))\n", 1522 | " #Decreasing the epsilon if the replay starts\n", 1523 | " if train == True:\n", 1524 | " DQNAgent.update_epsilon()\n", 1525 | " \t\n", 1526 | " except Exception as e:\n", 1527 | " import traceback\n", 1528 | " traceback.print_exc() \n", 1529 | " #print(\"Finished.\")\n", 1530 | " break" 1531 | ] 1532 | } 1533 | ], 1534 | "metadata": { 1535 | "colab": { 1536 | "collapsed_sections": [], 1537 | "name": "Miner_Training_Colab_CodeSample.ipynb", 1538 | "provenance": [] 1539 | }, 1540 | "kernelspec": { 1541 | "display_name": "Python 3", 1542 | "language": "python", 1543 | "name": "python3" 1544 | }, 1545 | "language_info": { 1546 | "codemirror_mode": { 1547 | "name": "ipython", 1548 | "version": 3 1549 | }, 1550 | "file_extension": ".py", 1551 | "mimetype": "text/x-python", 1552 | "name": "python", 1553 | "nbconvert_exporter": "python", 1554 | "pygments_lexer": "ipython3", 1555 | "version": "3.6.5" 1556 | } 1557 | }, 1558 | "nbformat": 4, 1559 | "nbformat_minor": 1 1560 | } 1561 | --------------------------------------------------------------------------------