├── Miner-Testing-CodeSample
    ├── src
    │   └── build.sh
    └── build
    │   ├── RLModelSample.h5
    │   ├── run.sh
    │   ├── GAME_SOCKET.py
    │   ├── RLModelSample.json
    │   ├── TestingAgent.py
    │   ├── MinerEnv.py
    │   └── MINER_STATE.py
├── Miner-Training-Local-CodeSample
    ├── Data
    │   └── .gitkeep
    ├── TrainedModels
    │   └── .gitkeep
    ├── Maps
    │   ├── map6
    │   ├── map4
    │   ├── map12
    │   ├── map2
    │   ├── map5
    │   ├── map11
    │   ├── map7
    │   ├── map3
    │   ├── map9
    │   ├── map1
    │   ├── map10
    │   └── map8
    ├── bot2.py
    ├── bot1.py
    ├── bot3.py
    ├── Memory.py
    ├── MinerEnv.py
    ├── DQNModel.py
    ├── MINER_STATE.py
    ├── TrainingClient.py
    └── GAME_SOCKET_DUMMY.py
├── Maps
    ├── map1.png
    ├── map2.png
    ├── map3.png
    ├── map4.png
    ├── map5.png
    ├── image10.png
    ├── image11.png
    ├── image12.png
    ├── image6.png
    ├── image7.png
    ├── image8.png
    └── image9.png
├── Change logs.docx
├── MinerAI - CodeAISample.pdf
├── image
    ├── codeAI
    │   ├── Picture1.png
    │   ├── Picture2.png
    │   └── Picture3.png
    └── minerEnv
    │   ├── Picture1.png
    │   ├── Picture10.png
    │   ├── Picture11.png
    │   ├── Picture2.png
    │   ├── Picture3.png
    │   ├── Picture4.png
    │   ├── Picture5.png
    │   ├── Picture6.png
    │   ├── Picture7.png
    │   ├── Picture8.png
    │   └── Picture9.png
├── MinerAI - CodeAISample.docx
├── MinerAI - CodeAISample_en.pdf
├── MInerAI-Set-up-Environment.pdf
├── MinerAI - CodeAISample_en.docx
├── MInerAI - Cài đặt môi trường.docx
├── MinerAI - Cài đặt môi trường.pdf
├── Miner-Testing-Server
    ├── README.md
    └── DUMMY_SERVER.py
├── .gitignore
├── README.md
└── Miner-Colab-CodeSample
    └── Miner_Training_Colab_CodeSample.ipynb


/Miner-Testing-CodeSample/src/build.sh:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/TrainedModels/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Maps/map1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map1.png


--------------------------------------------------------------------------------
/Maps/map2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map2.png


--------------------------------------------------------------------------------
/Maps/map3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map3.png


--------------------------------------------------------------------------------
/Maps/map4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map4.png


--------------------------------------------------------------------------------
/Maps/map5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/map5.png


--------------------------------------------------------------------------------
/Change logs.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Change logs.docx


--------------------------------------------------------------------------------
/Maps/image10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image10.png


--------------------------------------------------------------------------------
/Maps/image11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image11.png


--------------------------------------------------------------------------------
/Maps/image12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image12.png


--------------------------------------------------------------------------------
/Maps/image6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image6.png


--------------------------------------------------------------------------------
/Maps/image7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image7.png


--------------------------------------------------------------------------------
/Maps/image8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image8.png


--------------------------------------------------------------------------------
/Maps/image9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Maps/image9.png


--------------------------------------------------------------------------------
/MinerAI - CodeAISample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample.pdf


--------------------------------------------------------------------------------
/image/codeAI/Picture1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/codeAI/Picture1.png


--------------------------------------------------------------------------------
/image/codeAI/Picture2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/codeAI/Picture2.png


--------------------------------------------------------------------------------
/image/codeAI/Picture3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/codeAI/Picture3.png


--------------------------------------------------------------------------------
/MinerAI - CodeAISample.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample.docx


--------------------------------------------------------------------------------
/MinerAI - CodeAISample_en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample_en.pdf


--------------------------------------------------------------------------------
/image/minerEnv/Picture1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture1.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture10.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture11.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture2.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture3.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture4.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture5.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture6.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture7.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture8.png


--------------------------------------------------------------------------------
/image/minerEnv/Picture9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/image/minerEnv/Picture9.png


--------------------------------------------------------------------------------
/MInerAI-Set-up-Environment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MInerAI-Set-up-Environment.pdf


--------------------------------------------------------------------------------
/MinerAI - CodeAISample_en.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - CodeAISample_en.docx


--------------------------------------------------------------------------------
/MInerAI - Cài đặt môi trường.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MInerAI - Cài đặt môi trường.docx


--------------------------------------------------------------------------------
/MinerAI - Cài đặt môi trường.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/MinerAI - Cài đặt môi trường.pdf


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/RLModelSample.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xphongvn/rlcomp2020/HEAD/Miner-Testing-CodeSample/build/RLModelSample.h5


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #workingDir=${PWD}
3 | #cd /tf2/bin
4 | #source ./activate
5 | #cd $workingDir
6 | python3 TestingAgent.py $1 $2
7 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map6:
--------------------------------------------------------------------------------
1 | [[-3,0,0,0,0,0,50,-3,50,0,-2,-2,-3,0,0,0,-3,-1,-2,-1,50],[0,0,-2,-1,0,-1,0,0,0,-2,-1,-3,50,-1,-1,-2,50,-2,-1,-2,50],[-3,-2,0,50,0,0,-3,-1,0,0,0,-3,0,-2,0,-1,-1,-1,0,-2,-1],[-3,-3,-2,0,-3,-3,-2,-1,0,0,-2,-3,0,0,0,0,-2,-3,-2,0,50],[50,-2,-3,0,0,0,-2,-2,0,0,-3,0,0,-3,-3,0,0,-3,-1,-3,0],[-2,-1,50,-2,-3,-3,-2,-1,-1,-3,50,-2,0,0,-1,-3,-1,-2,50,-2,0],[-2,-3,-1,-2,0,50,50,-1,-2,-2,-3,0,-2,-3,0,0,-1,50,-3,0,0],[-2,0,-2,0,-1,-1,0,-1,0,-1,0,-3,0,-1,0,-1,-2,0,-2,-3,-1],[-3,50,-2,-2,50,50,-1,-3,-2,-2,0,0,0,-3,-3,-1,-1,0,-1,50,0]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map4:
--------------------------------------------------------------------------------
1 | [[0,-1,0,0,0,0,0,-3,0,-1,0,-1,-3,150,500,200,-1,0,-2,-1,0],[-3,500,-3,-2,350,-1,0,0,0,0,0,0,-1,-1,50,-1,0,0,-2,350,-3],[-1,-3,-2,0,-1,-2,0,-1,0,0,0,0,0,0,-2,-1,0,0,-2,-1,-3],[0,-3,0,0,0,0,-2,600,-3,0,0,-2,-2,0,-2,0,0,0,0,0,0],[0,0,0,0,-1,-1,0,-3,0,-1,400,-3,-2,0,0,0,-1,-2,-1,-1,-1],[-1,0,0,-1,-3,-3,-1,-1,0,0,-3,-2,0,0,0,0,-1,700,-1,-1,-3],[350,-2,-1,-3,-2,-3,-3,-1,0,-3,0,0,0,0,-3,0,-1,-1,-1,-3,200],[0,-1,-3,-2,250,-2,-3,-1,0,0,0,0,-2,0,0,0,0,-1,-3,300,0],[0,-3,-2,300,1000,-2,-3,-1,0,-1,0,-2,100,-2,-1,0,-1,-3,400,0,800]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map12:
--------------------------------------------------------------------------------
1 | [[50,-2,-3,0,0,0,50,-3,-1,-1,-3,-1,0,-2,-3,-3,50,-3,0,-3,-3],[0,-1,-1,50,-2,-3,50,-3,0,-2,-3,0,0,0,-2,-2,0,0,0,-3,50],[-2,-1,0,50,0,-2,-3,-2,-1,-1,-2,50,-3,50,0,-3,-2,-1,0,-1,0],[0,0,-1,-3,0,0,-3,50,-2,-2,-1,-2,-3,0,0,-3,-1,-2,0,-3,-1],[50,0,-3,-2,-1,-2,-1,-2,-3,50,0,0,-2,0,-3,50,-1,-3,-3,50,-1],[-1,-3,-1,-2,-2,-3,-2,0,-2,-3,0,0,0,-2,-2,0,0,0,-2,-1,50],[50,-3,50,-3,-1,-3,-2,0,-3,-3,50,-1,-2,-1,-2,-2,0,0,-3,0,0],[0,-2,0,-1,0,-3,-2,-2,-1,-2,0,0,-3,-2,50,0,-3,-2,-3,-2,-3],[-1,-3,-2,-3,0,-3,50,-1,0,0,-2,-3,50,0,-2,0,0,0,50,-1,0]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map2:
--------------------------------------------------------------------------------
1 | [[550,-1,-1,-2,-2,-2,-2,150,0,0,0,-3,-2,400,-3,650,-2,-1,0,0,0],[350,-1,-1,-2,0,0,-2,-2,-2,-1,0,-3,-3,-2,300,-2,-1,650,-2,-1,0],[-1,-1,450,-2,0,0,0,0,0,0,0,0,0,-3,-3,-3,-1,-1,-3,0,0],[-1,-1,-2,-2,0,-3,-2,0,0,-3,-3,-3,0,0,0,0,0,0,-3,0,150],[0,0,-2,0,-3,-3,-3,0,-1,-2,400,-3,-3,0,-2,0,-1,0,-3,0,0],[0,200,-2,0,-3,250,-1,0,0,-1,-2,-3,-2,0,-1,300,-1,0,-3,-1,0],[-3,-3,-2,0,-3,-3,-3,0,0,0,0,-3,-3,-3,-2,-2,-2,0,-2,-2,0],[-1,-3,-2,0,0,-2,0,0,-3,-3,-3,-3,150,-3,0,0,0,0,-2,200,0],[800,-3,-2,-3,450,-2,0,-3,-3,200,-1,250,-1,-3,0,-1,-1,0,0,0,0]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map5:
--------------------------------------------------------------------------------
1 | [[0,0,0,0,0,0,0,-2,0,0,0,0,-2,0,0,0,0,-1,-1,800,-1],[100,-1,-3,-2,0,0,-1,200,-1,-2,250,-2,-2,250,-1,0,0,-1,-2,-1,-2],[-1,700,-1,-3,150,-1,-1,-3,-3,-2,-2,-1,-3,-2,-1,0,0,-3,0,-1,0],[0,-1,-3,0,-1,0,-3,-3,-1,-3,-1,600,-1,-3,-2,-1,-3,500,-1,0,0],[0,-3,0,0,0,0,-2,-1,350,-1,50,-1,-3,50,0,0,0,-1,0,0,-2],[0,0,0,-3,250,-3,-3,-3,-1,500,450,-1,-3,0,-1,0,0,0,0,-2,100],[0,-3,0,0,-3,0,0,0,-3,-1,-1,-3,0,0,-2,300,-2,-1,0,0,-2],[0,-1,-3,-2,0,0,0,-1,0,-3,-3,-3,-1,-2,-2,-1,-2,0,-1,0,0],[-1,500,-1,-3,-2,0,-1,450,-1,0,0,-1,500,-1,-2,-1,-2,0,50,0,0]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map11:
--------------------------------------------------------------------------------
1 | [[50,0,-2,-3,-2,0,-3,-2,-2,0,50,-3,-1,-1,0,-1,50,0,0,-1,-3],[-2,-3,-2,-3,0,0,-3,-1,-3,-3,0,-3,0,0,0,0,-1,0,-3,50,0],[0,-2,50,-2,50,0,-1,0,-1,50,0,-2,-3,0,-1,50,0,-2,-3,0,-2],[-2,-1,0,-2,0,-3,50,-3,-3,-3,-1,-2,-3,-2,-3,-2,-2,50,0,-2,0],[-1,50,0,-3,-3,-2,-3,50,-2,-1,-3,-3,-1,-1,-1,-2,-3,50,-1,-3,-2],[0,-1,-1,-2,-2,-3,-3,-3,0,-2,0,-2,-2,50,-2,-3,-1,-3,-3,0,0],[-3,-2,-1,0,0,-2,-3,-2,0,-1,-1,-3,0,50,-2,0,0,50,-1,0,50],[-1,-3,0,0,-3,50,-3,-1,-1,-2,-3,-1,-1,0,-2,-1,-1,0,0,-2,-3],[-2,50,-3,-1,50,-1,-2,-2,0,0,-3,-2,0,-2,50,-2,-2,-2,-3,-3,50]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map7:
--------------------------------------------------------------------------------
1 | [[-1,-3,-1,50,-1,-1,-1,-2,0,0,-1,-3,0,-2,0,-3,0,-3,-2,-2,50],[0,0,-1,-3,-1,-2,-1,50,-1,-2,-2,-2,-3,0,0,-3,-2,-3,-1,-1,0],[50,0,-2,-2,-3,-3,0,50,-1,-3,0,0,-2,-1,-1,50,0,-2,-2,0,-1],[0,-1,-1,0,-2,0,0,-3,0,-3,0,-3,-1,-3,-2,0,-2,0,0,-2,-2],[-2,-3,-2,-2,-2,-3,0,-1,0,-1,0,50,-1,-1,0,-1,-2,50,-2,-3,-1],[0,-3,-1,-2,-2,50,50,-1,-2,50,-2,-1,-1,-2,-2,-2,0,0,-2,-1,-2],[-1,0,0,-3,-3,-1,-3,-3,-3,50,-3,-1,-2,-3,-1,-2,-1,-3,-1,-2,0],[-3,-2,-3,-1,50,0,-2,0,-3,-3,-2,0,-3,-3,-2,50,-1,-2,0,-3,50],[0,50,-2,-1,-1,-2,-2,0,50,-3,0,-2,-2,0,-3,0,-3,-1,-1,-1,-2]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map3:
--------------------------------------------------------------------------------
1 | [[200,-2,-2,250,-2,0,-2,-1,0,-3,500,-3,-3,0,0,0,0,0,-3,450,-3],[-2,-2,-1,-2,-1,0,-3,200,-2,0,-3,0,150,0,-2,-1,0,0,0,-3,0],[-3,-2,-1,-3,-1,0,0,-3,-2,0,0,0,-1,-2,450,-2,0,-2,150,-2,0],[300,-3,-3,300,-2,-2,-2,-2,300,-2,-2,0,0,0,-2,0,-3,-3,-3,-2,0],[-3,-3,0,-3,-1,350,-1,0,-2,-2,350,-2,0,-3,0,0,-3,300,-3,-2,250],[-3,0,0,0,-1,-1,-1,-1,-3,0,-2,0,-3,400,-3,0,-3,-3,-3,0,0],[450,-3,0,0,0,0,-1,400,-3,0,0,0,-2,-3,-2,0,0,0,0,0,0],[-3,0,-1,0,-1,0,-3,-3,-3,0,-1,0,0,0,250,-3,-3,-3,-1,-2,-2],[0,0,-1,200,-1,-3,500,-3,0,-1,200,-1,0,0,-2,-2,-2,-1,400,-1,-2]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map9:
--------------------------------------------------------------------------------
1 | [[50,-2,-2,-2,-2,-2,-1,0,50,-2,-3,-1,-2,0,-3,0,-1,-3,-2,50,-2],[-1,-1,50,-1,-2,50,-2,0,-1,-1,50,-1,-3,-3,-1,-1,-1,0,-2,-3,-3],[-1,0,0,-1,-1,-2,0,-2,-2,-1,-1,0,-3,-2,50,-3,-2,-1,-2,-1,-1],[-1,0,-2,50,-1,0,-2,-1,-3,-1,-3,-2,50,0,-3,0,50,-1,50,0,-2],[-1,-3,-3,-3,-3,50,-1,-1,50,0,-3,-3,-3,-3,-1,0,0,-2,-2,-3,-2],[-3,-3,-3,0,0,-1,-1,-2,0,-2,-2,0,-2,-3,-1,-3,0,-3,0,-2,0],[-1,-3,0,50,0,-1,0,-3,0,-2,0,50,-2,-2,-3,-1,0,-3,-2,0,-1],[-1,-2,50,-1,-3,50,-2,-2,-2,-3,0,-3,-2,50,0,0,0,0,0,-3,50],[50,0,-1,50,-1,-3,-3,0,-1,-3,-3,-1,-1,-3,-3,-3,-3,50,-1,-3,-1]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map1:
--------------------------------------------------------------------------------
1 | [[450,-2,0,-2,150,-1,0,0,0,0,-1,-2,-2,-2,0,0,0,0,150,-2,350],[-2,-2,-2,-2,-1,0,-1,-1,-1,-1,-3,50,-2,-2,-2,-2,-3,-3,50,-2,-1],[-2,-2,200,-2,0,-2,0,-2,-3,-3,-2,0,-3,-2,-2,150,-3,-3,0,0,50],[0,-3,-3,-2,0,0,-1,0,550,-3,-2,0,0,0,-1,0,0,-1,-1,-1,-2],[-2,0,0,0,-1,0,-1,50,300,-3,-2,0,-3,0,0,0,-1,-3,-3,-2,-1],[-1,-3,-1,-3,0,-2,0,0,-2,-1,100,-3,0,-2,300,-3,0,-2,-3,-2,0],[-2,-3,-1,-3,-1,500,-1,-3,-2,-1,0,-1,0,-1,0,-1,0,-2,-3,-3,-1],[0,-3,-1,-3,0,-2,-3,-3,0,0,0,0,-2,0,-2,-3,-3,-3,-3,200,-1],[1200,-3,-1,-3,-1,-1,-2,-2,0,-1,150,-2,0,-2,0,0,-2,-3,-3,1500,50]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map10:
--------------------------------------------------------------------------------
1 | [[50,0,-2,-2,-3,-2,-3,-1,-2,-3,-3,0,0,0,-1,0,-2,-2,50,-1,-2],[-1,-1,-3,-1,50,-2,-1,-3,-2,-1,-1,0,50,0,-2,-3,50,-2,-1,-1,-1],[-3,-1,-3,-1,-3,-2,-3,0,-1,0,-2,50,-2,-3,0,0,-2,-3,50,0,0],[-3,-1,-3,50,-1,-1,50,-1,0,-2,-3,-2,-1,50,0,-3,50,0,50,-3,-2],[-2,-1,-1,0,-3,-1,50,50,0,-3,-3,0,-1,50,-1,-3,0,0,0,-2,0],[0,-2,-2,-1,-3,50,-3,-3,-3,-2,0,0,0,-1,-2,-3,0,-3,0,-2,-2],[-3,-1,-3,-1,-3,-2,-1,-2,-1,-3,0,-1,-3,-2,0,-2,-1,50,-3,-2,50],[-3,50,-1,0,-2,0,-2,-3,-3,-3,0,-3,-1,0,-1,-1,-3,-2,-1,-3,-3],[-3,-3,-3,50,0,-2,-2,-3,50,-3,-2,-3,-1,0,-3,-1,50,-2,-2,-3,0]]


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Maps/map8:
--------------------------------------------------------------------------------
1 | [[50,-1,0,0,-2,50,-3,-1,50,-2,0,-2,-2,-1,-1,-3,0,-1,-2,-2,-2],[-2,0,0,0,-3,-3,50,0,50,-3,50,-2,-2,0,-1,-3,-2,-2,0,-1,50],[-1,-2,-2,-2,0,-1,-2,-2,0,-3,50,-1,-3,0,-2,50,-2,50,0,-2,-1],[-3,-3,-3,-3,-1,-3,-1,-3,0,-1,0,-2,0,-1,-1,-2,-2,-3,0,0,-3],[-2,-2,-2,-2,-1,-3,-3,50,-1,-1,-3,-1,-1,-3,50,-1,-2,50,-3,-3,-2],[50,-2,-1,-3,-2,-1,-1,-3,-3,-3,0,-3,-2,-1,0,0,-2,50,-3,0,50],[-3,-3,50,-3,0,-1,-1,-3,0,-2,-2,-1,50,0,50,0,0,-1,-1,-3,-2],[-3,-2,-2,-2,50,-2,-3,-1,-2,0,50,-1,0,0,-1,-2,50,-3,50,-3,-2],[50,0,-1,-3,-1,0,0,-3,-2,-2,0,0,-2,-2,-2,-3,0,-1,-1,0,-3]]


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/GAME_SOCKET.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import socket
 3 | import json
 4 | 
 5 | class GameSocket:
 6 | 	def __init__(self, host, port):
 7 | 		self.host = host
 8 | 		self.port = port
 9 | 		self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
10 | 
11 | 	def connect(self):
12 | 		try:
13 | 			self.socket.connect((self.host, self.port))
14 | 			print("Connected to server.")
15 | 		except Exception as e:
16 | 			import traceback
17 | 			traceback.print_exc()   
18 | 			print("Cannot connect.")
19 | 		
20 | 	def receive(self):
21 | 		buff_size = 4096
22 | 		recv_data = b""
23 | 		while True:
24 | 			part = self.socket.recv(buff_size)
25 | 			recv_data += part
26 | 			if len(part) < buff_size:
27 | 				break
28 | 		message = recv_data.decode("utf-8")
29 | 		return message
30 | 
31 | 	def send(self, message):
32 | 		self.socket.send(message.encode("utf-8"))
33 | 		
34 | 	def close(self):
35 |             self.socket.shutdown(1)
36 |             self.socket.close()
37 |             print("Close socket.")
38 | 


--------------------------------------------------------------------------------
/Miner-Testing-Server/README.md:
--------------------------------------------------------------------------------
 1 | ***(English version, Vietnamese below)***
 2 | 
 3 | # DUMMY TESTING SERVER
 4 | This is a server that assists players to check their agents' running results.
 5 |  
 6 | ## Descriptions
 7 | The Server has been simulated for the player to run on a map named Verify, which is used to verify the player’s code when uploading the source code to the system.
 8 |  
 9 | ## How to Run
10 | 
11 | Start server: 
12 | **Python3 DUMMY_SERVER. py {Port}**
13 |  
14 | Example: **Python3 DUMMY_SERVER. PY 1234**
15 |  
16 | Run client:
17 | **./run.sh localhost {port}**
18 |  
19 | Example: **./run.sh localhost 1234**
20 |  
21 | ## Note
22 | This is only a simulation server to help players quickly check the agent's quality, but does not guarantee accurate results in real-world environments.
23 | 
24 | 
25 | 
26 | # DUMMY TESTING SERVER
27 | Đây là phần giả lập server giúp cho người chơi có thể kiểm tra kết quả chạy của agent của mình.
28 | 
29 | ## Mô tả
30 | Server đã được giả lập để người chơi có thể chạy trên bản đồ Verify - là bản đồ được dùng đề verify code của người chơi khi upload mã nguồn lên hệ thống.
31 | 
32 | ## Cách chạy
33 | 
34 | Khởi động server: 
35 | **python3 DUMMY_SERVER.py {port}**
36 | 
37 | Ví dụ: **python3 DUMMY_SERVER.py 1234**
38 | 
39 | Chạy client:
40 | **./run.sh localhost {port}**
41 | 
42 | Ví dụ: **./run.sh localhost 1234**
43 | 
44 | ## Lưu ý
45 | Đây chỉ là bản giả lập server để giúp người chơi kiểm tra nhanh chất lượng của agent, không đảm bảo kết quả chạy chính xác trên môi trường thật.
46 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/bot2.py:
--------------------------------------------------------------------------------
 1 | from MINER_STATE import State
 2 | import numpy as np
 3 | 
 4 | 
 5 | class PlayerInfo:
 6 |     def __init__(self, id):
 7 |         self.playerId = id
 8 |         self.score = 0
 9 |         self.energy = 0
10 |         self.posx = 0
11 |         self.posy = 0
12 |         self.lastAction = -1
13 |         self.status = 0
14 |         self.freeCount = 0
15 | 
16 | 
17 | class Bot2:
18 |     ACTION_GO_LEFT = 0
19 |     ACTION_GO_RIGHT = 1
20 |     ACTION_GO_UP = 2
21 |     ACTION_GO_DOWN = 3
22 |     ACTION_FREE = 4
23 |     ACTION_CRAFT = 5
24 | 
25 |     def __init__(self, id):
26 |         self.state = State()
27 |         self.info = PlayerInfo(id)
28 | 
29 |     def next_action(self):
30 |         if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:
31 |             if self.info.energy >= 6:
32 |                 return self.ACTION_CRAFT
33 |             else:
34 |                 return self.ACTION_FREE
35 |         if self.info.energy < 5:
36 |             return self.ACTION_FREE
37 |         else:
38 |             action = np.random.randint(0, 4)
39 |             return action
40 | 
41 |     def new_game(self, data):
42 |         try:
43 |             self.state.init_state(data)
44 |         except Exception as e:
45 |             import traceback
46 |             traceback.print_exc()
47 | 
48 |     def new_state(self, data):
49 |         # action = self.next_action();
50 |         # self.socket.send(action)
51 |         try:
52 |             self.state.update_state(data)
53 |         except Exception as e:
54 |             import traceback
55 |             traceback.print_exc()


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/RLModelSample.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 198], "dtype": "float32", "units": 300, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 300, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 6, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_3", "trainable": true, "dtype": "float32", "activation": "linear"}}]}, "keras_version": "2.3.1", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/bot1.py:
--------------------------------------------------------------------------------
 1 | from MINER_STATE import State
 2 | import numpy as np
 3 | 
 4 | 
 5 | class PlayerInfo:
 6 |     def __init__(self, id):
 7 |         self.playerId = id
 8 |         self.score = 0
 9 |         self.energy = 0
10 |         self.posx = 0
11 |         self.posy = 0
12 |         self.lastAction = -1
13 |         self.status = 0
14 |         self.freeCount = 0
15 | 
16 | 
17 | class Bot1:
18 |     ACTION_GO_LEFT = 0
19 |     ACTION_GO_RIGHT = 1
20 |     ACTION_GO_UP = 2
21 |     ACTION_GO_DOWN = 3
22 |     ACTION_FREE = 4
23 |     ACTION_CRAFT = 5
24 | 
25 |     def __init__(self, id):
26 |         self.state = State()
27 |         self.info = PlayerInfo(id)
28 | 
29 |     def next_action(self):
30 |         if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:
31 |             if self.info.energy >= 6:
32 |                 return self.ACTION_CRAFT
33 |             else:
34 |                 return self.ACTION_FREE
35 |         if self.info.energy < 5:
36 |             return self.ACTION_FREE
37 |         else:
38 |             action = self.ACTION_GO_UP
39 |             if self.info.posy % 2 == 0:
40 |                 if self.info.posx < self.state.mapInfo.max_x:
41 |                     action = self.ACTION_GO_RIGHT
42 |             else:
43 |                 if self.info.posx > 0:
44 |                     action = self.ACTION_GO_LEFT
45 |                 else:
46 |                     action = self.ACTION_GO_DOWN
47 |             return action
48 | 
49 |     def new_game(self, data):
50 |         try:
51 |             self.state.init_state(data)
52 |         except Exception as e:
53 |             import traceback
54 |             traceback.print_exc()
55 | 
56 |     def new_state(self, data):
57 |         # action = self.next_action();
58 |         # self.socket.send(action)
59 |         try:
60 |             self.state.update_state(data)
61 |         except Exception as e:
62 |             import traceback
63 |             traceback.print_exc()


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/bot3.py:
--------------------------------------------------------------------------------
 1 | from MINER_STATE import State
 2 | import numpy as np
 3 | 
 4 | 
 5 | class PlayerInfo:
 6 |     def __init__(self, id):
 7 |         self.playerId = id
 8 |         self.score = 0
 9 |         self.energy = 0
10 |         self.posx = 0
11 |         self.posy = 0
12 |         self.lastAction = -1
13 |         self.status = 0
14 |         self.freeCount = 0
15 | 
16 | 
17 | class Bot3:
18 |     ACTION_GO_LEFT = 0
19 |     ACTION_GO_RIGHT = 1
20 |     ACTION_GO_UP = 2
21 |     ACTION_GO_DOWN = 3
22 |     ACTION_FREE = 4
23 |     ACTION_CRAFT = 5
24 | 
25 |     def __init__(self, id):
26 |         self.state = State()
27 |         self.info = PlayerInfo(id)
28 | 
29 |     def next_action(self):
30 |         if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:
31 |             if self.info.energy >= 6:
32 |                 return self.ACTION_CRAFT
33 |             else:
34 |                 return self.ACTION_FREE
35 |         if self.info.energy < 5:
36 |             return self.ACTION_FREE
37 |         else:
38 |             action = self.ACTION_GO_LEFT
39 |             if self.info.posx % 2 == 0:
40 |                 if self.info.posy < self.state.mapInfo.max_y:
41 |                     action = self.ACTION_GO_DOWN
42 |             else:
43 |                 if self.info.posy > 0:
44 |                     action = self.ACTION_GO_UP
45 |                 else:
46 |                     action = self.ACTION_GO_RIGHT
47 |             return action
48 | 
49 |     def new_game(self, data):
50 |         try:
51 |             self.state.init_state(data)
52 |         except Exception as e:
53 |             import traceback
54 |             traceback.print_exc()
55 | 
56 |     def new_state(self, data):
57 |         # action = self.next_action();
58 |         # self.socket.send(action)
59 |         try:
60 |             self.state.update_state(data)
61 |         except Exception as e:
62 |             import traceback
63 |             traceback.print_exc()
64 | 


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/TestingAgent.py:
--------------------------------------------------------------------------------
 1 | from warnings import simplefilter 
 2 | simplefilter(action='ignore', category=FutureWarning)
 3 | 
 4 | import sys
 5 | from keras.models import model_from_json
 6 | from MinerEnv import MinerEnv
 7 | import numpy as np
 8 | 
 9 | ACTION_GO_LEFT = 0
10 | ACTION_GO_RIGHT = 1
11 | ACTION_GO_UP = 2
12 | ACTION_GO_DOWN = 3
13 | ACTION_FREE = 4
14 | ACTION_CRAFT = 5
15 | 
16 | HOST = "localhost"
17 | PORT = 1111
18 | if len(sys.argv) == 3:
19 |     HOST = str(sys.argv[1])
20 |     PORT = int(sys.argv[2])
21 | 
22 | # load json and create model
23 | json_file = open('RLModelSample.json', 'r')
24 | loaded_model_json = json_file.read()
25 | json_file.close()
26 | DQNAgent = model_from_json(loaded_model_json)
27 | # load weights into new model
28 | DQNAgent.load_weights("RLModelSample.h5")
29 | print("Loaded model from disk")
30 | status_map = {0: "STATUS_PLAYING", 1: "STATUS_ELIMINATED_WENT_OUT_MAP", 2: "STATUS_ELIMINATED_OUT_OF_ENERGY",
31 |                   3: "STATUS_ELIMINATED_INVALID_ACTION", 4: "STATUS_STOP_EMPTY_GOLD", 5: "STATUS_STOP_END_STEP"}
32 | try:
33 |     # Initialize environment
34 |     minerEnv = MinerEnv(HOST, PORT)
35 |     minerEnv.start()  # Connect to the game
36 |     minerEnv.reset()
37 |     s = minerEnv.get_state()  ##Getting an initial state
38 |     while not minerEnv.check_terminate():
39 |         try:
40 |             action = np.argmax(DQNAgent.predict(s.reshape(1, len(s))))  # Getting an action from the trained model
41 |             print("next action = ", action)
42 |             minerEnv.step(str(action))  # Performing the action in order to obtain the new state
43 |             s_next = minerEnv.get_state()  # Getting a new state
44 |             s = s_next
45 |         except Exception as e:
46 |             import traceback
47 |             traceback.print_exc()
48 |             print("Finished.")
49 |             break
50 |     print(status_map[minerEnv.state.status])
51 | except Exception as e:
52 |     import traceback
53 |     traceback.print_exc()
54 | print("End game.")
55 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/Memory.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | class Memory:
 5 |         
 6 |     capacity = None
 7 |     
 8 |     
 9 |     def __init__(
10 |             self,
11 |             capacity,
12 |             length = None,
13 |             states = None,
14 |             actions = None,
15 |             rewards = None,
16 |             dones = None,
17 |             states2 = None,       
18 |     ):
19 |         self.capacity = capacity
20 |         self.length = 0
21 |         self.states = states
22 |         self.actions = actions
23 |         self.rewards = rewards
24 |         self.dones = dones
25 |         self.states2 = states2
26 | 
27 |     def push(self, s, a, r, done, s2):
28 |         if self.states is None:
29 |             self.states = s
30 |             self.actions = a
31 |             self.rewards = r
32 |             self.dones = done
33 |             self.states2 = s2
34 |         else:
35 |             self.states = np.vstack((self.states,s))
36 |             self.actions = np.vstack((self.actions,a))
37 |             self.rewards = np.vstack((self.rewards, r))
38 |             self.dones = np.vstack((self.dones, done))
39 |             self.states2 = np.vstack((self.states2,s2))
40 |         
41 |         self.length = self.length + 1
42 |             
43 |         if (self.length > self.capacity): 
44 |             self.states = np.delete(self.states,(0), axis = 0)
45 |             self.actions = np.delete(self.actions,(0), axis = 0)
46 |             self.rewards = np.delete(self.rewards,(0), axis = 0)
47 |             self.dones = np.delete(self.dones,(0), axis = 0)
48 |             self.states2 = np.delete(self.states2,(0), axis = 0)           
49 |             self.length = self.length - 1
50 |             
51 |         
52 |     def sample(self,batch_size):
53 |         if (self.length >= batch_size):
54 |             idx = random.sample(range(0,self.length),batch_size)
55 |             s = self.states[idx,:]
56 |             a = self.actions[idx,:]
57 |             r = self.rewards[idx,:]
58 |             d = self.dones[idx,:]
59 |             s2 = self.states2[idx,:]
60 |                 
61 |             return list([s,a,r,s2,d])
62 |                     
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/
136 | 
137 | # pytype static type analyzer
138 | .pytype/
139 | 
140 | # Cython debug symbols
141 | cython_debug/
142 | 
143 | .idea/
144 | 
145 | .DS_Store


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/MinerEnv.py:
--------------------------------------------------------------------------------
 1 | from warnings import simplefilter 
 2 | simplefilter(action='ignore', category=FutureWarning)
 3 | 
 4 | import sys
 5 | import numpy as np
 6 | from GAME_SOCKET import GameSocket #in testing version, please use GameSocket instead of GameSocketDummy
 7 | from MINER_STATE import State
 8 | 
 9 | TreeID = 1
10 | TrapID = 2
11 | SwampID = 3
12 | 
13 | 
14 | class MinerEnv:
15 |     def __init__(self, host, port):
16 |         self.socket = GameSocket(host, port)
17 |         self.state = State()
18 |         
19 |         self.score_pre = self.state.score#Storing the last score for designing the reward function
20 |         
21 |     def start(self): #connect to server
22 |         self.socket.connect()
23 | 
24 |     def end(self): #disconnect server
25 |         self.socket.close()
26 | 
27 |     def send_map_info(self, request):#tell server which map to run
28 |         self.socket.send(request)
29 | 
30 |     def reset(self): #start new game
31 |         try:
32 |             message = self.socket.receive() #receive game info from server
33 |             print(message)
34 |             self.state.init_state(message) #init state
35 |         except Exception as e:
36 |             import traceback
37 |             traceback.print_exc()
38 | 
39 |     def step(self, action): #step process
40 |         self.socket.send(action) #send action to server
41 |         try:
42 |             message = self.socket.receive() #receive new state from server
43 |             #print("New state: ", message)
44 |             self.state.update_state(message) #update to local state
45 |         except Exception as e:
46 |             import traceback
47 |             traceback.print_exc()
48 | 
49 |     # Functions are customized by client
50 |     def get_state(self):
51 |         # Building the map
52 |         view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int)
53 |         for i in range(self.state.mapInfo.max_x + 1):
54 |             for j in range(self.state.mapInfo.max_y + 1):
55 |                 if self.state.mapInfo.get_obstacle(i, j) == TreeID:  # Tree
56 |                     view[i, j] = -TreeID
57 |                 if self.state.mapInfo.get_obstacle(i, j) == TrapID:  # Trap
58 |                     view[i, j] = -TrapID
59 |                 if self.state.mapInfo.get_obstacle(i, j) == SwampID:  # Swamp
60 |                     view[i, j] = -SwampID
61 |                 if self.state.mapInfo.gold_amount(i, j) > 0:
62 |                     view[i, j] = self.state.mapInfo.gold_amount(i, j)
63 | 
64 |         DQNState = view.flatten().tolist()#Flattening the map matrix to a vector
65 |         
66 |         # Add position and energy of agent to the DQNState
67 |         DQNState.append(self.state.x)
68 |         DQNState.append(self.state.y)
69 |         DQNState.append(self.state.energy)
70 |         
71 |         #Add position of bots 
72 |         for player in self.state.players:
73 |             if player["playerId"] != self.state.id:
74 |                 DQNState.append(player["posx"])
75 |                 DQNState.append(player["posy"])
76 |                 
77 |         #Convert the DQNState from list to array
78 |         DQNState = np.array(DQNState)
79 | 
80 |         return DQNState
81 | 
82 |     def check_terminate(self):
83 |         return self.state.status != State.STATUS_PLAYING
84 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/MinerEnv.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | from GAME_SOCKET_DUMMY import GameSocket #in testing version, please use GameSocket instead of GAME_SOCKET_DUMMY
  4 | from MINER_STATE import State
  5 | 
  6 | 
  7 | TreeID = 1
  8 | TrapID = 2
  9 | SwampID = 3
 10 | class MinerEnv:
 11 |     def __init__(self, host, port):
 12 |         self.socket = GameSocket(host, port)
 13 |         self.state = State()
 14 |         
 15 |         self.score_pre = self.state.score#Storing the last score for designing the reward function
 16 | 
 17 |     def start(self): #connect to server
 18 |         self.socket.connect()
 19 | 
 20 |     def end(self): #disconnect server
 21 |         self.socket.close()
 22 | 
 23 |     def send_map_info(self, request):#tell server which map to run
 24 |         self.socket.send(request)
 25 | 
 26 |     def reset(self): #start new game
 27 |         try:
 28 |             message = self.socket.receive() #receive game info from server
 29 |             self.state.init_state(message) #init state
 30 |         except Exception as e:
 31 |             import traceback
 32 |             traceback.print_exc()
 33 | 
 34 |     def step(self, action): #step process
 35 |         self.socket.send(action) #send action to server
 36 |         try:
 37 |             message = self.socket.receive() #receive new state from server
 38 |             self.state.update_state(message) #update to local state
 39 |         except Exception as e:
 40 |             import traceback
 41 |             traceback.print_exc()
 42 | 
 43 |     # Functions are customized by client
 44 |     def get_state(self):
 45 |         # Building the map
 46 |         view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int)
 47 |         for i in range(self.state.mapInfo.max_x + 1):
 48 |             for j in range(self.state.mapInfo.max_y + 1):
 49 |                 if self.state.mapInfo.get_obstacle(i, j) == TreeID:  # Tree
 50 |                     view[i, j] = -TreeID
 51 |                 if self.state.mapInfo.get_obstacle(i, j) == TrapID:  # Trap
 52 |                     view[i, j] = -TrapID
 53 |                 if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp
 54 |                     view[i, j] = -SwampID
 55 |                 if self.state.mapInfo.gold_amount(i, j) > 0:
 56 |                     view[i, j] = self.state.mapInfo.gold_amount(i, j)
 57 | 
 58 |         DQNState = view.flatten().tolist() #Flattening the map matrix to a vector
 59 |         
 60 |         # Add position and energy of agent to the DQNState
 61 |         DQNState.append(self.state.x)
 62 |         DQNState.append(self.state.y)
 63 |         DQNState.append(self.state.energy)
 64 |         #Add position of bots 
 65 |         for player in self.state.players:
 66 |             if player["playerId"] != self.state.id:
 67 |                 DQNState.append(player["posx"])
 68 |                 DQNState.append(player["posy"])
 69 |                 
 70 |         #Convert the DQNState from list to array for training
 71 |         DQNState = np.array(DQNState)
 72 | 
 73 |         return DQNState
 74 | 
 75 |     def get_reward(self):
 76 |         # Calculate reward
 77 |         reward = 0
 78 |         score_action = self.state.score - self.score_pre
 79 |         self.score_pre = self.state.score
 80 |         if score_action > 0:
 81 |             #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action)
 82 |             reward += score_action
 83 |             
 84 |         #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward
 85 |         if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID:  # Tree
 86 |             reward -= TreeID
 87 |         if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID:  # Trap
 88 |             reward -= TrapID
 89 |         if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID:  # Swamp
 90 |             reward -= SwampID
 91 | 
 92 |         # If out of the map, then the DQN agent should be punished by a larger nagative reward.
 93 |         if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP:
 94 |             reward += -10
 95 |             
 96 |         #Run out of energy, then the DQN agent should be punished by a larger nagative reward.
 97 |         if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY:
 98 |             reward += -10
 99 |         # print ("reward",reward)
100 |         return reward
101 | 
102 |     def check_terminate(self):
103 |         #Checking the status of the game
104 |         #it indicates the game ends or is playing
105 |         return self.state.status != State.STATUS_PLAYING
106 | 


--------------------------------------------------------------------------------
/Miner-Testing-CodeSample/build/MINER_STATE.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | 
  4 | def str_2_json(str):
  5 |     return json.loads(str, encoding="utf-8")
  6 | 
  7 | 
  8 | class MapInfo:
  9 |     def __init__(self):
 10 |         self.max_x = 0
 11 |         self.max_y = 0
 12 |         self.golds = []
 13 |         self.obstacles = []
 14 |         self.numberOfPlayers = 0
 15 |         self.maxStep = 0
 16 | 
 17 |     def init_map(self, gameInfo):
 18 |         self.max_x = gameInfo["width"] - 1
 19 |         self.max_y = gameInfo["height"] - 1
 20 |         self.golds = gameInfo["golds"]
 21 |         self.obstacles = gameInfo["obstacles"]
 22 |         self.maxStep = gameInfo["steps"]
 23 |         self.numberOfPlayers = gameInfo["numberOfPlayers"]
 24 | 
 25 |     def update(self, golds, changedObstacles):
 26 |         self.golds = golds
 27 |         for cob in changedObstacles:
 28 |             newOb = True
 29 |             for ob in self.obstacles:
 30 |                 if cob["posx"] == ob["posx"] and cob["posy"] == ob["posy"]:
 31 |                     newOb = False
 32 |                     #print("cell(", cob["posx"], ",", cob["posy"], ") change type from: ", ob["type"], " -> ",
 33 |                     #      cob["type"], " / value: ", ob["value"], " -> ", cob["value"])
 34 |                     ob["type"] = cob["type"]
 35 |                     ob["value"] = cob["value"]
 36 |                     break
 37 |             if newOb:
 38 |                 self.obstacles.append(cob)
 39 |                 #print("new obstacle: ", cob["posx"], ",", cob["posy"], ", type = ", cob["type"], ", value = ",
 40 |                 #      cob["value"])
 41 | 
 42 |     def get_min_x(self):
 43 |         return min([cell["posx"] for cell in self.golds])
 44 | 
 45 |     def get_max_x(self):
 46 |         return max([cell["posx"] for cell in self.golds])
 47 | 
 48 |     def get_min_y(self):
 49 |         return min([cell["posy"] for cell in self.golds])
 50 | 
 51 |     def get_max_y(self):
 52 |         return max([cell["posy"] for cell in self.golds])
 53 | 
 54 |     def is_row_has_gold(self, y):
 55 |         return y in [cell["posy"] for cell in self.golds]
 56 | 
 57 |     def is_column_has_gold(self, x):
 58 |         return x in [cell["posx"] for cell in self.golds]
 59 | 
 60 |     def gold_amount(self, x, y):
 61 |         for cell in self.golds:
 62 |             if x == cell["posx"] and y == cell["posy"]:
 63 |                 return cell["amount"]
 64 |         return 0
 65 | 
 66 |     def get_obstacle(self, x, y):  # Getting the kind of the obstacle at cell(x,y)
 67 |         for cell in self.obstacles:
 68 |             if x == cell["posx"] and y == cell["posy"]:
 69 |                 return cell["type"]
 70 |         return -1  # No obstacle at the cell (x,y)
 71 | 
 72 | 
 73 | class State:
 74 |     STATUS_PLAYING = 0
 75 |     STATUS_ELIMINATED_WENT_OUT_MAP = 1
 76 |     STATUS_ELIMINATED_OUT_OF_ENERGY = 2
 77 |     STATUS_ELIMINATED_INVALID_ACTION = 3
 78 |     STATUS_STOP_EMPTY_GOLD = 4
 79 |     STATUS_STOP_END_STEP = 5
 80 | 
 81 |     def __init__(self):
 82 |         self.end = False
 83 |         self.score = 0
 84 |         self.lastAction = None
 85 |         self.id = 0
 86 |         self.x = 0
 87 |         self.y = 0
 88 |         self.energy = 0
 89 |         self.mapInfo = MapInfo()
 90 |         self.players = []
 91 |         self.stepCount = 0
 92 |         self.status = State.STATUS_PLAYING
 93 | 
 94 |     def init_state(self, data): #parse data from server into object
 95 |         game_info = str_2_json(data)
 96 |         self.end = False
 97 |         self.score = 0
 98 |         self.lastAction = None
 99 |         self.id = game_info["playerId"]
100 |         self.x = game_info["posx"]
101 |         self.y = game_info["posy"]
102 |         self.energy = game_info["energy"]
103 |         self.mapInfo.init_map(game_info["gameinfo"])
104 |         self.stepCount = 0
105 |         self.status = State.STATUS_PLAYING
106 |         self.players = [{"playerId": 2, "posx": self.x, "posy": self.y},
107 |                         {"playerId": 3, "posx": self.x, "posy": self.y},
108 |                         {"playerId": 4, "posx": self.x, "posy": self.y}]
109 | 
110 |     def update_state(self, data):
111 |         new_state = str_2_json(data)
112 |         for player in new_state["players"]:
113 |             if player["playerId"] == self.id:
114 |                 self.x = player["posx"]
115 |                 self.y = player["posy"]
116 |                 self.energy = player["energy"]
117 |                 self.score = player["score"]
118 |                 self.lastAction = player["lastAction"]
119 |                 self.status = player["status"]
120 | 
121 |         self.mapInfo.update(new_state["golds"], new_state["changedObstacles"])
122 |         self.players = new_state["players"]
123 |         for i in range(len(self.players) + 1, 5, 1):
124 |             self.players.append({"playerId": i, "posx": self.x, "posy": self.y})
125 |         self.stepCount = self.stepCount + 1
126 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/DQNModel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from warnings import simplefilter 
  3 | simplefilter(action='ignore', category=FutureWarning)
  4 | 
  5 | import numpy as np
  6 | from keras.models import Sequential
  7 | from keras.models import model_from_json
  8 | from keras.layers import Dense, Activation
  9 | from keras import optimizers
 10 | from keras import backend as K
 11 | import tensorflow as tf
 12 | from random import random, randrange
 13 | 
 14 | 
 15 | # Deep Q Network off-policy
 16 | class DQN: 
 17 |    
 18 |     def __init__(
 19 |             self,
 20 |             input_dim, #The number of inputs for the DQN network
 21 |             action_space, #The number of actions for the DQN network
 22 |             gamma = 0.99, #The discount factor
 23 |             epsilon = 1, #Epsilon - the exploration factor
 24 |             epsilon_min = 0.01, #The minimum epsilon 
 25 |             epsilon_decay = 0.999,#The decay epislon for each update_epsilon time
 26 |             learning_rate = 0.00025, #The learning rate for the DQN network
 27 |             tau = 0.125, #The factor for updating the DQN target network from the DQN network
 28 |             model = None, #The DQN model
 29 |             target_model = None, #The DQN target model 
 30 |             sess=None
 31 |             
 32 |     ):
 33 |       self.input_dim = input_dim
 34 |       self.action_space = action_space
 35 |       self.gamma = gamma
 36 |       self.epsilon = epsilon
 37 |       self.epsilon_min = epsilon_min
 38 |       self.epsilon_decay = epsilon_decay
 39 |       self.learning_rate = learning_rate
 40 |       self.tau = tau
 41 |             
 42 |       #Creating networks
 43 |       self.model        = self.create_model() #Creating the DQN model
 44 |       self.target_model = self.create_model() #Creating the DQN target model
 45 |       
 46 |       #Tensorflow GPU optimization
 47 |       config = tf.compat.v1.ConfigProto()
 48 |       config.gpu_options.allow_growth = True
 49 |       self.sess = tf.compat.v1.Session(config=config)
 50 |       K.set_session(sess)
 51 |       self.sess.run( tf.compat.v1.global_variables_initializer()) 
 52 |       
 53 |     def create_model(self):
 54 |       #Creating the network
 55 |       #Two hidden layers (300,300), their activation is ReLu
 56 |       #One output layer with action_space of nodes, activation is linear.
 57 |       model = Sequential()
 58 |       model.add(Dense(300, input_dim=self.input_dim))
 59 |       model.add(Activation('relu'))
 60 |       model.add(Dense(300))
 61 |       model.add(Activation('relu'))
 62 |       model.add(Dense(self.action_space))
 63 |       model.add(Activation('linear'))    
 64 |       #adam = optimizers.adam(lr=self.learning_rate)
 65 |       sgd = optimizers.SGD(lr=self.learning_rate, decay=1e-6, momentum=0.95)
 66 |       model.compile(optimizer = sgd,
 67 |               loss='mse')
 68 |       return model
 69 |   
 70 |     
 71 |     def act(self,state):
 72 |       #Get the index of the maximum Q values      
 73 |       a_max = np.argmax(self.model.predict(state.reshape(1,len(state))))      
 74 |       if (random() < self.epsilon):
 75 |         a_chosen = randrange(self.action_space)
 76 |       else:
 77 |         a_chosen = a_max      
 78 |       return a_chosen
 79 |     
 80 |     
 81 |     def replay(self,samples,batch_size):
 82 |       inputs = np.zeros((batch_size, self.input_dim))
 83 |       targets = np.zeros((batch_size, self.action_space))
 84 |       
 85 |       for i in range(0,batch_size):
 86 |         state = samples[0][i,:]
 87 |         action = samples[1][i]
 88 |         reward = samples[2][i]
 89 |         new_state = samples[3][i,:]
 90 |         done= samples[4][i]
 91 |         
 92 |         inputs[i,:] = state
 93 |         targets[i,:] = self.target_model.predict(state.reshape(1,len(state)))        
 94 |         if done:
 95 |           targets[i,action] = reward # if terminated, only equals reward
 96 |         else:
 97 |           Q_future = np.max(self.target_model.predict(new_state.reshape(1,len(new_state))))
 98 |           targets[i,action] = reward + Q_future * self.gamma
 99 |       #Training
100 |       loss = self.model.train_on_batch(inputs, targets)  
101 |     
102 |     def target_train(self): 
103 |       weights = self.model.get_weights()
104 |       target_weights = self.target_model.get_weights()
105 |       for i in range(0, len(target_weights)):
106 |         target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
107 |       
108 |       self.target_model.set_weights(target_weights) 
109 |     
110 |     
111 |     def update_epsilon(self):
112 |       self.epsilon =  self.epsilon*self.epsilon_decay
113 |       self.epsilon =  max(self.epsilon_min, self.epsilon)
114 |     
115 |     
116 |     def save_model(self,path, model_name):
117 |         # serialize model to JSON
118 |         model_json = self.model.to_json()
119 |         with open(path + model_name + ".json", "w") as json_file:
120 |             json_file.write(model_json)
121 |             # serialize weights to HDF5
122 |             self.model.save_weights(path + model_name + ".h5")
123 |             print("Saved model to disk")
124 |  
125 | 
126 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/MINER_STATE.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | 
  4 | def str_2_json(str):
  5 |     return json.loads(str, encoding="utf-8")
  6 | 
  7 | 
  8 | class MapInfo:
  9 |     def __init__(self):
 10 |         self.max_x = 0 #Width of the map
 11 |         self.max_y = 0 #Height of the map
 12 |         self.golds = [] #List of the golds in the map
 13 |         self.obstacles = []
 14 |         self.numberOfPlayers = 0
 15 |         self.maxStep = 0 #The maximum number of step is set for this map
 16 | 
 17 |     def init_map(self, gameInfo):
 18 |         #Initialize the map at the begining of each episode
 19 |         self.max_x = gameInfo["width"] - 1
 20 |         self.max_y = gameInfo["height"] - 1
 21 |         self.golds = gameInfo["golds"]
 22 |         self.obstacles = gameInfo["obstacles"]
 23 |         self.maxStep = gameInfo["steps"]
 24 |         self.numberOfPlayers = gameInfo["numberOfPlayers"]
 25 | 
 26 |     def update(self, golds, changedObstacles):
 27 |         #Update the map after every step
 28 |         self.golds = golds
 29 |         for cob in changedObstacles:
 30 |             newOb = True
 31 |             for ob in self.obstacles:
 32 |                 if cob["posx"] == ob["posx"] and cob["posy"] == ob["posy"]:
 33 |                     newOb = False
 34 |                     #print("cell(", cob["posx"], ",", cob["posy"], ") change type from: ", ob["type"], " -> ",
 35 |                     #      cob["type"], " / value: ", ob["value"], " -> ", cob["value"])
 36 |                     ob["type"] = cob["type"]
 37 |                     ob["value"] = cob["value"]
 38 |                     break
 39 |             if newOb:
 40 |                 self.obstacles.append(cob)
 41 |                 #print("new obstacle: ", cob["posx"], ",", cob["posy"], ", type = ", cob["type"], ", value = ",
 42 |                 #      cob["value"])
 43 | 
 44 |     def get_min_x(self):
 45 |         return min([cell["posx"] for cell in self.golds])
 46 | 
 47 |     def get_max_x(self):
 48 |         return max([cell["posx"] for cell in self.golds])
 49 | 
 50 |     def get_min_y(self):
 51 |         return min([cell["posy"] for cell in self.golds])
 52 | 
 53 |     def get_max_y(self):
 54 |         return max([cell["posy"] for cell in self.golds])
 55 | 
 56 |     def is_row_has_gold(self, y):
 57 |         return y in [cell["posy"] for cell in self.golds]
 58 | 
 59 |     def is_column_has_gold(self, x):
 60 |         return x in [cell["posx"] for cell in self.golds]
 61 | 
 62 |     def gold_amount(self, x, y): #Get the amount of golds at cell (x,y)
 63 |         for cell in self.golds:
 64 |             if x == cell["posx"] and y == cell["posy"]:
 65 |                 return cell["amount"]
 66 |         return 0 
 67 | 
 68 |     def get_obstacle(self, x, y):  # Get the kind of the obstacle at cell(x,y)
 69 |         for cell in self.obstacles:
 70 |             if x == cell["posx"] and y == cell["posy"]:
 71 |                 return cell["type"]
 72 |         return -1  # No obstacle at the cell (x,y)
 73 | 
 74 | 
 75 | class State:
 76 |     STATUS_PLAYING = 0
 77 |     STATUS_ELIMINATED_WENT_OUT_MAP = 1
 78 |     STATUS_ELIMINATED_OUT_OF_ENERGY = 2
 79 |     STATUS_ELIMINATED_INVALID_ACTION = 3
 80 |     STATUS_STOP_EMPTY_GOLD = 4
 81 |     STATUS_STOP_END_STEP = 5
 82 | 
 83 |     def __init__(self):
 84 |         self.end = False
 85 |         self.score = 0
 86 |         self.lastAction = None
 87 |         self.id = 0
 88 |         self.x = 0
 89 |         self.y = 0
 90 |         self.energy = 0
 91 |         self.mapInfo = MapInfo()
 92 |         self.players = []
 93 |         self.stepCount = 0
 94 |         self.status = State.STATUS_PLAYING
 95 | 
 96 |     def init_state(self, data): #parse data from server into object
 97 |         game_info = str_2_json(data)
 98 |         self.end = False
 99 |         self.score = 0
100 |         self.lastAction = None
101 |         self.id = game_info["playerId"]
102 |         self.x = game_info["posx"]
103 |         self.y = game_info["posy"]
104 |         self.energy = game_info["energy"]
105 |         self.mapInfo.init_map(game_info["gameinfo"])
106 |         self.stepCount = 0
107 |         self.status = State.STATUS_PLAYING
108 |         self.players = [{"playerId": 2, "posx": self.x, "posy": self.y},
109 |                         {"playerId": 3, "posx": self.x, "posy": self.y},
110 |                         {"playerId": 4, "posx": self.x, "posy": self.y}]
111 | 
112 |     def update_state(self, data):
113 |         new_state = str_2_json(data)
114 |         for player in new_state["players"]:
115 |             if player["playerId"] == self.id:
116 |                 self.x = player["posx"]
117 |                 self.y = player["posy"]
118 |                 self.energy = player["energy"]
119 |                 self.score = player["score"]
120 |                 self.lastAction = player["lastAction"]
121 |                 self.status = player["status"]
122 | 
123 |         self.mapInfo.update(new_state["golds"], new_state["changedObstacles"])
124 |         self.players = new_state["players"]
125 |         for i in range(len(self.players) + 1, 5, 1):
126 |             self.players.append({"playerId": i, "posx": self.x, "posy": self.y})
127 |         self.stepCount = self.stepCount + 1
128 | 


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/TrainingClient.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from DQNModel import DQN # A class of creating a deep q-learning model
  3 | from MinerEnv import MinerEnv # A class of creating a communication environment between the DQN model and the GameMiner environment (GAME_SOCKET_DUMMY.py)
  4 | from Memory import Memory # A class of creating a batch in order to store experiences for the training process
  5 | 
  6 | import pandas as pd
  7 | import datetime 
  8 | import numpy as np
  9 | 
 10 | 
 11 | HOST = "localhost"
 12 | PORT = 1111
 13 | if len(sys.argv) == 3:
 14 |     HOST = str(sys.argv[1])
 15 |     PORT = int(sys.argv[2])
 16 | 
 17 | # Create header for saving DQN learning file
 18 | now = datetime.datetime.now() #Getting the latest datetime
 19 | header = ["Ep", "Step", "Reward", "Total_reward", "Action", "Epsilon", "Done", "Termination_Code"] #Defining header for the save file
 20 | filename = "Data/data_" + now.strftime("%Y%m%d-%H%M") + ".csv" 
 21 | with open(filename, 'w') as f:
 22 |     pd.DataFrame(columns=header).to_csv(f, encoding='utf-8', index=False, header=True)
 23 | 
 24 | # Parameters for training a DQN model
 25 | N_EPISODE = 10000 #The number of episodes for training
 26 | MAX_STEP = 1000   #The number of steps for each episode
 27 | BATCH_SIZE = 32   #The number of experiences for each replay 
 28 | MEMORY_SIZE = 100000 #The size of the batch for storing experiences
 29 | SAVE_NETWORK = 100  # After this number of episodes, the DQN model is saved for testing later. 
 30 | INITIAL_REPLAY_SIZE = 1000 #The number of experiences are stored in the memory batch before starting replaying
 31 | INPUTNUM = 198 #The number of input values for the DQN model
 32 | ACTIONNUM = 6  #The number of actions output from the DQN model
 33 | MAP_MAX_X = 21 #Width of the Map
 34 | MAP_MAX_Y = 9  #Height of the Map
 35 | 
 36 | # Initialize a DQN model and a memory batch for storing experiences
 37 | DQNAgent = DQN(INPUTNUM, ACTIONNUM)
 38 | memory = Memory(MEMORY_SIZE)
 39 | 
 40 | # Initialize environment
 41 | minerEnv = MinerEnv(HOST, PORT) #Creating a communication environment between the DQN model and the game environment (GAME_SOCKET_DUMMY.py)
 42 | minerEnv.start()  # Connect to the game
 43 | 
 44 | train = False #The variable is used to indicate that the replay starts, and the epsilon starts decrease.
 45 | #Training Process
 46 | #the main part of the deep-q learning agorithm 
 47 | for episode_i in range(0, N_EPISODE):
 48 |     try:
 49 |         # Choosing a map in the list
 50 |         mapID = np.random.randint(1, 6) #Choosing a map ID from 5 maps in Maps folder randomly
 51 |         posID_x = np.random.randint(MAP_MAX_X) #Choosing a initial position of the DQN agent on X-axes randomly
 52 |         posID_y = np.random.randint(MAP_MAX_Y) #Choosing a initial position of the DQN agent on Y-axes randomly
 53 |         #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent
 54 |         request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") 
 55 |         #Send the request to the game environment (GAME_SOCKET_DUMMY.py)
 56 |         minerEnv.send_map_info(request)
 57 | 
 58 |         # Getting the initial state
 59 |         minerEnv.reset() #Initialize the game environment
 60 |         s = minerEnv.get_state()#Get the state after reseting. 
 61 |                                 #This function (get_state()) is an example of creating a state for the DQN model 
 62 |         total_reward = 0 #The amount of rewards for the entire episode
 63 |         terminate = False #The variable indicates that the episode ends
 64 |         maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training
 65 |         #Start an episde for training
 66 |         for step in range(0, maxStep):
 67 |             action = DQNAgent.act(s)  # Getting an action from the DQN model from the state (s)
 68 |             minerEnv.step(str(action))  # Performing the action in order to obtain the new state
 69 |             s_next = minerEnv.get_state()  # Getting a new state
 70 |             reward = minerEnv.get_reward()  # Getting a reward
 71 |             terminate = minerEnv.check_terminate()  # Checking the end status of the episode
 72 | 
 73 |             # Add this transition to the memory batch
 74 |             memory.push(s, action, reward, terminate, s_next)
 75 | 
 76 |             # Sample batch memory to train network
 77 |             if (memory.length > INITIAL_REPLAY_SIZE):
 78 |                 #If there are INITIAL_REPLAY_SIZE experiences in the memory batch
 79 |                 #then start replaying
 80 |                 batch = memory.sample(BATCH_SIZE) #Get a BATCH_SIZE experiences for replaying
 81 |                 DQNAgent.replay(batch, BATCH_SIZE)#Do relaying
 82 |                 train = True #Indicate the training starts
 83 |             total_reward = total_reward + reward #Plus the reward to the total rewad of the episode
 84 |             s = s_next #Assign the next state for the next step.
 85 | 
 86 |             # Saving data to file
 87 |             save_data = np.hstack(
 88 |                 [episode_i + 1, step + 1, reward, total_reward, action, DQNAgent.epsilon, terminate]).reshape(1, 7)
 89 |             with open(filename, 'a') as f:
 90 |                 pd.DataFrame(save_data).to_csv(f, encoding='utf-8', index=False, header=False)
 91 |             
 92 |             if terminate == True:
 93 |                 #If the episode ends, then go to the next episode
 94 |                 break
 95 | 
 96 |         # Iteration to save the network architecture and weights
 97 |         if (np.mod(episode_i + 1, SAVE_NETWORK) == 0 and train == True):
 98 |             DQNAgent.target_train()  # Replace the learning weights for target model with soft replacement
 99 |             #Save the DQN model
100 |             now = datetime.datetime.now() #Get the latest datetime
101 |             DQNAgent.save_model("TrainedModels/",
102 |                                 "DQNmodel_" + now.strftime("%Y%m%d-%H%M") + "_ep" + str(episode_i + 1))
103 | 
104 |         
105 |         #Print the training information after the episode
106 |         print('Episode %d ends. Number of steps is: %d. Accumulated Reward = %.2f. Epsilon = %.2f .Termination code: %d' % (
107 |             episode_i + 1, step + 1, total_reward, DQNAgent.epsilon, terminate))
108 |         
109 |         #Decreasing the epsilon if the replay starts
110 |         if train == True:
111 |             DQNAgent.update_epsilon()
112 | 
113 |     except Exception as e:
114 |         import traceback
115 | 
116 |         traceback.print_exc()
117 |         # print("Finished.")
118 |         break
119 | 


--------------------------------------------------------------------------------
/Miner-Testing-Server/DUMMY_SERVER.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import json
  3 | import math
  4 | from random import randrange
  5 | import socket
  6 | 
  7 | MAP = "[[0,0,-2,100,0,0,-1,-1,-3,0,0,0,-1,-1,0,0,-3,0,-1,-1,0],[-1,-1,-2,0,0,0,-3,-1,0,-2,0,0,0,-1,0,-1,0,-2,-1,0,0],[0,0,-1,0,0,0,0,-1,-1,-1,0,0,100,0,0,0,0,50,-2,0,0],[0,0,0,0,-2,0,0,0,0,0,0,0,-1,50,-2,0,0,-1,-1,0,0],[-2,0,200,-2,-2,300,0,0,-2,-2,0,0,-3,0,-1,0,0,-3,-1,0,0],[0,-1,0,0,0,0,0,-3,0,0,-1,-1,0,0,0,0,0,0,-2,0,0],[0,-1,-1,0,0,-1,-1,0,0,700,-1,0,0,0,-2,-1,-1,0,0,0,100],[0,0,0,500,0,0,-1,0,-2,-2,-1,-1,0,0,-2,0,-3,0,0,-1,0],[-1,-1,0,-2,0,-1,-2,0,400,-2,-1,-1,500,0,-2,0,-3,100,0,0,0]]"
  8 | POS_X = 0
  9 | POS_Y = 0
 10 | E = 50
 11 | MAX_STEP = 50
 12 | W = 21
 13 | H = 9
 14 | 
 15 | class ObstacleInfo:
 16 |     # initial energy for obstacles: Land (key = 0): -1, Forest(key = -1): 0 (random), Trap(key = -2): -10, Swamp (key = -3): -5
 17 |     types = {0: -1, -1: 0, -2: -10, -3: -5}
 18 | 
 19 |     def __init__(self):
 20 |         self.type = 0
 21 |         self.posx = 0
 22 |         self.posy = 0
 23 |         self.value = 0
 24 | 
 25 | 
 26 | class GoldInfo:
 27 |     def __init__(self):
 28 |         self.posx = 0
 29 |         self.posy = 0
 30 |         self.amount = 0
 31 | 
 32 |     def loads(self, data):
 33 |         golds = []
 34 |         for gd in data:
 35 |             g = GoldInfo()
 36 |             g.posx = gd["posx"]
 37 |             g.posy = gd["posy"]
 38 |             g.amount = gd["amount"]
 39 |             golds.append(g)
 40 |         return golds
 41 | 
 42 | 
 43 | class PlayerInfo:
 44 |     STATUS_PLAYING = 0
 45 |     STATUS_ELIMINATED_WENT_OUT_MAP = 1
 46 |     STATUS_ELIMINATED_OUT_OF_ENERGY = 2
 47 |     STATUS_ELIMINATED_INVALID_ACTION = 3
 48 |     STATUS_STOP_EMPTY_GOLD = 4
 49 |     STATUS_STOP_END_STEP = 5
 50 | 
 51 |     def __init__(self, id):
 52 |         self.playerId = id
 53 |         self.score = 0
 54 |         self.energy = E
 55 |         self.posx = POS_X
 56 |         self.posy = POS_Y
 57 |         self.lastAction = 0
 58 |         self.status = PlayerInfo.STATUS_PLAYING
 59 |         self.freeCount = 0
 60 | 
 61 | 
 62 | class GameInfo:
 63 |     def __init__(self):
 64 |         self.numberOfPlayers = 1
 65 |         self.width = W
 66 |         self.height = H
 67 |         self.steps = MAX_STEP
 68 |         self.golds = []
 69 |         self.obstacles = []
 70 | 
 71 |     def loads(self, data):
 72 |         m = GameInfo()
 73 |         m.width = data["width"]
 74 |         m.height = data["height"]
 75 |         m.golds = GoldInfo().loads(data["golds"])
 76 |         m.obstacles = data["obstacles"]
 77 |         m.numberOfPlayers = data["numberOfPlayers"]
 78 |         m.steps = data["steps"]
 79 |         return m
 80 | 
 81 | 
 82 | class UserMatch:
 83 |     def __init__(self):
 84 |         self.playerId = 1
 85 |         self.posx = POS_X
 86 |         self.posy = POS_Y
 87 |         self.energy = E
 88 |         self.gameinfo = GameInfo()
 89 | 
 90 |     def to_json(self):
 91 |         return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
 92 | 
 93 | 
 94 | class StepState:
 95 |     def __init__(self):
 96 |         self.players = []
 97 |         self.golds = []
 98 |         self.changedObstacles = []
 99 | 
100 |     def to_json(self):
101 |         return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
102 | 
103 | 
104 | class GameSocket:
105 |     bog_energy_chain = {-5: -20, -20: -40, -40: -100, -100: -100}
106 | 
107 |     def __init__(self):
108 |         self.stepCount = 0
109 |         self.maxStep = MAX_STEP
110 |         self.userMatch = UserMatch()
111 |         self.user = PlayerInfo(1)
112 |         self.stepState = StepState()
113 |         self.map = json.loads(MAP)  # running map info: 0->Land, -1->Forest, -2->Trap, -3:Swamp, >0:Gold
114 |         self.energyOnMap = json.loads(MAP)  # self.energyOnMap[x][y]: <0, amount of energy which player will consume if it move into (x,y)
115 |         self.E = E
116 |         self.stepCount = 0
117 |         self.craftUsers = []  # players that craft at current step - for calculating amount of gold
118 |         self.craftMap = {}  # cells that players craft at current step, key: x_y, value: number of players that craft at (x,y)
119 | 
120 |     def setup(self):
121 |         self.init_map()
122 |         self.maxStep = self.userMatch.gameinfo.steps
123 | 
124 |         # init data for players
125 |         self.stepState.players = [self.user]
126 |         self.E = self.userMatch.energy
127 | 
128 |     def init_map(self):  # load map info
129 |         i = 0
130 |         while i < len(self.map):
131 |             j = 0
132 |             while j < len(self.map[i]):
133 |                 if self.map[i][j] > 0:  # gold
134 |                     g = GoldInfo()
135 |                     g.posx = j
136 |                     g.posy = i
137 |                     g.amount = self.map[i][j]
138 |                     self.userMatch.gameinfo.golds.append(g)
139 |                 else:  # obstacles
140 |                     o = ObstacleInfo()
141 |                     o.posx = j
142 |                     o.posy = i
143 |                     o.type = -self.map[i][j]
144 |                     o.value = ObstacleInfo.types[self.map[i][j]]
145 |                     self.userMatch.gameinfo.obstacles.append(o)
146 |                 j += 1
147 |             i += 1
148 |         self.stepState.golds = self.userMatch.gameinfo.golds
149 |         for x in range(len(self.map)):
150 |             for y in range(len(self.map[x])):
151 |                 if self.map[x][y] > 0:  # gold
152 |                     self.energyOnMap[x][y] = -4
153 |                 else:  # obstacles
154 |                     self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]]
155 | 
156 |     def get_game_info(self):
157 |         data = self.userMatch.to_json()
158 |         print("send: ", data)
159 |         return data
160 | 
161 |     def get_step(self):  # send data to player (simulate player's receive request)
162 |         self.stepCount = self.stepCount + 1
163 |         if self.stepCount >= self.maxStep:
164 |             for player in self.stepState.players:
165 |                 player.status = PlayerInfo.STATUS_STOP_END_STEP
166 |         data = self.stepState.to_json()
167 |         return data
168 | 
169 |     def receive(self, message):  # receive message from player (simulate send request from player)
170 |         self.stepState.changedObstacles = []
171 |         action = int(message)
172 |         # print("Action = ", action)
173 |         self.user.lastAction = action
174 |         self.craftUsers = []
175 |         self.step_action(self.user, action)
176 |         self.action_5_craft()
177 |         for c in self.stepState.changedObstacles:
178 |             self.map[c["posy"]][c["posx"]] = -c["type"]
179 |             self.energyOnMap[c["posy"]][c["posx"]] = c["value"]
180 | 
181 |     def step_action(self, user, action):
182 |         switcher = {
183 |             0: self.action_0_left,
184 |             1: self.action_1_right,
185 |             2: self.action_2_up,
186 |             3: self.action_3_down,
187 |             4: self.action_4_free,
188 |             5: self.action_5_craft_pre
189 |         }
190 |         func = switcher.get(action, self.invalid_action)
191 |         func(user)
192 | 
193 |     def action_5_craft_pre(self, user):  # collect players who craft at current step
194 |         user.freeCount = 0
195 |         if self.map[user.posy][user.posx] <= 0:  # craft at the non-gold cell
196 |             user.energy -= 10
197 |             if user.energy <= 0:
198 |                 user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY
199 |                 user.lastAction = 6 #eliminated
200 |         else:
201 |             user.energy -= 5
202 |             if user.energy > 0:
203 |                 self.craftUsers.append(user)
204 |                 key = str(user.posx) + "_" + str(user.posy)
205 |                 if key in self.craftMap:
206 |                     count = self.craftMap[key]
207 |                     self.craftMap[key] = count + 1
208 |                 else:
209 |                     self.craftMap[key] = 1
210 |             else:
211 |                 user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY
212 |                 user.lastAction = 6 #eliminated
213 | 
214 |     def action_0_left(self, user):  # user go left
215 |         user.freeCount = 0
216 |         user.posx = user.posx - 1
217 |         if user.posx < 0:
218 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
219 |             user.lastAction = 6 #eliminated
220 |         else:
221 |             self.go_to_pos(user)
222 | 
223 |     def action_1_right(self, user):  # user go right
224 |         user.freeCount = 0
225 |         user.posx = user.posx + 1
226 |         if user.posx >= self.userMatch.gameinfo.width:
227 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
228 |             user.lastAction = 6 #eliminated
229 |         else:
230 |             self.go_to_pos(user)
231 | 
232 |     def action_2_up(self, user):  # user go up
233 |         user.freeCount = 0
234 |         user.posy = user.posy - 1
235 |         if user.posy < 0:
236 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
237 |             user.lastAction = 6 #eliminated
238 |         else:
239 |             self.go_to_pos(user)
240 | 
241 |     def action_3_down(self, user):  # user go right
242 |         user.freeCount = 0
243 |         user.posy = user.posy + 1
244 |         if user.posy >= self.userMatch.gameinfo.height:
245 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
246 |             user.lastAction = 6 #eliminated
247 |         else:
248 |             self.go_to_pos(user)
249 | 
250 |     def action_4_free(self, user):  # user free
251 |         user.freeCount += 1
252 |         if user.freeCount == 1:
253 |             user.energy += int(self.E / 4)
254 |         elif user.freeCount == 2:
255 |             user.energy += int(self.E / 3)
256 |         elif user.freeCount == 3:
257 |             user.energy += int(self.E / 2)
258 |         else:
259 |             user.energy = self.E
260 |         if user.energy > self.E:
261 |             user.energy = self.E
262 | 
263 |     def action_5_craft(self):
264 |         craftCount = len(self.craftUsers)
265 |         # print ("craftCount",craftCount)
266 |         if (craftCount > 0):
267 |             for user in self.craftUsers:
268 |                 x = user.posx
269 |                 y = user.posy
270 |                 key = str(user.posx) + "_" + str(user.posy)
271 |                 c = self.craftMap[key]
272 |                 m = min(math.ceil(self.map[y][x] / c), 50)
273 |                 user.score += m
274 |                 # print ("user", user.playerId, m)
275 |             for user in self.craftUsers:
276 |                 x = user.posx
277 |                 y = user.posy
278 |                 key = str(user.posx) + "_" + str(user.posy)
279 |                 if key in self.craftMap:
280 |                     c = self.craftMap[key]
281 |                     del self.craftMap[key]
282 |                     m = min(math.ceil(self.map[y][x] / c), 50)
283 |                     self.map[y][x] -= m * c
284 |                     if self.map[y][x] < 0:
285 |                         self.map[y][x] = 0
286 |                         self.energyOnMap[y][x] = ObstacleInfo.types[0]
287 |                     for g in self.stepState.golds:
288 |                         if g.posx == x and g.posy == y:
289 |                             g.amount = self.map[y][x]
290 |                             if g.amount == 0:
291 |                                 self.stepState.golds.remove(g)
292 |                                 self.add_changed_obstacle(x, y, 0, ObstacleInfo.types[0])
293 |                                 if len(self.stepState.golds) == 0:
294 |                                     for player in self.stepState.players:
295 |                                         player.status = PlayerInfo.STATUS_STOP_EMPTY_GOLD
296 |                             break;
297 |             self.craftMap = {}
298 | 
299 |     def invalid_action(self, user):
300 |         user.status = PlayerInfo.STATUS_ELIMINATED_INVALID_ACTION
301 |         user.lastAction = 6 #eliminated
302 | 
303 |     def go_to_pos(self, user):  # player move to cell(x,y)
304 |         if self.map[user.posy][user.posx] == -1:
305 |             user.energy -= randrange(16) + 5
306 |         elif self.map[user.posy][user.posx] == 0:
307 |             user.energy += self.energyOnMap[user.posy][user.posx]
308 |         elif self.map[user.posy][user.posx] == -2:
309 |             user.energy += self.energyOnMap[user.posy][user.posx]
310 |             self.add_changed_obstacle(user.posx, user.posy, 0, ObstacleInfo.types[0])
311 |         elif self.map[user.posy][user.posx] == -3:
312 |             user.energy += self.energyOnMap[user.posy][user.posx]
313 |             self.add_changed_obstacle(user.posx, user.posy, 3,
314 |                                       self.bog_energy_chain[self.energyOnMap[user.posy][user.posx]])
315 |         else:
316 |             user.energy -= 4
317 |         if user.energy <= 0:
318 |             user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY
319 |             user.lastAction = 6 #eliminated
320 | 
321 |     def add_changed_obstacle(self, x, y, t, v):
322 |         added = False
323 |         for o in self.stepState.changedObstacles:
324 |             if o["posx"] == x and o["posy"] == y:
325 |                 added = True
326 |                 break
327 |         if not added:
328 |             o = {}
329 |             o["posx"] = x
330 |             o["posy"] = y
331 |             o["type"] = t
332 |             o["value"] = v
333 |             self.stepState.changedObstacles.append(o)
334 | 
335 | 
336 | if __name__ == "__main__":
337 | 
338 |     HOST = "localhost"
339 |     PORT = int(sys.argv[1])
340 | 
341 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
342 |     print('# Socket created')
343 | 
344 |     try:
345 |         s.bind((HOST, PORT))
346 |     except socket.error as msg:
347 |         print('# Bind failed. ')
348 |         sys.exit()
349 |     s.listen(10)
350 |     conn, addr = s.accept()
351 |     print('# Connected to ' + addr[0] + ':' + str(addr[1]))
352 | 
353 |     game = GameSocket()
354 |     game.setup()
355 |     conn.send(bytes(game.get_game_info(),"utf-8"))
356 |     while game.user.status == 0:
357 |         data = conn.recv(1024)
358 |         game.receive(data)
359 |         conn.send(bytes(game.get_step(), "utf-8"))
360 | 
361 |     s.close()
362 |     print("Player score: ", game.user.score)


--------------------------------------------------------------------------------
/Miner-Training-Local-CodeSample/GAME_SOCKET_DUMMY.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from array import *
  3 | import json
  4 | import os
  5 | import math
  6 | from bot1 import Bot1
  7 | from bot2 import Bot2
  8 | from bot3 import Bot3
  9 | from random import randrange
 10 | 
 11 | 
 12 | class ObstacleInfo:
 13 |     # initial energy for obstacles: Land (key = 0): -1, Forest(key = -1): 0 (random), Trap(key = -2): -10, Swamp (key = -3): -5
 14 |     types = {0: -1, -1: 0, -2: -10, -3: -5}
 15 | 
 16 |     def __init__(self):
 17 |         self.type = 0
 18 |         self.posx = 0
 19 |         self.posy = 0
 20 |         self.value = 0
 21 | 
 22 | 
 23 | class GoldInfo:
 24 |     def __init__(self):
 25 |         self.posx = 0
 26 |         self.posy = 0
 27 |         self.amount = 0
 28 | 
 29 |     def loads(self, data):
 30 |         golds = []
 31 |         for gd in data:
 32 |             g = GoldInfo()
 33 |             g.posx = gd["posx"]
 34 |             g.posy = gd["posy"]
 35 |             g.amount = gd["amount"]
 36 |             golds.append(g)
 37 |         return golds
 38 | 
 39 | 
 40 | class PlayerInfo:
 41 |     STATUS_PLAYING = 0
 42 |     STATUS_ELIMINATED_WENT_OUT_MAP = 1
 43 |     STATUS_ELIMINATED_OUT_OF_ENERGY = 2
 44 |     STATUS_ELIMINATED_INVALID_ACTION = 3
 45 |     STATUS_STOP_EMPTY_GOLD = 4
 46 |     STATUS_STOP_END_STEP = 5
 47 | 
 48 |     def __init__(self, id):
 49 |         self.playerId = id
 50 |         self.score = 0
 51 |         self.energy = 0
 52 |         self.posx = 0
 53 |         self.posy = 0
 54 |         self.lastAction = -1
 55 |         self.status = PlayerInfo.STATUS_PLAYING
 56 |         self.freeCount = 0
 57 | 
 58 | 
 59 | class GameInfo:
 60 |     def __init__(self):
 61 |         self.numberOfPlayers = 1
 62 |         self.width = 0
 63 |         self.height = 0
 64 |         self.steps = 100
 65 |         self.golds = []
 66 |         self.obstacles = []
 67 | 
 68 |     def loads(self, data):
 69 |         m = GameInfo()
 70 |         m.width = data["width"]
 71 |         m.height = data["height"]
 72 |         m.golds = GoldInfo().loads(data["golds"])
 73 |         m.obstacles = data["obstacles"]
 74 |         m.numberOfPlayers = data["numberOfPlayers"]
 75 |         m.steps = data["steps"]
 76 |         return m
 77 | 
 78 | 
 79 | class UserMatch:
 80 |     def __init__(self):
 81 |         self.playerId = 1
 82 |         self.posx = 0
 83 |         self.posy = 0
 84 |         self.energy = 50
 85 |         self.gameinfo = GameInfo()
 86 | 
 87 |     def to_json(self):
 88 |         return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
 89 | 
 90 | 
 91 | class StepState:
 92 |     def __init__(self):
 93 |         self.players = []
 94 |         self.golds = []
 95 |         self.changedObstacles = []
 96 | 
 97 |     def to_json(self):
 98 |         return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
 99 | 
100 | 
101 | class GameSocket:
102 |     bog_energy_chain = {-5: -20, -20: -40, -40: -100, -100: -100}
103 | 
104 |     def __init__(self, host, port):
105 |         self.stepCount = 0
106 |         self.maxStep = 0
107 |         self.mapdir = "Maps"  # where to load all pre-defined maps
108 |         self.mapid = ""
109 |         self.userMatch = UserMatch()
110 |         self.user = PlayerInfo(1)
111 |         self.stepState = StepState()
112 |         self.maps = {}  # key: map file name, value: file content
113 |         self.map = []  # running map info: 0->Land, -1->Forest, -2->Trap, -3:Swamp, >0:Gold
114 |         self.energyOnMap = []  # self.energyOnMap[x][y]: <0, amount of energy which player will consume if it move into (x,y)
115 |         self.E = 50
116 |         self.resetFlag = True
117 |         self.craftUsers = []  # players that craft at current step - for calculating amount of gold
118 |         self.bots = []
119 |         self.craftMap = {}  # cells that players craft at current step, key: x_y, value: number of players that craft at (x,y)
120 | 
121 |     def init_bots(self):
122 |         self.bots = [Bot1(2), Bot2(3), Bot3(4)]  # use bot1(id=2), bot2(id=3), bot3(id=4)
123 |         for (bot) in self.bots:  # at the beginning, all bots will have same position, energy as player
124 |             bot.info.posx = self.user.posx
125 |             bot.info.posy = self.user.posy
126 |             bot.info.energy = self.user.energy
127 |             bot.info.lastAction = -1
128 |             bot.info.status = PlayerInfo.STATUS_PLAYING
129 |             bot.info.score = 0
130 |             self.stepState.players.append(bot.info)
131 |         self.userMatch.gameinfo.numberOfPlayers = len(self.stepState.players)
132 |         print("numberOfPlayers: ", self.userMatch.gameinfo.numberOfPlayers)
133 | 
134 |     def reset(self, requests):  # load new game by given request: [map id (filename), posx, posy, initial energy]
135 |         # load new map
136 |         self.reset_map(requests[0])
137 |         self.userMatch.posx = int(requests[1])
138 |         self.userMatch.posy = int(requests[2])
139 |         self.userMatch.energy = int(requests[3])
140 |         self.userMatch.gameinfo.steps = int(requests[4])
141 |         self.maxStep = self.userMatch.gameinfo.steps
142 | 
143 |         # init data for players
144 |         self.user.posx = self.userMatch.posx  # in
145 |         self.user.posy = self.userMatch.posy
146 |         self.user.energy = self.userMatch.energy
147 |         self.user.status = PlayerInfo.STATUS_PLAYING
148 |         self.user.score = 0
149 |         self.stepState.players = [self.user]
150 |         self.E = self.userMatch.energy
151 |         self.resetFlag = True
152 |         self.init_bots()
153 |         self.stepCount = 0
154 | 
155 |     def reset_map(self, id):  # load map info
156 |         self.mapId = id
157 |         self.map = json.loads(self.maps[self.mapId])
158 |         self.userMatch = self.map_info(self.map)
159 |         #print(self.map)
160 |         self.stepState.golds = self.userMatch.gameinfo.golds
161 |         #self.map = json.loads(self.maps[self.mapId])
162 |         self.energyOnMap = json.loads(self.maps[self.mapId])
163 |         for x in range(len(self.map)):
164 |             for y in range(len(self.map[x])):
165 |                 if self.map[x][y] > 0:  # gold
166 |                     self.energyOnMap[x][y] = -4
167 |                 else:  # obstacles
168 |                     self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]]
169 | 
170 |     def connect(self):  # simulate player's connect request
171 |         print("Connected to server.")
172 |         # load all pre-defined maps from mapDir
173 |         for filename in os.listdir(self.mapdir):
174 |             print("Found: " + filename)
175 |             with open(os.path.join(self.mapdir, filename), 'r') as f:
176 |                 self.maps[filename] = f.read()
177 | 
178 |     def map_info(self, map):  # get map info
179 |         # print(map)
180 |         userMatch = UserMatch()
181 |         userMatch.gameinfo.height = len(map)
182 |         userMatch.gameinfo.width = len(map[0])
183 |         i = 0
184 |         max_gold_num = 10000
185 |         while i < len(map):
186 |             j = 0
187 |             while j < len(map[i]):
188 |                 if map[i][j] > 0:  # gold
189 |                     g = GoldInfo()
190 |                     g.posx = j
191 |                     g.posy = i
192 |                     g.amount = (randrange(min(25, max(int(max_gold_num/50), 1))) + 1) * 50
193 |                     self.map[i][j] = g.amount
194 |                     max_gold_num -= g.amount
195 |                     userMatch.gameinfo.golds.append(g)
196 |                 else:  # obstacles
197 |                     o = ObstacleInfo()
198 |                     o.posx = j
199 |                     o.posy = i
200 |                     o.type = -map[i][j]
201 |                     o.value = ObstacleInfo.types[map[i][j]]
202 |                     userMatch.gameinfo.obstacles.append(o)
203 |                 j += 1
204 |             i += 1
205 |         #print("max_gold_num=",max_gold_num)
206 |         return userMatch
207 | 
208 |     def receive(self):  # send data to player (simulate player's receive request)
209 |         if self.resetFlag:  # for the first time -> send game info
210 |             self.resetFlag = False
211 |             data = self.userMatch.to_json()
212 |             for (bot) in self.bots:
213 |                 bot.new_game(data)
214 |             #print(data)
215 |             return data
216 |         else:  # send step state
217 |             self.stepCount = self.stepCount + 1
218 |             if self.stepCount >= self.maxStep:
219 |                 for player in self.stepState.players:
220 |                     player.status = PlayerInfo.STATUS_STOP_END_STEP
221 |             data = self.stepState.to_json()
222 |             for (bot) in self.bots:  # update bots' state
223 |                 bot.new_state(data)
224 |             # print(data)
225 |             return data
226 | 
227 |     def send(self, message):  # receive message from player (simulate send request from player)
228 |         if message.isnumeric():  # player send action
229 |             self.resetFlag = False
230 |             self.stepState.changedObstacles = []
231 |             action = int(message)
232 |             # print("Action = ", action)
233 |             self.user.lastAction = action
234 |             self.craftUsers = []
235 |             self.step_action(self.user, action)
236 |             for bot in self.bots:
237 |                 if bot.info.status == PlayerInfo.STATUS_PLAYING:
238 |                     action = bot.next_action()
239 |                     bot.info.lastAction = action
240 |                     # print("Bot Action: ", action)
241 |                     self.step_action(bot.info, action)
242 |             self.action_5_craft()
243 |             for c in self.stepState.changedObstacles:
244 |                 self.map[c["posy"]][c["posx"]] = -c["type"]
245 |                 self.energyOnMap[c["posy"]][c["posx"]] = c["value"]
246 | 
247 |         else:  # reset game
248 |             requests = message.split(",")
249 |             print("Reset game: ", requests)
250 |             self.reset(requests)
251 | 
252 |     def step_action(self, user, action):
253 |         switcher = {
254 |             0: self.action_0_left,
255 |             1: self.action_1_right,
256 |             2: self.action_2_up,
257 |             3: self.action_3_down,
258 |             4: self.action_4_free,
259 |             5: self.action_5_craft_pre
260 |         }
261 |         func = switcher.get(action, self.invalidAction)
262 |         func(user)
263 | 
264 |     def action_5_craft_pre(self, user):  # collect players who craft at current step
265 |         user.freeCount = 0
266 |         if self.map[user.posy][user.posx] <= 0:  # craft at the non-gold cell
267 |             user.energy -= 10
268 |             if user.energy <= 0:
269 |                 user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY
270 |                 user.lastAction = 6 #eliminated
271 |         else:
272 |             user.energy -= 5
273 |             if user.energy > 0:
274 |                 self.craftUsers.append(user)
275 |                 key = str(user.posx) + "_" + str(user.posy)
276 |                 if key in self.craftMap:
277 |                     count = self.craftMap[key]
278 |                     self.craftMap[key] = count + 1
279 |                 else:
280 |                     self.craftMap[key] = 1
281 |             else:
282 |                 user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY
283 |                 user.lastAction = 6 #eliminated
284 | 
285 |     def action_0_left(self, user):  # user go left
286 |         user.freeCount = 0
287 |         user.posx = user.posx - 1
288 |         if user.posx < 0:
289 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
290 |             user.lastAction = 6 #eliminated
291 |         else:
292 |             self.go_to_pos(user)
293 | 
294 |     def action_1_right(self, user):  # user go right
295 |         user.freeCount = 0
296 |         user.posx = user.posx + 1
297 |         if user.posx >= self.userMatch.gameinfo.width:
298 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
299 |             user.lastAction = 6 #eliminated
300 |         else:
301 |             self.go_to_pos(user)
302 | 
303 |     def action_2_up(self, user):  # user go up
304 |         user.freeCount = 0
305 |         user.posy = user.posy - 1
306 |         if user.posy < 0:
307 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
308 |             user.lastAction = 6 #eliminated
309 |         else:
310 |             self.go_to_pos(user)
311 | 
312 |     def action_3_down(self, user):  # user go right
313 |         user.freeCount = 0
314 |         user.posy = user.posy + 1
315 |         if user.posy >= self.userMatch.gameinfo.height:
316 |             user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP
317 |             user.lastAction = 6 #eliminated
318 |         else:
319 |             self.go_to_pos(user)
320 | 
321 |     def action_4_free(self, user):  # user free
322 |         user.freeCount += 1
323 |         if user.freeCount == 1:
324 |             user.energy += int(self.E / 4)
325 |         elif user.freeCount == 2:
326 |             user.energy += int(self.E / 3)
327 |         elif user.freeCount == 3:
328 |             user.energy += int(self.E / 2)
329 |         else:
330 |             user.energy = self.E
331 |         if user.energy > self.E:
332 |             user.energy = self.E
333 | 
334 |     def action_5_craft(self):
335 |         craftCount = len(self.craftUsers)
336 |         # print ("craftCount",craftCount)
337 |         if (craftCount > 0):
338 |             for user in self.craftUsers:
339 |                 x = user.posx
340 |                 y = user.posy
341 |                 key = str(user.posx) + "_" + str(user.posy)
342 |                 c = self.craftMap[key]
343 |                 m = min(math.ceil(self.map[y][x] / c), 50)
344 |                 user.score += m
345 |                 # print ("user", user.playerId, m)
346 |             for user in self.craftUsers:
347 |                 x = user.posx
348 |                 y = user.posy
349 |                 key = str(user.posx) + "_" + str(user.posy)
350 |                 if key in self.craftMap:
351 |                     c = self.craftMap[key]
352 |                     del self.craftMap[key]
353 |                     m = min(math.ceil(self.map[y][x] / c), 50)
354 |                     self.map[y][x] -= m * c
355 |                     if self.map[y][x] < 0:
356 |                         self.map[y][x] = 0
357 |                         self.energyOnMap[y][x] = ObstacleInfo.types[0]
358 |                     for g in self.stepState.golds:
359 |                         if g.posx == x and g.posy == y:
360 |                             g.amount = self.map[y][x]
361 |                             if g.amount == 0:
362 |                                 self.stepState.golds.remove(g)
363 |                                 self.add_changed_obstacle(x, y, 0, ObstacleInfo.types[0])
364 |                                 if len(self.stepState.golds) == 0:
365 |                                     for player in self.stepState.players:
366 |                                         player.status = PlayerInfo.STATUS_STOP_EMPTY_GOLD
367 |                             break;
368 |             self.craftMap = {}
369 | 
370 |     def invalidAction(self, user):
371 |         user.status = PlayerInfo.STATUS_ELIMINATED_INVALID_ACTION
372 |         user.lastAction = 6 #eliminated
373 | 
374 |     def go_to_pos(self, user):  # player move to cell(x,y)
375 |         if self.map[user.posy][user.posx] == -1:
376 |             user.energy -= randrange(16) + 5
377 |         elif self.map[user.posy][user.posx] == 0:
378 |             user.energy += self.energyOnMap[user.posy][user.posx]
379 |         elif self.map[user.posy][user.posx] == -2:
380 |             user.energy += self.energyOnMap[user.posy][user.posx]
381 |             self.add_changed_obstacle(user.posx, user.posy, 0, ObstacleInfo.types[0])
382 |         elif self.map[user.posy][user.posx] == -3:
383 |             user.energy += self.energyOnMap[user.posy][user.posx]
384 |             self.add_changed_obstacle(user.posx, user.posy, 3,
385 |                                       self.bog_energy_chain[self.energyOnMap[user.posy][user.posx]])
386 |         else:
387 |             user.energy -= 4
388 |         if user.energy <= 0:
389 |             user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY
390 |             user.lastAction = 6 #eliminated
391 | 
392 |     def add_changed_obstacle(self, x, y, t, v):
393 |         added = False
394 |         for o in self.stepState.changedObstacles:
395 |             if o["posx"] == x and o["posy"] == y:
396 |                 added = True
397 |                 break
398 |         if added == False:
399 |             o = {}
400 |             o["posx"] = x
401 |             o["posy"] = y
402 |             o["type"] = t
403 |             o["value"] = v
404 |             self.stepState.changedObstacles.append(o)
405 | 
406 |     def close(self):
407 |         print("Close socket.")
408 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rlcomp2020
  2 | This is sample source code for Reinforcement Learning Competition, hosted by FPT-Software (Hanoi, Vietnam). The game is Gold Miner.
  3 | 
  4 | 
  5 | 
  6 | ***
  7 | ***(English version, Vietnamese below)***
  8 | 
  9 | ## Sample source code description: training and competition
 10 | 
 11 | During the competition, the following State information will be returned after an Action is performed:
 12 | 
 13 | -   Information about competing Agents .
 14 | 
 15 |   ```json
 16 |   {
 17 |           "playerId": Agent's ID, integer; 
 18 |           "posx": Agent's X position, integer;
 19 |           "posy": Agent's Y position, integer;
 20 |           "score": Agent's amount of gold mined, integer;
 21 |           "energy": Agent's amount of remaining energy, integer;
 22 |           "lastAction": the last action, integer;
 23 |           "status": Agent's status - playing or eliminated, integer
 24 |   }
 25 |   ```
 26 | -	Information about the remaining obstacles on the map (their position and the amount of energy that will be subtracted when an Agent passes by).
 27 | -	Information about the remaining gold mines on the map (their position and the amount of gold). 
 28 | -   Map size (height and witdth)
 29 | 
 30 | Based on the returned State information, teams can decide their own training strategies, such as designing Reward Function and defining State Space. In the two sample source code (Miner-Training-Local-CodeSample and Miner-Testing-CodeSample) provided to teams (described below), we will give an example on designing Reward Function and defining State Space using 02 functions get_state() and get_reward() respectively. Below is an overview of the two sample source code provided for training and competition:
 31 | ## A. Source code for training - Miner-Training-Local-CodeSample
 32 | This is the sample source code used for training. The source code contains 02 major parts: Miner Game Environment and Deep reinforcement learning algorithm (Deep-Q learning - DQN).
 33 | 
 34 | ![alt text](image/codeAI/Picture1.png)
 35 | *Figure 1: The information flow between programs in the sample source code
 36 | used for training*
 37 | Details of the two parts are as follows:
 38 | ### 1.	Miner Game Environment
 39 | The source code of Miner Game Environment is derived from the original source code of Miner Game on Codelearn system. It includes GAME_SOCKET_DUMMY.py, MINER_STATE.py, MinerEnv.py, Maps, and 03 Bots (bot1.py, bot2.py, bot3.py). Figure 2 illustrates the exchange process of map information and Agent's State information between MinerEnv.py and GAME_SOCKET_DUMMY.py. The details of programs are described below.
 40 | 
 41 | ![alt text](image/codeAI/Picture2.png)
 42 | *Figure 2: The information flow between MinerEnv.py and GAME_SOCKET_DUMMY.py during training (simulating the information flow between client and server)s*
 43 | 
 44 | a)	MinerEnv.py: A program designed based on the general structure of the reinforcement learning environment to help teams access the main program (GAME_SOCKET_DUMMYpy) in a simple and convenient way. Some of the main functions in the program are as follows:
 45 | 
 46 | - **start()**: a single-use function used to simulate the process of connecting to the server to start playing. During training, this function calls the **connect()** function in GAME_SOCKET_DUMMY.py to read 05 maps in Maps folder.
 47 | 
 48 | - **send_map_info()**: a function used to select maps to train Agents.
 49 | 
 50 | - **reset()**: a function used to initialize a map and a State for the Agent. This function calls the **receive()** function in GAME_SOCKET_DUMMY.py to get the map initial information saved in a json message, as well as the **init_state(message)** function in MINER_STATE.py to update the Agent's State with the map initial information.
 51 | 
 52 | - **step()**: a function used to send an action to GAME_SOCKET_DUMMY.py and receive the changes of map information and Agent's State.
 53 | 
 54 | - **get_state()**: a function provided as an example of defining a State of the Agent during training. Teams can overwrite this function to define a State that suits their training strategies.
 55 | 
 56 | - **get_reward()**: a function provided as an example of defining a reward function of the Agent during training. Teams can overwrite this function to define a reward function that suits their training strategies.
 57 |     
 58 | b)	MINER_STATE.py (teams should not change the source code in this program): this program is a sample source code for saving the map information and the Agent's State received from GAME_SOCKET_DUMMY.py (which will be sent from the server in the actual competition). This program is designed to help teams manage their State information easily during training. Map and State classes, along with some main functions in these two classes, are as follows:
 59 | 
 60 | - **MapInfo** (Class): a class used to store all map information. This class includes max_x, max_y, maxStep, numberOfPlayer, golds (the current amount of remaining gold on the map), obstacles (the information about current obstacles on the map).
 61 | 
 62 |   + **update (golds, changedObstacles)**: update the map information after each step.
 63 | 
 64 | - **State** (Class): a class that contains the States of the game (including the player's State and map).
 65 | 
 66 |   + **init_state(data)**: a function used to initialize the map information and the Agent's State at the beginning of an episode in training (or a match in the actual competition).
 67 |   + **update_state(data)**: a function used to update the State of the game after each step. The transferred data includes the map information and the Agent's State.
 68 | 
 69 | c)	GAME_SOCKET_DUMMY.py (teams should not change the source code in this program): a program used to simulate gold miner game, including the process of transferring data (message) to the server. This program contains 07 classes: ObstacleInfo, GoldInfo, PlayerInfo, GameInfo, UserMatch, StepState and GameSocket. GameSocket is the main class and contains the following main functions:
 70 | 
 71 | - **__ init__ (host, port)**: a function used to initialize the environment. In this function, the purpose of host and port initialization is to simulate the connection on the server in the actual competition.
 72 | 
 73 | - **init_bots()**: a function designed to assist the players to train Agent with bots. To specify a bot to participate in training, use the following command: **self.bots = [Bot1(2), Bot2(3), Bot3(4)]**
 74 | 
 75 | - **connect()**: a function used to simulate the connection from client to server. In training, the function will upload maps from the Maps folder to the environment.
 76 | 
 77 | - **receive()**: a function used to simulate the action in which the client receives messages from the server. During training, if this function is called for the first time, it will return the map initial information and the Agent's initial State. In other cases, it will return the current map information and the Agent's current State.
 78 | 
 79 | - **send()**: a function used to simulate the action in which the client sends messages to the server. During training, there are 2 types of messages from the client:
 80 | 
 81 |   + Action: an action for the next step, the data type is numeric.
 82 | 
 83 |   + Request: a request for parameters to initialize the game environment. The parameters include: map, init_x, init_y, init_energe, max_step. For example, request = "map1,1,2,100,150" means that the server will use the map information (gold, obstacles) from map1 in the Maps folder, the players will start from position (x = 1, y = 2) with an initial energy of 100, and the game will have a maximum of 150 steps.
 84 | 
 85 | d)	Maps: The Maps folder. It containing 05 sample maps for training. Information on traps will be the same in these 05 maps, only the positions of gold mines and the amount of gold will change in the preliminary round. Teams may redesign these maps to suit their own training strategies.  Teams need to pay attention to the followings when working with Maps:
 86 | 
 87 | - Each file in the Maps folder is considered a map, the filename is the map name.
 88 | 
 89 | - Each map is a matrix of integers with the following meanings:
 90 |   | ID | Type |
 91 |   | ---| ---|
 92 |   | 0 | Land |
 93 |   | -1 | Woods |
 94 |   | -2 | Trap |
 95 |   | -3 | Swamp |
 96 |   | >0 | Gold |
 97 | 
 98 | - Select a training map as follows:
 99 | 
100 |   + Function to select a map in MinerEnv.py file: send_map_info(request)
101 | 
102 |   + Request structure: {map_name},{init_x},{init_y},{init_energy}
103 | 
104 |   + For example, request = "map2,4,5,1000" means that map2 will be used for the match,the players will start from position (x = 4, y = 5) with an initial energy of 1000.
105 | 
106 | e)	Bots (not the bots used in the preliminary round): 3 sample bots (bot1.py, bot2.py, and bot3.py) are provided to teams. Teams can create bots to suit their training strategies.  The bots will be put into play in the game environment via GAME_SOCKET_DUMMY.py. You will need to declare the bots (import Botx) and initialize them (init_bots()). Some of the main functions in the bot source code are as follows:
107 | 
108 | - **new_game(data)**: a function used to initialize the game environment (including initial map information and the initial state of the bots).
109 | - **new_state(data**) : a function used to update the State received from the server.
110 | - **next_action** : a function used to return an Action for the next step.
111 | 
112 | ### 2.	Deep reinforcement learning algorithm (Deep Q-learning)
113 | In this section, the source code is written based on the Deep reinforcement learning algorithm (Deep Q-learning - DQN). The DQN algorithm has been introduced in a work of Mnih et al ("Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533). The source code contains the following program files: TrainingClient.py, DQNModel.py, and Memory.py.
114 | 
115 | a)	TrainingClient.py: this program allows communication with the game environment. Some main points in this program are as follows:
116 | 
117 | - Initialize parameters for the algorithm:
118 | 
119 |         N_EPISODE = 10000 #The number of episodes for training
120 | 
121 |         MAX_STEP = 1000 #The number of steps for each episode
122 | 
123 |         BATCH_SIZE = 32 #The number of experiences used in each training session 
124 | 
125 |         MEMORY_SIZE = 100000 #The memory capacity to save experiences
126 | 
127 |         SAVE_NETWORK = 100 # The number of episodes after which the DQN network will be saved
128 | 
129 |         INITIAL_REPLAY_SIZE = 1000 #The number of experiences required to start training
130 | 
131 |         INPUTNUM = 198 #The number of inputs for the DQN network
132 | 
133 |         ACTIONNUM = 6 #The number of actions equivalent to the number of outputs of the DQN network
134 | 
135 | - Initialize the game environment: 
136 | 
137 |         minerEnv = MinerEnv(HOST, PORT)
138 | 
139 |         minerEnv.start()
140 | 
141 | - Acquire the initial State of the Agent:
142 | 
143 |         minerEnv.reset() 
144 | 
145 |         s = minerEnv.get_state()
146 | 
147 | - Perform an Action:
148 | 
149 |         action = DQNAgent.act(s)
150 | 
151 |         minerEnv.step(str(action))
152 | 
153 | - Acquire the current State of the Agent and reward for the last Action, check the requirements to terminate the episode:
154 | 
155 |         s_next = minerEnv.get_state()
156 | 
157 |         reward = minerEnv.get_reward()
158 | 
159 |         terminate = minerEnv.check_terminate()
160 | 
161 | - Train the DQN network (put some experiences from Memory to DQNAgent to start training):
162 | 
163 |         batch = memory.sample(BATCH_SIZE)
164 | 
165 |         DQNAgent.replay(batch, BATCH_SIZE)
166 | 
167 | b)	DQNModel.py: this source code is designed to allow the creation of deep learning models and model training functions. Some main points in this program are as follows:
168 | 
169 | - **Initialize numeric parameters**:
170 | 
171 |         gamma = 0.99, #The discount factor
172 | 
173 |         epsilon = 1, #Epsilon - the exploration factor
174 | 
175 |         epsilon_min = 0.01, #The minimum epsilon 
176 | 
177 |         epsilon_decay = 0.999,#The decay epislon for each update_epsilon time
178 | 
179 |         learning_rate = 0.00025, #The learning rate for the DQN network
180 | 
181 | - **create_model()**: a function used to create a deep network. The network contains 02 hidden layers (each layer has 300 nodes, the activation function of these 02 hidden layers is ReLu) and an output layer (06 nodes corresponding to 06 Q-action values of 06 actions, the activation function is Linear).
182 | 
183 | - **act(state)**: a function used to return an Action for the Agent at the State.
184 | 
185 | - **replay(samples, batch_size)**: a function used to train the deep network with experiences from Memory file.
186 | 
187 | - **update_epsilon()**: a function used to reduce epsilon (exploration factor).
188 | 
189 | c)	Memory.py: this source code is used to store data (experiences) for training.
190 | 
191 | **Note**: As the above source code is used for training, the game ends only when the map runs out of gold or the players are eliminated.
192 | 
193 | ### B.	Source code for competition -Miner-Testing-CodeSample
194 | 
195 | ![alt text](image/codeAI/Picture3.png)
196 | *Figure 3: The information flow between programs in the sample source code used in the competition*
197 | - A source code designed for teams to use in official competitions. 
198 | 
199 | - The difference between this and the source code provided for training (Miner-Training-Local-CodeSample) is that this source code uses GAME_SOCKET.py instead of GAME_SOCKET_DUMMY.py. GAME_SOCKET.py allows data transfer to the server. 
200 | 
201 | - Information on the HOST and PORT of the server is taken from the environment variables when TestingAgent.py is executed.
202 | 
203 | - The other source code (MINER_STATE.py and MinerEnv.py) is similar to that provided for training (Miner-Testing-CodeSample).
204 | 
205 | - In the source code, a trained DQN model (RLModelSample.json, RLModelSample.h5) is provided as an example for uploading a model in the competition (Note: the model has not been fully trained to be able to compete). In particular, the json file stores the network parameters and the h5 file stores the network weight.
206 | 
207 | 
208 | 
209 | ## MinerAI environment installation
210 | 
211 | This guideline is provided for users to install Python environment with libraries used at server, facilitating the running of the test code.
212 | 
213 | The environment can be installed in two ways.:
214 | 
215 | - Install the environment directly on your PC:
216 |   - Advantages: easy and familiar to those who have worked with Python
217 |   - Disadvantages: there may be difference to the actual running environment due to different OS (the actual environment in which the server runs your code is Ubuntu Server 18.04)
218 | 
219 | - Using Docker to install:
220 |   - Advantages: the code environment is similar to the actual running environment
221 |   - Disadvantages: installing Docker may be difficult for some older OS
222 | 
223 | Installation instructions:
224 | - Python 3.6.9 (Ubuntu) – python 3.7.4 (windows)
225 | - Tensorflow 1.14.0 or 2.2.0
226 | - Keras 2.3.1
227 | - Numpy 1.18.4
228 | - Pandas 0.15
229 | - PyTorch 1.5.0
230 | - joblib 0.16.0
231 | - ray 0.8.6 (ray[rllib], ray[tune])
232 | - requests 2.24.0
233 | - semver 2.10.2
234 | - tf-agents 0.3.0 (0.5.0 on Tensorflow 2.2.0)
235 | - Pyqlearning v1.2.4
236 | - Mushroom-RL v1.4.0
237 | - gym 0.17.2
238 | - opencv-python 4.2.0.34
239 | - prettytable 0.7.2
240 | - yacs 0.1.7
241 | 
242 | 1. Installing directly
243 |    1. Windows
244 |       1. Install Python 3.7.4:
245 | 
246 |            Download the executable installer relevant to the OS on your PC at: https://www.python.org/downloads/release/python-374/ 
247 | 
248 |             (The download link is located in the Files section at the bottom of the page)
249 | 
250 |             Install and set Windows environment variables to PYTHON_HOME /, PYTHON_HOME / Scripts
251 | 
252 |             ![alt text](image/minerEnv/Picture1.png)
253 |             ![alt text](image/minerEnv/Picture2.png)
254 | 
255 |             Verify the installation, make sure the version is 3.7.4
256 | 
257 |             ![alt text](image/minerEnv/Picture3.png)
258 |       2. Install pip3:
259 | 
260 |             Run the following command to install:
261 |             ```
262 |             python37 -m pip install --upgrade pip
263 |             ```
264 |             Verify the installation, make sure pip3 is installed in Python37
265 |             
266 |             ![alt text](image/minerEnv/Picture4.png)
267 |       3. Install virtualenv:
268 | 
269 |            Run the following command to install:
270 |             ```
271 |             pip3 install virtualenv
272 |             ```
273 | 
274 |             Verify the installation, make sure virtualenv is installed in Python37
275 | 
276 |             ![alt text](image/minerEnv/Picture5.png)
277 |       4. Install libraries:
278 | 
279 |            In order not to affect the normal Python environment, the installation will be done in a virtual environment.
280 | 
281 |            - Change the current directory to the directory you want to install and create a virtual environment:
282 |                 ```
283 |                 virtualenv -p python37 {env_name}
284 |                 ```
285 |                 **{env_name}**: an environment's name of your choice.
286 | 
287 |                 For example, if you want to name the environment as Miner, the installation command will be:
288 |                 ```
289 |                 virtualenv -p python37 miner
290 |                 ```
291 |             - Activate the virtual environment
292 |                 ```
293 |                 .\{env_name}\Scripts\activate
294 |                 ```
295 |             - Install libraries:
296 |                 ```
297 |                 pip3 install numpy==1.18.4
298 |                 pip3 install keras==2.3.1
299 |                 pip3 install pandas==1.0.4
300 |                 pip3 install tensorflow==1.14.0
301 |                 pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
302 |                 pip3 install joblib==0.16.0
303 |                 pip3 install ray==0.8.6
304 |                 pip3 install ray[rllib]
305 |                 pip3 install ray[tune]
306 |                 pip3 install requests==2.24.0
307 |                 pip3 install semver==2.10.2
308 |                 pip3 install tf-agents==0.3.0 
309 |                 pip3 install Pyqlearning==v1.2.4
310 |                 pip3 install Mushroom-RL==v1.4.0
311 |                 pip3 install gym==0.17.2
312 |                 pip3 install opencv-python==4.2.0.34
313 |                 pip3 install prettytable==0.7.2
314 |                 pip3 install yacs==0.1.7
315 |                 ```
316 |             - Run code: the code is run in the virtual environment set up in the previous step, therefore, make sure to activate the virtual environment in advance.
317 |    2. Ubuntu 18.04
318 |       1. Install Python 3.6.9:
319 | 
320 |             Installation commands:
321 |             ```
322 |             apt-get update
323 |             apt-get install python3
324 |             ```
325 |             (See here on how to install in other OS versions: https://askubuntu.com/questions/865554/how-do-i-install-python-3-6-using-apt-get)
326 | 
327 |             Verify if the right version is installed:
328 | 
329 |             ![alt text](image/minerEnv/Picture6.png)
330 |       2. Install pip:
331 | 
332 |             Run the following command to install:
333 |             ```
334 |             python3 -m pip install --upgrade pip
335 |             ```
336 |             Or
337 |             ```
338 |             sudo apt install python3-pip
339 |             ```
340 |       3. Install virtualenv:
341 |            
342 |            Run the following command to install:
343 |             ```
344 |             pip3 install virtualenv
345 |             ```
346 |       4. Install libraries:
347 | 
348 |            In order not to affect the normal Python environment, the installation will be done in a virtual environment.
349 | 
350 |            - Change the current directory to the directory you want to install and create a virtual environment:
351 |                 ```
352 |                 virtualenv -p python3 {env_name}
353 |                 ```
354 |                 **{env_name}**: an environment's name of your choice.
355 | 
356 |                 For example, if you want to name the environment as Miner, the installation command will be:
357 |                 ```
358 |                 virtualenv -p python3 miner
359 |                 ```
360 |             - Activate the virtual environment:
361 |                 ```
362 |                 cd {env_name}/bin
363 |                 source ./activate
364 |                 ```
365 |             - Install libraries:
366 |                 ```
367 |                 pip3 install numpy==1.18.4
368 |                 pip3 install keras==2.3.1
369 |                 pip3 install pandas==0.15
370 |                 pip3 install tensorflow==1.14.0
371 |                 pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
372 |                 pip3 install joblib==0.16.0
373 |                 pip3 install ray==0.8.6
374 |                 pip3 install ray[rllib]
375 |                 pip3 install ray[tune]
376 |                 pip3 install requests==2.24.0
377 |                 pip3 install semver==2.10.2
378 |                 pip3 install tf-agents==0.3.0 
379 |                 pip3 install Pyqlearning==v1.2.4
380 |                 pip3 install Mushroom-RL==v1.4.0
381 |                 pip3 install gym==0.17.2
382 |                 pip3 install opencv-python==4.2.0.34
383 |                 pip3 install prettytable==0.7.2
384 |                 pip3 install yacs==0.1.7
385 |                 ```
386 |             - Run code: the code is run in the virtual environment set up in the previous step, therefore, make sure to activate the virtual environment in advance.
387 | 2. Using Docker
388 | 
389 |     We will provide a Docker image which has installed all the environments similar to the actual server environment.
390 | 
391 |     In this section, we will show you how to install the Docker, and how to use the image we provide.
392 | 
393 |    1. Install Docker
394 | 
395 |         You can easily look for comprehensive and detailed instructions to install Docker on the internet. Below are just some examples.
396 | 
397 |        1. Windows:
398 | 
399 |             - Windows 10: Visit the link below to download the installer and install on your PC:
400 |             https://hub.docker.com/editions/community/docker-ce-desktop-windows/
401 |             - Windows 7: Follow the instructions at: https://webme.ie/how-to-install-docker-on-windows-7/
402 |        2. Ubuntu:
403 |             - Ubuntu 18.04: Follow the instructions at:
404 |             https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-18-04
405 |             - Ubuntu 16.04: Follow the instructions at: https://docs.docker.com/engine/install/ubuntu/
406 |    2. Use Doker image
407 |        1. Pull image:
408 | 
409 |             Execute the following command to pull image
410 |             ```
411 |             docker pull codelearnio/miner-ai:training-v5
412 |             ```
413 | 
414 |             Verify if the image has been successfully pulled by the following command: 
415 |             ```
416 |             docker images –a
417 |             ```
418 |             The displayed information will include the Docker image with the following details:
419 |             ![alt text](image/minerEnv/Picture7.png)
420 | 
421 |        2. Use image:
422 | 
423 |             This section will instruct you to use some basic commands in Docker container.
424 |             
425 |             For other commands, refer to the following link:
426 |             https://docs.docker.com/engine/reference/commandline/docker/
427 | 
428 |             - Create and run the Docker container from the existing image:
429 |                 
430 |                 Create and run the Docker container with the provided Docker image by the following command:
431 |                 ```
432 |                 docker run -it -v {WORKING_DIR}:/v b71e2ea7dec6
433 |                 ```
434 |                 Note: **{WORKING_DIR}** lis the path to the directory where your source is located;
435 | 
436 |                 For example: if you put the source code in **D:\MinerAI** directory, the run command will be:
437 |                 ```
438 |                 docker run -it -v D:\MinerAI:/v b71e2ea7dec6
439 |                 ```
440 |                 You can name the container by adding parameters: **--name={name}**
441 |                 
442 |                 Change the current directory to the binded directory: **cd /v**
443 |                 
444 |                 Check the files binded to the container: **ls**
445 |                 ![alt text](image/minerEnv/Picture8.png)
446 |                 Then execute run **python3** command with your source code without any additional installation..
447 | 
448 |                 For example: 
449 |                 ```
450 |                 python3 TrainingClient.pyt
451 |                 ```
452 |             - Check the existing containers:
453 |                 ```
454 |                 docker container ls -a
455 |                 ```
456 |                 ![alt text](image/minerEnv/Picture9.png)
457 |                 As shown above, there are 2 containers initialized from the image **b71e2ea7dec6**: container **2f3d9797c028**  is up running while container, container **f39ab8375c62** has stopped
458 | 
459 |             - Attach to a running Docker:
460 |                 ```
461 |                 docker attach {container_id}
462 |                 ```
463 |                 ![alt text](image/minerEnv/Picture10.png)
464 | 
465 |                Note: You can replace ***{container_id}*** with ***{container_name}***
466 |             - Start a stopped Docker container:
467 |                 ```
468 |                 docker start –a {container_id}
469 |                 ```
470 |                 ![alt text](image/minerEnv/Picture11.png)
471 |             - Stop a running container:
472 |                 ```
473 |                 docker stop {container_id}
474 |                 ```
475 |             - Remove a Docker container:
476 |                 ```
477 |                 docker rm {container_id}
478 |                 ```
479 | 
480 | 
481 | ***
482 | 
483 | ***Vietnamese version***
484 | ## Mô Tả Mã Nguồn Mẫu: Huấn Luyện và Thi đấu
485 | 
486 | Trong quá trình thi đấu, thông tin trạng thái (State) được trả về sau khi thực hiện hành động (Action) bao gồm:
487 | 
488 | -   Thông tin của những agent đang thi đấu
489 | 
490 |   ```json
491 |   {
492 |         "playerId" tên định danh của agent, kiểu integer; 
493 |         "posx": vị trí theo tọa độ x của agent, kiểu integer;
494 |         "posy": vị trí theo tọa độ y của agent, kiểu integer;
495 |         "score": số vàng agent đào được, kiểu integer;
496 |         "energy": số năng lượng còn lại của agent, kiểu integer;
497 |         "lastAction": lưu action vừa thực hiện, kiểu integer;
498 |         "status": trạng thái của agent - đang chơi hay đã bị loại, kiểu integer
499 |   }
500 |   ```
501 | - Thông tin các vật cản còn lại trên bản đồ (vị trí và số năng lượng sẽ bị trừ khi một agent đi qua).
502 | - Thông tin số bãi vàng còn lại trên bản đồ (vị trí và số vàng).
503 | - Kích thước của bản đồ (chiều cao và độ rộng).
504 | 
505 | Từ thông tin State được trả về trên, các đội chơi quyết định chiến lược huấn luyện riêng như thiết kế hàm thưởng (Reward Function) và định nghĩa không gian State (State Space). Trong hai mã nguồn mẫu **(Miner-Training-Local-CodeSample và Miner-Testing-CodeSample)** được cung cấp cho các đội chơi (được mô tả ở phía dưới),chúng tôi đưa ra một ví dụ về việc thiết kế Reward Function và định nghĩa State Space lần lượt trong 02 hàm *get_state()* và *get_reward()*. Dưới đây là mô tả tổng quan về hai mã nguồn được cung cấp cho việc huấn luyện và thi đấu::
506 | ## A. Mã nguồn cho huấn luyện - Miner-Training-Local-CodeSample
507 | Đây là mã nguồn (source code) mẫu được sử dụng cho quá trình huấn luyện tại máy của các đội chơi. Mã nguồn bao gồm 02 phần chính: môi trường trò chơi đào vàng (Miner Game Environment) và thuật toán học tăng cường (Deep-Q learning -DQN). Hình 1 cung cấp cái nhìn trực quan về luồng trao đổi thông tin giữa các chương trình. 
508 | 
509 | ![alt text](image/codeAI/Picture1.png)
510 | *Hình 1: Luồng trao đổi thông tin giữa các chương trình trong mã nguồn mẫu
511 | được sử dụng trong Huấn luyện*
512 | 
513 | 
514 | Chi tiết của hai phần như sau:
515 | ### 1.	Phần môi trường trò chơi đào vàng (Miner Game Environment).
516 | Mã nguồn của môi trường được lấy từ mã nguồn gốc của trò chơi đào vàng (Miner Game) trên hệ thống Codelearn. Mã nguồn bao gồm: GAME_SOCKET_DUMMY.py, MINER_STATE.py, MinerEnv.py, Maps, và 03 Bots (bot1.py, bot2.py, bot3.py). Hình 2 mô tả quá trình trao đổi về thông tin bản đồ và trạng thái của agent giữa MinerEnv.py và GAME_SOCKET_DUMMY.py. Mô tả chi tiết các chương trình được miêu tả phía dưới:
517 | 
518 | ![alt text](image/codeAI/Picture2.png)
519 | *Hình 2: Luồng trao đổi thông tin giữa MinerEnv.py và GAME_SOCKET_DUMMY.py được mô phỏng giữa client và server trong Huấn luyện*
520 | 
521 | a) MinerEnv.py: Chương trình được thiết kế theo cấu trúc chung của môi trường học tăng cường (Reinforcement learning environment) cho phép các đội chơi truy cập tới chương trình chính (GAME_SOCKET_DUMMY.py) đơn giản và thuận tiện. Một số hàm chính trong chương trình như sau:
522 | 
523 | 
524 | - **start()**: hàm dùng một lần duy nhất với mục đích mô phỏng lại quá trình kết nối tới server để bắt đầu chơi. Trong Training, hàm này gọi tới hàm **connect()** trong GAME_SOCKET_DUMMY.py để đọc 05 maps trong tệp Maps.
525 | 
526 | - **send_map_info()**: hàm được sử dụng nhằm mục đích chọn map để huấn luyện agent.
527 | 
528 | - **reset()**: hàm được sử dụng nhằm mục đích khởi tạo map và state cho agent. Hàm này sẽ gọi tới hàm **receive()** trong GAME_SOCKET_DUMMY.py để lấy thông tin ban đầu của map được lưu trong một message định dạng json, và hàm **init_state(message)** trong MINER_STATE.py để cập nhật thông tin ban đầu của map tới state của agent.
529 | 
530 | - **step()**: hàm được sử dụng nhằm mục đích gửi một hành động (action) tới GAME_SOCKET_DUMMY.py, và sẽ nhận về thông tin bản đồ (map) và trạng thái (state) của agent thay đổi.
531 | 
532 | - **get_state()**: hàm được cung cấp như một ví dụ cho việc định nghĩa một trạng thái (state) của agent cho quá trình huấn luyện. Các đội chơi tùy vào chiến lược huấn luyện riêng có thể viết lại hàm để đưa ra được state của agent phù hợp.
533 | 
534 | - **get_reward()**: hàm được cung cấp như một ví dụ cho việc định nghĩa một hàm thưởng (reward function) của agent cho quá trình huấn luyện. Các đội chơi tùy vào chiến lược huấn luyện riêng có thể viết lại hàm để đưa ra được hàm reward function cho phù hợp.
535 |     
536 | b)	MINER_STATE.py ***(Các đội chơi không nên thay đổi mã nguồn trong chương trình)***: Chương trình là mã nguồn mẫu cho việc lưu thông tin bản đồ (map) và trạng thái (state) của agent nhận được từ GAME_SOCKET_DUMMY.py (trong thi đấu sẽ nhận từ server). Chương trình được thiết kế giúp cho các đội chơi quản lý state dễ dàng trong quá trình huấn luyện. Hai lớp (class) map và state cùng với một số hàm chính trong hai lớp như sau:
537 | 
538 | - **MapInfo** (Class): Là lớp dùng cho việc lưu trữ toàn bộ thông tin của bản đồ. Bao gồm: max_x, max_y, maxStep, numberOfPlayer, golds: số vàng còn lại trên map tại thời điểm hiện tại, obstacles: thông tin các vật cản hiện tại trên bản đồ.
539 |   + **update (golds, changedObstacles)**: cập nhật lại thông tin bản đồ sau mỗi bước (step).
540 | 
541 | - **State** (Class): là lớp chứa trạng thái (state) của trò chơi (bao gồm trạng thái của người chơi và bản đồ).
542 | 
543 |   + **init_state(data)**: hàm khởi tạo thông tin của bản đồ (map) và trạng thái (state) của agent tại thời điểm bắt đầu một episode trong training (hay một trận đấu trong thi đấu).
544 |   + **update_state(data)**: hàm cập nhật trạng thái (state) của trò chơi sau mỗi lượt chơi (step). Data được truyền vào bao gồm thông tin bản đồ và trạng thái của agent.
545 | 
546 | c)	GAME_SOCKET_DUMMY.py ***(Các đội chơi không nên thay đổi mã nguồn trong chương trình)*** : Chương trình mô phỏng lại trò chơi đào vàng bao gồm cả quá trình truyền nhận dữ liệu (message) tới máy chủ (server). Chương trình bao gồm 07 lớp (class) : ObstacleInfo, GoldInfo, PlayerInfo, GameInfo, UserMatch, StepState,và GameSocket. Trong đó, lớp GameSocket là lớp chính gồm những hàm chính sau:
547 | 
548 | - **__ init__ (host, port)**: a function used to initialize the environment. In this function, the purpose of host and port initialization is to simulate the connection on the server in the actual competition.
549 | 
550 | - **init_bots()**:hàm được thiết kế để hỗ trợ người chơi huấn luyện agent với bot. Để chỉ định bot tham gia vào huấn luyện, sử dụng dòng lệnh sau : **self.bots = [Bot1(2), Bot2(3), Bot3(4)]**.
551 | 
552 | - **connect()**: hàm này mô phỏng hành động connect từ client đến server. Trong huấn luyện, hàm sẽ tải các bản đồ từ tệp Maps lên môi trường.
553 | 
554 | - **receive()**: hàm này mô phỏng hành động client nhận message từ server. Trong huấn luyện, hàm sẽ trả về thông tin bản đồ ban đầu cũng như trạng thái ban đầu của agent nếu lần đầu được gọi, và sẽ trả về thông tin bản đồ hiện tại cũng như trạng thái hiện tại của agent.
555 | 
556 | - **send()**: hàm này mô phỏng hành động client gửi message lên server. Trong huấn luyện, sẽ có 2 kiểu message từ client:
557 | 
558 |   + Action: là hành động cho step tiếp theo và là kiểu số.
559 | 
560 |   + Request: là yêu cầu các thông số cho việc khởi tạo môi trường trò chơi. Các thông số bao gồm: (map, init_x, init_y, init_energe, max_step). Ví dụ: request = “map1,1,2,100,150” sẽ hiểu là server sẽ sử dụng thông tin bản đồ (vàng, vật cản) theo map1 trong tệp Maps, khởi tạo vị trí ban đầu cho người chơi tại ô (x = 1, y = 2), với năng lượng ban đầu là 100, và trận đấu có tối đa 150 lượt chơi (step).
561 | 
562 | d)	Maps: Thư mục Maps chứa 05 bản đồ (map) mẫu phục vụ cho việc huấn luyện. Thông tin về bẫy trong 05 bản đồ là được giữ nguyên, và chỉ thay đổi vị trí vàng và số lượng vàng trong vòng sơ loại. Đội chơi có thể thiết kế lại những bản đồ này cho phù hợp với chiến lược huấn luyện riêng. Một số nội dung cần chú ý khi các đội chơi làm việc với Maps như sau:
563 | 
564 | - Mỗi file trong thư mục Maps được xem là một bản đồ, tên của file (filename) được xem là tên của bản đồ (map name).
565 | 
566 | - Mỗi bản đồ là một ma trận các số nguyên với ý nghĩa như sau:
567 |   | ID | Type |
568 |   | ---| ---|
569 |   | 0 | Đất |
570 |   | -1 | Rừng |
571 |   | -2 | Bẫy |
572 |   | -3 | Đầm lầy |
573 |   | >0 | Vàng |
574 | 
575 | 
576 | - Chọn bản đồ trong huấn luyện như sau:
577 | 
578 |   + Hàm chọn bản đồ trong file MinerEnv.py : send_map_info(request)
579 | 
580 |   + Cấu trúc của request: {map_name},{init_x},{init_y},{init_energy}
581 | 
582 |   + Ví dụ: request = "map2,4,5,1000" - là trận đấu sẽ sử dụng map2, người chơi (players) xuất phát từ tọa độ: x = 4, y = 5 với năng lượng (energy) được khởi tạo là 1000.
583 | 
584 | e)	Bots ***(không phải các bots được dùng trong vòng sơ loại)***: 03 bots (bot1.py, bot2.py, và bot3.py) được cung cấp mẫu cho các đội chơi. Các đội chơi có thể tạo những bot theo chiến lược huấn luyện riêng. Các bots sẽ được đưa vào trong môi trường trò chơi trong GAME_SOCKET_DUMMY.py. Bao gồm 02 bước: khai báo bots (import Botx) và khởi tạo bot (init_bots()). Một số hàm chính trong những mã nguồn bot như sau:
585 | 
586 | - **new_game(data)**: hàm thực hiện khởi tạo môi trường trò chơi (bao gồm thông tin bản đồ và trạng thái ban đầu cho bot).
587 | - **new_state(data)**: hàm cập nhật trạng thái (state) nhận được từ server.
588 | - **next_action**: hàm trả về một hành động (action) cho bước (step) tiếp theo.
589 | 
590 | ### 2.	Phần thuật toán học tăng cường (Deep Q-learning)
591 | Trong phần này, mã nguồn được viết theo thuật toán học tăng cường sâu (Deep Q-learning - DQN). Nguồn thuật toán được giới thiệu trong nghiên cứu của Mnih et al *("Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533)*. Mã nguồn thuật toán bao gồm những file chương trình sau: TrainingClient.py, DQNModel.py, và Memory.py.
592 | 
593 | a)	TrainingClient.py: đây là mã nguồn của thuật toán DQN cho phép giao tiếp với phần môi trường trò chơi. Trong chương trình này, một số phần cần chú ý như sau:
594 | 
595 | - Khởi tạo các tham số cho thuật toán:
596 | 
597 |         N_EPISODE = 10000 #Số episode cho huấn luyện
598 |         MAX_STEP = 1000 #Số bước (step) cho mỗi episode
599 |         BATCH_SIZE = 32 #Số trải nghiệm (experiences) được sử dụng cho môi lần huấn luyện
600 |         MEMORY_SIZE = 100000 #Kích thước bộ nhớ lưu những trải nghiệm
601 |         SAVE_NETWORK = 100 #Sau số episode này, mạng DQN sẽ được lưu lại
602 |         INITIAL_REPLAY_SIZE = 1000 #Số trải nghiệm cần phải có trong bộ nhớ để bắt đầu huấn luyện.
603 |         INPUTNUM = 198 #Số đầu vào cho mạng DQN
604 |         ACTIONNUM = 6 #Số hành động tương đương số đầu ra của mạng DQN
605 | 
606 | - Khởi tạo môi trường trò chơi: 
607 | 
608 |         minerEnv = MinerEnv(HOST, PORT)
609 | 
610 |         minerEnv.start()
611 | 
612 | - Lấy trạng thái (state) ban đầu của agent:
613 | 
614 |         minerEnv.reset() 
615 | 
616 |         s = minerEnv.get_state()
617 | 
618 | - Thực hiện hành động (action):
619 | 
620 |         action = DQNAgent.act(s)
621 | 
622 |         minerEnv.step(str(action))
623 | 
624 | -Lấy trạng thái mới của agent, phần thưởng (reward) cho hành động vừa thực hiện, và kiểm tra điều kiện kết thúc episode:
625 | 
626 |         s_next = minerEnv.get_state()
627 | 
628 |         reward = minerEnv.get_reward()
629 | 
630 |         terminate = minerEnv.check_terminate()
631 | 
632 | - Huấn luyện mạng DQN (lấy một số trải nghiệm (experiences) từ Memory, và đưa vào DQNAgent để huấn luyện):
633 | 
634 |         batch = memory.sample(BATCH_SIZE)
635 | 
636 |         DQNAgent.replay(batch, BATCH_SIZE)
637 | 
638 | b)	DQNModel.py: mã nguồn này được thiết kế cho phép tạo những mô hình học sâu (deep learning models) và những hàm huấn luyện mô hình. Trong chương trình này, một số phần cần chú ý như sau:
639 | 
640 | - Khởi tạo tham số học:
641 | 
642 |         gamma = 0.99, #The discount factor
643 | 
644 |         epsilon = 1, #Epsilon - the exploration factor
645 | 
646 |         epsilon_min = 0.01, #The minimum epsilon 
647 | 
648 |         epsilon_decay = 0.999,#The decay epislon for each update_epsilon time
649 | 
650 |         learning_rate = 0.00025, #The learning rate for the DQN network
651 | 
652 | - create_model(): hàm được sử dụng để tạo một mạng sâu (deep network). Mạng chứa 02 lớp ẩn (hidden layers), mỗi lớp có 300 nút (node), hàm kích hoạt (activation) của hai lớp ẩn là ReLu, và một lớp đầu ra có 06 nút (node) tương ứng với 06 giá trị Q-action của 06 actions với hàm kích hoạt là Linear.
653 | 
654 | - act(state): hàm trả về hành động cho agent tại trạng thái state.
655 | 
656 | - replay(samples, batch_size): hàm được sử dụng để huấn luyện mạng sâu từ những trải nghiệm (experiences) lấy ra từ Memory file.
657 | 
658 | - update_epsilon(): hàm thực hiện việc giảm epsilon hay giảm tham dò (exploration).
659 | 
660 | c)	Memory.py: mã nguồn được sử dụng để lưu dữ liệu (experiences) cho việc huấn luyện.
661 | 
662 | **Chú ý**: Mã nguồn trên được dùng cho việc huấn luyện nên trận đấu kết thúc chỉ khi hết vàng trên bản đồ (map) hoặc người chơi bị loại (khi hết vàng hoặc ra khỏi bản đồ).
663 | 
664 | ### B.	Mã nguồn thi đấu -Miner-Testing-CodeSample
665 | 
666 | ![alt text](image/codeAI/Picture3.png)
667 | *Hình 3: Luồng trao đổi thông tin giữa các chương trình trong mã nguồn mẫu được sử dụng trong thi đấu.*
668 | -  Mã nguồn được thiết kế cho các đội chơi sử dụng để tham gia cuộc thi chính thức.
669 | -  Điểm khác với mã nguồn cung cấp cho phần huấn luyện (Miner-Training-Local-CodeSample) đó là sử dụng GAME_SOCKET.py thay cho GAME_SOCKET_DUMMY.py. Chương trình GAME_SOCKET.py cho phép truyền nhận dữ liệu tới máy chủ (server).
670 | -  HOST và PORT của server được lấy từ biến môi trường khi run TestingAgent.py
671 | -  Những mã nguồn còn lại (MINER_STATE.py và MinerEnv.py) được giữ nguyên như trong mã nguồn được cung cấp cho huấn luyện (Miner-Testing-CodeSample).
672 | -  Trong mã nguồn, một DQN model (RLModelSample.json, RLModelSample.h5) đã được huấn luyện được cung cấp để làm ví dụ cho việc tải model lên thi đấu (Chú ý: model trên chưa được huấn luyện đầy đủ để có thể thi đấu). Trong đó, file định dạng json lưu tham số mạng và file định dạng h5 lưu trọng số của mạng.
673 | 
674 | 
675 | ## Cài đặt môi trường MinerAI
676 | 
677 | Phần này hướng dẫn người dùng cài đặt môi trường python với các thư viện được sử dụng ở server, giúp cho việc chạy thử nghiệm code được chính xác hơn.
678 | 
679 | Trong này, chúng tôi sẽ hướng dẫn các bạn cài đặt môi trường theo 2 cách:
680 | 
681 | - Cài đặt môi trường trực tiếp trên máy tính:
682 |   - Ưu điểm: dễ thực hiện, quen thuộc với các bạn đã từng code và cài đặt python
683 |   - Nhược điểm: có thể sẽ có chênh lệch so với môi trường chạy thực tế do hệ điều hành khác nhau (Môi trường thực tế mà server sẽ run code của các bạn là Ubuntu Server 18.04)
684 | 
685 | - Cài đặt sử dụng docker:
686 |   - Ưu điểm: sao chép được môi trường code giống với môi trường chạy thực tế
687 |   - Nhược điểm: việc cài đặt docker có thể sẽ có khó khăn với một số hệ điều hành cũ
688 | 
689 | Nội dung cài đặt:
690 | - Python 3.6.9 (Ubuntu) – python 3.7.4 (windows)
691 | - Tensorflow 1.14.0 hoặc 2.2.0
692 | - Keras 2.3.1
693 | - Numpy 1.18.4
694 | - Pandas 0.15
695 | - PyTorch 1.5.0
696 | - joblib 0.16.0
697 | - ray 0.8.6 (ray[rllib], ray[tune])
698 | - requests 2.24.0
699 | - semver 2.10.2
700 | - tf-agents 0.3.0 (0.5.0 với Tensorflow 2.2.0)
701 | - Pyqlearning v1.2.4
702 | - Mushroom-RL v1.4.0
703 | - gym 0.17.2
704 | - opencv-python 4.2.0.34
705 | - prettytable 0.7.2
706 | - yacs 0.1.7
707 | 
708 | 1. Run trực tiếp
709 |    1. Windows
710 |       1. Cài đặt python 3.7.4:
711 | 
712 |             Download bản executable installer cho OS tương ứng từ: https://www.python.org/downloads/release/python-374/ 
713 | 
714 |             (Link download được đặt ở mục Files phía cuối trang)
715 | 
716 |             Thực hiện cài đặt và setup biến môi trường windows đến PYTHON_HOME/, PYTHON_HOME/Scripts
717 | 
718 |             ![alt text](image/minerEnv/Picture1.png)
719 |             ![alt text](image/minerEnv/Picture2.png)
720 | 
721 |             Kiểm tra cài đăt, đảm bảo version là 3.7.4
722 | 
723 |             ![alt text](image/minerEnv/Picture3.png)
724 |       2. Cài đặt pip3:
725 | 
726 |             Chạy lệnh sau để thực hiện cài đặt:
727 |             ```
728 |             python37 -m pip install --upgrade pip
729 |             ```
730 |             Kiểm tra cài đặt, đảm bảo pip3 được cài đặt trong Python37
731 |             
732 |             ![alt text](image/minerEnv/Picture4.png)
733 |       3. Cài đặt virtualenv:
734 | 
735 |            Chạy lệnh sau để thực hiện cài đặt:
736 |             ```
737 |             pip3 install virtualenv
738 |             ```
739 | 
740 |             Kiểm tra cài đặt, đảm bảo virtualenv được cài đặt trong Python37
741 | 
742 |             ![alt text](image/minerEnv/Picture5.png)
743 |       4. Cài đặt thư viện:
744 | 
745 |            Để không ảnh hưởng đến môi trường python chung, việc cài đặt sẽ được thực hiện trên môi trường ảo.
746 | 
747 |            - Chuyển thư mục hiện thời về thư mục bạn muốn cài đặt và thực hiện tạo môi trường ảo:
748 |                 ```
749 |                 virtualenv -p python37 {env_name}
750 |                 ```
751 |                 {env_name}: là tên môi trường do bạn tự chọn.
752 | 
753 |                 Ví dụ, nếu bạn muốn đặt tên môi trường là miner thì lệnh cài đặt sẽ là:
754 |                 ```
755 |                 virtualenv -p python37 miner
756 |                 ```
757 |             - Kích hoạt môi trường ảo
758 |                 ```
759 |                 .\{env_name}\Scripts\activate
760 |                 ```
761 |             - Cài đặt thư viện:
762 |                 ```
763 |                 pip3 install numpy==1.18.4
764 |                 pip3 install keras==2.3.1
765 |                 pip3 install pandas==1.0.4
766 |                 pip3 install tensorflow==1.14.0
767 |                 pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
768 |                 pip3 install joblib==0.16.0
769 |                 pip3 install ray==0.8.6
770 |                 pip3 install ray[rllib]
771 |                 pip3 install ray[tune]
772 |                 pip3 install requests==2.24.0
773 |                 pip3 install semver==2.10.2
774 |                 pip3 install tf-agents==0.3.0 
775 |                 pip3 install Pyqlearning==v1.2.4
776 |                 pip3 install Mushroom-RL==v1.4.0
777 |                 pip3 install gym==0.17.2
778 |                 pip3 install opencv-python==4.2.0.34
779 |                 pip3 install prettytable==0.7.2
780 |                 pip3 install yacs==0.1.7
781 |                 ```
782 |             - Run code: code được run ở môi trường ảo mà bạn cài ở step trước, vì thế đừng quên kích hoạt môi trường ảo trước khi run nhé!
783 |    2. Ubuntu 18.04
784 |       1. Cài đặt python 3.6.9:
785 | 
786 |             Các lệnh cài đặt:
787 |             ```
788 |             apt-get update
789 |             apt-get install python3
790 |             ```
791 |             (Tham khảo thêm cách cài đặt ở các version OS khác ở: https://askubuntu.com/questions/865554/how-do-i-install-python-3-6-using-apt-get)
792 | 
793 |             Kiểm tra cài đặt đúng version:
794 | 
795 |             ![alt text](image/minerEnv/Picture6.png)
796 |       2. Cài đặt pip:
797 | 
798 |             Run lệnh sau để cài đặt:
799 |             ```
800 |             python3 -m pip install --upgrade pip
801 |             ```
802 |             Hoặc
803 |             ```
804 |             sudo apt install python3-pip
805 |             ```
806 |       3. Cài đặt virtualenv:
807 |            
808 |            Chạy lệnh sau để thực hiện cài đặt:
809 |             ```
810 |             pip3 install virtualenv
811 |             ```
812 |       4. Cài đặt thư viện:
813 | 
814 |            Để không ảnh hưởng đến môi trường python chung, việc cài đặt sẽ được thực hiện trên môi trường ảo.
815 | 
816 |            - Chuyển thư mục hiện thời về thư mục bạn muốn cài đặt và thực hiện tạo môi trường ảo:
817 |                 ```
818 |                 virtualenv -p python3 {env_name}
819 |                 ```
820 |                 {env_name}: là tên môi trường do bạn tự chọn.
821 | 
822 |                 Ví dụ, nếu bạn muốn đặt tên môi trường là miner thì lệnh cài đặt sẽ là:
823 |                 ```
824 |                 virtualenv -p python3 miner
825 |                 ```
826 |             - Kích hoạt môi trường ảo
827 |                 ```
828 |                 cd {env_name}/bin
829 |                 source ./activate
830 |                 ```
831 |             - Cài đặt thư viện:
832 |                 ```
833 |                 pip3 install numpy==1.18.4
834 |                 pip3 install keras==2.3.1
835 |                 pip3 install pandas==0.15
836 |                 pip3 install tensorflow==1.14.0
837 |                 pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
838 |                 pip3 install joblib==0.16.0
839 |                 pip3 install ray==0.8.6
840 |                 pip3 install ray[rllib]
841 |                 pip3 install ray[tune]
842 |                 pip3 install requests==2.24.0
843 |                 pip3 install semver==2.10.2
844 |                 pip3 install tf-agents==0.3.0 
845 |                 pip3 install Pyqlearning==v1.2.4
846 |                 pip3 install Mushroom-RL==v1.4.0
847 |                 pip3 install gym==0.17.2
848 |                 pip3 install opencv-python==4.2.0.34
849 |                 pip3 install prettytable==0.7.2
850 |                 pip3 install yacs==0.1.7
851 |                 ```
852 |             - Run code: code được run ở môi trường ảo mà bạn cài ở step trước, vì thế đừng quên kích hoạt môi trường ảo trước khi run nhé!
853 | 2. Sử dụng docker
854 | 
855 |     Chúng tôi cung cấp cho các bạn docker image mà đã cài đặt đủ các môi trường giống như ở môi trường server thật.
856 | 
857 |     Ở phần này, chúng tôi sẽ hướng dẫn cho bạn cách cài đặt docker, và cách sử dụng image mà chúng tôi cung cấp
858 | 
859 |    1. Cài đặt Docker
860 | 
861 |         Việc cài đặt Docker được hướng dẫn khá đầy đủ và chi tiết ở nhiều nguồn trên internet, vì thế chúng tôi xin phép chỉ cung cấp các link hướng dẫn mà chúng tôi đã thử nghiệm thực hiện theo thành công.
862 | 
863 |        1. Windows:
864 | 
865 |             - Windows 10: truy cập vào đường link dưới đây để download bộ cài và thực hiện cài đặt trên máy tính:
866 |             https://hub.docker.com/editions/community/docker-ce-desktop-windows/
867 |             - Windows 7: thực hiện việc cài đặt theo đường link sau: https://webme.ie/how-to-install-docker-on-windows-7/
868 |        2. Ubuntu:
869 |             - Ubuntu 18.04: Thực hiện cài đặt theo hướng dẫn ở đường link sau:
870 |             https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-18-04
871 |             - Ubuntu 16.04: Thực hiện cài đặt theo hướng dẫn ở đường link sau: https://docs.docker.com/engine/install/ubuntu/
872 |    2. Sử dụng docker image
873 |        1. Pull image:
874 | 
875 |             Thực hiện lệnh sau để pull image về
876 |             ```
877 |             docker pull codelearnio/miner-ai:training-v5
878 |             ```
879 | 
880 |             Kiểm tra image đã được pull về thành công bằng lệnh: 
881 |             ```
882 |             docker images –a
883 |             ```
884 |             Thông tin được hiển thị ra sẽ bao gồm docker image có thông tin dưới đây:
885 |             ![alt text](image/minerEnv/Picture7.png)
886 | 
887 |        2. Sử dụng image:
888 | 
889 |             Ở phần này, chúng tôi hướng dẫn bạn sử dụng 1 số lệnh cơ bản với docker container.
890 |             
891 |             Để tìm hiểu thêm các lệnh khác, các bạn có thể tham khảo ở đường link sau:
892 |             https://docs.docker.com/engine/reference/commandline/docker/
893 | 
894 |             - Tạo mới và run docker container từ image có sẵn:
895 |                 
896 |                 Tạo và run docker container với docker image được cung cấp bằng lệnh sau:
897 |                 ```
898 |                 docker run -it -v {WORKING_DIR}:/v b71e2ea7dec6
899 |                 ```
900 |                 Note: **{WORKING_DIR}** là path dẫn đến thư mục chứa source của bạn;
901 | 
902 |                 Ví dụ: bạn để source code ở thư mục **D:\MinerAI** thì lệnh run của bạn sẽ là:
903 |                 ```
904 |                 docker run -it -v D:\MinerAI:/v b71e2ea7dec6
905 |                 ```
906 |                 Bạn có thể đặt tên container bằng cách thêm tham số: **--name={name}**
907 |                 
908 |                 Chuyển thư mục hiện thời về thư mục đã được bind: **cd /v**
909 |                 
910 |                 Kiểm tra các file đã được bind vào container: **ls**
911 |                 ![alt text](image/minerEnv/Picture8.png)
912 |                 Tại đây bạn có thể thực hiện lệnh run **python3** với source code của bạn mà không cần phải cài đặt gì thêm.
913 | 
914 |                 Ví dụ: 
915 |                 ```
916 |                 python3 TrainingClient.pyt
917 |                 ```
918 |             - Kiểm tra các container đang có:
919 |                 ```
920 |                 docker container ls -a
921 |                 ```
922 |                 ![alt text](image/minerEnv/Picture9.png)
923 |                 Với hiển thị như ở hình trên, ta thấy đang có 2 container được khởi tạo từ image **b71e2ea7dec6**, container **2f3d9797c028**  đang run, container **f39ab8375c62** đã stop
924 | 
925 |             - Attach docker đang chạy:
926 |                 ```
927 |                 docker attach {container_id}
928 |                 ```
929 |                 ![alt text](image/minerEnv/Picture10.png)
930 | 
931 |                 Note: bạn có thể thay thế ***{container_id}*** bằng ***{container_name}***
932 |             - Start docker đang stop:
933 |                 ```
934 |                 docker start –a {container_id}
935 |                 ```
936 |                 ![alt text](image/minerEnv/Picture11.png)
937 |             - Stop container đang run:
938 |                 ```
939 |                 docker stop {container_id}
940 |                 ```
941 |             - Remove docker container:
942 |                 ```
943 |                 docker rm {container_id}
944 |                 ```
945 | 


--------------------------------------------------------------------------------
/Miner-Colab-CodeSample/Miner_Training_Colab_CodeSample.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 2,
   6 |    "metadata": {
   7 |     "colab": {
   8 |      "base_uri": "https://localhost:8080/",
   9 |      "height": 105
  10 |     },
  11 |     "colab_type": "code",
  12 |     "id": "qHcugJLenzKZ",
  13 |     "outputId": "dcbaa3c4-e70c-4cb7-fb12-6221a3bf016a"
  14 |    },
  15 |    "outputs": [
  16 |     {
  17 |      "name": "stderr",
  18 |      "output_type": "stream",
  19 |      "text": [
  20 |       "Using TensorFlow backend.\n"
  21 |      ]
  22 |     },
  23 |     {
  24 |      "name": "stdout",
  25 |      "output_type": "stream",
  26 |      "text": [
  27 |       "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
  28 |       "Instructions for updating:\n",
  29 |       "non-resource variables are not supported in the long term\n"
  30 |      ]
  31 |     }
  32 |    ],
  33 |    "source": [
  34 |     "import sys\n",
  35 |     "import numpy as np\n",
  36 |     "import pandas as pd\n",
  37 |     "import datetime\n",
  38 |     "import json\n",
  39 |     "from array import *\n",
  40 |     "import os\n",
  41 |     "import math\n",
  42 |     "from random import randrange\n",
  43 |     "import random\n",
  44 |     "\n",
  45 |     "from keras.models import Sequential\n",
  46 |     "from keras.models import model_from_json\n",
  47 |     "from keras.layers import Dense, Activation\n",
  48 |     "from keras import optimizers\n",
  49 |     "\n",
  50 |     "import tensorflow.compat.v1 as tf\n",
  51 |     "from tensorflow.compat.v1.keras import backend as K\n",
  52 |     "tf.disable_v2_behavior()"
  53 |    ]
  54 |   },
  55 |   {
  56 |    "cell_type": "code",
  57 |    "execution_count": 3,
  58 |    "metadata": {
  59 |     "colab": {},
  60 |     "colab_type": "code",
  61 |     "id": "e3tvcApSyW1g"
  62 |    },
  63 |    "outputs": [],
  64 |    "source": [
  65 |     "#Classes in GAME_SOCKET_DUMMY.py\n",
  66 |     "class ObstacleInfo:\n",
  67 |     "    # initial energy for obstacles: Land (key = 0): -1, Forest(key = -1): 0 (random), Trap(key = -2): -10, Swamp (key = -3): -5\n",
  68 |     "    types = {0: -1, -1: 0, -2: -10, -3: -5}\n",
  69 |     "\n",
  70 |     "    def __init__(self):\n",
  71 |     "        self.type = 0\n",
  72 |     "        self.posx = 0\n",
  73 |     "        self.posy = 0\n",
  74 |     "        self.value = 0\n",
  75 |     "        \n",
  76 |     "class GoldInfo:\n",
  77 |     "    def __init__(self):\n",
  78 |     "        self.posx = 0\n",
  79 |     "        self.posy = 0\n",
  80 |     "        self.amount = 0\n",
  81 |     "\n",
  82 |     "    def loads(self, data):\n",
  83 |     "        golds = []\n",
  84 |     "        for gd in data:\n",
  85 |     "            g = GoldInfo()\n",
  86 |     "            g.posx = gd[\"posx\"]\n",
  87 |     "            g.posy = gd[\"posy\"]\n",
  88 |     "            g.amount = gd[\"amount\"]\n",
  89 |     "            golds.append(g)\n",
  90 |     "        return golds\n",
  91 |     "\n",
  92 |     "class PlayerInfo:\n",
  93 |     "    STATUS_PLAYING = 0\n",
  94 |     "    STATUS_ELIMINATED_WENT_OUT_MAP = 1\n",
  95 |     "    STATUS_ELIMINATED_OUT_OF_ENERGY = 2\n",
  96 |     "    STATUS_ELIMINATED_INVALID_ACTION = 3\n",
  97 |     "    STATUS_STOP_EMPTY_GOLD = 4\n",
  98 |     "    STATUS_STOP_END_STEP = 5\n",
  99 |     "\n",
 100 |     "    def __init__(self, id):\n",
 101 |     "        self.playerId = id\n",
 102 |     "        self.score = 0\n",
 103 |     "        self.energy = 0\n",
 104 |     "        self.posx = 0\n",
 105 |     "        self.posy = 0\n",
 106 |     "        self.lastAction = -1\n",
 107 |     "        self.status = PlayerInfo.STATUS_PLAYING\n",
 108 |     "        self.freeCount = 0\n",
 109 |     "\n",
 110 |     "class GameInfo:\n",
 111 |     "    def __init__(self):\n",
 112 |     "        self.numberOfPlayers = 1\n",
 113 |     "        self.width = 0\n",
 114 |     "        self.height = 0\n",
 115 |     "        self.steps = 100\n",
 116 |     "        self.golds = []\n",
 117 |     "        self.obstacles = []\n",
 118 |     "\n",
 119 |     "    def loads(self, data):\n",
 120 |     "        m = GameInfo()\n",
 121 |     "        m.width = data[\"width\"]\n",
 122 |     "        m.height = data[\"height\"]\n",
 123 |     "        m.golds = GoldInfo().loads(data[\"golds\"])\n",
 124 |     "        m.obstacles = data[\"obstacles\"]\n",
 125 |     "        m.numberOfPlayers = data[\"numberOfPlayers\"]\n",
 126 |     "        m.steps = data[\"steps\"]\n",
 127 |     "        return m\n",
 128 |     "\n",
 129 |     "class UserMatch:\n",
 130 |     "    def __init__(self):\n",
 131 |     "        self.playerId = 1\n",
 132 |     "        self.posx = 0\n",
 133 |     "        self.posy = 0\n",
 134 |     "        self.energy = 50\n",
 135 |     "        self.gameinfo = GameInfo()\n",
 136 |     "\n",
 137 |     "    def to_json(self):\n",
 138 |     "        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)\n",
 139 |     "\n",
 140 |     "class StepState:\n",
 141 |     "    def __init__(self):\n",
 142 |     "        self.players = []\n",
 143 |     "        self.golds = []\n",
 144 |     "        self.changedObstacles = []\n",
 145 |     "\n",
 146 |     "    def to_json(self):\n",
 147 |     "        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)"
 148 |    ]
 149 |   },
 150 |   {
 151 |    "cell_type": "code",
 152 |    "execution_count": 19,
 153 |    "metadata": {
 154 |     "colab": {},
 155 |     "colab_type": "code",
 156 |     "id": "Madmz8hE1op6"
 157 |    },
 158 |    "outputs": [],
 159 |    "source": [
 160 |     "#Main class in GAME_SOCKET_DUMMY.py\n",
 161 |     "class GameSocket:\n",
 162 |     "    bog_energy_chain = {-5: -20, -20: -40, -40: -100, -100: -100}\n",
 163 |     "\n",
 164 |     "    def __init__(self):\n",
 165 |     "        self.stepCount = 0\n",
 166 |     "        self.maxStep = 0\n",
 167 |     "        self.mapdir = \"Maps\"  # where to load all pre-defined maps\n",
 168 |     "        self.mapid = \"\"\n",
 169 |     "        self.userMatch = UserMatch()\n",
 170 |     "        self.user = PlayerInfo(1)\n",
 171 |     "        self.stepState = StepState()\n",
 172 |     "        self.maps = {}  # key: map file name, value: file content\n",
 173 |     "        self.map = []  # running map info: 0->Land, -1->Forest, -2->Trap, -3:Swamp, >0:Gold\n",
 174 |     "        self.energyOnMap = []  # self.energyOnMap[x][y]: <0, amount of energy which player will consume if it move into (x,y)\n",
 175 |     "        self.E = 50\n",
 176 |     "        self.resetFlag = True\n",
 177 |     "        self.craftUsers = []  # players that craft at current step - for calculating amount of gold\n",
 178 |     "        self.bots = []\n",
 179 |     "        self.craftMap = {}  # cells that players craft at current step, key: x_y, value: number of players that craft at (x,y)\n",
 180 |     "\n",
 181 |     "    def init_bots(self):\n",
 182 |     "        self.bots = [Bot1(2), Bot2(3), Bot3(4)]  # use bot1(id=2), bot2(id=3), bot3(id=4)\n",
 183 |     "        for (bot) in self.bots:  # at the beginning, all bots will have same position, energy as player\n",
 184 |     "            bot.info.posx = self.user.posx\n",
 185 |     "            bot.info.posy = self.user.posy\n",
 186 |     "            bot.info.energy = self.user.energy\n",
 187 |     "            bot.info.lastAction = -1\n",
 188 |     "            bot.info.status = PlayerInfo.STATUS_PLAYING\n",
 189 |     "            bot.info.score = 0\n",
 190 |     "            self.stepState.players.append(bot.info)\n",
 191 |     "        self.userMatch.gameinfo.numberOfPlayers = len(self.stepState.players)\n",
 192 |     "        print(\"numberOfPlayers: \", self.userMatch.gameinfo.numberOfPlayers)\n",
 193 |     "\n",
 194 |     "    def reset(self, requests):  # load new game by given request: [map id (filename), posx, posy, initial energy]\n",
 195 |     "        # load new map\n",
 196 |     "        self.reset_map(requests[0])\n",
 197 |     "        self.userMatch.posx = int(requests[1])\n",
 198 |     "        self.userMatch.posy = int(requests[2])\n",
 199 |     "        self.userMatch.energy = int(requests[3])\n",
 200 |     "        self.userMatch.gameinfo.steps = int(requests[4])\n",
 201 |     "        self.maxStep = self.userMatch.gameinfo.steps\n",
 202 |     "\n",
 203 |     "        # init data for players\n",
 204 |     "        self.user.posx = self.userMatch.posx  # in\n",
 205 |     "        self.user.posy = self.userMatch.posy\n",
 206 |     "        self.user.energy = self.userMatch.energy\n",
 207 |     "        self.user.status = PlayerInfo.STATUS_PLAYING\n",
 208 |     "        self.user.score = 0\n",
 209 |     "        self.stepState.players = [self.user]\n",
 210 |     "        self.E = self.userMatch.energy\n",
 211 |     "        self.resetFlag = True\n",
 212 |     "        self.init_bots()\n",
 213 |     "        self.stepCount = 0\n",
 214 |     "\n",
 215 |     "    def reset_map(self, id):  # load map info\n",
 216 |     "        self.mapId = id\n",
 217 |     "        self.map = json.loads(self.maps[self.mapId])\n",
 218 |     "        self.userMatch = self.map_info(self.map)\n",
 219 |     "        self.stepState.golds = self.userMatch.gameinfo.golds\n",
 220 |     "        self.map = json.loads(self.maps[self.mapId])\n",
 221 |     "        self.energyOnMap = json.loads(self.maps[self.mapId])\n",
 222 |     "        for x in range(len(self.map)):\n",
 223 |     "            for y in range(len(self.map[x])):\n",
 224 |     "                if self.map[x][y] > 0:  # gold\n",
 225 |     "                    self.energyOnMap[x][y] = -4\n",
 226 |     "                else:  # obstacles\n",
 227 |     "                    self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]]\n",
 228 |     "\n",
 229 |     "    def connect(self): # simulate player's connect request\n",
 230 |     "        print(\"Connected to server.\")\n",
 231 |     "        for mapid in range(len(Maps)):\n",
 232 |     "            filename = \"map\" + str(mapid)\n",
 233 |     "            print(\"Found: \" + filename)\n",
 234 |     "            self.maps[filename] = str(Maps[mapid])\n",
 235 |     "\n",
 236 |     "    def map_info(self, map):  # get map info\n",
 237 |     "        # print(map)\n",
 238 |     "        userMatch = UserMatch()\n",
 239 |     "        userMatch.gameinfo.height = len(map)\n",
 240 |     "        userMatch.gameinfo.width = len(map[0])\n",
 241 |     "        i = 0\n",
 242 |     "        while i < len(map):\n",
 243 |     "            j = 0\n",
 244 |     "            while j < len(map[i]):\n",
 245 |     "                if map[i][j] > 0:  # gold\n",
 246 |     "                    g = GoldInfo()\n",
 247 |     "                    g.posx = j\n",
 248 |     "                    g.posy = i\n",
 249 |     "                    g.amount = map[i][j]\n",
 250 |     "                    userMatch.gameinfo.golds.append(g)\n",
 251 |     "                else:  # obstacles\n",
 252 |     "                    o = ObstacleInfo()\n",
 253 |     "                    o.posx = j\n",
 254 |     "                    o.posy = i\n",
 255 |     "                    o.type = -map[i][j]\n",
 256 |     "                    o.value = ObstacleInfo.types[map[i][j]]\n",
 257 |     "                    userMatch.gameinfo.obstacles.append(o)\n",
 258 |     "                j += 1\n",
 259 |     "            i += 1\n",
 260 |     "        return userMatch\n",
 261 |     "\n",
 262 |     "    def receive(self):  # send data to player (simulate player's receive request)\n",
 263 |     "        if self.resetFlag:  # for the first time -> send game info\n",
 264 |     "            self.resetFlag = False\n",
 265 |     "            data = self.userMatch.to_json()\n",
 266 |     "            for (bot) in self.bots:\n",
 267 |     "                bot.new_game(data)\n",
 268 |     "            # print(data)\n",
 269 |     "            return data\n",
 270 |     "        else:  # send step state\n",
 271 |     "            self.stepCount = self.stepCount + 1\n",
 272 |     "            if self.stepCount >= self.maxStep:\n",
 273 |     "                for player in self.stepState.players:\n",
 274 |     "                    player.status = PlayerInfo.STATUS_STOP_END_STEP\n",
 275 |     "            data = self.stepState.to_json()\n",
 276 |     "            for (bot) in self.bots:  # update bots' state\n",
 277 |     "                bot.new_state(data)\n",
 278 |     "            # print(data)\n",
 279 |     "            return data\n",
 280 |     "\n",
 281 |     "    def send(self, message):  # receive message from player (simulate send request from player)\n",
 282 |     "        if message.isnumeric():  # player send action\n",
 283 |     "            self.resetFlag = False\n",
 284 |     "            self.stepState.changedObstacles = []\n",
 285 |     "            action = int(message)\n",
 286 |     "            # print(\"Action = \", action)\n",
 287 |     "            self.user.lastAction = action\n",
 288 |     "            self.craftUsers = []\n",
 289 |     "            self.step_action(self.user, action)\n",
 290 |     "            for bot in self.bots:\n",
 291 |     "                if bot.info.status == PlayerInfo.STATUS_PLAYING:\n",
 292 |     "                    action = bot.next_action()\n",
 293 |     "                    bot.info.lastAction = action\n",
 294 |     "                    # print(\"Bot Action: \", action)\n",
 295 |     "                    self.step_action(bot.info, action)\n",
 296 |     "            self.action_5_craft()\n",
 297 |     "            for c in self.stepState.changedObstacles:\n",
 298 |     "                self.map[c[\"posy\"]][c[\"posx\"]] = -c[\"type\"]\n",
 299 |     "                self.energyOnMap[c[\"posy\"]][c[\"posx\"]] = c[\"value\"]\n",
 300 |     "\n",
 301 |     "        else:  # reset game\n",
 302 |     "            requests = message.split(\",\")\n",
 303 |     "            print(\"Reset game: \", requests)\n",
 304 |     "            self.reset(requests)\n",
 305 |     "\n",
 306 |     "    def step_action(self, user, action):\n",
 307 |     "        switcher = {\n",
 308 |     "            0: self.action_0_left,\n",
 309 |     "            1: self.action_1_right,\n",
 310 |     "            2: self.action_2_up,\n",
 311 |     "            3: self.action_3_down,\n",
 312 |     "            4: self.action_4_free,\n",
 313 |     "            5: self.action_5_craft_pre\n",
 314 |     "        }\n",
 315 |     "        func = switcher.get(action, self.invalidAction)\n",
 316 |     "        func(user)\n",
 317 |     "\n",
 318 |     "    def action_5_craft_pre(self, user):  # collect players who craft at current step\n",
 319 |     "        user.freeCount = 0\n",
 320 |     "        if self.map[user.posy][user.posx] <= 0:  # craft at the non-gold cell\n",
 321 |     "            user.energy -= 10\n",
 322 |     "            if user.energy <= 0:\n",
 323 |     "                user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY\n",
 324 |     "                user.lastAction = 6 #eliminated\n",
 325 |     "        else:\n",
 326 |     "            user.energy -= 5\n",
 327 |     "            if user.energy > 0:\n",
 328 |     "                self.craftUsers.append(user)\n",
 329 |     "                key = str(user.posx) + \"_\" + str(user.posy)\n",
 330 |     "                if key in self.craftMap:\n",
 331 |     "                    count = self.craftMap[key]\n",
 332 |     "                    self.craftMap[key] = count + 1\n",
 333 |     "                else:\n",
 334 |     "                    self.craftMap[key] = 1\n",
 335 |     "            else:\n",
 336 |     "                user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY\n",
 337 |     "                user.lastAction = 6 #eliminated\n",
 338 |     "\n",
 339 |     "    def action_0_left(self, user):  # user go left\n",
 340 |     "        user.freeCount = 0\n",
 341 |     "        user.posx = user.posx - 1\n",
 342 |     "        if user.posx < 0:\n",
 343 |     "            user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n",
 344 |     "            user.lastAction = 6 #eliminated\n",
 345 |     "        else:\n",
 346 |     "            self.go_to_pos(user)\n",
 347 |     "\n",
 348 |     "    def action_1_right(self, user):  # user go right\n",
 349 |     "        user.freeCount = 0\n",
 350 |     "        user.posx = user.posx + 1\n",
 351 |     "        if user.posx >= self.userMatch.gameinfo.width:\n",
 352 |     "            user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n",
 353 |     "            user.lastAction = 6 #eliminated\n",
 354 |     "        else:\n",
 355 |     "            self.go_to_pos(user)\n",
 356 |     "\n",
 357 |     "    def action_2_up(self, user):  # user go up\n",
 358 |     "        user.freeCount = 0\n",
 359 |     "        user.posy = user.posy - 1\n",
 360 |     "        if user.posy < 0:\n",
 361 |     "            user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n",
 362 |     "            user.lastAction = 6 #eliminated\n",
 363 |     "        else:\n",
 364 |     "            self.go_to_pos(user)\n",
 365 |     "\n",
 366 |     "    def action_3_down(self, user):  # user go right\n",
 367 |     "        user.freeCount = 0\n",
 368 |     "        user.posy = user.posy + 1\n",
 369 |     "        if user.posy >= self.userMatch.gameinfo.height:\n",
 370 |     "            user.status = PlayerInfo.STATUS_ELIMINATED_WENT_OUT_MAP\n",
 371 |     "            user.lastAction = 6 #eliminated\n",
 372 |     "        else:\n",
 373 |     "            self.go_to_pos(user)\n",
 374 |     "\n",
 375 |     "    def action_4_free(self, user):  # user free\n",
 376 |     "        user.freeCount += 1\n",
 377 |     "        if user.freeCount == 1:\n",
 378 |     "            user.energy += int(self.E / 4)\n",
 379 |     "        elif user.freeCount == 2:\n",
 380 |     "            user.energy += int(self.E / 3)\n",
 381 |     "        elif user.freeCount == 3:\n",
 382 |     "            user.energy += int(self.E / 2)\n",
 383 |     "        else:\n",
 384 |     "            user.energy = self.E\n",
 385 |     "        if user.energy > self.E:\n",
 386 |     "            user.energy = self.E\n",
 387 |     "\n",
 388 |     "    def action_5_craft(self):\n",
 389 |     "        craftCount = len(self.craftUsers)\n",
 390 |     "        # print (\"craftCount\",craftCount)\n",
 391 |     "        if (craftCount > 0):\n",
 392 |     "            for user in self.craftUsers:\n",
 393 |     "                x = user.posx\n",
 394 |     "                y = user.posy\n",
 395 |     "                key = str(user.posx) + \"_\" + str(user.posy)\n",
 396 |     "                c = self.craftMap[key]\n",
 397 |     "                m = min(math.ceil(self.map[y][x] / c), 50)\n",
 398 |     "                user.score += m\n",
 399 |     "                # print (\"user\", user.playerId, m)\n",
 400 |     "            for user in self.craftUsers:\n",
 401 |     "                x = user.posx\n",
 402 |     "                y = user.posy\n",
 403 |     "                key = str(user.posx) + \"_\" + str(user.posy)\n",
 404 |     "                if key in self.craftMap:\n",
 405 |     "                    c = self.craftMap[key]\n",
 406 |     "                    del self.craftMap[key]\n",
 407 |     "                    m = min(math.ceil(self.map[y][x] / c), 50)\n",
 408 |     "                    self.map[y][x] -= m * c\n",
 409 |     "                    if self.map[y][x] < 0:\n",
 410 |     "                        self.map[y][x] = 0\n",
 411 |     "                        self.energyOnMap[y][x] = ObstacleInfo.types[0]\n",
 412 |     "                    for g in self.stepState.golds:\n",
 413 |     "                        if g.posx == x and g.posy == y:\n",
 414 |     "                            g.amount = self.map[y][x]\n",
 415 |     "                            if g.amount == 0:\n",
 416 |     "                                self.stepState.golds.remove(g)\n",
 417 |     "                                self.add_changed_obstacle(x, y, 0, ObstacleInfo.types[0])\n",
 418 |     "                                if len(self.stepState.golds) == 0:\n",
 419 |     "                                    for player in self.stepState.players:\n",
 420 |     "                                        player.status = PlayerInfo.STATUS_STOP_EMPTY_GOLD\n",
 421 |     "                            break;\n",
 422 |     "            self.craftMap = {}\n",
 423 |     "\n",
 424 |     "    def invalidAction(self, user):\n",
 425 |     "        user.status = PlayerInfo.STATUS_ELIMINATED_INVALID_ACTION\n",
 426 |     "        user.lastAction = 6 #eliminated\n",
 427 |     "\n",
 428 |     "    def go_to_pos(self, user):  # player move to cell(x,y)\n",
 429 |     "        if self.map[user.posy][user.posx] == -1:\n",
 430 |     "            user.energy -= randrange(16) + 5\n",
 431 |     "        elif self.map[user.posy][user.posx] == 0:\n",
 432 |     "            user.energy += self.energyOnMap[user.posy][user.posx]\n",
 433 |     "        elif self.map[user.posy][user.posx] == -2:\n",
 434 |     "            user.energy += self.energyOnMap[user.posy][user.posx]\n",
 435 |     "            self.add_changed_obstacle(user.posx, user.posy, 0, ObstacleInfo.types[0])\n",
 436 |     "        elif self.map[user.posy][user.posx] == -3:\n",
 437 |     "            user.energy += self.energyOnMap[user.posy][user.posx]\n",
 438 |     "            self.add_changed_obstacle(user.posx, user.posy, 3,\n",
 439 |     "                                      self.bog_energy_chain[self.energyOnMap[user.posy][user.posx]])\n",
 440 |     "        else:\n",
 441 |     "            user.energy -= 4\n",
 442 |     "        if user.energy <= 0:\n",
 443 |     "            user.status = PlayerInfo.STATUS_ELIMINATED_OUT_OF_ENERGY\n",
 444 |     "            user.lastAction = 6 #eliminated\n",
 445 |     "\n",
 446 |     "    def add_changed_obstacle(self, x, y, t, v):\n",
 447 |     "        added = False\n",
 448 |     "        for o in self.stepState.changedObstacles:\n",
 449 |     "            if o[\"posx\"] == x and o[\"posy\"] == y:\n",
 450 |     "                added = True\n",
 451 |     "                break\n",
 452 |     "        if added == False:\n",
 453 |     "            o = {}\n",
 454 |     "            o[\"posx\"] = x\n",
 455 |     "            o[\"posy\"] = y\n",
 456 |     "            o[\"type\"] = t\n",
 457 |     "            o[\"value\"] = v\n",
 458 |     "            self.stepState.changedObstacles.append(o)\n",
 459 |     "\n",
 460 |     "    def close(self):\n",
 461 |     "        print(\"Close socket.\")"
 462 |    ]
 463 |   },
 464 |   {
 465 |    "cell_type": "code",
 466 |    "execution_count": 5,
 467 |    "metadata": {
 468 |     "colab": {},
 469 |     "colab_type": "code",
 470 |     "id": "ZEExD0BCyePu"
 471 |    },
 472 |    "outputs": [],
 473 |    "source": [
 474 |     "#Bots :bot1\n",
 475 |     "class Bot1:\n",
 476 |     "    ACTION_GO_LEFT = 0\n",
 477 |     "    ACTION_GO_RIGHT = 1\n",
 478 |     "    ACTION_GO_UP = 2\n",
 479 |     "    ACTION_GO_DOWN = 3\n",
 480 |     "    ACTION_FREE = 4\n",
 481 |     "    ACTION_CRAFT = 5\n",
 482 |     "\n",
 483 |     "    def __init__(self, id):\n",
 484 |     "        self.state = State()\n",
 485 |     "        self.info = PlayerInfo(id)\n",
 486 |     "\n",
 487 |     "    def next_action(self):\n",
 488 |     "        if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:\n",
 489 |     "            if self.info.energy >= 6:\n",
 490 |     "                return self.ACTION_CRAFT\n",
 491 |     "            else:\n",
 492 |     "                return self.ACTION_FREE\n",
 493 |     "        if self.info.energy < 5:\n",
 494 |     "            return self.ACTION_FREE\n",
 495 |     "        else:\n",
 496 |     "            action = self.ACTION_GO_UP\n",
 497 |     "            if self.info.posy % 2 == 0:\n",
 498 |     "                if self.info.posx < self.state.mapInfo.max_x:\n",
 499 |     "                    action = self.ACTION_GO_RIGHT\n",
 500 |     "            else:\n",
 501 |     "                if self.info.posx > 0:\n",
 502 |     "                    action = self.ACTION_GO_LEFT\n",
 503 |     "                else:\n",
 504 |     "                    action = self.ACTION_GO_DOWN\n",
 505 |     "            return action\n",
 506 |     "\n",
 507 |     "    def new_game(self, data):\n",
 508 |     "        try:\n",
 509 |     "            self.state.init_state(data)\n",
 510 |     "        except Exception as e:\n",
 511 |     "            import traceback\n",
 512 |     "            traceback.print_exc()\n",
 513 |     "\n",
 514 |     "    def new_state(self, data):\n",
 515 |     "        # action = self.next_action();\n",
 516 |     "        # self.socket.send(action)\n",
 517 |     "        try:\n",
 518 |     "            self.state.update_state(data)\n",
 519 |     "        except Exception as e:\n",
 520 |     "            import traceback\n",
 521 |     "            traceback.print_exc()\n"
 522 |    ]
 523 |   },
 524 |   {
 525 |    "cell_type": "code",
 526 |    "execution_count": 6,
 527 |    "metadata": {
 528 |     "colab": {},
 529 |     "colab_type": "code",
 530 |     "id": "UYHsBcVEyiCm"
 531 |    },
 532 |    "outputs": [],
 533 |    "source": [
 534 |     "#Bots :bot2\n",
 535 |     "class Bot2:\n",
 536 |     "    ACTION_GO_LEFT = 0\n",
 537 |     "    ACTION_GO_RIGHT = 1\n",
 538 |     "    ACTION_GO_UP = 2\n",
 539 |     "    ACTION_GO_DOWN = 3\n",
 540 |     "    ACTION_FREE = 4\n",
 541 |     "    ACTION_CRAFT = 5\n",
 542 |     "\n",
 543 |     "    def __init__(self, id):\n",
 544 |     "        self.state = State()\n",
 545 |     "        self.info = PlayerInfo(id)\n",
 546 |     "\n",
 547 |     "    def next_action(self):\n",
 548 |     "        if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:\n",
 549 |     "            if self.info.energy >= 6:\n",
 550 |     "                return self.ACTION_CRAFT\n",
 551 |     "            else:\n",
 552 |     "                return self.ACTION_FREE\n",
 553 |     "        if self.info.energy < 5:\n",
 554 |     "            return self.ACTION_FREE\n",
 555 |     "        else:\n",
 556 |     "            action = np.random.randint(0, 4)            \n",
 557 |     "            return action\n",
 558 |     "\n",
 559 |     "    def new_game(self, data):\n",
 560 |     "        try:\n",
 561 |     "            self.state.init_state(data)\n",
 562 |     "        except Exception as e:\n",
 563 |     "            import traceback\n",
 564 |     "            traceback.print_exc()\n",
 565 |     "\n",
 566 |     "    def new_state(self, data):\n",
 567 |     "        # action = self.next_action();\n",
 568 |     "        # self.socket.send(action)\n",
 569 |     "        try:\n",
 570 |     "            self.state.update_state(data)\n",
 571 |     "        except Exception as e:\n",
 572 |     "            import traceback\n",
 573 |     "            traceback.print_exc()"
 574 |    ]
 575 |   },
 576 |   {
 577 |    "cell_type": "code",
 578 |    "execution_count": 7,
 579 |    "metadata": {
 580 |     "colab": {},
 581 |     "colab_type": "code",
 582 |     "id": "CCQo94-0ykm6"
 583 |    },
 584 |    "outputs": [],
 585 |    "source": [
 586 |     "#Bots :bot3\n",
 587 |     "class Bot3:\n",
 588 |     "    ACTION_GO_LEFT = 0\n",
 589 |     "    ACTION_GO_RIGHT = 1\n",
 590 |     "    ACTION_GO_UP = 2\n",
 591 |     "    ACTION_GO_DOWN = 3\n",
 592 |     "    ACTION_FREE = 4\n",
 593 |     "    ACTION_CRAFT = 5\n",
 594 |     "\n",
 595 |     "    def __init__(self, id):\n",
 596 |     "        self.state = State()\n",
 597 |     "        self.info = PlayerInfo(id)\n",
 598 |     "\n",
 599 |     "    def next_action(self):\n",
 600 |     "        if self.state.mapInfo.gold_amount(self.info.posx, self.info.posy) > 0:\n",
 601 |     "            if self.info.energy >= 6:\n",
 602 |     "                return self.ACTION_CRAFT\n",
 603 |     "            else:\n",
 604 |     "                return self.ACTION_FREE\n",
 605 |     "        if self.info.energy < 5:\n",
 606 |     "            return self.ACTION_FREE\n",
 607 |     "        else:\n",
 608 |     "            action = self.ACTION_GO_LEFT\n",
 609 |     "            if self.info.posx % 2 == 0:\n",
 610 |     "                if self.info.posy < self.state.mapInfo.max_y:\n",
 611 |     "                    action = self.ACTION_GO_DOWN\n",
 612 |     "            else:\n",
 613 |     "                if self.info.posy > 0:\n",
 614 |     "                    action = self.ACTION_GO_UP\n",
 615 |     "                else:\n",
 616 |     "                    action = self.ACTION_GO_RIGHT            \n",
 617 |     "            return action\n",
 618 |     "\n",
 619 |     "    def new_game(self, data):\n",
 620 |     "        try:\n",
 621 |     "            self.state.init_state(data)\n",
 622 |     "        except Exception as e:\n",
 623 |     "            import traceback\n",
 624 |     "            traceback.print_exc()\n",
 625 |     "\n",
 626 |     "    def new_state(self, data):\n",
 627 |     "        # action = self.next_action();\n",
 628 |     "        # self.socket.send(action)\n",
 629 |     "        try:\n",
 630 |     "            self.state.update_state(data)\n",
 631 |     "        except Exception as e:\n",
 632 |     "            import traceback\n",
 633 |     "            traceback.print_exc()"
 634 |    ]
 635 |   },
 636 |   {
 637 |    "cell_type": "code",
 638 |    "execution_count": 8,
 639 |    "metadata": {
 640 |     "colab": {},
 641 |     "colab_type": "code",
 642 |     "id": "agO3td72yvaS"
 643 |    },
 644 |    "outputs": [],
 645 |    "source": [
 646 |     "#MinerState.py\n",
 647 |     "def str_2_json(str):\n",
 648 |     "    return json.loads(str, encoding=\"utf-8\")\n",
 649 |     "\n",
 650 |     "\n",
 651 |     "class MapInfo:\n",
 652 |     "    def __init__(self):\n",
 653 |     "        self.max_x = 0 #Width of the map\n",
 654 |     "        self.max_y = 0 #Height of the map\n",
 655 |     "        self.golds = [] #List of the golds in the map\n",
 656 |     "        self.obstacles = []\n",
 657 |     "        self.numberOfPlayers = 0\n",
 658 |     "        self.maxStep = 0 #The maximum number of step is set for this map\n",
 659 |     "\n",
 660 |     "    def init_map(self, gameInfo):\n",
 661 |     "        #Initialize the map at the begining of each episode\n",
 662 |     "        self.max_x = gameInfo[\"width\"] - 1\n",
 663 |     "        self.max_y = gameInfo[\"height\"] - 1\n",
 664 |     "        self.golds = gameInfo[\"golds\"]\n",
 665 |     "        self.obstacles = gameInfo[\"obstacles\"]\n",
 666 |     "        self.maxStep = gameInfo[\"steps\"]\n",
 667 |     "        self.numberOfPlayers = gameInfo[\"numberOfPlayers\"]\n",
 668 |     "\n",
 669 |     "    def update(self, golds, changedObstacles):\n",
 670 |     "        #Update the map after every step\n",
 671 |     "        self.golds = golds\n",
 672 |     "        for cob in changedObstacles:\n",
 673 |     "            newOb = True\n",
 674 |     "            for ob in self.obstacles:\n",
 675 |     "                if cob[\"posx\"] == ob[\"posx\"] and cob[\"posy\"] == ob[\"posy\"]:\n",
 676 |     "                    newOb = False\n",
 677 |     "                    #print(\"cell(\", cob[\"posx\"], \",\", cob[\"posy\"], \") change type from: \", ob[\"type\"], \" -> \",\n",
 678 |     "                    #      cob[\"type\"], \" / value: \", ob[\"value\"], \" -> \", cob[\"value\"])\n",
 679 |     "                    ob[\"type\"] = cob[\"type\"]\n",
 680 |     "                    ob[\"value\"] = cob[\"value\"]\n",
 681 |     "                    break\n",
 682 |     "            if newOb:\n",
 683 |     "                self.obstacles.append(cob)\n",
 684 |     "                #print(\"new obstacle: \", cob[\"posx\"], \",\", cob[\"posy\"], \", type = \", cob[\"type\"], \", value = \",\n",
 685 |     "                #      cob[\"value\"])\n",
 686 |     "\n",
 687 |     "    def get_min_x(self):\n",
 688 |     "        return min([cell[\"posx\"] for cell in self.golds])\n",
 689 |     "\n",
 690 |     "    def get_max_x(self):\n",
 691 |     "        return max([cell[\"posx\"] for cell in self.golds])\n",
 692 |     "\n",
 693 |     "    def get_min_y(self):\n",
 694 |     "        return min([cell[\"posy\"] for cell in self.golds])\n",
 695 |     "\n",
 696 |     "    def get_max_y(self):\n",
 697 |     "        return max([cell[\"posy\"] for cell in self.golds])\n",
 698 |     "\n",
 699 |     "    def is_row_has_gold(self, y):\n",
 700 |     "        return y in [cell[\"posy\"] for cell in self.golds]\n",
 701 |     "\n",
 702 |     "    def is_column_has_gold(self, x):\n",
 703 |     "        return x in [cell[\"posx\"] for cell in self.golds]\n",
 704 |     "\n",
 705 |     "    def gold_amount(self, x, y): #Get the amount of golds at cell (x,y)\n",
 706 |     "        for cell in self.golds:\n",
 707 |     "            if x == cell[\"posx\"] and y == cell[\"posy\"]:\n",
 708 |     "                return cell[\"amount\"]\n",
 709 |     "        return 0 \n",
 710 |     "\n",
 711 |     "    def get_obstacle(self, x, y):  # Get the kind of the obstacle at cell(x,y)\n",
 712 |     "        for cell in self.obstacles:\n",
 713 |     "            if x == cell[\"posx\"] and y == cell[\"posy\"]:\n",
 714 |     "                return cell[\"type\"]\n",
 715 |     "        return -1  # No obstacle at the cell (x,y)\n",
 716 |     "\n",
 717 |     "\n",
 718 |     "class State:\n",
 719 |     "    STATUS_PLAYING = 0\n",
 720 |     "    STATUS_ELIMINATED_WENT_OUT_MAP = 1\n",
 721 |     "    STATUS_ELIMINATED_OUT_OF_ENERGY = 2\n",
 722 |     "    STATUS_ELIMINATED_INVALID_ACTION = 3\n",
 723 |     "    STATUS_STOP_EMPTY_GOLD = 4\n",
 724 |     "    STATUS_STOP_END_STEP = 5\n",
 725 |     "\n",
 726 |     "    def __init__(self):\n",
 727 |     "        self.end = False\n",
 728 |     "        self.score = 0\n",
 729 |     "        self.lastAction = None\n",
 730 |     "        self.id = 0\n",
 731 |     "        self.x = 0\n",
 732 |     "        self.y = 0\n",
 733 |     "        self.energy = 0\n",
 734 |     "        self.mapInfo = MapInfo()\n",
 735 |     "        self.players = []\n",
 736 |     "        self.stepCount = 0\n",
 737 |     "        self.status = State.STATUS_PLAYING\n",
 738 |     "\n",
 739 |     "    def init_state(self, data): #parse data from server into object\n",
 740 |     "        game_info = str_2_json(data)\n",
 741 |     "        self.end = False\n",
 742 |     "        self.score = 0\n",
 743 |     "        self.lastAction = None\n",
 744 |     "        self.id = game_info[\"playerId\"]\n",
 745 |     "        self.x = game_info[\"posx\"]\n",
 746 |     "        self.y = game_info[\"posy\"]\n",
 747 |     "        self.energy = game_info[\"energy\"]\n",
 748 |     "        self.mapInfo.init_map(game_info[\"gameinfo\"])\n",
 749 |     "        self.stepCount = 0\n",
 750 |     "        self.status = State.STATUS_PLAYING\n",
 751 |     "        self.players = [{\"playerId\": 2, \"posx\": self.x, \"posy\": self.y},\n",
 752 |     "                        {\"playerId\": 3, \"posx\": self.x, \"posy\": self.y},\n",
 753 |     "                        {\"playerId\": 4, \"posx\": self.x, \"posy\": self.y}]\n",
 754 |     "\n",
 755 |     "    def update_state(self, data):\n",
 756 |     "        new_state = str_2_json(data)\n",
 757 |     "        for player in new_state[\"players\"]:\n",
 758 |     "            if player[\"playerId\"] == self.id:\n",
 759 |     "                self.x = player[\"posx\"]\n",
 760 |     "                self.y = player[\"posy\"]\n",
 761 |     "                self.energy = player[\"energy\"]\n",
 762 |     "                self.score = player[\"score\"]\n",
 763 |     "                self.lastAction = player[\"lastAction\"]\n",
 764 |     "                self.status = player[\"status\"]\n",
 765 |     "\n",
 766 |     "        self.mapInfo.update(new_state[\"golds\"], new_state[\"changedObstacles\"])\n",
 767 |     "        self.players = new_state[\"players\"]\n",
 768 |     "        for i in range(len(self.players) + 1, 5, 1):\n",
 769 |     "            self.players.append({\"playerId\": i, \"posx\": self.x, \"posy\": self.y})\n",
 770 |     "        self.stepCount = self.stepCount + 1"
 771 |    ]
 772 |   },
 773 |   {
 774 |    "cell_type": "code",
 775 |    "execution_count": 9,
 776 |    "metadata": {
 777 |     "colab": {},
 778 |     "colab_type": "code",
 779 |     "id": "QHa-DcAcyyMc"
 780 |    },
 781 |    "outputs": [],
 782 |    "source": [
 783 |     "#MinerEnv.py\n",
 784 |     "TreeID = 1\n",
 785 |     "TrapID = 2\n",
 786 |     "SwampID = 3\n",
 787 |     "class MinerEnv:\n",
 788 |     "    def __init__(self):\n",
 789 |     "        self.socket = GameSocket()\n",
 790 |     "        self.state = State()\n",
 791 |     "        \n",
 792 |     "        self.score_pre = self.state.score#Storing the last score for designing the reward function\n",
 793 |     "\n",
 794 |     "    def start(self): #connect to server\n",
 795 |     "        self.socket.connect()\n",
 796 |     "\n",
 797 |     "    def end(self): #disconnect server\n",
 798 |     "        self.socket.close()\n",
 799 |     "\n",
 800 |     "    def send_map_info(self, request):#tell server which map to run\n",
 801 |     "        self.socket.send(request)\n",
 802 |     "\n",
 803 |     "    def reset(self): #start new game\n",
 804 |     "        try:\n",
 805 |     "            message = self.socket.receive() #receive game info from server\n",
 806 |     "            self.state.init_state(message) #init state\n",
 807 |     "        except Exception as e:\n",
 808 |     "            import traceback\n",
 809 |     "            traceback.print_exc()\n",
 810 |     "\n",
 811 |     "    def step(self, action): #step process\n",
 812 |     "        self.socket.send(action) #send action to server\n",
 813 |     "        try:\n",
 814 |     "            message = self.socket.receive() #receive new state from server\n",
 815 |     "            self.state.update_state(message) #update to local state\n",
 816 |     "        except Exception as e:\n",
 817 |     "            import traceback\n",
 818 |     "            traceback.print_exc()\n",
 819 |     "\n",
 820 |     "    # Functions are customized by client\n",
 821 |     "    def get_state(self):\n",
 822 |     "        # Building the map\n",
 823 |     "        view = np.zeros([self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1], dtype=int)\n",
 824 |     "        for i in range(self.state.mapInfo.max_x + 1):\n",
 825 |     "            for j in range(self.state.mapInfo.max_y + 1):\n",
 826 |     "                if self.state.mapInfo.get_obstacle(i, j) == TreeID:  # Tree\n",
 827 |     "                    view[i, j] = -TreeID\n",
 828 |     "                if self.state.mapInfo.get_obstacle(i, j) == TrapID:  # Trap\n",
 829 |     "                    view[i, j] = -TrapID\n",
 830 |     "                if self.state.mapInfo.get_obstacle(i, j) == SwampID: # Swamp\n",
 831 |     "                    view[i, j] = -SwampID\n",
 832 |     "                if self.state.mapInfo.gold_amount(i, j) > 0:\n",
 833 |     "                    view[i, j] = self.state.mapInfo.gold_amount(i, j)\n",
 834 |     "\n",
 835 |     "        DQNState = view.flatten().tolist() #Flattening the map matrix to a vector\n",
 836 |     "        \n",
 837 |     "        # Add position and energy of agent to the DQNState\n",
 838 |     "        DQNState.append(self.state.x)\n",
 839 |     "        DQNState.append(self.state.y)\n",
 840 |     "        DQNState.append(self.state.energy)\n",
 841 |     "        #Add position of bots \n",
 842 |     "        for player in self.state.players:\n",
 843 |     "            if player[\"playerId\"] != self.state.id:\n",
 844 |     "                DQNState.append(player[\"posx\"])\n",
 845 |     "                DQNState.append(player[\"posy\"])\n",
 846 |     "                \n",
 847 |     "        #Convert the DQNState from list to array for training\n",
 848 |     "        DQNState = np.array(DQNState)\n",
 849 |     "\n",
 850 |     "        return DQNState\n",
 851 |     "\n",
 852 |     "    def get_reward(self):\n",
 853 |     "        # Calculate reward\n",
 854 |     "        reward = 0\n",
 855 |     "        score_action = self.state.score - self.score_pre\n",
 856 |     "        self.score_pre = self.state.score\n",
 857 |     "        if score_action > 0:\n",
 858 |     "            #If the DQN agent crafts golds, then it should obtain a positive reward (equal score_action)\n",
 859 |     "            reward += score_action\n",
 860 |     "            \n",
 861 |     "        #If the DQN agent crashs into obstacels (Tree, Trap, Swamp), then it should be punished by a negative reward\n",
 862 |     "        if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TreeID:  # Tree\n",
 863 |     "            reward -= TreeID\n",
 864 |     "        if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == TrapID:  # Trap\n",
 865 |     "            reward -= TrapID\n",
 866 |     "        if self.state.mapInfo.get_obstacle(self.state.x, self.state.y) == SwampID:  # Swamp\n",
 867 |     "            reward -= SwampID\n",
 868 |     "\n",
 869 |     "        # If out of the map, then the DQN agent should be punished by a larger nagative reward.\n",
 870 |     "        if self.state.status == State.STATUS_ELIMINATED_WENT_OUT_MAP:\n",
 871 |     "            reward += -10\n",
 872 |     "            \n",
 873 |     "        #Run out of energy, then the DQN agent should be punished by a larger nagative reward.\n",
 874 |     "        if self.state.status == State.STATUS_ELIMINATED_OUT_OF_ENERGY:\n",
 875 |     "            reward += -10\n",
 876 |     "        # print (\"reward\",reward)\n",
 877 |     "        return reward\n",
 878 |     "\n",
 879 |     "    def check_terminate(self):\n",
 880 |     "        #Checking the status of the game\n",
 881 |     "        #it indicates the game ends or is playing\n",
 882 |     "        return self.state.status != State.STATUS_PLAYING"
 883 |    ]
 884 |   },
 885 |   {
 886 |    "cell_type": "code",
 887 |    "execution_count": 10,
 888 |    "metadata": {
 889 |     "colab": {},
 890 |     "colab_type": "code",
 891 |     "id": "Qf2sasVey0sm"
 892 |    },
 893 |    "outputs": [],
 894 |    "source": [
 895 |     "#DQNModel.py\n",
 896 |     "class DQN: \n",
 897 |     "   \n",
 898 |     "    def __init__(\n",
 899 |     "            self,\n",
 900 |     "            input_dim, #The number of inputs for the DQN network\n",
 901 |     "            action_space, #The number of actions for the DQN network\n",
 902 |     "            gamma = 0.99, #The discount factor\n",
 903 |     "            epsilon = 1, #Epsilon - the exploration factor\n",
 904 |     "            epsilon_min = 0.01, #The minimum epsilon \n",
 905 |     "            epsilon_decay = 0.999,#The decay epislon for each update_epsilon time\n",
 906 |     "            learning_rate = 0.00025, #The learning rate for the DQN network\n",
 907 |     "            tau = 0.125, #The factor for updating the DQN target network from the DQN network\n",
 908 |     "            model = None, #The DQN model\n",
 909 |     "            target_model = None, #The DQN target model \n",
 910 |     "            sess=None\n",
 911 |     "            \n",
 912 |     "    ):\n",
 913 |     "      self.input_dim = input_dim\n",
 914 |     "      self.action_space = action_space\n",
 915 |     "      self.gamma = gamma\n",
 916 |     "      self.epsilon = epsilon\n",
 917 |     "      self.epsilon_min = epsilon_min\n",
 918 |     "      self.epsilon_decay = epsilon_decay\n",
 919 |     "      self.learning_rate = learning_rate\n",
 920 |     "      self.tau = tau\n",
 921 |     "            \n",
 922 |     "      #Creating networks\n",
 923 |     "      self.model        = self.create_model() #Creating the DQN model\n",
 924 |     "      self.target_model = self.create_model() #Creating the DQN target model\n",
 925 |     "      \n",
 926 |     "      #Tensorflow GPU optimization\n",
 927 |     "      config = tf.ConfigProto()\n",
 928 |     "      config.gpu_options.allow_growth = True\n",
 929 |     "      self.sess = tf.Session(config=config)\n",
 930 |     "      K.set_session(sess)\n",
 931 |     "      self.sess.run( tf.global_variables_initializer()) \n",
 932 |     "      \n",
 933 |     "    def create_model(self):\n",
 934 |     "      #Creating the network\n",
 935 |     "      #Two hidden layers (300,300), their activation is ReLu\n",
 936 |     "      #One output layer with action_space of nodes, activation is linear.\n",
 937 |     "      model = Sequential()\n",
 938 |     "      model.add(Dense(300, input_dim=self.input_dim))\n",
 939 |     "      model.add(Activation('relu'))\n",
 940 |     "      model.add(Dense(300))\n",
 941 |     "      model.add(Activation('relu'))\n",
 942 |     "      model.add(Dense(self.action_space))\n",
 943 |     "      model.add(Activation('linear'))    \n",
 944 |     "      #adam = optimizers.adam(lr=self.learning_rate)\n",
 945 |     "      sgd = optimizers.SGD(lr=self.learning_rate, decay=1e-6, momentum=0.95)\n",
 946 |     "      model.compile(optimizer = sgd,\n",
 947 |     "              loss='mse')\n",
 948 |     "      return model\n",
 949 |     "  \n",
 950 |     "    \n",
 951 |     "    def act(self,state):\n",
 952 |     "      #Get the index of the maximum Q values      \n",
 953 |     "      a_max = np.argmax(self.model.predict(state.reshape(1,len(state))))      \n",
 954 |     "      if (random.random() < self.epsilon):\n",
 955 |     "        a_chosen = randrange(self.action_space)\n",
 956 |     "      else:\n",
 957 |     "        a_chosen = a_max      \n",
 958 |     "      return a_chosen\n",
 959 |     "    \n",
 960 |     "    \n",
 961 |     "    def replay(self,samples,batch_size):\n",
 962 |     "      inputs = np.zeros((batch_size, self.input_dim))\n",
 963 |     "      targets = np.zeros((batch_size, self.action_space))\n",
 964 |     "      \n",
 965 |     "      for i in range(0,batch_size):\n",
 966 |     "        state = samples[0][i,:]\n",
 967 |     "        action = samples[1][i]\n",
 968 |     "        reward = samples[2][i]\n",
 969 |     "        new_state = samples[3][i,:]\n",
 970 |     "        done= samples[4][i]\n",
 971 |     "        \n",
 972 |     "        inputs[i,:] = state\n",
 973 |     "        targets[i,:] = self.target_model.predict(state.reshape(1,len(state)))        \n",
 974 |     "        if done:\n",
 975 |     "          targets[i,action] = reward # if terminated, only equals reward\n",
 976 |     "        else:\n",
 977 |     "          Q_future = np.max(self.target_model.predict(new_state.reshape(1,len(new_state))))\n",
 978 |     "          targets[i,action] = reward + Q_future * self.gamma\n",
 979 |     "      #Training\n",
 980 |     "      loss = self.model.train_on_batch(inputs, targets)  \n",
 981 |     "    \n",
 982 |     "    def target_train(self): \n",
 983 |     "      weights = self.model.get_weights()\n",
 984 |     "      target_weights = self.target_model.get_weights()\n",
 985 |     "      for i in range(0, len(target_weights)):\n",
 986 |     "        target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)\n",
 987 |     "      \n",
 988 |     "      self.target_model.set_weights(target_weights) \n",
 989 |     "    \n",
 990 |     "    \n",
 991 |     "    def update_epsilon(self):\n",
 992 |     "      self.epsilon =  self.epsilon*self.epsilon_decay\n",
 993 |     "      self.epsilon =  max(self.epsilon_min, self.epsilon)\n",
 994 |     "    \n",
 995 |     "    \n",
 996 |     "    def save_model(self, model_name):\n",
 997 |     "        # serialize model to JSON\n",
 998 |     "        model_json = self.model.to_json()\n",
 999 |     "        with open(model_name + \".json\", \"w\") as json_file:\n",
1000 |     "            json_file.write(model_json)\n",
1001 |     "            # serialize weights to HDF5\n",
1002 |     "            self.model.save_weights( model_name + \".h5\")\n",
1003 |     "            print(\"Saved model to disk\")"
1004 |    ]
1005 |   },
1006 |   {
1007 |    "cell_type": "code",
1008 |    "execution_count": 11,
1009 |    "metadata": {
1010 |     "colab": {},
1011 |     "colab_type": "code",
1012 |     "id": "MARetIYHy4qp"
1013 |    },
1014 |    "outputs": [],
1015 |    "source": [
1016 |     "#Memory.py\n",
1017 |     "class Memory:        \n",
1018 |     "    capacity = None  \n",
1019 |     "    \n",
1020 |     "    def __init__(\n",
1021 |     "            self,\n",
1022 |     "            capacity,\n",
1023 |     "            length = None,\n",
1024 |     "            states = None,\n",
1025 |     "            actions = None,\n",
1026 |     "            rewards = None,\n",
1027 |     "            dones = None,\n",
1028 |     "            states2 = None,       \n",
1029 |     "    ):\n",
1030 |     "        self.capacity = capacity\n",
1031 |     "        self.length = 0\n",
1032 |     "        self.states = states\n",
1033 |     "        self.actions = actions\n",
1034 |     "        self.rewards = rewards\n",
1035 |     "        self.dones = dones\n",
1036 |     "        self.states2 = states2\n",
1037 |     "\n",
1038 |     "    def push(self, s, a, r, done, s2):\n",
1039 |     "        if self.states is None:\n",
1040 |     "            self.states = s\n",
1041 |     "            self.actions = a\n",
1042 |     "            self.rewards = r\n",
1043 |     "            self.dones = done\n",
1044 |     "            self.states2 = s2\n",
1045 |     "        else:\n",
1046 |     "            self.states = np.vstack((self.states,s))\n",
1047 |     "            self.actions = np.vstack((self.actions,a))\n",
1048 |     "            self.rewards = np.vstack((self.rewards, r))\n",
1049 |     "            self.dones = np.vstack((self.dones, done))\n",
1050 |     "            self.states2 = np.vstack((self.states2,s2))\n",
1051 |     "        \n",
1052 |     "        self.length = self.length + 1\n",
1053 |     "            \n",
1054 |     "        if (self.length > self.capacity): \n",
1055 |     "            self.states = np.delete(self.states,(0), axis = 0)\n",
1056 |     "            self.actions = np.delete(self.actions,(0), axis = 0)\n",
1057 |     "            self.rewards = np.delete(self.rewards,(0), axis = 0)\n",
1058 |     "            self.dones = np.delete(self.dones,(0), axis = 0)\n",
1059 |     "            self.states2 = np.delete(self.states2,(0), axis = 0)           \n",
1060 |     "            self.length = self.length - 1\n",
1061 |     "            \n",
1062 |     "        \n",
1063 |     "    def sample(self,batch_size):\n",
1064 |     "        if (self.length >= batch_size):\n",
1065 |     "            idx = random.sample(range(0,self.length),batch_size)\n",
1066 |     "            s = self.states[idx,:]\n",
1067 |     "            a = self.actions[idx,:]\n",
1068 |     "            r = self.rewards[idx,:]\n",
1069 |     "            d = self.dones[idx,:]\n",
1070 |     "            s2 = self.states2[idx,:]\n",
1071 |     "                \n",
1072 |     "            return list([s,a,r,s2,d])"
1073 |    ]
1074 |   },
1075 |   {
1076 |    "cell_type": "code",
1077 |    "execution_count": 12,
1078 |    "metadata": {
1079 |     "colab": {},
1080 |     "colab_type": "code",
1081 |     "id": "1ZqwV8edy7eX"
1082 |    },
1083 |    "outputs": [],
1084 |    "source": [
1085 |     "#Creating Maps\n",
1086 |     "#This function is used to create 05 maps instead of loading them from Maps folder in the local\n",
1087 |     "def CreateMaps():\n",
1088 |     "      map1 = [\n",
1089 |     "        [0,  0,  -2,  100,  0,  0,  -1,  -1,  -3,  0,  0,  0,  -1,  -1,  0,  0,  -3,  0,  -1,  -1,0],\n",
1090 |     "        [-1,-1,  -2,  0, 0,  0,  -3,  -1,  0,  -2,  0,  0,  0,  -1,  0,  -1,  0,  -2,  -1,  0,0],\n",
1091 |     "        [0,  0,  -1,  0,  0,  0,  0,  -1,  -1,  -1,  0, 0,  100,  0,  0,  0,  0,  50,  -2,  0,0],\n",
1092 |     "        [0,  0,  0,  0,  -2,  0,  0,  0,  0,  0,  0,  0,  -1,  50, -2,  0,  0,  -1,  -1,  0,0],\n",
1093 |     "        [-2, 0,  200,  -2,  -2,  300,  0, 0,  -2,  -2,  0,  0,  -3,  0,  -1,  0,  0,  -3,  -1,  0,0],\n",
1094 |     "        [0,  -1,  0,  0,  0,  0,  0,  -3,  0,  0,  -1,  -1,  0,  0,  0,  0,  0,  0,  -2,  0,0],\n",
1095 |     "        [0,  -1,  -1,  0,  0,  -1,  -1,  0,  0,  700,  -1,  0,  0,  0,  -2,  -1,  -1,  0,  0, 0,100],\n",
1096 |     "        [0,  0, 0, 500,  0,  0,  -1,  0,  -2,  -2,  -1,  -1,  0,  0,  -2,  0,  -3,  0,  0,  -1,0],\n",
1097 |     "        [-1,  -1, 0,-2 ,  0,  -1,  -2,  0,  400,  -2,  -1,  -1,  500,  0,  -2,  0,  -3,  100,  0, 0,0]\n",
1098 |     "      ]\n",
1099 |     "      map2 = [\n",
1100 |     "        [0,  0,  -2,  0,  0,  0,  -1,  -1,  -3,  0,  0,  0,  -1,  -1,  0,  0,  -3,  0,  -1,  -1,0],\n",
1101 |     "        [-1,-1,  -2,  100, 0,  0,  -3,  -1,  0,  -2,  100,  0,  0,  -1,  0,  -1,  0,  -2,  -1,  0,0],\n",
1102 |     "        [0,  0,  -1,  0,  0,  0,  0,  -1,  -1,  -1,  0, 0,  0,  0,  0,  0,  50,  0,  -2,  0,0],\n",
1103 |     "        [0,  200,  0,  0,  -2,  0,  0,  0,  0,  0,  0,  0,  -1,  50, -2,  0,  0,  -1,  -1,  0,0],\n",
1104 |     "        [-2, 0,  0,  -2,  -2,  0,  0, 0,  -2,  -2,  0,  0,  -3,  0,  -1,  0,  0,  -3,  -1,  0,0],\n",
1105 |     "        [0,  -1,  0,  0,  300,  0,  0,  -3,  0,  0,  -1,  -1,  0,  0,  0,  0,  0,  0,  -2,  0,0],\n",
1106 |     "        [500,  -1,  -1,  0,  0,  -1,  -1,  0,  700,  0,  -1,  0,  0,  0,  -2,  -1,  -1,  0,  0, 0,0],\n",
1107 |     "        [0,  0, 0, 0,  0,  0,  -1,  0,  -2,  -2,  -1,  -1,  0,  0,  -2,  0,  -3,  100,  0,  -1,0],\n",
1108 |     "        [-1,  -1, 0,-2 ,  0,  -1,  -2,  400,  0,  -2,  -1,  -1,  0,  500,  -2,  0,  -3,  0,  0, 100,0]\n",
1109 |     "      ]\n",
1110 |     "      map3= [\n",
1111 |     "        [0,  0,  -2,  0,  0,  0,  -1,  -1,  -3,  0,  100,  0,  -1,  -1,  0,  0,  -3,  0,  -1,  -1,0],\n",
1112 |     "        [-1,-1,  -2,  0, 0,  0,  -3,  -1,  0,  -2,  0,  0,  0,  -1,  0,  -1,  0,  -2,  -1,  0,0 ],\n",
1113 |     "        [0,  0,  -1,  0,  0,  0,  100,  -1,  -1,  -1,  0, 0,  50,  0,  0,  0,  50,  0,  -2,  0,0],\n",
1114 |     "        [0,  200,  0,  0,  -2,  0,  0,  0,  0,  0,  0,  0,  -1,  0, -2,  0,  0,  -1,  -1,  0,0],\n",
1115 |     "        [-2, 0,  0,  -2,  -2,  0,  0, 0,  -2,  -2,  0,  0,  -3,  0,  -1,  0,  0,  -3,  -1,  0,0],\n",
1116 |     "        [0,  -1,  0, 300,  0,  0,  0,  -3,  0,  0,  -1,  -1,  0,  0,  0,  0,  0,  0,  -2,  0,0],\n",
1117 |     "        [0,  -1,  -1,  0,  0,  -1,  -1,  700,  0,  0,  -1,  0,  0,  0,  -2,  -1,  -1,  0,  0, 0,0],\n",
1118 |     "        [0,  0, 0, 0,  0,  500,  -1,  0,  -2,  -2,  -1,  -1,  0,  0,  -2,  0,  -3,  0,  700,  -1,0],\n",
1119 |     "        [-1,  -1, 0,-2 ,  0,  -1,  -2,  400,  0,  -2,  -1,  -1,  0,  500,  -2,  0,  -3,  0,  0, 100,0]\n",
1120 |     "      ]\n",
1121 |     "      map4=[\n",
1122 |     "        [0,  0,  -2,  0,  0,  0,  -1,  -1,  -3,  0,  0,  0,  -1,  -1,  0,  0,  -3,  0,  -1,  -1,0],\n",
1123 |     "        [-1,-1,  -2,  0, 0,  0,  -3,  -1,  0,  -2,  0,  0,  100,  -1,  0,  -1,  0,  -2,  -1,  0,0],\n",
1124 |     "        [0,  0,  -1,  0,  100,  0,  0,  -1,  -1,  -1,  0, 0,  0,  0,  50,  0,  50,  0,  -2,  0,0],\n",
1125 |     "        [0,  200,  0,  0,  -2,  0,  0,  0,  0,  0,  0,  0,  -1,  0, -2,  0,  0,  -1,  -1,  0,0],\n",
1126 |     "        [-2, 0,  0,  -2,  -2,  0,  0, 0,  -2,  -2,  0,  0,  -3,  0,  -1,  0,  0,  -3,  -1,  0,0],\n",
1127 |     "        [0,  -1,  0,  0,  0,  0,  300,  -3,  0,  700,  -1,  -1,  0,  0,  0,  0,  0,  0,  -2,  0,0],\n",
1128 |     "        [0,  -1,  -1,  0,  0,  -1,  -1,  0,  0,  0,  -1,  0,  0,  0,  -2,  -1,  -1,  0,  0, 100,0],\n",
1129 |     "        [500,  0, 0, 0,  0,  0,  -1,  0,  -2,  -2,  -1,  -1,  0,  0,  -2,  0,  -3,  0,  0,  -1,0],\n",
1130 |     "        [-1,  -1, 0,-2 ,  0,  -1,  -2,  400,  0,  -2,  -1,  -1,  0,  500,  -2,  0,  -3,  0,  0, 100,0]\n",
1131 |     "\n",
1132 |     "      ]\n",
1133 |     "      map5=[\n",
1134 |     "        [0,  0,  -2,  0,  100,  0,  -1,  -1,  -3,  0,  0,  0,  -1,  -1,  0,  0,  -3,  0,  -1,  -1,0],\n",
1135 |     "        [-1,-1,  -2,  0, 0,  0,  -3,  -1,  0,  -2,  100,  0,  0,  -1,  0,  -1,  0,  -2,  -1,  0,0],\n",
1136 |     "        [0,  0,  -1,  0,  0,  0,  0,  -1,  -1,  -1,  0, 0,  0,  0,  50,  0,  0,  0,  -2,  0,0],\n",
1137 |     "        [0,  200,  0,  0,  -2,  0,  0,  0,  0,  0,  0,  0,  -1,  0, -2,  0,  50,  -1,  -1,  0,0],\n",
1138 |     "        [-2, 0,  0,  -2,  -2,  0,  0, 0,  -2,  -2,  0,  0,  -3,  0,  -1,  0,  0,  -3,  -1,  0,0],\n",
1139 |     "        [0,  -1,  0,  0,  300,  0,  0,  -3,  0,  0,  -1,  -1,  0,  0,  0,  0,  0,  0,  -2,  0,0],\n",
1140 |     "        [500,  -1,  -1,  0,  0,  -1,  -1,  0,  0,  700,  -1,  0,  0,  0,  -2,  -1,  -1,  0,  0, 100,0],\n",
1141 |     "        [0,  0, 0, 0,  0,  0,  -1,  0,  -2,  -2,  -1,  -1,  0,  0,  -2,  0,  -3,  0,  0,  -1,0],\n",
1142 |     "        [-1,  -1, 0,-2 ,  0,  -1,  -2,  400,  0,  -2,  -1,  -1,  0,  500,  -2,  0,  -3,  0,  0, 100,0]\n",
1143 |     "      ]\n",
1144 |     "      Maps = (map1,map2,map3,map4,map5)\n",
1145 |     "      return Maps   \n",
1146 |     "\n"
1147 |    ]
1148 |   },
1149 |   {
1150 |    "cell_type": "code",
1151 |    "execution_count": null,
1152 |    "metadata": {
1153 |     "colab": {
1154 |      "base_uri": "https://localhost:8080/",
1155 |      "height": 1000
1156 |     },
1157 |     "colab_type": "code",
1158 |     "id": "SKN2xerQy-p8",
1159 |     "outputId": "7ef7de46-4787-456c-e8f2-228058511229"
1160 |    },
1161 |    "outputs": [
1162 |     {
1163 |      "name": "stdout",
1164 |      "output_type": "stream",
1165 |      "text": [
1166 |       "Connected to server.\n",
1167 |       "Found: map0\n",
1168 |       "Found: map1\n",
1169 |       "Found: map2\n",
1170 |       "Found: map3\n",
1171 |       "Found: map4\n",
1172 |       "Reset game:  ['map0', '5', '4', '50', '100']\n",
1173 |       "numberOfPlayers:  4\n",
1174 |       "Episode 1 ends. Number of steps is: 7. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1175 |       "Reset game:  ['map4', '12', '8', '50', '100']\n",
1176 |       "numberOfPlayers:  4\n",
1177 |       "Episode 2 ends. Number of steps is: 1. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1178 |       "Reset game:  ['map2', '4', '4', '50', '100']\n",
1179 |       "numberOfPlayers:  4\n",
1180 |       "Episode 3 ends. Number of steps is: 28. Accumlated Reward = -15.00. Epsilon = 1.00 .Termination code: 1\n",
1181 |       "Reset game:  ['map2', '10', '5', '50', '100']\n",
1182 |       "numberOfPlayers:  4\n",
1183 |       "Episode 4 ends. Number of steps is: 6. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n",
1184 |       "Reset game:  ['map1', '4', '3', '50', '100']\n",
1185 |       "numberOfPlayers:  4\n",
1186 |       "Episode 5 ends. Number of steps is: 17. Accumlated Reward = 33.00. Epsilon = 1.00 .Termination code: 1\n",
1187 |       "Reset game:  ['map1', '12', '3', '50', '100']\n",
1188 |       "numberOfPlayers:  4\n",
1189 |       "Episode 6 ends. Number of steps is: 13. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1190 |       "Reset game:  ['map1', '1', '5', '50', '100']\n",
1191 |       "numberOfPlayers:  4\n",
1192 |       "Episode 7 ends. Number of steps is: 10. Accumlated Reward = -15.00. Epsilon = 1.00 .Termination code: 1\n",
1193 |       "Reset game:  ['map0', '1', '4', '50', '100']\n",
1194 |       "numberOfPlayers:  4\n",
1195 |       "Episode 8 ends. Number of steps is: 3. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1196 |       "Reset game:  ['map4', '18', '7', '50', '100']\n",
1197 |       "numberOfPlayers:  4\n",
1198 |       "Episode 9 ends. Number of steps is: 15. Accumlated Reward = -17.00. Epsilon = 1.00 .Termination code: 1\n",
1199 |       "Reset game:  ['map0', '0', '1', '50', '100']\n",
1200 |       "numberOfPlayers:  4\n",
1201 |       "Episode 10 ends. Number of steps is: 5. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1202 |       "Reset game:  ['map3', '14', '2', '50', '100']\n",
1203 |       "numberOfPlayers:  4\n",
1204 |       "Episode 11 ends. Number of steps is: 50. Accumlated Reward = -19.00. Epsilon = 1.00 .Termination code: 1\n",
1205 |       "Reset game:  ['map4', '19', '8', '50', '100']\n",
1206 |       "numberOfPlayers:  4\n",
1207 |       "Episode 12 ends. Number of steps is: 5. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1208 |       "Reset game:  ['map1', '1', '4', '50', '100']\n",
1209 |       "numberOfPlayers:  4\n",
1210 |       "Episode 13 ends. Number of steps is: 17. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1211 |       "Reset game:  ['map4', '18', '4', '50', '100']\n",
1212 |       "numberOfPlayers:  4\n",
1213 |       "Episode 14 ends. Number of steps is: 5. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n",
1214 |       "Reset game:  ['map1', '2', '5', '50', '100']\n",
1215 |       "numberOfPlayers:  4\n",
1216 |       "Episode 15 ends. Number of steps is: 4. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n",
1217 |       "Reset game:  ['map2', '19', '2', '50', '100']\n",
1218 |       "numberOfPlayers:  4\n",
1219 |       "Episode 16 ends. Number of steps is: 7. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1220 |       "Reset game:  ['map1', '10', '5', '50', '100']\n",
1221 |       "numberOfPlayers:  4\n",
1222 |       "Episode 17 ends. Number of steps is: 32. Accumlated Reward = -24.00. Epsilon = 1.00 .Termination code: 1\n",
1223 |       "Reset game:  ['map1', '3', '1', '50', '100']\n",
1224 |       "numberOfPlayers:  4\n",
1225 |       "Episode 18 ends. Number of steps is: 20. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1226 |       "Reset game:  ['map1', '20', '7', '50', '100']\n",
1227 |       "numberOfPlayers:  4\n",
1228 |       "Episode 19 ends. Number of steps is: 2. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1229 |       "Reset game:  ['map0', '5', '1', '50', '100']\n",
1230 |       "numberOfPlayers:  4\n",
1231 |       "Episode 20 ends. Number of steps is: 13. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1232 |       "Reset game:  ['map2', '3', '2', '50', '100']\n",
1233 |       "numberOfPlayers:  4\n",
1234 |       "Episode 21 ends. Number of steps is: 8. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1235 |       "Reset game:  ['map3', '9', '2', '50', '100']\n",
1236 |       "numberOfPlayers:  4\n",
1237 |       "Episode 22 ends. Number of steps is: 14. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1238 |       "Reset game:  ['map2', '19', '2', '50', '100']\n",
1239 |       "numberOfPlayers:  4\n",
1240 |       "Episode 23 ends. Number of steps is: 8. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1241 |       "Reset game:  ['map0', '10', '4', '50', '100']\n",
1242 |       "numberOfPlayers:  4\n",
1243 |       "Episode 24 ends. Number of steps is: 10. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1244 |       "Reset game:  ['map4', '11', '1', '50', '100']\n",
1245 |       "numberOfPlayers:  4\n",
1246 |       "Episode 25 ends. Number of steps is: 3. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1247 |       "Reset game:  ['map0', '15', '2', '50', '100']\n",
1248 |       "numberOfPlayers:  4\n",
1249 |       "Episode 26 ends. Number of steps is: 23. Accumlated Reward = -27.00. Epsilon = 1.00 .Termination code: 1\n",
1250 |       "Reset game:  ['map2', '17', '4', '50', '100']\n",
1251 |       "numberOfPlayers:  4\n",
1252 |       "Episode 27 ends. Number of steps is: 4. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1253 |       "Reset game:  ['map0', '6', '4', '50', '100']\n",
1254 |       "numberOfPlayers:  4\n",
1255 |       "Episode 28 ends. Number of steps is: 14. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1256 |       "Reset game:  ['map1', '14', '1', '50', '100']\n",
1257 |       "numberOfPlayers:  4\n",
1258 |       "Episode 29 ends. Number of steps is: 10. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n",
1259 |       "Reset game:  ['map4', '20', '1', '50', '100']\n",
1260 |       "numberOfPlayers:  4\n",
1261 |       "Episode 30 ends. Number of steps is: 5. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1262 |       "Reset game:  ['map3', '19', '0', '50', '100']\n",
1263 |       "numberOfPlayers:  4\n",
1264 |       "Episode 31 ends. Number of steps is: 5. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n",
1265 |       "Reset game:  ['map0', '1', '1', '50', '100']\n",
1266 |       "numberOfPlayers:  4\n",
1267 |       "Episode 32 ends. Number of steps is: 4. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1268 |       "Reset game:  ['map0', '18', '6', '50', '100']\n",
1269 |       "numberOfPlayers:  4\n",
1270 |       "Episode 33 ends. Number of steps is: 13. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n",
1271 |       "Reset game:  ['map4', '16', '1', '50', '100']\n",
1272 |       "numberOfPlayers:  4\n",
1273 |       "Episode 34 ends. Number of steps is: 6. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1274 |       "Reset game:  ['map1', '0', '0', '50', '100']\n",
1275 |       "numberOfPlayers:  4\n",
1276 |       "Episode 35 ends. Number of steps is: 2. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1277 |       "Reset game:  ['map3', '0', '4', '50', '100']\n",
1278 |       "numberOfPlayers:  4\n",
1279 |       "Episode 36 ends. Number of steps is: 26. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1280 |       "Reset game:  ['map4', '9', '4', '50', '100']\n",
1281 |       "numberOfPlayers:  4\n",
1282 |       "Episode 37 ends. Number of steps is: 19. Accumlated Reward = 90.00. Epsilon = 1.00 .Termination code: 1\n",
1283 |       "Reset game:  ['map1', '18', '0', '50', '100']\n",
1284 |       "numberOfPlayers:  4\n",
1285 |       "Episode 38 ends. Number of steps is: 2. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1286 |       "Reset game:  ['map2', '0', '0', '50', '100']\n",
1287 |       "numberOfPlayers:  4\n",
1288 |       "Episode 39 ends. Number of steps is: 11. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1289 |       "Reset game:  ['map0', '6', '1', '50', '100']\n",
1290 |       "numberOfPlayers:  4\n",
1291 |       "Episode 40 ends. Number of steps is: 14. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1292 |       "Reset game:  ['map2', '15', '5', '50', '100']\n",
1293 |       "numberOfPlayers:  4\n",
1294 |       "Episode 41 ends. Number of steps is: 14. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1295 |       "Reset game:  ['map3', '18', '6', '50', '100']\n",
1296 |       "numberOfPlayers:  4\n",
1297 |       "Episode 42 ends. Number of steps is: 14. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n",
1298 |       "Reset game:  ['map2', '12', '6', '50', '100']\n",
1299 |       "numberOfPlayers:  4\n",
1300 |       "Episode 43 ends. Number of steps is: 13. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1301 |       "Reset game:  ['map1', '2', '7', '50', '100']\n",
1302 |       "numberOfPlayers:  4\n",
1303 |       "Episode 44 ends. Number of steps is: 8. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1304 |       "Reset game:  ['map0', '7', '4', '50', '100']\n",
1305 |       "numberOfPlayers:  4\n",
1306 |       "Episode 45 ends. Number of steps is: 10. Accumlated Reward = -20.00. Epsilon = 1.00 .Termination code: 1\n",
1307 |       "Reset game:  ['map1', '3', '8', '50', '100']\n",
1308 |       "numberOfPlayers:  4\n",
1309 |       "Episode 46 ends. Number of steps is: 3. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1310 |       "Reset game:  ['map1', '3', '0', '50', '100']\n",
1311 |       "numberOfPlayers:  4\n",
1312 |       "Episode 47 ends. Number of steps is: 9. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1313 |       "Reset game:  ['map0', '16', '2', '50', '100']\n",
1314 |       "numberOfPlayers:  4\n",
1315 |       "Episode 48 ends. Number of steps is: 12. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1316 |       "Reset game:  ['map4', '15', '8', '50', '100']\n",
1317 |       "numberOfPlayers:  4\n",
1318 |       "Episode 49 ends. Number of steps is: 3. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1319 |       "Reset game:  ['map0', '8', '3', '50', '100']\n",
1320 |       "numberOfPlayers:  4\n",
1321 |       "Episode 50 ends. Number of steps is: 33. Accumlated Reward = 38.00. Epsilon = 1.00 .Termination code: 1\n",
1322 |       "Reset game:  ['map3', '4', '6', '50', '100']\n",
1323 |       "numberOfPlayers:  4\n",
1324 |       "Episode 51 ends. Number of steps is: 16. Accumlated Reward = -23.00. Epsilon = 1.00 .Termination code: 1\n",
1325 |       "Reset game:  ['map1', '15', '4', '50', '100']\n",
1326 |       "numberOfPlayers:  4\n",
1327 |       "Episode 52 ends. Number of steps is: 13. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1328 |       "Reset game:  ['map4', '20', '3', '50', '100']\n",
1329 |       "numberOfPlayers:  4\n",
1330 |       "Episode 53 ends. Number of steps is: 4. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1331 |       "Reset game:  ['map4', '9', '3', '50', '100']\n",
1332 |       "numberOfPlayers:  4\n",
1333 |       "Episode 54 ends. Number of steps is: 11. Accumlated Reward = -14.00. Epsilon = 1.00 .Termination code: 1\n",
1334 |       "Reset game:  ['map4', '13', '8', '50', '100']\n",
1335 |       "numberOfPlayers:  4\n",
1336 |       "Episode 55 ends. Number of steps is: 7. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1337 |       "Reset game:  ['map3', '9', '7', '50', '100']\n",
1338 |       "numberOfPlayers:  4\n",
1339 |       "Episode 56 ends. Number of steps is: 5. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1340 |       "Reset game:  ['map2', '1', '2', '50', '100']\n",
1341 |       "numberOfPlayers:  4\n",
1342 |       "Episode 57 ends. Number of steps is: 5. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1343 |       "Reset game:  ['map1', '15', '6', '50', '100']\n",
1344 |       "numberOfPlayers:  4\n",
1345 |       "Episode 58 ends. Number of steps is: 13. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n",
1346 |       "Reset game:  ['map2', '5', '3', '50', '100']\n",
1347 |       "numberOfPlayers:  4\n",
1348 |       "Episode 59 ends. Number of steps is: 8. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1349 |       "Reset game:  ['map3', '8', '8', '50', '100']\n",
1350 |       "numberOfPlayers:  4\n",
1351 |       "Episode 60 ends. Number of steps is: 17. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1352 |       "Reset game:  ['map0', '14', '7', '50', '100']\n",
1353 |       "numberOfPlayers:  4\n",
1354 |       "Episode 61 ends. Number of steps is: 6. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1355 |       "Reset game:  ['map2', '20', '1', '50', '100']\n",
1356 |       "numberOfPlayers:  4\n",
1357 |       "Episode 62 ends. Number of steps is: 10. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1358 |       "Reset game:  ['map2', '3', '7', '50', '100']\n",
1359 |       "numberOfPlayers:  4\n",
1360 |       "Episode 63 ends. Number of steps is: 23. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1361 |       "Reset game:  ['map2', '0', '0', '50', '100']\n",
1362 |       "numberOfPlayers:  4\n",
1363 |       "Episode 64 ends. Number of steps is: 1. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1364 |       "Reset game:  ['map3', '8', '5', '50', '100']\n",
1365 |       "numberOfPlayers:  4\n",
1366 |       "Episode 65 ends. Number of steps is: 8. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1367 |       "Reset game:  ['map2', '13', '6', '50', '100']\n",
1368 |       "numberOfPlayers:  4\n",
1369 |       "Episode 66 ends. Number of steps is: 19. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1370 |       "Reset game:  ['map4', '18', '3', '50', '100']\n",
1371 |       "numberOfPlayers:  4\n",
1372 |       "Episode 67 ends. Number of steps is: 6. Accumlated Reward = -22.00. Epsilon = 1.00 .Termination code: 1\n",
1373 |       "Reset game:  ['map3', '15', '1', '50', '100']\n",
1374 |       "numberOfPlayers:  4\n",
1375 |       "Episode 68 ends. Number of steps is: 6. Accumlated Reward = -19.00. Epsilon = 1.00 .Termination code: 1\n",
1376 |       "Reset game:  ['map4', '17', '1', '50', '100']\n",
1377 |       "numberOfPlayers:  4\n",
1378 |       "Episode 69 ends. Number of steps is: 40. Accumlated Reward = -25.00. Epsilon = 1.00 .Termination code: 1\n",
1379 |       "Reset game:  ['map1', '7', '3', '50', '100']\n",
1380 |       "numberOfPlayers:  4\n",
1381 |       "Episode 70 ends. Number of steps is: 51. Accumlated Reward = -13.00. Epsilon = 1.00 .Termination code: 1\n",
1382 |       "Reset game:  ['map3', '19', '1', '50', '100']\n",
1383 |       "numberOfPlayers:  4\n",
1384 |       "Episode 71 ends. Number of steps is: 10. Accumlated Reward = -16.00. Epsilon = 1.00 .Termination code: 1\n",
1385 |       "Reset game:  ['map1', '17', '6', '50', '100']\n",
1386 |       "numberOfPlayers:  4\n",
1387 |       "Episode 72 ends. Number of steps is: 12. Accumlated Reward = -24.00. Epsilon = 1.00 .Termination code: 1\n",
1388 |       "Reset game:  ['map1', '7', '5', '50', '100']\n",
1389 |       "numberOfPlayers:  4\n",
1390 |       "Episode 73 ends. Number of steps is: 27. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1391 |       "Reset game:  ['map3', '1', '5', '50', '100']\n",
1392 |       "numberOfPlayers:  4\n",
1393 |       "Episode 74 ends. Number of steps is: 11. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1394 |       "Reset game:  ['map1', '13', '5', '50', '100']\n",
1395 |       "numberOfPlayers:  4\n",
1396 |       "Episode 75 ends. Number of steps is: 22. Accumlated Reward = 10.00. Epsilon = 1.00 .Termination code: 1\n",
1397 |       "Reset game:  ['map2', '17', '8', '50', '100']\n",
1398 |       "numberOfPlayers:  4\n",
1399 |       "Episode 76 ends. Number of steps is: 1. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1400 |       "Reset game:  ['map3', '9', '3', '50', '100']\n",
1401 |       "numberOfPlayers:  4\n",
1402 |       "Episode 77 ends. Number of steps is: 26. Accumlated Reward = -18.00. Epsilon = 1.00 .Termination code: 1\n",
1403 |       "Reset game:  ['map2', '8', '3', '50', '100']\n",
1404 |       "numberOfPlayers:  4\n",
1405 |       "Episode 78 ends. Number of steps is: 13. Accumlated Reward = -10.00. Epsilon = 1.00 .Termination code: 1\n",
1406 |       "Reset game:  ['map2', '6', '3', '50', '100']\n",
1407 |       "numberOfPlayers:  4\n",
1408 |       "Episode 79 ends. Number of steps is: 18. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1409 |       "Reset game:  ['map4', '11', '5', '50', '100']\n",
1410 |       "numberOfPlayers:  4\n",
1411 |       "Episode 80 ends. Number of steps is: 11. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1412 |       "Reset game:  ['map0', '13', '8', '50', '100']\n",
1413 |       "numberOfPlayers:  4\n",
1414 |       "Episode 81 ends. Number of steps is: 4. Accumlated Reward = 40.00. Epsilon = 1.00 .Termination code: 1\n",
1415 |       "Reset game:  ['map4', '5', '2', '50', '100']\n",
1416 |       "numberOfPlayers:  4\n",
1417 |       "Episode 82 ends. Number of steps is: 8. Accumlated Reward = -15.00. Epsilon = 1.00 .Termination code: 1\n",
1418 |       "Reset game:  ['map4', '16', '3', '50', '100']\n",
1419 |       "numberOfPlayers:  4\n",
1420 |       "Episode 83 ends. Number of steps is: 7. Accumlated Reward = -5.00. Epsilon = 1.00 .Termination code: 1\n",
1421 |       "Reset game:  ['map0', '19', '3', '50', '100']\n",
1422 |       "numberOfPlayers:  4\n",
1423 |       "Episode 84 ends. Number of steps is: 11. Accumlated Reward = -12.00. Epsilon = 1.00 .Termination code: 1\n",
1424 |       "Reset game:  ['map0', '10', '1', '50', '100']\n",
1425 |       "numberOfPlayers:  4\n",
1426 |       "Episode 85 ends. Number of steps is: 12. Accumlated Reward = -11.00. Epsilon = 1.00 .Termination code: 1\n",
1427 |       "Reset game:  ['map0', '3', '5', '50', '100']\n",
1428 |       "numberOfPlayers:  4\n"
1429 |      ]
1430 |     }
1431 |    ],
1432 |    "source": [
1433 |     "#DQN Algorithm-Main\n",
1434 |     "#Create header for saving DQN learning file\n",
1435 |     "'''now = datetime.datetime.now()\n",
1436 |     "header = [\"Ep\",\"Step\", \"Reward\",\"Total_reward\",\"Action\",\"Epsilon\",\"Done\",\"Termination_Code\"]\n",
1437 |     "filename = \"Data/data_\" + now.strftime(\"%Y%m%d-%H%M\") + \".csv\"\n",
1438 |     "with open(filename, 'w') as f:\n",
1439 |     "    pd.DataFrame(columns = header).to_csv(f,encoding='utf-8', index=False, header = True)'''\n",
1440 |     "\n",
1441 |     "# Parameters for training a DQN model\n",
1442 |     "N_EPISODE = 10000 #The number of episodes for training\n",
1443 |     "MAX_STEP = 1000   #The number of steps for each episode\n",
1444 |     "BATCH_SIZE = 32   #The number of experiences for each replay \n",
1445 |     "MEMORY_SIZE = 100000 #The size of the batch for storing experiences\n",
1446 |     "SAVE_NETWORK = 100  # After this number of episodes, the DQN model is saved for testing later. \n",
1447 |     "INITIAL_REPLAY_SIZE = 1000 #The number of experiences are stored in the memory batch before starting replaying\n",
1448 |     "INPUTNUM = 198 #The number of input values for the DQN model\n",
1449 |     "ACTIONNUM = 6  #The number of actions output from the DQN model\n",
1450 |     "MAP_MAX_X = 21 #Width of the Map\n",
1451 |     "MAP_MAX_Y = 9  #Height of the Map\n",
1452 |     "\n",
1453 |     "# Initialize network and memory\n",
1454 |     "DQNAgent = DQN(INPUTNUM,ACTIONNUM)\n",
1455 |     "memory = Memory(MEMORY_SIZE)\n",
1456 |     "\n",
1457 |     "# Initialize environment\n",
1458 |     "Maps = CreateMaps()#Creating 05 maps\n",
1459 |     "minerEnv = MinerEnv()#Creating a communication environment between the DQN model and the game environment\n",
1460 |     "minerEnv.start() #Connect to the game\n",
1461 |     "\n",
1462 |     "train = False #The variable is used to indicate that the replay starts, and the epsilon starts decrease.\n",
1463 |     "#Training Process\n",
1464 |     "#the main part of the deep-q learning agorithm \n",
1465 |     "for episode_i in range(0,N_EPISODE):\n",
1466 |     "    try:\n",
1467 |     "        # Choosing a map in the list\n",
1468 |     "        mapID = np.random.randint(0, 5) #Choosing a map ID from 5 maps in Maps folder randomly\n",
1469 |     "        posID_x = np.random.randint(MAP_MAX_X) #Choosing a initial position of the DQN agent on X-axes randomly\n",
1470 |     "        posID_y = np.random.randint(MAP_MAX_Y) #Choosing a initial position of the DQN agent on Y-axes randomly\n",
1471 |     "        #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent\n",
1472 |     "        request = (\"map\" + str(mapID) + \",\" + str(posID_x) + \",\" + str(posID_y) + \",50,100\") \n",
1473 |     "        #Send the request to the game environment\n",
1474 |     "        minerEnv.send_map_info(request)\n",
1475 |     "\n",
1476 |     "        # Getting the initial state\n",
1477 |     "        minerEnv.reset() #Initialize the game environment\n",
1478 |     "        s = minerEnv.get_state()#Get the state after reseting. \n",
1479 |     "                                #This function (get_state()) is an example of creating a state for the DQN model \n",
1480 |     "        total_reward = 0 #The amount of rewards for the entire episode\n",
1481 |     "        terminate = False #The variable indicates that the episode ends\n",
1482 |     "        maxStep = minerEnv.state.mapInfo.maxStep #Get the maximum number of steps for each episode in training\n",
1483 |     "        #Start an episde for training\n",
1484 |     "        for step in range(0, maxStep):\n",
1485 |     "            action = DQNAgent.act(s)  # Getting an action from the DQN model from the state (s)\n",
1486 |     "            minerEnv.step(str(action))  # Performing the action in order to obtain the new state\n",
1487 |     "            s_next = minerEnv.get_state()  # Getting a new state\n",
1488 |     "            reward = minerEnv.get_reward()  # Getting a reward\n",
1489 |     "            terminate = minerEnv.check_terminate()  # Checking the end status of the episode\n",
1490 |     "             \n",
1491 |     "            # Add this transition to the memory batch\n",
1492 |     "            memory.push(s, action, reward, terminate, s_next)\n",
1493 |     "\n",
1494 |     "            # Sample batch memory to train network\n",
1495 |     "            if (memory.length > INITIAL_REPLAY_SIZE):\n",
1496 |     "                #If there are INITIAL_REPLAY_SIZE experiences in the memory batch\n",
1497 |     "                #then start replaying\n",
1498 |     "                batch = memory.sample(BATCH_SIZE) #Get a BATCH_SIZE experiences for replaying\n",
1499 |     "                DQNAgent.replay(batch, BATCH_SIZE)#Do relaying\n",
1500 |     "                train = True #Indicate the training starts\n",
1501 |     "            total_reward = total_reward + reward #Plus the reward to the total rewad of the episode\n",
1502 |     "            s = s_next #Assign the next state for the next step.\n",
1503 |     "            \n",
1504 |     "            #Saving data to file\n",
1505 |     "            '''save_data = np.hstack([episode_i+1,step+1,reward,total_reward,action, DQNAgent.epsilon, terminate]).reshape(1,7)\n",
1506 |     "            with open(filename, 'a') as f:\n",
1507 |     "                pd.DataFrame(save_data).to_csv(f,encoding='utf-8', index=False, header = False)'''\n",
1508 |     "\n",
1509 |     "            if terminate == True:\n",
1510 |     "                #If the episode ends, then go to the next episode\n",
1511 |     "                break\n",
1512 |     "            \n",
1513 |     "        # Iteration to save the network architecture and weights\n",
1514 |     "        if (np.mod(episode_i + 1, SAVE_NETWORK) == 0 and train == True):\n",
1515 |     "            DQNAgent.target_train()  # Replace the learning weights for target model with soft replacement\n",
1516 |     "            #Save the DQN model\n",
1517 |     "            now = datetime.datetime.now() #Get the latest datetime          \n",
1518 |     "            DQNAgent.save_model( \"DQNmodel_\" + now.strftime(\"%Y%m%d-%H%M\") + \"_ep\" + str(episode_i+1))   \n",
1519 |     "        \n",
1520 |     "        #Print the training information after the episode\n",
1521 |     "        print('Episode %d ends. Number of steps is: %d. Accumlated Reward = %.2f. Epsilon = %.2f .Termination code: %d' % (episode_i+1, step+1, total_reward, DQNAgent.epsilon, terminate))\n",
1522 |     "        #Decreasing the epsilon if the replay starts\n",
1523 |     "        if  train == True:\n",
1524 |     "            DQNAgent.update_epsilon()\n",
1525 |     "            \t\n",
1526 |     "    except Exception as e:\n",
1527 |     "        import traceback\n",
1528 |     "        traceback.print_exc()                \n",
1529 |     "        #print(\"Finished.\")\n",
1530 |     "        break"
1531 |    ]
1532 |   }
1533 |  ],
1534 |  "metadata": {
1535 |   "colab": {
1536 |    "collapsed_sections": [],
1537 |    "name": "Miner_Training_Colab_CodeSample.ipynb",
1538 |    "provenance": []
1539 |   },
1540 |   "kernelspec": {
1541 |    "display_name": "Python 3",
1542 |    "language": "python",
1543 |    "name": "python3"
1544 |   },
1545 |   "language_info": {
1546 |    "codemirror_mode": {
1547 |     "name": "ipython",
1548 |     "version": 3
1549 |    },
1550 |    "file_extension": ".py",
1551 |    "mimetype": "text/x-python",
1552 |    "name": "python",
1553 |    "nbconvert_exporter": "python",
1554 |    "pygments_lexer": "ipython3",
1555 |    "version": "3.6.5"
1556 |   }
1557 |  },
1558 |  "nbformat": 4,
1559 |  "nbformat_minor": 1
1560 | }
1561 | 


--------------------------------------------------------------------------------