├── .gitignore ├── LICENSE ├── README.md ├── curr.png ├── myAgent.cs └── systemmodel.png /.gitignore: -------------------------------------------------------------------------------- 1 | *.meta 2 | git_commit_logs.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2022, IntelligentNetworkingLAB 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement-Learning-via-Curriculum-in-a-Wireless-Network-with-High-User-Mobility 2 | 3 | ## Overview 4 | This is the Simulation code for Resource Allocation and User Association Using Reinforcement Learning Through Curriculum in a Wireless Network with High User Mobility 5 | 6 | With the rapid development of wireless networks and artificial intelligence technologies, various applications of mobile networks have emerged. In particular, resource allocation becomes more complicated and handover problems occur more often when the user's mobility is high, such as the vehicle Internet. In addition, the problem of resource allocation in wireless networks is known as the NP-Hard problem. Using reinforcement learning to solve this problem is a promising solution. However, designing a reward function is very difficult, and incorrect design of the reward function can lead to completely unexpected results. In this paper, we propose a curriculum learning technique to solve the above problems so that reinforcement learning agents can learn more accurately. During learning, each user's mobility was sequentially increased so that the model could learn accurately. 7 | ![image info](./systemmodel.png) 8 | ![image info](./curr.png) 9 | 10 | ## Version Info 11 | - Unity 2021 12 | - ML-Agents 2.0(Unity Package) 13 | - ML-Agents 0.26.0(Unity Package) 14 | 15 | ## Acknowledgement 16 | 이 성과는 2022년도 정부(과학기술정보통신부)의 재원으로 정보통신기획평가원의 지원(No.2019-0-01287, 분산 엣지를 위한 진화형 딥러닝 모델생성 플랫폼)과 2022년도 정부(과학기술정보통신부)의 재원으로 정보통신기획평가원의 지원을 받아 수행됨(No.RS-2022-00155911, 인공지능융합혁신인재양성(경희대학교)) 17 | 18 | ## License 19 | Copyright (c) 2022 Networking Intelligence 20 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 21 | 22 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 23 | 24 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /curr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligentNetworkingLAB/Reinforcement-Learning-via-Curriculum-in-a-Wireless-Network-with-High-User-Mobility/b5cdab7d1bb76e598e5a0e6c908498f9257cbb6c/curr.png -------------------------------------------------------------------------------- /myAgent.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | using System.Collections.Generic; 3 | using UnityEngine; 4 | using Unity.MLAgents; 5 | using Unity.MLAgents.Actuators; 6 | using Unity.MLAgents.Sensors; 7 | using System.Diagnostics; 8 | using System; 9 | 10 | public class myAgent : Agent 11 | { 12 | public const int NUM_user = 12; 13 | public const int NUM_RBs = 13; 14 | public const int MAX_epis = 50; 15 | public const int NUM_BS = 4; 16 | public int step = 0; 17 | EnvironmentParameters m_ResetParams; 18 | 19 | public Vector2[] Users;//User Positions 20 | 21 | public float[] velo_Usr; 22 | public float[] angle_Usr; 23 | 24 | public float[] Ch_Gain;//user channel Gain TODO 25 | public float[,] Ch_Gain_OBS; 26 | public float Usr_Rate;// Total data rate 27 | public int[] Usr_Assoc;// user association 28 | public int[] alloc_RBs; 29 | public float[] eachUsr_Rate; 30 | 31 | public Vector2[] BS; 32 | 33 | public GameObject[] mBS; 34 | public GameObject[] mUsers; 35 | 36 | public override void Initialize() 37 | { 38 | Users = new Vector2[NUM_user]; 39 | Ch_Gain = new float[NUM_user]; 40 | Ch_Gain_OBS = new float[NUM_BS, NUM_user]; 41 | 42 | Usr_Assoc = new int[NUM_user]; 43 | alloc_RBs = new int[NUM_RBs]; 44 | eachUsr_Rate = new float[NUM_user]; 45 | velo_Usr = new float[NUM_user]; 46 | angle_Usr = new float[NUM_user]; 47 | BS = new Vector2[4]; 48 | 49 | BS[0].x = 125.0f; 50 | BS[0].y = 125.0f; 51 | BS[1].x = 375.0f; 52 | BS[1].y = 125.0f; 53 | BS[2].x = 375.0f; 54 | BS[2].y = 125.0f; 55 | BS[3].x = 375.0f; 56 | BS[3].y = 375.0f; 57 | 58 | for (int i = 0; i < NUM_BS; ++i) 59 | { 60 | mBS[i].transform.position = new Vector3(BS[i].x, 0.0f, BS[i].y); 61 | } 62 | 63 | m_ResetParams = Academy.Instance.EnvironmentParameters; 64 | //SetResetParameters(); 65 | } 66 | 67 | public override void CollectObservations(VectorSensor sensor) 68 | { 69 | sensor.AddObservation(Usr_Rate); 70 | 71 | for (int i = 0; i < NUM_user; ++i) 72 | sensor.AddObservation(eachUsr_Rate[i]); 73 | 74 | for (int i = 0; i < NUM_user; ++i) 75 | sensor.AddObservation(Usr_Assoc[i]); 76 | 77 | for (int i = 0; i < NUM_RBs; ++i) 78 | sensor.AddObservation(alloc_RBs[i]); 79 | 80 | for (int i = 0; i < NUM_BS; ++i) 81 | { 82 | for (int j = 0; j < NUM_user; ++j) 83 | sensor.AddObservation(Ch_Gain_OBS[i, j]); 84 | } 85 | } 86 | 87 | public override void OnActionReceived(ActionBuffers actionBuffers) 88 | { 89 | float level = m_ResetParams.GetWithDefault("level", 0); 90 | 91 | step = this.StepCount % MAX_epis; 92 | float pre_rate = Usr_Rate; 93 | 94 | float[] pre_Assoc = new float[NUM_user]; 95 | 96 | for (int i = 0; i < NUM_user; ++i) 97 | pre_Assoc[i] = Usr_Assoc[i]; 98 | 99 | 100 | int num_changed = 0; 101 | //float sum_RBs = 0.0f; 102 | 103 | if (level != 0) 104 | gaussian_markov(level); 105 | user_move(); 106 | user_move_go(); 107 | 108 | //Aciton 109 | for (int i = 0; i < NUM_user; ++i) // ����-������ ��ҽÿ��̼� 110 | Usr_Assoc[i] = actionBuffers.DiscreteActions[i]; 111 | 112 | for (int i = NUM_user; i < NUM_RBs; ++i) 113 | { // RB fraction Ratio 114 | alloc_RBs[i] = actionBuffers.DiscreteActions[i]; 115 | } 116 | //UnityEngine.Debug.Log(step.ToString()); 117 | for (int i = 0; i < NUM_user; i++) //ä�ΰ��� �ٽ� ��� 118 | get_channel_gain(i); 119 | 120 | get_rate();// �ٽ� ������ ����Ʈ ��� 121 | /*for(int i =0; i<20; i++) 122 | { 123 | UnityEngine.Debug.Log(i.ToString() + "'s Rate: " + eachUsr_Rate[i].ToString()); 124 | }*/ 125 | 126 | 127 | if (is_constraint_rate(60.0f)) 128 | { 129 | SetReward(-1000.0f); 130 | //UnityEngine.Debug.Log("Each users DataRate Threshold constraint..."); 131 | EndEpisode(); 132 | } 133 | if (pre_rate > Usr_Rate) 134 | { // compare data rate & previous rate 135 | AddReward(Usr_Rate * 0.2f); 136 | } 137 | else 138 | AddReward(Usr_Rate + Usr_Rate - pre_rate); 139 | 140 | num_changed = is_changed(pre_Assoc); 141 | if (num_changed != 0) 142 | {// Check handover 143 | for (int i = 0; i < num_changed; ++i) 144 | AddReward(-150.0f); 145 | } 146 | //AddReward(step * 5.0f); 147 | //step = step + 1; 148 | } 149 | 150 | public override void OnEpisodeBegin() 151 | { 152 | SetResetParameters(); 153 | } 154 | 155 | public void SetResetParameters() 156 | { 157 | step = 0; // Reset episode count 158 | for (int i = 0; i < NUM_user; ++i) 159 | { 160 | Users[i].x = UnityEngine.Random.Range(0.0f, 500.0f); 161 | Users[i].y = UnityEngine.Random.Range(0.0f, 500.0f); 162 | } 163 | 164 | for (int i = 0; i < NUM_user; ++i) 165 | { 166 | velo_Usr[i] = 1.0f; 167 | angle_Usr[i] = UnityEngine.Random.Range(0.0f, 2 * UnityEngine.Random.Range(0, 2 * Mathf.PI)); 168 | } 169 | for (int i = 0; i < NUM_user; i++) 170 | Usr_Assoc[i] = (int)Mathf.Floor(UnityEngine.Random.Range(0.0f, 3.9f)); 171 | 172 | for (int i = 0; i < NUM_RBs; i++) 173 | alloc_RBs[i] = (int)Mathf.Floor(UnityEngine.Random.Range(0.0f, 19.0f)); 174 | 175 | for (int i = 0; i < NUM_user; i++) 176 | get_channel_gain(i); 177 | 178 | get_rate(); 179 | 180 | } 181 | 182 | public int is_changed(float[] pre) 183 | { 184 | int i = 0; 185 | for (int j = 0; j < NUM_user; ++j) 186 | { 187 | if (Usr_Assoc[i] != pre[i]) 188 | i = i + 1; 189 | } 190 | return i; 191 | } 192 | 193 | public void user_move() 194 | { 195 | for (int i = 0; i < NUM_user; ++i) 196 | { 197 | Users[i].x = Users[i].x + velo_Usr[i] * Mathf.Cos(angle_Usr[i]); 198 | Users[i].y = Users[i].y + velo_Usr[i] * Mathf.Sin(angle_Usr[i]); 199 | } 200 | } 201 | public void user_move_go() 202 | { 203 | for(int i = 0; i < NUM_user; ++i) 204 | { 205 | mUsers[i].transform.position = new Vector3(Users[i].x, 0.0f, Users[i].y); 206 | } 207 | } 208 | public void gaussian_markov(float lev) 209 | { 210 | float velo_mean = lev; 211 | 212 | float angle_mean = 0.0f; 213 | for (int j = 0; j < NUM_user; j++) 214 | { 215 | float alpha1 = UnityEngine.Random.Range(0.0f, 1.0f); 216 | float alpha2 = 1 - alpha1; 217 | float alpha3 = Mathf.Sqrt(1 - alpha1 * alpha1); 218 | 219 | velo_Usr[j] = (alpha1 * velo_Usr[j] + 220 | alpha2 * velo_mean + 221 | alpha3 * UnityEngine.Random.Range(0.0f, 1.0f) 222 | ); 223 | angle_Usr[j] = (alpha1 * angle_Usr[j] + 224 | alpha2 * angle_mean + 225 | alpha3 * UnityEngine.Random.Range(0.0f, 1.0f) 226 | ); 227 | } 228 | } 229 | 230 | public bool is_constraint_alloc(int RB) 231 | { 232 | if (RB > NUM_RBs) 233 | return false; 234 | return true; 235 | } 236 | public bool is_constraint_rate(float Threshold) 237 | { 238 | for (int i = 0; i < NUM_user; ++i) 239 | { 240 | if (eachUsr_Rate[i] < Threshold) 241 | { 242 | return true; 243 | } 244 | } 245 | return false; 246 | } 247 | 248 | public float randn(float mean, float stddev) 249 | { 250 | float r1 = UnityEngine.Random.Range(0.0f, 1.0f); 251 | float r2 = UnityEngine.Random.Range(0.0f, 1.0f); 252 | 253 | float rand = Mathf.Sqrt(-2.0f * Mathf.Log(r1)) * Mathf.Sin(2.0f * Mathf.PI * r2); 254 | return rand; 255 | //return mean + stddev * rand; 256 | } 257 | 258 | public void get_channel_gain(int idx) 259 | { 260 | } 261 | 262 | 263 | public void get_rate() 264 | { 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /systemmodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligentNetworkingLAB/Reinforcement-Learning-via-Curriculum-in-a-Wireless-Network-with-High-User-Mobility/b5cdab7d1bb76e598e5a0e6c908498f9257cbb6c/systemmodel.png --------------------------------------------------------------------------------