├── res_alloc_files └── figure-gfm │ ├── res_alloc.png │ └── simulated_annealing.png ├── .gitignore ├── res_alloc.py └── README.md /res_alloc_files/figure-gfm/res_alloc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MNoorFawi/resource-allocation-using-optimization-algorithms/HEAD/res_alloc_files/figure-gfm/res_alloc.png -------------------------------------------------------------------------------- /res_alloc_files/figure-gfm/simulated_annealing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MNoorFawi/resource-allocation-using-optimization-algorithms/HEAD/res_alloc_files/figure-gfm/simulated_annealing.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /res_alloc.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from string import ascii_lowercase 3 | import random 4 | import numpy as np 5 | from itertools import compress 6 | import math 7 | 8 | resource = [random.choice(ascii_lowercase) + str(_) for _ in range(100)] 9 | project = [random.choice(ascii_lowercase) + random.choice(ascii_lowercase) + 10 | str(_) for _ in range(50)] 11 | 12 | lang_skill = ["R", "Python", "Scala", "Julia"] 13 | db_skill = ["PSQL", "MySQL", "MongoDB", "Neo4j", "CouchDB"] 14 | 15 | random.seed(1311) 16 | resources = pd.DataFrame({ 17 | "name" : resource, 18 | "skill1" : random.choices(lang_skill, k = 100), 19 | "skill2" : random.choices(db_skill, k = 100) 20 | }) 21 | 22 | projects = pd.DataFrame({ 23 | "project" : project, 24 | "skill1" : random.choices(lang_skill, k = 50), 25 | "skill2" : random.choices(db_skill, k = 50) 26 | }) 27 | 28 | print(resources.head()) 29 | print("#########") 30 | print(projects.head()) 31 | 32 | def schedule_display(sol): 33 | res = [] 34 | proj = [] 35 | resskill = [] 36 | projskill = [] 37 | slots = [] 38 | # create two slots for each project 39 | for i in range(len(projects)): slots += [i, i] 40 | 41 | # Loop over resources assignment 42 | for i in range(len(sol)): 43 | # get slot 44 | x = int(sol[i]) 45 | # get resource name 46 | res.append(resources.name[i]) 47 | # project name 48 | pr = projects.project[slots[x]] 49 | # append to project list 50 | proj.append(pr) 51 | # get resources skill 52 | resskill.append(list(resources.iloc[i, 1:])) 53 | # to get the project skills from the name we need to get the indices 54 | # where the project is equal to "pr" then slice the projects df 55 | pr_bool = projects.project == pr 56 | pr_ind = list(compress(range(len(pr_bool)), pr_bool)) 57 | projskill.append(list(projects.iloc[pr_ind, 1:].values[0])) 58 | # remove this slot in order not to be filled again 59 | del slots[x] 60 | 61 | res_proj = pd.DataFrame({"Resource" : res, "Project" : proj, 62 | "Res_Skill" : resskill, 63 | "Proj_Skill" : projskill}) 64 | 65 | return res_proj.sort_values("Project") 66 | 67 | rand_sch = schedule_display([0 for _ in range(len(resources))]) 68 | print(rand_sch) 69 | 70 | def resproj_cost(sol): 71 | cost = 0 72 | # create list a of slots 73 | slots = [] 74 | for i in range(len(projects)): slots += [i, i] 75 | 76 | # loop over each resource 77 | for i in range(len(sol)): 78 | x = int(sol[i]) 79 | # get project skills and resources skills 80 | proj = np.array(projects.iloc[slots[x], 1:]) 81 | res = np.array(resources.iloc[i, 1:]) 82 | # count how many mismatches among skills (0, 1 or 2) 83 | cost += sum(res != proj) 84 | 85 | # remove selected slot 86 | del slots[x] 87 | 88 | return cost 89 | 90 | def simulated_annealing(domain, costf, temp = 10000.0, 91 | cool = 0.95, step = 1): 92 | # initialize the values randomly 93 | current_sol = [float(random.randint(domain[i][0], domain[i][1])) for i in range(len(domain))] 94 | while temp > 0.1: 95 | # choose one of the indices 96 | i = random.randint(0, len(domain) - 1) 97 | 98 | # choose a direction to change it 99 | direction = random.randint(- step, step) 100 | 101 | # create a new list with one of the values changed 102 | new_sol = current_sol[:] 103 | new_sol[i] += direction 104 | if new_sol[i] < domain[i][0]: new_sol[i] = domain[i][0] 105 | elif new_sol[i] > domain[i][1]: new_sol[i] = domain[i][1] 106 | 107 | # calculate the current cost and the new cost 108 | current_cost = costf(current_sol) 109 | new_cost = costf(new_sol) 110 | #p = pow(math.e, (- new_cost - current_cost) / temp) 111 | p = math.e ** (( - new_cost - current_cost) / temp) 112 | 113 | # is it better, or does it make the probability 114 | # cutoff? 115 | if (new_cost < current_cost or random.random() < p): 116 | current_sol = new_sol 117 | print(new_cost) 118 | 119 | # decrease the temperature 120 | temp = temp * cool 121 | return current_sol 122 | 123 | 124 | solution = [(0, (len(projects) * 2) - i - 1) for i in range(0, len(projects) * 2)] 125 | 126 | # step = 3 to widen the direction of movement and high cool to run the algorithm longer 127 | schedule = simulated_annealing(solution, resproj_cost, step = 3, cool = 0.99) 128 | 129 | schedule_df = schedule_display(schedule) 130 | print(schedule_df.head(20)) 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Resource Allocation using Optimization Algorithms 2 | ================ 3 | 4 | ![](res_alloc_files/figure-gfm/res_alloc.png) 5 | 6 | Resource allocation or resource management is a very difficult task in 7 | any company. To find the best resource with the right skills for the a 8 | specific project with certain requirements. 9 | 10 | It takes a lot of time and effort to do this task manually. Here we are 11 | going to appraoch this problem using **Machine Learning Optimization 12 | Algorithms** and **Python**. 13 | 14 | Import necessary libraries: 15 | 16 | ``` python 17 | import pandas as pd 18 | from string import ascii_lowercase 19 | import random 20 | import numpy as np 21 | from itertools import compress 22 | import math 23 | ``` 24 | 25 | Define a random dataset with random resource skills and project 26 | prerequisites. We will create 50 projects and 100 resources. Each 27 | project needs 2 resources. 28 | 29 | *N.B. There is no perfect solution for this dataset. The algorithm will 30 | try to find the best partial solution with the most minimal cost as 31 | possible* 32 | 33 | ``` python 34 | resource = [random.choice(ascii_lowercase) + str(_) for _ in range(100)] 35 | project = [random.choice(ascii_lowercase) + random.choice(ascii_lowercase) + 36 | str(_) for _ in range(50)] 37 | 38 | lang_skill = ["R", "Python", "Scala", "Julia"] 39 | db_skill = ["PSQL", "MySQL", "MongoDB", "Neo4j", "CouchDB"] 40 | 41 | random.seed(1311) 42 | resources = pd.DataFrame({ 43 | "name" : resource, 44 | "skill1" : random.choices(lang_skill, k = 100), 45 | "skill2" : random.choices(db_skill, k = 100) 46 | }) 47 | 48 | projects = pd.DataFrame({ 49 | "project" : project, 50 | "skill1" : random.choices(lang_skill, k = 50), 51 | "skill2" : random.choices(db_skill, k = 50) 52 | }) 53 | 54 | print(resources.head()) 55 | print("#########") 56 | print(projects.head()) 57 | 58 | # name skill1 skill2 59 | # 0 y0 R Neo4j 60 | # 1 g1 Python MongoDB 61 | # 2 e2 Julia CouchDB 62 | # 3 n3 Julia PSQL 63 | # 4 s4 R MySQL 64 | # ######### 65 | # project skill1 skill2 66 | # 0 kr0 R CouchDB 67 | # 1 ns1 Scala CouchDB 68 | # 2 dw2 R PSQL 69 | # 3 at3 Python CouchDB 70 | # 4 wg4 Scala PSQL 71 | ``` 72 | 73 | Now we need a function to display the solution the algorithm will give 74 | us. 75 | 76 | ``` python 77 | def schedule_display(sol): 78 | res = [] 79 | proj = [] 80 | resskill = [] 81 | projskill = [] 82 | slots = [] 83 | # create two slots for each project 84 | for i in range(len(projects)): slots += [i, i] 85 | 86 | # Loop over resources assignment 87 | for i in range(len(sol)): 88 | # get slot 89 | x = int(sol[i]) 90 | # get resource name 91 | res.append(resources.name[i]) 92 | # project name 93 | pr = projects.project[slots[x]] 94 | # append to project list 95 | proj.append(pr) 96 | # get resources skill 97 | resskill.append(list(resources.iloc[i, 1:])) 98 | # to get the project skills from the name we need to get the indices 99 | # where the project is equal to "pr" then slice the projects df 100 | pr_bool = projects.project == pr 101 | pr_ind = list(compress(range(len(pr_bool)), pr_bool)) 102 | projskill.append(list(projects.iloc[pr_ind, 1:].values[0])) 103 | # remove this slot in order not to be filled again 104 | del slots[x] 105 | 106 | res_proj = pd.DataFrame({"Resource" : res, "Project" : proj, 107 | "Res_Skill" : resskill, 108 | "Proj_Skill" : projskill}) 109 | 110 | return res_proj.sort_values("Project") 111 | ``` 112 | 113 | Let’s see a random allocation what it suggests; 114 | 115 | ``` python 116 | rand_sch = schedule_display([0 for _ in range(len(resources))]) 117 | print(rand_sch) 118 | 119 | # Resource Project Res_Skill Proj_Skill 120 | # 90 g90 aw45 [Julia, MongoDB] [Julia, PSQL] 121 | # 91 q91 aw45 [Julia, MongoDB] [Julia, PSQL] 122 | # 76 l76 bv38 [Scala, MongoDB] [Julia, CouchDB] 123 | # 77 h77 bv38 [Julia, Neo4j] [Julia, CouchDB] 124 | # 43 e43 cv21 [Scala, Neo4j] [Scala, MongoDB] 125 | # .. ... ... ... ... 126 | # 31 d31 yi15 [Scala, MySQL] [Julia, MySQL] 127 | # 24 h24 zm12 [Python, CouchDB] [Julia, CouchDB] 128 | # 25 h25 zm12 [Scala, Neo4j] [Julia, CouchDB] 129 | # 73 m73 zm36 [R, MySQL] [Julia, PSQL] 130 | # 72 i72 zm36 [R, Neo4j] [Julia, PSQL] 131 | # 132 | # [100 rows x 4 columns] 133 | ``` 134 | 135 | The **Cost Function** is the most important part in any *optimization 136 | algorithm*. The algorithm searches different solutions in order to 137 | minimize the cost function of the current solution until it reaches the 138 | stop criteria. 139 | 140 | Here we define our cost function which calculates how mismatching the 141 | assigned resource’s skills with the project requirements. It increases 142 | by 1 if the resource has 1 out of 2 from the requirements, 2 if the 143 | resource doesn’t have any required skill and 0 if the resource is the 144 | perfect match. 145 | 146 | ###### N.B. Here we try to teach the algorithm to at least find one required skill in each assigned resource. We can be more strict and search for the two resources that perfectly cover the required skills. 147 | 148 | ``` python 149 | def resproj_cost(sol): 150 | cost = 0 151 | # create list a of slots 152 | slots = [] 153 | for i in range(len(projects)): slots += [i, i] 154 | 155 | # loop over each resource 156 | for i in range(len(sol)): 157 | x = int(sol[i]) 158 | # get project skills and resources skills 159 | proj = np.array(projects.iloc[slots[x], 1:]) 160 | res = np.array(resources.iloc[i, 1:]) 161 | # count how many mismatches among skills (0, 1 or 2) 162 | cost += sum(res != proj) 163 | 164 | # remove selected slot 165 | del slots[x] 166 | 167 | return cost 168 | ``` 169 | 170 | ### Simulated Annealing 171 | 172 | Simulated annealing algorithm is an optimization method which is 173 | inspired by the slow cooling of metals. The algorithm starts with a 174 | random solution to the problem. It has a variable called temperature, 175 | which starts very high and gradually gets lower (cool down). In each 176 | iteration, a random number from the current solution is chosen and 177 | changed in a given direction. The cost is calculated before and after 178 | the change, and the two costs are compared. If the new cost is lower, 179 | the new solution becomes the current solution, just like any other 180 | optimization algorithm. However, if the cost is higher, the algorithm 181 | can still accept the current solution with a certain probability. *This 182 | is to avoid the local minimum.* 183 | 184 | As shown in the picture, the algorithms sometimes accept worse solution 185 | to escape from the local minima and go for the goal **(Global Minimum)** 186 | 187 | ![](res_alloc_files/figure-gfm/simulated_annealing.png) 188 | 189 | Define the Simulated Annealing algorithm; 190 | 191 | ``` python 192 | def simulated_annealing(domain, costf, temp = 10000.0, 193 | cool = 0.95, step = 1): 194 | # initialize the values randomly 195 | current_sol = [float(random.randint(domain[i][0], domain[i][1])) for i in range(len(domain))] 196 | while temp > 0.1: 197 | # choose one of the indices 198 | i = random.randint(0, len(domain) - 1) 199 | 200 | # choose a direction to change it 201 | direction = random.randint(- step, step) 202 | 203 | # create a new list with one of the values changed 204 | new_sol = current_sol[:] 205 | new_sol[i] += direction 206 | if new_sol[i] < domain[i][0]: new_sol[i] = domain[i][0] 207 | elif new_sol[i] > domain[i][1]: new_sol[i] = domain[i][1] 208 | 209 | # calculate the current cost and the new cost 210 | current_cost = costf(current_sol) 211 | new_cost = costf(new_sol) 212 | #p = pow(math.e, (- new_cost - current_cost) / temp) 213 | p = math.e ** (( - new_cost - current_cost) / temp) 214 | 215 | # is it better, or does it make the probability 216 | # cutoff? 217 | if (new_cost < current_cost or random.random() < p): 218 | current_sol = new_sol 219 | print(new_cost) 220 | 221 | # decrease the temperature 222 | temp = temp * cool 223 | return current_sol 224 | ``` 225 | 226 | Run the algorithm. Start first with a random solution and pass it to the 227 | algorithm. 228 | 229 | N.B. How the cost can get higher in certain iterations. 230 | 231 | ``` python 232 | # random solution (start point) 233 | solution = [(0, (len(projects) * 2) - i - 1) for i in range(0, len(projects) * 2)] 234 | 235 | # step = 3 to widen the direction of movement and high cool to run the algorithm longer 236 | schedule = simulated_annealing(solution, resproj_cost, step = 3, cool = 0.99) 237 | 238 | # 151 239 | # 151 240 | # 152 241 | # 152 242 | # 153 243 | # 152 244 | # 159 245 | # 160 246 | # 160 247 | # 160 248 | # ... 249 | # ... 250 | # 251 | # 148 252 | # 147 253 | # 147 254 | # 145 255 | # 145 256 | # 145 257 | # 145 258 | # 144 259 | # ... 260 | # ... 261 | # 262 | # 102 263 | # 101 264 | # 100 265 | # 99 266 | # 98 267 | # 97 268 | ``` 269 | 270 | Now we have a variable that has an optimized schedule. Let’s display it. 271 | 272 | ``` python 273 | schedule_df = schedule_display(schedule) 274 | print(schedule_df.head(20)) 275 | 276 | # Resource Project Res_Skill Proj_Skill 277 | # 91 q91 aw45 [Julia, MongoDB] [Julia, PSQL] 278 | # 14 l14 aw45 [Julia, PSQL] [Julia, PSQL] 279 | # 3 r3 bv38 [Julia, PSQL] [Julia, CouchDB] 280 | # 77 h77 bv38 [Julia, Neo4j] [Julia, CouchDB] 281 | # 26 n26 cv21 [Scala, MongoDB] [Scala, MongoDB] 282 | # 82 n82 cv21 [Scala, MySQL] [Scala, MongoDB] 283 | # 32 v32 db27 [Python, MongoDB] [Python, MongoDB] 284 | # 90 g90 db27 [Julia, MongoDB] [Python, MongoDB] 285 | # 47 z47 dh6 [Scala, MongoDB] [Scala, MongoDB] 286 | # 41 a41 dh6 [R, PSQL] [Scala, MongoDB] 287 | # 49 a49 di46 [R, PSQL] [Scala, PSQL] 288 | # 31 d31 di46 [Scala, MySQL] [Scala, PSQL] 289 | # 56 q56 dn1 [Scala, PSQL] [Scala, CouchDB] 290 | # 36 z36 dn1 [Python, MongoDB] [Scala, CouchDB] 291 | # 33 u33 dv33 [Julia, CouchDB] [Julia, CouchDB] 292 | # 52 f52 dv33 [Scala, CouchDB] [Julia, CouchDB] 293 | # 6 q6 ex35 [Julia, Neo4j] [R, PSQL] 294 | # 46 z46 ex35 [R, MongoDB] [R, PSQL] 295 | # 74 u74 fi20 [Scala, Neo4j] [Scala, Neo4j] 296 | # 18 d18 fi20 [Scala, PSQL] [Scala, Neo4j] 297 | ``` 298 | 299 | The assignments are much better than the random chosen one. The 300 | algorithm can be tweaked and improved more and more using different 301 | criteria for the cost function. 302 | --------------------------------------------------------------------------------