├── res_alloc_files
    └── figure-gfm
    │   ├── res_alloc.png
    │   └── simulated_annealing.png
├── .gitignore
├── res_alloc.py
└── README.md


/res_alloc_files/figure-gfm/res_alloc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MNoorFawi/resource-allocation-using-optimization-algorithms/HEAD/res_alloc_files/figure-gfm/res_alloc.png


--------------------------------------------------------------------------------
/res_alloc_files/figure-gfm/simulated_annealing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MNoorFawi/resource-allocation-using-optimization-algorithms/HEAD/res_alloc_files/figure-gfm/simulated_annealing.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/res_alloc.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from string import ascii_lowercase
  3 | import random 
  4 | import numpy as np
  5 | from itertools import compress
  6 | import math
  7 | 
  8 | resource = [random.choice(ascii_lowercase) + str(_) for _ in range(100)]
  9 | project = [random.choice(ascii_lowercase) + random.choice(ascii_lowercase) +
 10 |            str(_) for _ in range(50)]
 11 | 
 12 | lang_skill = ["R", "Python", "Scala", "Julia"]
 13 | db_skill = ["PSQL", "MySQL", "MongoDB", "Neo4j", "CouchDB"]
 14 | 
 15 | random.seed(1311)
 16 | resources = pd.DataFrame({
 17 |         "name" : resource,
 18 |         "skill1" : random.choices(lang_skill, k = 100),
 19 |         "skill2" : random.choices(db_skill, k = 100)
 20 |             })
 21 | 
 22 | projects = pd.DataFrame({
 23 |         "project" : project,
 24 |         "skill1" : random.choices(lang_skill, k = 50),
 25 |         "skill2" : random.choices(db_skill, k = 50)
 26 |         })
 27 |         
 28 | print(resources.head())
 29 | print("#########")
 30 | print(projects.head())
 31 | 
 32 | def schedule_display(sol):
 33 |     res = []
 34 |     proj = []
 35 |     resskill = []
 36 |     projskill = []
 37 |     slots = []
 38 |     # create two slots for each project
 39 |     for i in range(len(projects)): slots += [i, i]
 40 | 
 41 |     # Loop over resources assignment
 42 |     for i in range(len(sol)):
 43 |         # get slot
 44 |         x = int(sol[i])
 45 |         # get resource name
 46 |         res.append(resources.name[i])
 47 |         # project name
 48 |         pr = projects.project[slots[x]]
 49 |         # append to project list
 50 |         proj.append(pr)
 51 |         # get resources skill
 52 |         resskill.append(list(resources.iloc[i, 1:]))
 53 |         # to get the project skills from the name we need to get the indices
 54 |         # where the project is equal to "pr" then slice the projects df
 55 |         pr_bool = projects.project == pr
 56 |         pr_ind = list(compress(range(len(pr_bool)), pr_bool))
 57 |         projskill.append(list(projects.iloc[pr_ind, 1:].values[0]))
 58 |         # remove this slot in order not to be filled again
 59 |         del slots[x]
 60 |     
 61 |     res_proj = pd.DataFrame({"Resource" : res, "Project" : proj,
 62 |                              "Res_Skill" : resskill,
 63 |                              "Proj_Skill" : projskill})
 64 |                              
 65 |     return res_proj.sort_values("Project")
 66 |     
 67 | rand_sch = schedule_display([0 for _ in range(len(resources))])
 68 | print(rand_sch)
 69 | 
 70 | def resproj_cost(sol):
 71 |   cost = 0
 72 |   # create list a of slots
 73 |   slots = []
 74 |   for i in range(len(projects)): slots += [i, i]
 75 |   
 76 |   # loop over each resource
 77 |   for i in range(len(sol)):
 78 |       x = int(sol[i])
 79 |       # get project skills and resources skills
 80 |       proj = np.array(projects.iloc[slots[x], 1:])
 81 |       res = np.array(resources.iloc[i, 1:])
 82 |       # count how many mismatches among skills (0, 1 or 2)
 83 |       cost += sum(res != proj)
 84 |       
 85 |       # remove selected slot
 86 |       del slots[x]
 87 |     
 88 |   return cost
 89 |   
 90 | def simulated_annealing(domain, costf, temp = 10000.0,
 91 |                      cool = 0.95, step = 1):
 92 |     # initialize the values randomly
 93 |     current_sol = [float(random.randint(domain[i][0], domain[i][1])) for i in range(len(domain))]
 94 |     while temp > 0.1:
 95 |         # choose one of the indices
 96 |         i = random.randint(0, len(domain) - 1)
 97 |         
 98 |         # choose a direction to change it
 99 |         direction = random.randint(- step, step)
100 |         
101 |         # create a new list with one of the values changed
102 |         new_sol = current_sol[:]
103 |         new_sol[i] += direction
104 |         if new_sol[i] < domain[i][0]: new_sol[i] = domain[i][0]
105 |         elif new_sol[i] > domain[i][1]: new_sol[i] = domain[i][1]
106 |         
107 |         # calculate the current cost and the new cost
108 |         current_cost = costf(current_sol)
109 |         new_cost = costf(new_sol)
110 |         #p = pow(math.e, (- new_cost - current_cost) / temp)
111 |         p = math.e ** (( - new_cost - current_cost) / temp)
112 |         
113 |         # is it better, or does it make the probability
114 |         # cutoff?
115 |         if (new_cost < current_cost or random.random() < p):
116 |             current_sol = new_sol
117 |             print(new_cost)
118 |         
119 |         # decrease the temperature
120 |         temp = temp * cool
121 |     return current_sol
122 |     
123 |  
124 | solution = [(0, (len(projects) * 2) - i - 1) for i in range(0, len(projects) * 2)]
125 | 
126 | # step = 3 to widen the direction of movement and high cool to run the algorithm longer
127 | schedule = simulated_annealing(solution, resproj_cost, step = 3, cool = 0.99)
128 | 
129 | schedule_df = schedule_display(schedule)
130 | print(schedule_df.head(20))
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Resource Allocation using Optimization Algorithms
  2 | ================
  3 | 
  4 | ![](res_alloc_files/figure-gfm/res_alloc.png)
  5 | 
  6 | Resource allocation or resource management is a very difficult task in
  7 | any company. To find the best resource with the right skills for the a
  8 | specific project with certain requirements.
  9 | 
 10 | It takes a lot of time and effort to do this task manually. Here we are
 11 | going to appraoch this problem using **Machine Learning Optimization
 12 | Algorithms** and **Python**.
 13 | 
 14 | Import necessary libraries:
 15 | 
 16 | ``` python
 17 | import pandas as pd
 18 | from string import ascii_lowercase
 19 | import random 
 20 | import numpy as np
 21 | from itertools import compress
 22 | import math
 23 | ```
 24 | 
 25 | Define a random dataset with random resource skills and project
 26 | prerequisites. We will create 50 projects and 100 resources. Each
 27 | project needs 2 resources.
 28 | 
 29 | *N.B. There is no perfect solution for this dataset. The algorithm will
 30 | try to find the best partial solution with the most minimal cost as
 31 | possible*
 32 | 
 33 | ``` python
 34 | resource = [random.choice(ascii_lowercase) + str(_) for _ in range(100)]
 35 | project = [random.choice(ascii_lowercase) + random.choice(ascii_lowercase) +
 36 |            str(_) for _ in range(50)]
 37 | 
 38 | lang_skill = ["R", "Python", "Scala", "Julia"]
 39 | db_skill = ["PSQL", "MySQL", "MongoDB", "Neo4j", "CouchDB"]
 40 | 
 41 | random.seed(1311)
 42 | resources = pd.DataFrame({
 43 |         "name" : resource,
 44 |         "skill1" : random.choices(lang_skill, k = 100),
 45 |         "skill2" : random.choices(db_skill, k = 100)
 46 |             })
 47 | 
 48 | projects = pd.DataFrame({
 49 |         "project" : project,
 50 |         "skill1" : random.choices(lang_skill, k = 50),
 51 |         "skill2" : random.choices(db_skill, k = 50)
 52 |         })
 53 |         
 54 | print(resources.head())
 55 | print("#########")
 56 | print(projects.head())
 57 | 
 58 | #   name  skill1   skill2
 59 | # 0   y0       R    Neo4j
 60 | # 1   g1  Python  MongoDB
 61 | # 2   e2   Julia  CouchDB
 62 | # 3   n3   Julia     PSQL
 63 | # 4   s4       R    MySQL
 64 | # #########
 65 | #   project  skill1   skill2
 66 | # 0     kr0       R  CouchDB
 67 | # 1     ns1   Scala  CouchDB
 68 | # 2     dw2       R     PSQL
 69 | # 3     at3  Python  CouchDB
 70 | # 4     wg4   Scala     PSQL
 71 | ```
 72 | 
 73 | Now we need a function to display the solution the algorithm will give
 74 | us.
 75 | 
 76 | ``` python
 77 | def schedule_display(sol):
 78 |     res = []
 79 |     proj = []
 80 |     resskill = []
 81 |     projskill = []
 82 |     slots = []
 83 |     # create two slots for each project
 84 |     for i in range(len(projects)): slots += [i, i]
 85 | 
 86 |     # Loop over resources assignment
 87 |     for i in range(len(sol)):
 88 |         # get slot
 89 |         x = int(sol[i])
 90 |         # get resource name
 91 |         res.append(resources.name[i])
 92 |         # project name
 93 |         pr = projects.project[slots[x]]
 94 |         # append to project list
 95 |         proj.append(pr)
 96 |         # get resources skill
 97 |         resskill.append(list(resources.iloc[i, 1:]))
 98 |         # to get the project skills from the name we need to get the indices
 99 |         # where the project is equal to "pr" then slice the projects df
100 |         pr_bool = projects.project == pr
101 |         pr_ind = list(compress(range(len(pr_bool)), pr_bool))
102 |         projskill.append(list(projects.iloc[pr_ind, 1:].values[0]))
103 |         # remove this slot in order not to be filled again
104 |         del slots[x]
105 |     
106 |     res_proj = pd.DataFrame({"Resource" : res, "Project" : proj,
107 |                              "Res_Skill" : resskill,
108 |                              "Proj_Skill" : projskill})
109 |                              
110 |     return res_proj.sort_values("Project")
111 | ```
112 | 
113 | Let’s see a random allocation what it suggests;
114 | 
115 | ``` python
116 | rand_sch = schedule_display([0 for _ in range(len(resources))])
117 | print(rand_sch)
118 | 
119 | #    Resource Project          Res_Skill        Proj_Skill
120 | # 90      g90    aw45   [Julia, MongoDB]     [Julia, PSQL]
121 | # 91      q91    aw45   [Julia, MongoDB]     [Julia, PSQL]
122 | # 76      l76    bv38   [Scala, MongoDB]  [Julia, CouchDB]
123 | # 77      h77    bv38     [Julia, Neo4j]  [Julia, CouchDB]
124 | # 43      e43    cv21     [Scala, Neo4j]  [Scala, MongoDB]
125 | # ..      ...     ...                ...               ...
126 | # 31      d31    yi15     [Scala, MySQL]    [Julia, MySQL]
127 | # 24      h24    zm12  [Python, CouchDB]  [Julia, CouchDB]
128 | # 25      h25    zm12     [Scala, Neo4j]  [Julia, CouchDB]
129 | # 73      m73    zm36         [R, MySQL]     [Julia, PSQL]
130 | # 72      i72    zm36         [R, Neo4j]     [Julia, PSQL]
131 | # 
132 | # [100 rows x 4 columns]
133 | ```
134 | 
135 | The **Cost Function** is the most important part in any *optimization
136 | algorithm*. The algorithm searches different solutions in order to
137 | minimize the cost function of the current solution until it reaches the
138 | stop criteria.
139 | 
140 | Here we define our cost function which calculates how mismatching the
141 | assigned resource’s skills with the project requirements. It increases
142 | by 1 if the resource has 1 out of 2 from the requirements, 2 if the
143 | resource doesn’t have any required skill and 0 if the resource is the
144 | perfect match.
145 | 
146 | ###### N.B. Here we try to teach the algorithm to at least find one required skill in each assigned resource. We can be more strict and search for the two resources that perfectly cover the required skills.
147 | 
148 | ``` python
149 | def resproj_cost(sol):
150 |   cost = 0
151 |   # create list a of slots
152 |   slots = []
153 |   for i in range(len(projects)): slots += [i, i]
154 |   
155 |   # loop over each resource
156 |   for i in range(len(sol)):
157 |       x = int(sol[i])
158 |       # get project skills and resources skills
159 |       proj = np.array(projects.iloc[slots[x], 1:])
160 |       res = np.array(resources.iloc[i, 1:])
161 |       # count how many mismatches among skills (0, 1 or 2)
162 |       cost += sum(res != proj)
163 |       
164 |       # remove selected slot
165 |       del slots[x]
166 |     
167 |   return cost
168 | ```
169 | 
170 | ### Simulated Annealing
171 | 
172 | Simulated annealing algorithm is an optimization method which is
173 | inspired by the slow cooling of metals. The algorithm starts with a
174 | random solution to the problem. It has a variable called temperature,
175 | which starts very high and gradually gets lower (cool down). In each
176 | iteration, a random number from the current solution is chosen and
177 | changed in a given direction. The cost is calculated before and after
178 | the change, and the two costs are compared. If the new cost is lower,
179 | the new solution becomes the current solution, just like any other
180 | optimization algorithm. However, if the cost is higher, the algorithm
181 | can still accept the current solution with a certain probability. *This
182 | is to avoid the local minimum.*
183 | 
184 | As shown in the picture, the algorithms sometimes accept worse solution
185 | to escape from the local minima and go for the goal **(Global Minimum)**
186 | 
187 | ![](res_alloc_files/figure-gfm/simulated_annealing.png)
188 | 
189 | Define the Simulated Annealing algorithm;
190 | 
191 | ``` python
192 | def simulated_annealing(domain, costf, temp = 10000.0,
193 |                      cool = 0.95, step = 1):
194 |     # initialize the values randomly
195 |     current_sol = [float(random.randint(domain[i][0], domain[i][1])) for i in range(len(domain))]
196 |     while temp > 0.1:
197 |         # choose one of the indices
198 |         i = random.randint(0, len(domain) - 1)
199 |         
200 |         # choose a direction to change it
201 |         direction = random.randint(- step, step)
202 |         
203 |         # create a new list with one of the values changed
204 |         new_sol = current_sol[:]
205 |         new_sol[i] += direction
206 |         if new_sol[i] < domain[i][0]: new_sol[i] = domain[i][0]
207 |         elif new_sol[i] > domain[i][1]: new_sol[i] = domain[i][1]
208 |         
209 |         # calculate the current cost and the new cost
210 |         current_cost = costf(current_sol)
211 |         new_cost = costf(new_sol)
212 |         #p = pow(math.e, (- new_cost - current_cost) / temp)
213 |         p = math.e ** (( - new_cost - current_cost) / temp)
214 |         
215 |         # is it better, or does it make the probability
216 |         # cutoff?
217 |         if (new_cost < current_cost or random.random() < p):
218 |             current_sol = new_sol
219 |             print(new_cost)
220 |         
221 |         # decrease the temperature
222 |         temp = temp * cool
223 |     return current_sol
224 | ```
225 | 
226 | Run the algorithm. Start first with a random solution and pass it to the
227 | algorithm.
228 | 
229 | N.B. How the cost can get higher in certain iterations.
230 | 
231 | ``` python
232 | # random solution (start point)
233 | solution = [(0, (len(projects) * 2) - i - 1) for i in range(0, len(projects) * 2)]
234 | 
235 | # step = 3 to widen the direction of movement and high cool to run the algorithm longer
236 | schedule = simulated_annealing(solution, resproj_cost, step = 3, cool = 0.99)
237 | 
238 | # 151
239 | # 151
240 | # 152
241 | # 152
242 | # 153
243 | # 152
244 | # 159
245 | # 160
246 | # 160
247 | # 160
248 | # ...
249 | # ...
250 | # 
251 | # 148
252 | # 147
253 | # 147
254 | # 145
255 | # 145
256 | # 145
257 | # 145
258 | # 144
259 | # ...
260 | # ...
261 | # 
262 | # 102
263 | # 101
264 | # 100
265 | # 99
266 | # 98
267 | # 97
268 | ```
269 | 
270 | Now we have a variable that has an optimized schedule. Let’s display it.
271 | 
272 | ``` python
273 | schedule_df = schedule_display(schedule)
274 | print(schedule_df.head(20))
275 | 
276 | #    Resource Project          Res_Skill         Proj_Skill
277 | # 91      q91    aw45   [Julia, MongoDB]      [Julia, PSQL]
278 | # 14      l14    aw45      [Julia, PSQL]      [Julia, PSQL]
279 | # 3        r3    bv38      [Julia, PSQL]   [Julia, CouchDB]
280 | # 77      h77    bv38     [Julia, Neo4j]   [Julia, CouchDB]
281 | # 26      n26    cv21   [Scala, MongoDB]   [Scala, MongoDB]
282 | # 82      n82    cv21     [Scala, MySQL]   [Scala, MongoDB]
283 | # 32      v32    db27  [Python, MongoDB]  [Python, MongoDB]
284 | # 90      g90    db27   [Julia, MongoDB]  [Python, MongoDB]
285 | # 47      z47     dh6   [Scala, MongoDB]   [Scala, MongoDB]
286 | # 41      a41     dh6          [R, PSQL]   [Scala, MongoDB]
287 | # 49      a49    di46          [R, PSQL]      [Scala, PSQL]
288 | # 31      d31    di46     [Scala, MySQL]      [Scala, PSQL]
289 | # 56      q56     dn1      [Scala, PSQL]   [Scala, CouchDB]
290 | # 36      z36     dn1  [Python, MongoDB]   [Scala, CouchDB]
291 | # 33      u33    dv33   [Julia, CouchDB]   [Julia, CouchDB]
292 | # 52      f52    dv33   [Scala, CouchDB]   [Julia, CouchDB]
293 | # 6        q6    ex35     [Julia, Neo4j]          [R, PSQL]
294 | # 46      z46    ex35       [R, MongoDB]          [R, PSQL]
295 | # 74      u74    fi20     [Scala, Neo4j]     [Scala, Neo4j]
296 | # 18      d18    fi20      [Scala, PSQL]     [Scala, Neo4j]
297 | ```
298 | 
299 | The assignments are much better than the random chosen one. The
300 | algorithm can be tweaked and improved more and more using different
301 | criteria for the cost function.
302 | 


--------------------------------------------------------------------------------