├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── WPP.Distant-helper_PIPE-001_v2.ipynb
├── WPP.Generate-Floorplan_PIPE-003_v2.1.1.ipynb
├── WPP.Input-File-Validations_PIPE-002_v2.ipynb
└── requirements_SP.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Aa][Rr][Mm]/
 27 | [Aa][Rr][Mm]64/
 28 | bld/
 29 | [Bb]in/
 30 | [Oo]bj/
 31 | [Ll]og/
 32 | [Ll]ogs/
 33 | 
 34 | # Visual Studio 2015/2017 cache/options directory
 35 | .vs/
 36 | # Uncomment if you have tasks that create the project's static files in wwwroot
 37 | #wwwroot/
 38 | 
 39 | # Visual Studio 2017 auto generated files
 40 | Generated\ Files/
 41 | 
 42 | # MSTest test Results
 43 | [Tt]est[Rr]esult*/
 44 | [Bb]uild[Ll]og.*
 45 | 
 46 | # NUnit
 47 | *.VisualState.xml
 48 | TestResult.xml
 49 | nunit-*.xml
 50 | 
 51 | # Build Results of an ATL Project
 52 | [Dd]ebugPS/
 53 | [Rr]eleasePS/
 54 | dlldata.c
 55 | 
 56 | # Benchmark Results
 57 | BenchmarkDotNet.Artifacts/
 58 | 
 59 | # .NET Core
 60 | project.lock.json
 61 | project.fragment.lock.json
 62 | artifacts/
 63 | 
 64 | # StyleCop
 65 | StyleCopReport.xml
 66 | 
 67 | # Files built by Visual Studio
 68 | *_i.c
 69 | *_p.c
 70 | *_h.h
 71 | *.ilk
 72 | *.meta
 73 | *.obj
 74 | *.iobj
 75 | *.pch
 76 | *.pdb
 77 | *.ipdb
 78 | *.pgc
 79 | *.pgd
 80 | *.rsp
 81 | *.sbr
 82 | *.tlb
 83 | *.tli
 84 | *.tlh
 85 | *.tmp
 86 | *.tmp_proj
 87 | *_wpftmp.csproj
 88 | *.log
 89 | *.vspscc
 90 | *.vssscc
 91 | .builds
 92 | *.pidb
 93 | *.svclog
 94 | *.scc
 95 | 
 96 | # Chutzpah Test files
 97 | _Chutzpah*
 98 | 
 99 | # Visual C++ cache files
100 | ipch/
101 | *.aps
102 | *.ncb
103 | *.opendb
104 | *.opensdf
105 | *.sdf
106 | *.cachefile
107 | *.VC.db
108 | *.VC.VC.opendb
109 | 
110 | # Visual Studio profiler
111 | *.psess
112 | *.vsp
113 | *.vspx
114 | *.sap
115 | 
116 | # Visual Studio Trace Files
117 | *.e2e
118 | 
119 | # TFS 2012 Local Workspace
120 | $tf/
121 | 
122 | # Guidance Automation Toolkit
123 | *.gpState
124 | 
125 | # ReSharper is a .NET coding add-in
126 | _ReSharper*/
127 | *.[Rr]e[Ss]harper
128 | *.DotSettings.user
129 | 
130 | # TeamCity is a build add-in
131 | _TeamCity*
132 | 
133 | # DotCover is a Code Coverage Tool
134 | *.dotCover
135 | 
136 | # AxoCover is a Code Coverage Tool
137 | .axoCover/*
138 | !.axoCover/settings.json
139 | 
140 | # Visual Studio code coverage results
141 | *.coverage
142 | *.coveragexml
143 | 
144 | # NCrunch
145 | _NCrunch_*
146 | .*crunch*.local.xml
147 | nCrunchTemp_*
148 | 
149 | # MightyMoose
150 | *.mm.*
151 | AutoTest.Net/
152 | 
153 | # Web workbench (sass)
154 | .sass-cache/
155 | 
156 | # Installshield output folder
157 | [Ee]xpress/
158 | 
159 | # DocProject is a documentation generator add-in
160 | DocProject/buildhelp/
161 | DocProject/Help/*.HxT
162 | DocProject/Help/*.HxC
163 | DocProject/Help/*.hhc
164 | DocProject/Help/*.hhk
165 | DocProject/Help/*.hhp
166 | DocProject/Help/Html2
167 | DocProject/Help/html
168 | 
169 | # Click-Once directory
170 | publish/
171 | 
172 | # Publish Web Output
173 | *.[Pp]ublish.xml
174 | *.azurePubxml
175 | # Note: Comment the next line if you want to checkin your web deploy settings,
176 | # but database connection strings (with potential passwords) will be unencrypted
177 | *.pubxml
178 | *.publishproj
179 | 
180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
181 | # checkin your Azure Web App publish settings, but sensitive information contained
182 | # in these scripts will be unencrypted
183 | PublishScripts/
184 | 
185 | # NuGet Packages
186 | *.nupkg
187 | # NuGet Symbol Packages
188 | *.snupkg
189 | # The packages folder can be ignored because of Package Restore
190 | **/[Pp]ackages/*
191 | # except build/, which is used as an MSBuild target.
192 | !**/[Pp]ackages/build/
193 | # Uncomment if necessary however generally it will be regenerated when needed
194 | #!**/[Pp]ackages/repositories.config
195 | # NuGet v3's project.json files produces more ignorable files
196 | *.nuget.props
197 | *.nuget.targets
198 | 
199 | # Microsoft Azure Build Output
200 | csx/
201 | *.build.csdef
202 | 
203 | # Microsoft Azure Emulator
204 | ecf/
205 | rcf/
206 | 
207 | # Windows Store app package directories and files
208 | AppPackages/
209 | BundleArtifacts/
210 | Package.StoreAssociation.xml
211 | _pkginfo.txt
212 | *.appx
213 | *.appxbundle
214 | *.appxupload
215 | 
216 | # Visual Studio cache files
217 | # files ending in .cache can be ignored
218 | *.[Cc]ache
219 | # but keep track of directories ending in .cache
220 | !?*.[Cc]ache/
221 | 
222 | # Others
223 | ClientBin/
224 | ~$*
225 | *~
226 | *.dbmdl
227 | *.dbproj.schemaview
228 | *.jfm
229 | *.pfx
230 | *.publishsettings
231 | orleans.codegen.cs
232 | 
233 | # Including strong name files can present a security risk
234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
235 | #*.snk
236 | 
237 | # Since there are multiple workflows, uncomment next line to ignore bower_components
238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
239 | #bower_components/
240 | 
241 | # RIA/Silverlight projects
242 | Generated_Code/
243 | 
244 | # Backup & report files from converting an old project file
245 | # to a newer Visual Studio version. Backup files are not needed,
246 | # because we have git ;-)
247 | _UpgradeReport_Files/
248 | Backup*/
249 | UpgradeLog*.XML
250 | UpgradeLog*.htm
251 | ServiceFabricBackup/
252 | *.rptproj.bak
253 | 
254 | # SQL Server files
255 | *.mdf
256 | *.ldf
257 | *.ndf
258 | 
259 | # Business Intelligence projects
260 | *.rdl.data
261 | *.bim.layout
262 | *.bim_*.settings
263 | *.rptproj.rsuser
264 | *- [Bb]ackup.rdl
265 | *- [Bb]ackup ([0-9]).rdl
266 | *- [Bb]ackup ([0-9][0-9]).rdl
267 | 
268 | # Microsoft Fakes
269 | FakesAssemblies/
270 | 
271 | # GhostDoc plugin setting file
272 | *.GhostDoc.xml
273 | 
274 | # Node.js Tools for Visual Studio
275 | .ntvs_analysis.dat
276 | node_modules/
277 | 
278 | # Visual Studio 6 build log
279 | *.plg
280 | 
281 | # Visual Studio 6 workspace options file
282 | *.opt
283 | 
284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
285 | *.vbw
286 | 
287 | # Visual Studio LightSwitch build output
288 | **/*.HTMLClient/GeneratedArtifacts
289 | **/*.DesktopClient/GeneratedArtifacts
290 | **/*.DesktopClient/ModelManifest.xml
291 | **/*.Server/GeneratedArtifacts
292 | **/*.Server/ModelManifest.xml
293 | _Pvt_Extensions
294 | 
295 | # Paket dependency manager
296 | .paket/paket.exe
297 | paket-files/
298 | 
299 | # FAKE - F# Make
300 | .fake/
301 | 
302 | # CodeRush personal settings
303 | .cr/personal
304 | 
305 | # Python Tools for Visual Studio (PTVS)
306 | __pycache__/
307 | *.pyc
308 | 
309 | # Cake - Uncomment if you are using it
310 | # tools/**
311 | # !tools/packages.config
312 | 
313 | # Tabs Studio
314 | *.tss
315 | 
316 | # Telerik's JustMock configuration file
317 | *.jmconfig
318 | 
319 | # BizTalk build output
320 | *.btp.cs
321 | *.btm.cs
322 | *.odx.cs
323 | *.xsd.cs
324 | 
325 | # OpenCover UI analysis results
326 | OpenCover/
327 | 
328 | # Azure Stream Analytics local run output
329 | ASALocalRun/
330 | 
331 | # MSBuild Binary and Structured Log
332 | *.binlog
333 | 
334 | # NVidia Nsight GPU debugger configuration file
335 | *.nvuser
336 | 
337 | # MFractors (Xamarin productivity tool) working folder
338 | .mfractor/
339 | 
340 | # Local History for Visual Studio
341 | .localhistory/
342 | 
343 | # BeatPulse healthcheck temp database
344 | healthchecksdb
345 | 
346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
347 | MigrationBackup/
348 | 
349 | # Ionide (cross platform F# VS Code tools) working folder
350 | .ionide/
351 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Workspace Collaboration Optimizer
 2 | 
 3 | If your team or company is moving to a new worksite or you need to reorganize an existing workspace, this open-source tool can help. Workspace planning can help you identify and seat teams together in a workspace that maximizes and fosters cross-team productivity and collaboration. You can use this tool to generate seating plans quickly and objectively, in a data-driven way that optimizes employee collaboration by seating teams together.
 4 | 
 5 | The algorithm for this tool accounts for the following rules and constraints:
 6 | 
 7 | * **Teams stay together** - When a workspace can seat everyone on the team, it will keep them all together.
 8 | * **Teams who collaborate the most sit together** - Based on the collaboration patterns and the distances between spaces, if team A spends most of its time with team B, the two teams are assigned workspaces that are as close together as possible.
 9 | * **The most central team is in the most central workspace** - As lower priority than the previous two, the tool can help you determine which floor plans are better than others for seating specific teams in central locations.
10 | * **Everyone gets a seat** - All team members get an assigned seat in a workspace.
11 | * **People and seat assignments must match** - No workspace is assigned more people than it has seats for, and no workspace can have a negative number of people assigned to seats.
12 | 
13 | You can create seating plans that require different variables, such as the following:
14 | 
15 | * Co-locate teams who collaborate the most with each other within the same multi-floored building that has multiple zones or neighborhoods.
16 | * Cross-team collaboration around constraints for specific teams. For example, the HR team must be located together on the first floor in the same neighborhood and Zone A must be next to the file room.
17 | * Create seating for alternating or rotating work schedules for teams who share a workspace on different weeks or days.
18 | 
19 | ## Prerequisites
20 | 
21 | Before you can use the tool, confirm the following required prerequisites are met.
22 | 
23 | * **Anaconda** - Use to install and manage the following required versions of Python and Jupyter Notebook. See [Anaconda](https://www.anaconda.com/products/individual#windows) to install it. During the installation, select to **Register Anaconda as your default for Python**.
24 | * **Python** - Latest available or version 3.3 or later is required.
25 | * **Jupyter Notebook** - An open-source application that's required to run the Workspace planning tool.
26 | * **Workplace Analytics** - Have [the app set up](https://docs.microsoft.com/workplace-analytics/setup/set-up-workplace-analytics.md) with an analyst role to create the query data for the interaction file.
27 | 
28 | ## File prep
29 | 
30 | Save the following files to a **master folder**, which are located in the [Workspace Collaboration Optimizer repository](https://github.com/microsoft/workspaces-optimizer). If you’re new to GitHub, you’ll need to register and sign in before you can access this repository. See [GitHub Getting started](https://docs.github.com/github/getting-started-with-github) for details.  You'll use this master folder to create a copy of for each workspace project.
31 | 
32 | * **Distance Helper notebook** - Creates a distance file between specified zones or neighborhoods. This uses the following input files to help you define the walking distances in a unit you specify, such as estimated minutes or meters between floors or buildings.
33 | * **File Validations notebook** - Validates all the input files, including the distance file that's created from the Distance helper notebook.
34 | * **Generate Floorplan notebook** - Creates a floor plan from the validated input files and reruns the algorithm on the floor plan.
35 | * **Requirements text file** - Includes the list of Python packages needed to run the notebooks.
36 | 
37 | ### Input files
38 | 
39 | You also need to save the following input (.csv) files to your master folder. These files define the relevant information for each workspace project, such as team sizes and workspace capacity. The tool uses these to generate the floor plans with recommended seating for each team based on their collaboration patterns from the interaction file.
40 | 
41 | * [Interaction](https://docs.microsoft.com/workplace-analytics/azure-templates/space-planning#create-an-interaction-file) - This is a Workplace Analytics group-to-group query that shows current work and collaboration patterns across the different teams.
42 | * [Team size](https://docs.microsoft.com/workplace-analytics/azure-templates/space-planning#create-a-team-size-file) - Defines the number of employees in each team in your organization.
43 | * [Space capacity](https://docs.microsoft.com/workplace-analytics/azure-templates/space-planning#create-a-space-capacity-file) - Defines the workspace, which can be a combination of buildings, floors, and zones or neighborhoods, and the maximum capacities for each.
44 | 
45 | The tool combines the data in these files to generate a table that shows where to seat people in the specified floor plan.
46 | 
47 | See [Workspace planning tool](https://docs.microsoft.com/workplace-analytics/use/wsplan) for detailed instructions on how to install and plan a workspace.
48 | 
49 | # Contributing
50 | 
51 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
52 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
53 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
54 | 
55 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
56 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
57 | provided by the bot. You will only need to do this once across all repos using our CLA.
58 | 
59 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
60 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
61 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
62 | 
63 | # Support
64 | 
65 | ## How to file issues and get help  
66 | 
67 | This project uses GitHub Issues to track bugs, feature requests, and questions. Please check the documentation and search the existing 
68 | issues before filing new issues to avoid duplicates. 
69 | 
70 | For bugs, feature requests, suggestions and questions about this project, please submit them via Github Issues. For all other inquiries [email us](mailto:ws-collab-optimizer@microsoft.com). We will do our best to address your questions within a week timeframe. We will only respond to questions related to the tool itself. For all Anaconda/Jupyter Notebooks related questions please refer to the appropriate software documentation. 
71 | 
72 | ## Microsoft Support Policy  
73 | 
74 | Support for this project is limited to the resources listed above.
75 | 
76 | # Trademarks
77 | 
78 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
79 | trademarks or logos is subject to and must follow 
80 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
81 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
82 | Any use of third-party trademarks or logos are subject to those third-party's policies.
83 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/WPP.Distant-helper_PIPE-001_v2.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "# Copyright (c) Microsoft Corporation.\n",
  10 |     "# Licensed under the MIT license.\n",
  11 |     "\n",
  12 |     "# BUILDING A DISTANCE HELPER EXCEL VERSION FOR PYTHON\n",
  13 |     "\n",
  14 |     "from IPython.display import display, Markdown, clear_output, HTML, Javascript, display_html\n",
  15 |     "import subprocess\n",
  16 |     "import sys\n",
  17 |     "display(Markdown(\"\"\" ## Checking required packages:\n",
  18 |     "#### if any are missing, installing them now..... \"\"\"))\n",
  19 |     "!{sys.executable} -m pip install --no-cache-dir -r requirements_SP.txt\n",
  20 |     "clear_output()\n",
  21 |     "\n",
  22 |     "\n",
  23 |     "import ipysheet\n",
  24 |     "from ipysheet import column,sheet,to_dataframe,row\n",
  25 |     "import ipywidgets as widgets\n",
  26 |     "import pandas as pd\n",
  27 |     "import geopy\n",
  28 |     "import collections\n",
  29 |     "import ipywidgets as widgets\n",
  30 |     "from geopy.geocoders import Nominatim\n",
  31 |     "import numpy as np\n",
  32 |     "from itertools import combinations, permutations, product\n",
  33 |     "from math import radians, cos, sin, asin, sqrt\n",
  34 |     "from ast import literal_eval as make_tuple\n",
  35 |     "\n",
  36 |     "import os\n",
  37 |     "!jupyter nbextension enable --py widgetsnbextension --sys-prefix\n",
  38 |     "clear_output()\n",
  39 |     "#display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index(), IPython.notebook.get_selected_index()+1)'))\n",
  40 |     "\n",
  41 |     "\n",
  42 |     "HTML('''<script>{\n",
  43 |     "    $('div.input').hide();\n",
  44 |     "    }''')\n",
  45 |     "\n",
  46 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index(), IPython.notebook.get_selected_index()+1)'))\n",
  47 |     "HTML('''<script>\n",
  48 |     "    code_show=true; \n",
  49 |     "    function code_toggle() {\n",
  50 |     "     if (code_show){\n",
  51 |     "     $('div.input').hide();\n",
  52 |     "     } else {\n",
  53 |     "     $('div.input').show();\n",
  54 |     "     }\n",
  55 |     "     code_show = !code_show\n",
  56 |     "    } \n",
  57 |     "    $( document ).ready(code_toggle);\n",
  58 |     "    </script>\n",
  59 |     "    The raw code for this IPython notebook is by default hidden for easier reading toggle on/off the raw code by clicking <a href=\"javascript:code_toggle()\">here</a>.''')"
  60 |    ]
  61 |   },
  62 |   {
  63 |    "cell_type": "code",
  64 |    "execution_count": null,
  65 |    "metadata": {},
  66 |    "outputs": [],
  67 |    "source": [
  68 |     "display(Markdown(\"# Workspace Collaboration Optimizer\"))\n",
  69 |     "display(Markdown(\"If your team or company is moving to a new worksite or you need to reorganize an existing workspace, this open-source tool can help. The Workspace Collaboration Optimizer can help you identify and seat teams together in a workspace that maximizes and fosters cross-team productivity and collaboration. You can use this tool to generate floor plans quickly and objectively, in a data-driven way that optimizes employee collaboration by seating teams together. This tool works best in Chrome. \"))\n",
  70 |     "\n",
  71 |     "display(Markdown(\"##  Distance Helper\"))\n",
  72 |     "display(Markdown(\"This notebook produces a distance.csv file, one of the required input files to generate a seating plan. Before running this notebook, make sure you have your zone coordinates to reference and the list of workspaces for your campus. \"))\n",
  73 |     "display(Markdown(\"You can skip this step and move to the Validations notebook if you have already prepared a distance.csv file. You can also skip this step if you only have one building containing all of your workspaces and want to manually create your distance file - See documentation for more details.\"))\n",
  74 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "code",
  79 |    "execution_count": null,
  80 |    "metadata": {},
  81 |    "outputs": [],
  82 |    "source": [
  83 |     "style = {'description_width': '185px'}\n",
  84 |     "layout = {'width': '365px'}\n",
  85 |     "\n",
  86 |     "get_to_next = widgets.Button(description='Go',button_style='primary')\n",
  87 |     "howmanyBLDG = widgets.Text(description='Number of buildings',style=style)\n",
  88 |     "def go_next_line(click):\n",
  89 |     "    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
  90 |     "\n",
  91 |     "clear_output()\n",
  92 |     "display(Markdown(\"#### <font color='Green'> 1. Enter the total number of buildings you would like to include in your seating plan. </font>\"))\n",
  93 |     "display(howmanyBLDG)\n",
  94 |     "display(get_to_next)\n",
  95 |     "get_to_next.on_click(go_next_line)\n"
  96 |    ]
  97 |   },
  98 |   {
  99 |    "cell_type": "code",
 100 |    "execution_count": null,
 101 |    "metadata": {},
 102 |    "outputs": [],
 103 |    "source": [
 104 |     "\n",
 105 |     "styletext = {'description_width': '200px'}\n",
 106 |     "howmanyFLRS = widgets.Text(description='Max number of floors',style=styletext)\n",
 107 |     "# style_sheet = {'description_width': '1200px'}\n",
 108 |     "# sheetnameBLDGS = ipysheet.sheet(rows=int(howmanyBLDG.value), columns=3, column_headers=True, row_headers=False,layout=layout,style=style_sheet,row_resizing=True,column_resizing=True)\n",
 109 |     "\n",
 110 |     "# sheetnameBLDGS.column_headers=['Campus Name','Building','Address']\n",
 111 |     "# exec(\"for i in range(\"+str(howmanyBLDG.value)+\"):\\n\\trow(i, ['', '', ''])\")\n",
 112 |     "# # column(0, ['', '', ''])\n",
 113 |     "# create an Empty DataFrame \n",
 114 |     "# object With column names only \n",
 115 |     "dfskeleton_BLDG = pd.DataFrame(np.zeros([int(howmanyBLDG.value), 3])*np.nan)\n",
 116 |     "dfskeleton_BLDG1 = dfskeleton_BLDG.replace(np.nan, '', regex=True)\n",
 117 |     "dfskeleton_BLDG1.columns = ['Campus Name','Building','Address']\n",
 118 |     "dfskeleton_BLDG1.reset_index(drop=True, inplace=True)\n",
 119 |     "\n",
 120 |     "sheetnameBLDGS = ipysheet.from_dataframe(dfskeleton_BLDG1)\n",
 121 |     "\n",
 122 |     "clear_output()\n",
 123 |     "display(Markdown(\"#### <font color='Green'> 2. Enter your campus/building information below. </font>\"))\n",
 124 |     "display(Markdown(\"##### <font color='Gray'> Address is needed to fetch latitude and longitude coordinates. </font>\"))\n",
 125 |     "maxflrs_mkdwn = widgets.Output()\n",
 126 |     "with maxflrs_mkdwn:\n",
 127 |     "    display(Markdown(\"#### <font color='Green'> 3. Enter the maximum number of floors across your buildings.</font>\"))\n",
 128 |     "    display(Markdown(\"##### <font color='Gray'> Ex. If Building A has 4 floors and Building B has 2 floors, enter '4'</font>\"))\n",
 129 |     "display(widgets.VBox([sheetnameBLDGS,widgets.VBox([maxflrs_mkdwn,howmanyFLRS]),get_to_next]))\n",
 130 |     "    \n"
 131 |    ]
 132 |   },
 133 |   {
 134 |    "cell_type": "code",
 135 |    "execution_count": null,
 136 |    "metadata": {},
 137 |    "outputs": [],
 138 |    "source": [
 139 |     "import time\n",
 140 |     "\n",
 141 |     "display(Markdown(\"\"\" #### Fetching Latitude and Longitude coordinates... \"\"\"))\n",
 142 |     "\n",
 143 |     "dfbuildingADDRESS = to_dataframe(sheetnameBLDGS) \n",
 144 |     "\n",
 145 |     "addresscol = 'Address'\n",
 146 |     "global maxfloors\n",
 147 |     "maxfloors = int(howmanyFLRS.value)\n",
 148 |     "\n",
 149 |     "# declare an empty list to store \n",
 150 |     "# latitude and longitude of values  \n",
 151 |     "# of column \n",
 152 |     "longitude = [] \n",
 153 |     "latitude = [] \n",
 154 |     "\n",
 155 |     "\n",
 156 |     "\n",
 157 |     "def findlatlong_1(addresscol): \n",
 158 |     "# try and catch is used to overcome \n",
 159 |     "    # the exception thrown by geolocator \n",
 160 |     "    # using geocodertimedout   \n",
 161 |     "    try: \n",
 162 |     "        time.sleep(1) #to add delay in case of large DFs  \n",
 163 |     "        # Specify the user_agent as your \n",
 164 |     "        # app name it should not be none \n",
 165 |     "        geolocator = Nominatim(user_agent=\"get_lat_long\") \n",
 166 |     "#         location = geolocator.geocode(col)\n",
 167 |     "#         coordinates = location.latitude, location.longitude  \n",
 168 |     "        return geolocator.geocode(addresscol)\n",
 169 |     "      \n",
 170 |     "    except GeocoderTimedOut: \n",
 171 |     "          \n",
 172 |     "        return findlatlong_1(addresscol) \n",
 173 |     "    \n",
 174 |     "    \n",
 175 |     "# each value from column \n",
 176 |     "# will be fetched and sent to \n",
 177 |     "# function find_geocode \n",
 178 |     "for i in (dfbuildingADDRESS[addresscol]): \n",
 179 |     "\n",
 180 |     "    if findlatlong_1(i) != None: \n",
 181 |     "\n",
 182 |     "        loc = findlatlong_1(i) \n",
 183 |     "\n",
 184 |     "        # coordinates returned from  \n",
 185 |     "        # function is stored into \n",
 186 |     "        # two separate list \n",
 187 |     "        latitude.append(loc.latitude) \n",
 188 |     "        longitude.append(loc.longitude) \n",
 189 |     "\n",
 190 |     "    # if coordinate for a city not \n",
 191 |     "    # found, insert \"NaN\" indicating  \n",
 192 |     "    # missing value  \n",
 193 |     "    else: \n",
 194 |     "        latitude.append(np.nan) \n",
 195 |     "        longitude.append(np.nan) \n",
 196 |     "clear_output()\n",
 197 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 198 |    ]
 199 |   },
 200 |   {
 201 |    "cell_type": "code",
 202 |    "execution_count": null,
 203 |    "metadata": {},
 204 |    "outputs": [],
 205 |    "source": [
 206 |     " #now add these columns to dataframe \n",
 207 |     "dfbuildingSTG = dfbuildingADDRESS.copy()\n",
 208 |     "dfbuildingSTG[\"Latitude\"] = latitude \n",
 209 |     "dfbuildingSTG[\"Longitude\"] = longitude \n",
 210 |     "#create coordinates column from both\n",
 211 |     "dfbuildingSTG['Coordinates'] = dfbuildingSTG['Latitude'].map('{:,.6f}'.format) +','+ dfbuildingSTG['Longitude'].map('{:,.6f}'.format)\n",
 212 |     "\n",
 213 |     "#grab final output df\n",
 214 |     "dfbuildingSTG.drop(['Latitude','Longitude','Address'],axis=1,inplace=True)\n",
 215 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 216 |    ]
 217 |   },
 218 |   {
 219 |    "cell_type": "code",
 220 |    "execution_count": null,
 221 |    "metadata": {},
 222 |    "outputs": [],
 223 |    "source": [
 224 |     "columnsFLR = ['Number of Floors']\n",
 225 |     "columnsFLR = columnsFLR + ['Floor Number '+str(i) for i in range(1,maxfloors+1)]\n",
 226 |     "\n",
 227 |     "dfbuildingSTG2 = dfbuildingSTG.copy()\n",
 228 |     "dfbuildingSTG3 = pd.concat([dfbuildingSTG2,pd.DataFrame(None,columns=columnsFLR)],sort=False)\n",
 229 |     "dfbuildingSTG4 = dfbuildingSTG3.replace(np.nan, '', regex=True)\n",
 230 |     "dfbuildingSTG4['Building ID'] = dfbuildingSTG4['Campus Name']+'-'+dfbuildingSTG4['Building']\n",
 231 |     "\n",
 232 |     "bid_col_name=\"Building ID\"\n",
 233 |     "first_col_bldg = dfbuildingSTG4.pop(bid_col_name)\n",
 234 |     "\n",
 235 |     "dfbuildingSTG4.insert(0, bid_col_name, first_col_bldg)\n",
 236 |     "sheetnameBLDGFLRS = ipysheet.from_dataframe(dfbuildingSTG4)\n",
 237 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 238 |    ]
 239 |   },
 240 |   {
 241 |    "cell_type": "code",
 242 |    "execution_count": null,
 243 |    "metadata": {},
 244 |    "outputs": [],
 245 |    "source": [
 246 |     "# clear_output()\n",
 247 |     "display(Markdown(\"#### <font color='Green'> 4. Enter the total number of floors and the corresponding floor names/numbers for each building below. </font>\"))\n",
 248 |     "display(Markdown(\"#### <font color='Gray'>_If your organization’s first floor is on the actual first floor of the building, fill out “1” underneath Floor number 1. If your organization’s floors start on the 5th floor of a building, fill out “5” underneath Floor number 1._ </font>\"))\n",
 249 |     "display(widgets.VBox([sheetnameBLDGFLRS,get_to_next]))"
 250 |    ]
 251 |   },
 252 |   {
 253 |    "cell_type": "code",
 254 |    "execution_count": null,
 255 |    "metadata": {},
 256 |    "outputs": [],
 257 |    "source": [
 258 |     "maxzones_mkdwn = widgets.Output()\n",
 259 |     "with maxzones_mkdwn:\n",
 260 |     "    display(Markdown(\"#### <font color='Green'> 5. Enter the maximum number of zones you have across all floors </font>\"))\n",
 261 |     "\n",
 262 |     "howmanyQDRTS = widgets.Text(description='Max number of zones',style=styletext)\n",
 263 |     "display(widgets.VBox([maxzones_mkdwn,howmanyQDRTS,get_to_next]))"
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "code",
 268 |    "execution_count": null,
 269 |    "metadata": {},
 270 |    "outputs": [],
 271 |    "source": [
 272 |     "dfbuildingsFINAL = to_dataframe(sheetnameBLDGFLRS)\n",
 273 |     "global maxquadrants\n",
 274 |     "maxquadrants = int(howmanyQDRTS.value)\n",
 275 |     "\n",
 276 |     "flrqdrt_style = {'width:50px'}\n",
 277 |     "\n",
 278 |     "columnsQDRT = ['Floor Name','Number of Zones']\n",
 279 |     "columnsQDRT = columnsQDRT + ['Zone Coordinates '+str(i) for i in range(1,maxquadrants+1)]\n",
 280 |     "df_selectneededCOLS = dfbuildingsFINAL[['Campus Name','Building', 'Number of Floors']].copy()\n",
 281 |     "\n",
 282 |     "dfrepeatedbyFLRS = pd.DataFrame(np.repeat(df_selectneededCOLS.values, \n",
 283 |     "                                           df_selectneededCOLS['Number of Floors'].replace(0,1).tolist(), \n",
 284 |     "                                           axis=0),\n",
 285 |     "                                 columns=df_selectneededCOLS.columns)\n",
 286 |     "\n",
 287 |     "dffloorSTG1 = pd.concat([dfrepeatedbyFLRS,pd.DataFrame(None,columns=columnsQDRT)],sort=False)\n",
 288 |     "dffloorSTG2 = dffloorSTG1.replace(np.nan, '', regex=True)\n",
 289 |     "\n",
 290 |     "sheetnameFLRS = ipysheet.from_dataframe(dffloorSTG2)\n",
 291 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 292 |    ]
 293 |   },
 294 |   {
 295 |    "cell_type": "code",
 296 |    "execution_count": null,
 297 |    "metadata": {},
 298 |    "outputs": [],
 299 |    "source": [
 300 |     "#### clear_output()\n",
 301 |     "display(Markdown(\"#### <font color='Green'> 6. Enter the floor names from Step 4, total number of zones for each floor, and zone coordinates below. </font>\"))\n",
 302 |     "display(Markdown(\"#### <font color='black'> <b> Please review the documentation on how to create the zone coordinates and use the following syntax for zone coordinates. E.G. (1,1)</b> </font>\"))\n",
 303 |     "display(widgets.VBox([sheetnameFLRS,get_to_next]))\n"
 304 |    ]
 305 |   },
 306 |   {
 307 |    "cell_type": "code",
 308 |    "execution_count": null,
 309 |    "metadata": {},
 310 |    "outputs": [],
 311 |    "source": [
 312 |     "df_quadrants_selected = to_dataframe(sheetnameFLRS)\n",
 313 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
 314 |     "clear_output()"
 315 |    ]
 316 |   },
 317 |   {
 318 |    "cell_type": "code",
 319 |    "execution_count": null,
 320 |    "metadata": {},
 321 |    "outputs": [],
 322 |    "source": [
 323 |     "#dffloorsFINAL = to_dataframe(sheetnameFLRS)\n",
 324 |     "dffloorsFINAL = df_quadrants_selected.copy()\n",
 325 |     "# dffloorsFINAL= dffloorsFINAL.fillna(value=np.nan)\n",
 326 |     "dffloorsFINAL = dffloorsFINAL.replace([None], [''], regex=True)\n",
 327 |     "dffloorsFINAL['Floor ID'] = dffloorsFINAL['Campus Name']+'-'+dffloorsFINAL['Building']+'-'+dffloorsFINAL['Floor Name']\n",
 328 |     "\n",
 329 |     "fid_col_name=\"Floor ID\"\n",
 330 |     "first_col_flr = dffloorsFINAL.pop(fid_col_name)\n",
 331 |     "\n",
 332 |     "dffloorsFINAL.insert(0, fid_col_name, first_col_flr)\n",
 333 |     "dffloorsFINAL = dffloorsFINAL.rename(columns={'Campus Name': 'Campus'})\n",
 334 |     "\n",
 335 |     "\n",
 336 |     "dffloorsFINAL_melted = pd.melt(dffloorsFINAL, id_vars=['Floor ID','Campus','Building','Number of Floors','Floor Name','Number of Zones'], var_name='Zone Id', value_name='Zone Coordinates')\n",
 337 |     "dffloorsFINAL_melted['Space Name'] = \"\"\n",
 338 |     "\n",
 339 |     "df_rename_ZONEQDRNTS = dffloorsFINAL_melted[['Floor ID','Number of Zones','Zone Coordinates','Space Name']]\n",
 340 |     "\n",
 341 |     "df_rename_ZONEQDRNTS = df_rename_ZONEQDRNTS.sort_values('Floor ID')\n",
 342 |     "df_rename_ZONEQDRNTS['Zone Coordinates'].replace('', np.nan, inplace=True)\n",
 343 |     "df_rename_ZONEQDRNTS.dropna(subset=['Zone Coordinates'], inplace=True)\n",
 344 |     "\n",
 345 |     "sheetnameZONES_NAMES = ipysheet.from_dataframe(df_rename_ZONEQDRNTS)\n",
 346 |     "\n",
 347 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 348 |    ]
 349 |   },
 350 |   {
 351 |    "cell_type": "code",
 352 |    "execution_count": null,
 353 |    "metadata": {},
 354 |    "outputs": [],
 355 |    "source": [
 356 |     "display(Markdown(\"#### <font color='Green'> 7. Enter a unique workspace name for each zone. </font>\"))\n",
 357 |     "display(Markdown(\"#### <font color='black'> <b> These 'friendly' names should match the workspace/floor names used in your space_capacity.csv file. </b> </font>\"))\n",
 358 |     "\n",
 359 |     "display(widgets.VBox([sheetnameZONES_NAMES,get_to_next]))"
 360 |    ]
 361 |   },
 362 |   {
 363 |    "cell_type": "code",
 364 |    "execution_count": null,
 365 |    "metadata": {},
 366 |    "outputs": [],
 367 |    "source": [
 368 |     "df_zones_STG = to_dataframe(sheetnameZONES_NAMES)\n",
 369 |     "\n",
 370 |     "# check if space names \n",
 371 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 372 |    ]
 373 |   },
 374 |   {
 375 |    "cell_type": "code",
 376 |    "execution_count": null,
 377 |    "metadata": {},
 378 |    "outputs": [],
 379 |    "source": [
 380 |     "\"\"\"\n",
 381 |     "https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points\n",
 382 |     "\n",
 383 |     "\"\"\"\n",
 384 |     "def haversine(lon1, lat1, lon2, lat2):\n",
 385 |     "    \"\"\"\n",
 386 |     "    Calculate the great circle distance between two points \n",
 387 |     "    on the earth (specified in decimal degrees)\n",
 388 |     "    \"\"\"\n",
 389 |     "    # convert decimal degrees to radians \n",
 390 |     "    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])\n",
 391 |     "\n",
 392 |     "    # haversine formula \n",
 393 |     "    dlon = lon2 - lon1 \n",
 394 |     "    dlat = lat2 - lat1 \n",
 395 |     "    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2\n",
 396 |     "    c = 2 * asin(sqrt(a)) \n",
 397 |     "    #r = 6371 # Radius of earth in kilometers. Use 3956 for miles\n",
 398 |     "    r = 3956\n",
 399 |     "    return c * r\n",
 400 |     "\n",
 401 |     "\n",
 402 |     "##############################\n",
 403 |     "#Load Files ** Start Here **\n",
 404 |     "############################\n",
 405 |     "\n",
 406 |     "cwd = os.getcwd()\n",
 407 |     "# tab_nest_001 = widgets.Accordion()\n",
 408 |     "# style = {'description_width': '185px'}\n",
 409 |     "# layout = {'width': '365px'}\n",
 410 |     "\n",
 411 |     "# go_run_full = widgets.Button(description='Go',button_style = 'primary')\n",
 412 |     "\n",
 413 |     "# def go_to_nextblock(btn):\n",
 414 |     "#         display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+3)'))\n",
 415 |     "\n",
 416 |     "\n",
 417 |     "# input_path_001 = FileChooser(cwd)\n",
 418 |     "# tab_nest_001.children = [input_path_001]\n",
 419 |     "\n",
 420 |     "# tab_nest_001.set_title(0,'Load file and name output file')\n",
 421 |     "# go_run_full.on_click(go_to_nextblock) \n",
 422 |     "\n",
 423 |     "\n",
 424 |     "# display(widgets.VBox([tab_nest_001,go_run_full]))\n",
 425 |     "\n",
 426 |     "\n",
 427 |     "dfbuildingsDIST = dfbuildingsFINAL.copy()\n",
 428 |     "dfbuildingsDIST.index = dfbuildingsDIST.index.astype(int) #use astype to convert to int\n",
 429 |     "dfbuildingsDIST.columns = [s.strip() for s in dfbuildingsDIST.columns]\n",
 430 |     "num_bldgs = len(dfbuildingsDIST)\n",
 431 |     "dfbuildingsDIST[['lat','long']] = dfbuildingsDIST[\"Coordinates\"].str.split(\",\",expand=True) \n",
 432 |     "dfbuildingsDIST['lat'] = dfbuildingsDIST['lat'].astype(float)\n",
 433 |     "dfbuildingsDIST['long'] = dfbuildingsDIST['long'].astype(float)\n",
 434 |     "\n",
 435 |     "for i in range(num_bldgs):\n",
 436 |     "    bldg_id = dfbuildingsDIST.loc[i,\"Building ID\"]\n",
 437 |     "    bldg_bmcRaw = dfbuildingsDIST.loc[i,\"Coordinates\"]\n",
 438 |     "    blat = dfbuildingsDIST.loc[i,\"lat\"]\n",
 439 |     "    blong = dfbuildingsDIST.loc[i,\"long\"]\n",
 440 |     "    #print(bldg_id, \"coordinates found...\", blat, blong)\n",
 441 |     "\n",
 442 |     "#print(\"calculating building distance...\")\n",
 443 |     "\n",
 444 |     "bld_ids = list(range(len(dfbuildingsDIST)))\n",
 445 |     "bldg_edges = list(product(bld_ids, bld_ids))\n",
 446 |     "#bldg_edges = list(permutations(bld_ids, 2))\n",
 447 |     "bldg_distDF = pd.DataFrame(bldg_edges, columns = [\"from_idx\", \"to_idx\"])\n",
 448 |     "\n",
 449 |     "dfbuildingsDIST2 = dfbuildingsDIST[[\"Building ID\", \"lat\", \"long\"]].copy()\n",
 450 |     "\n",
 451 |     "bldg_distDF = pd.merge(bldg_distDF, dfbuildingsDIST2, left_on = \"from_idx\", right_index=True, how = 'left')\n",
 452 |     "bldg_distDF = pd.merge(bldg_distDF, dfbuildingsDIST2, left_on = \"to_idx\", right_index=True, how = 'left', suffixes= ['_From', '_To'])\n",
 453 |     "for i in range(len(bldg_distDF)):\n",
 454 |     "    lat_from = bldg_distDF.loc[i,\"lat_From\"]\n",
 455 |     "    long_from = bldg_distDF.loc[i,\"long_From\"]\n",
 456 |     "    lat_to = bldg_distDF.loc[i,\"lat_To\"]\n",
 457 |     "    long_to = bldg_distDF.loc[i,\"long_To\"]\n",
 458 |     "    hav_dist = haversine(lat_from, long_from, lat_to, long_to)\n",
 459 |     "    bldg_distDF.loc[i,\"dist\"] = hav_dist\n",
 460 |     "    \n",
 461 |     "    \n",
 462 |     "#print(\"Everything good so far... \")\n",
 463 |     "#print(\"Calculated \" + str(len(bldg_distDF))+ \" distances across building and campus using Bing Map coordinates and Haversine formula\")\n",
 464 |     "\n",
 465 |     "\n",
 466 |     "################################\n",
 467 |     "## Floor Distances\n",
 468 |     "#################################\n",
 469 |     "\n",
 470 |     "#print(\"calculating floor to floor distances...\")\n",
 471 |     "\n",
 472 |     "#create floors\n",
 473 |     "def create_floor_distances(floor_rel_dist, avg_dist):\n",
 474 |     "    rel_floor_idxs = list(range(len(floor_rel_dist)))\n",
 475 |     "    res = []\n",
 476 |     "    floor_rel_dist_avg = [x * avg_dist for x in floor_rel_dist]\n",
 477 |     "    for origin in floor_rel_dist_avg:\n",
 478 |     "        calc_dist_row = [abs(x - origin) for x in floor_rel_dist_avg]\n",
 479 |     "        res.append(calc_dist_row.copy())\n",
 480 |     "    return res, rel_floor_idxs  \n",
 481 |     "\n",
 482 |     "def create_floor_adj_list(K, key_start = 0):\n",
 483 |     "    if K == 0: return [], None\n",
 484 |     "    if K == 1: return [key_start], {key_start: []}\n",
 485 |     "    ids = [key_start+i for i in list(range(K))]\n",
 486 |     "    ids_set = set(ids)\n",
 487 |     "    d = {}\n",
 488 |     "    for current in ids:\n",
 489 |     "        d[current] = sorted(list(ids_set - set([current])))\n",
 490 |     "    return ids, d\n",
 491 |     "\n",
 492 |     "bldg_input = dfbuildingsFINAL.copy()\n",
 493 |     "bldg_input.index = bldg_input.index.astype(int) #use astype to convert to int\n",
 494 |     "num_bldgs = len(bldg_input)\n",
 495 |     "floors_per_bldg = {}\n",
 496 |     "dist_per_floors = {}\n",
 497 |     "floor_labels_per_bldg = {}\n",
 498 |     "cols_b_tab = list(bldg_input.columns)\n",
 499 |     "f_start_idx = cols_b_tab.index(\"Floor Number 1\")\n",
 500 |     "#print(\"loading floor relative distances....\")\n",
 501 |     "for i in range(num_bldgs):\n",
 502 |     "    bldg_id = dfbuildingsDIST.loc[i,\"Building ID\"]\n",
 503 |     "    num_floors = int(dfbuildingsDIST.loc[i,\"Number of Floors\"])\n",
 504 |     "    #removing optional column to input average distance as per @july 20, 2020 excel input file validation review meeting\n",
 505 |     "    #avg_dist = df1.loc[i,\"Average Distance per Floor\"] \n",
 506 |     "    avg_dist = 2\n",
 507 |     "    floor_rel_dist = list(dfbuildingsDIST.iloc[i,f_start_idx: f_start_idx + num_floors])\n",
 508 |     "    floor_rel_dist = [int(i) for i in floor_rel_dist] \n",
 509 |     "    #print(bldg_id, \"found\", num_floors, \"floors rel dist: \", floor_rel_dist)\n",
 510 |     "    floor_distances, floors_idx = create_floor_distances(floor_rel_dist, avg_dist)\n",
 511 |     "    key_start = 0\n",
 512 |     "    floor_labels = [key_start+i for i in floors_idx]  \n",
 513 |     "    dist_per_floors[bldg_id]= floor_distances.copy()\n",
 514 |     "    floor_labels_per_bldg[bldg_id] = floor_labels.copy()\n",
 515 |     "    \n",
 516 |     "    fi, fn = create_floor_adj_list(num_floors)\n",
 517 |     "    if fn:\n",
 518 |     "        floors_per_bldg[bldg_id] = fn.copy()\n",
 519 |     "    else:\n",
 520 |     "        print(\"wait....something went wrong....Buildings dataframe (dfbuildingsFINAL)... row:\", i)\n",
 521 |     "\n",
 522 |     "#print(\"number of buildings with floor dist:\", len(dist_per_floors), \"of\", len(floors_per_bldg) )\n",
 523 |     "\n",
 524 |     "\n",
 525 |     "##########################\n",
 526 |     "## Quadrant Coordinates\n",
 527 |     "##########################\n",
 528 |     "\n",
 529 |     "#print(\"loading quadrant coordinates... from floors tab... \")\n",
 530 |     "df100 = dffloorsFINAL.copy()\n",
 531 |     "#df100 = dffloorsFINAL.copy()\n",
 532 |     "df100.index = df100.index.astype(int) #use astype to convert to int\n",
 533 |     "df100.columns = [s.strip() for s in df100.columns]\n",
 534 |     "num_floors = len(df100)\n",
 535 |     "quad_coord_per_floor = {}\n",
 536 |     "\n",
 537 |     "cols = list(df100.columns)\n",
 538 |     "start_idx = cols.index('Zone Coordinates 1')\n",
 539 |     "#print(\"start of coordinates columns detected... column idx:\", start_idx)\n",
 540 |     "#print(\"validating coordinate columns vs number of quadrants... \")\n",
 541 |     "\n",
 542 |     "for i in range(num_floors):\n",
 543 |     "    floor_id = df100.loc[i,\"Floor ID\"]\n",
 544 |     "    num_quads = int(df100.loc[i,\"Number of Zones\"])\n",
 545 |     "    coord_row = []\n",
 546 |     "    for j in range(num_quads):\n",
 547 |     "        temp = df100.iloc[i, start_idx+j]\n",
 548 |     "        coord_row.append(temp)\n",
 549 |     "    if num_quads != len(coord_row):\n",
 550 |     "        print(floor_id, \"number of zones do not match\")\n",
 551 |     "        print(\"Excel Floor tab ERROR: Please enter one coordinate per each zone\")\n",
 552 |     "    else:\n",
 553 |     "        #print(floor_id,\"found quadrant coordinates...\", coord_row)\n",
 554 |     "        quad_coord_per_floor[floor_id] = coord_row.copy() \n",
 555 |     "\n",
 556 |     "        \n",
 557 |     "    ##########################\n",
 558 |     "## Quadrant Distances\n",
 559 |     "##########################\n",
 560 |     "#print(\"calculating quadrant to quadrant distances...\")\n",
 561 |     "\n",
 562 |     "#create quadrants\n",
 563 |     "def create_quad_distances(coords, avg_dist):\n",
 564 |     "    coord_idxs = list(range(len(coords)))\n",
 565 |     "    coords_arr = [ np.array(x) for x in coords] \n",
 566 |     "    res = []\n",
 567 |     "    for origin in coords_arr:\n",
 568 |     "        coord_dist_row = [np.linalg.norm(coord2-origin) for coord2 in coords_arr]\n",
 569 |     "        res.append(coord_dist_row.copy())\n",
 570 |     "    return res, coord_idxs  \n",
 571 |     "\n",
 572 |     "def create_quad_adj_list(K, key_start = \"a\"):\n",
 573 |     "    if K == 0: return [], None\n",
 574 |     "    if K == 1: return [key_start], {key_start: \"\"}\n",
 575 |     "    ids = [chr(ord(key_start)+i) for i in list(range(K))]\n",
 576 |     "    ids_set = set(ids)\n",
 577 |     "    d = {}\n",
 578 |     "    for current_quad in ids:\n",
 579 |     "        d[current_quad] = \"\".join(sorted(list(ids_set - set(current_quad))))\n",
 580 |     "    return ids, d\n",
 581 |     "\n",
 582 |     "\n",
 583 |     "quads_dist_per_floor = {}\n",
 584 |     "quad_idxs_map_per_floor = {}\n",
 585 |     "quad_labels_per_floor = {} \n",
 586 |     "for i in range(num_floors):\n",
 587 |     "    floor_id = df100.loc[i,\"Floor ID\"]\n",
 588 |     "    #TO DO. Average distance per quadrant small/medium/large\n",
 589 |     "    #avg_dist_q = df100.loc[i,\"Average Distance Per Quadrant\"] \n",
 590 |     "    avg_dist_q = 1\n",
 591 |     "    coords = quad_coord_per_floor[floor_id]\n",
 592 |     "    coords = [make_tuple(s) for s in coords]\n",
 593 |     "    coords = [(float(x),float(y)) for x,y in coords]\n",
 594 |     "    quad_distances, quads_idx = create_quad_distances(coords, avg_dist_q)\n",
 595 |     "    key_start_q = 'a'\n",
 596 |     "    quad_labels = [chr(ord(key_start_q)+i) for i in quads_idx]\n",
 597 |     "    quad_idxs_map = {q: i for q,i in zip(quad_labels, quads_idx)}\n",
 598 |     "    quads_dist_per_floor[floor_id]= quad_distances.copy()\n",
 599 |     "    quad_labels_per_floor[floor_id] = quad_labels.copy()\n",
 600 |     "    quad_idxs_map_per_floor[floor_id] = quad_idxs_map.copy() \n",
 601 |     "\n",
 602 |     "#print(\"number of floors with quads dist\", len(quads_dist_per_floor))\n",
 603 |     "\n",
 604 |     "#############################\n",
 605 |     "#  Quadrant IDs\n",
 606 |     "#############################\n",
 607 |     "\n",
 608 |     "quadrant_ids = []\n",
 609 |     "for floor_id, quad_labels in quad_labels_per_floor.items():\n",
 610 |     "    campus_name, bldg_name, floor_name = floor_id.split(\"-\")  \n",
 611 |     "    bldg_id = campus_name + \"-\" + bldg_name \n",
 612 |     "    for q in quad_labels:\n",
 613 |     "        space_row = {}\n",
 614 |     "        qid = floor_id+\"-\"+q\n",
 615 |     "        space_row[\"campus\"] = campus_name\n",
 616 |     "        space_row[\"building\"] = bldg_name\n",
 617 |     "        space_row[\"floor\"] = floor_name\n",
 618 |     "        space_row[\"quad\"] = q\n",
 619 |     "        quad_idx_map = quad_idxs_map_per_floor[floor_id]\n",
 620 |     "        space_row[\"quad_idx\"] = quad_idx_map[q]\n",
 621 |     "        space_row[\"building_id\"] = bldg_id\n",
 622 |     "        space_row[\"floor_id\"] = floor_id\n",
 623 |     "        space_row[\"quadrant_id\"] = qid\n",
 624 |     "        quadrant_ids.append(space_row.copy())        \n",
 625 |     "\n",
 626 |     "#print(\"number of spaces loaded... \", len(quadrant_ids))\n",
 627 |     "####################################\n",
 628 |     "#  Create Campus and Building Matrix\n",
 629 |     "####################################\n",
 630 |     "\n",
 631 |     "#sort idxs\n",
 632 |     "quadrant_idsDF = pd.DataFrame(quadrant_ids)\n",
 633 |     "quadrant_idsDF = quadrant_idsDF.sort_values(by = [\"quadrant_id\"]).reset_index(drop = True)\n",
 634 |     "quadrant_idsDF = quadrant_idsDF.reset_index()\n",
 635 |     "\n",
 636 |     "#new campus and buildings matrix (filled with zeros)\n",
 637 |     "N = len(quadrant_ids)\n",
 638 |     "new_building_expanded_matrix = np.zeros((N,N))\n",
 639 |     "\n",
 640 |     "#print(\"New building matrix created... expanding to quad size... shape:\", new_building_expanded_matrix.shape )\n",
 641 |     "#print(\"adding building distances as per Bing Map coordintes...\")\n",
 642 |     "\n",
 643 |     "proxyDF = quadrant_idsDF\n",
 644 |     "bldg_T = new_building_expanded_matrix\n",
 645 |     "for i in range(len(bldg_distDF)):\n",
 646 |     "    from_bldg = bldg_distDF.loc[i,\"Building ID_From\"]\n",
 647 |     "    to_bldg = bldg_distDF.loc[i,\"Building ID_To\"]\n",
 648 |     "    bldg_dist = bldg_distDF.loc[i,\"dist\"]\n",
 649 |     "    from_idxs = list(proxyDF.loc[proxyDF[\"building_id\"]==from_bldg,\"index\"]) \n",
 650 |     "    to_idxs = list(proxyDF.loc[proxyDF[\"building_id\"]==to_bldg,\"index\"]) \n",
 651 |     "    for i in from_idxs:\n",
 652 |     "        for j in to_idxs:\n",
 653 |     "            bldg_T[i, j] = bldg_dist\n",
 654 |     "\n",
 655 |     "#print(\"building matrix filled...\")\n",
 656 |     "\n",
 657 |     "#output for excel\n",
 658 |     "campus_bldg_DF = pd.DataFrame(bldg_T)\n",
 659 |     "campus_bldg_DF.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 660 |     "campus_bldg_DF = campus_bldg_DF.reset_index()\n",
 661 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 662 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 663 |     "campus_bldg_DF.columns = new_cols\n",
 664 |     "\n",
 665 |     "####################################\n",
 666 |     "#  Create Floors Distance Matrix\n",
 667 |     "####################################\n",
 668 |     "\n",
 669 |     "#new floors matrix (filled with zeros)\n",
 670 |     "new_floors_expanded_matrix = np.zeros((N,N))\n",
 671 |     "\n",
 672 |     "#print(\"New floors matrix created... expanding to quad size... shape:\", new_floors_expanded_matrix.shape )\n",
 673 |     "#print(\"adding floors distances as per buildings tab...\")\n",
 674 |     "\n",
 675 |     "proxyDF = quadrant_idsDF\n",
 676 |     "floor_T = new_floors_expanded_matrix\n",
 677 |     "for bldg_id, floor_distances in dist_per_floors.items():\n",
 678 |     "    floor_labels = sorted(set(list(proxyDF.loc[proxyDF['building_id'] == bldg_id, \"floor\"])))\n",
 679 |     "    floor_labels_map = { i:s for i,s in enumerate(floor_labels)}\n",
 680 |     "    for rel_floor_idx1, floor_dist_row in enumerate(floor_distances):                        \n",
 681 |     "        from_floor = floor_labels_map[rel_floor_idx1]\n",
 682 |     "        for rel_floor_idx2, floor_dist in enumerate(floor_dist_row): \n",
 683 |     "            to_floor = floor_labels_map[rel_floor_idx2]\n",
 684 |     "            from_idxs = list(proxyDF.loc[(proxyDF[\"building_id\"] == bldg_id) & (proxyDF[\"floor\"]==from_floor),\"index\"]) \n",
 685 |     "            to_idxs = list(proxyDF.loc[(proxyDF[\"building_id\"] == bldg_id) & (proxyDF[\"floor\"]==to_floor),\"index\"]) \n",
 686 |     "            for i in from_idxs:\n",
 687 |     "                for j in to_idxs:\n",
 688 |     "                    floor_T[i, j] = floor_dist\n",
 689 |     "\n",
 690 |     "#print(\"floor matrix filled...\")\n",
 691 |     "\n",
 692 |     "#output for excel\n",
 693 |     "floors_DF = pd.DataFrame(floor_T)\n",
 694 |     "floors_DF.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 695 |     "floors_DF = floors_DF.reset_index()\n",
 696 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 697 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 698 |     "floors_DF.columns = new_cols\n",
 699 |     "\n",
 700 |     "####################################\n",
 701 |     "# \"REPMAT\" Floor Vectors - Horizontally\n",
 702 |     "####################################\n",
 703 |     "#print(\"translating floor distances horizontally... \")\n",
 704 |     "\n",
 705 |     "proxyDF = quadrant_idsDF\n",
 706 |     "floor_T2 = np.zeros((N,N))\n",
 707 |     "for bldg_id, floor_distances in dist_per_floors.items():\n",
 708 |     "    row_idxs = list(proxyDF.loc[(proxyDF[\"building_id\"] == bldg_id),\"index\"]) \n",
 709 |     "    col_idxs = row_idxs.copy()\n",
 710 |     "    cv, rv = np.meshgrid(row_idxs, col_idxs)\n",
 711 |     "    temp = floor_T[rv, cv]\n",
 712 |     "    floor_dist = temp[:,[0]]  #first column\n",
 713 |     "    floor_vector = floor_dist\n",
 714 |     "    #print(\"REPMAT horizontally\", bldg_id, \"num rows:\", len(floor_vector))\n",
 715 |     "    min_idx, max_idx = min(col_idxs), max(col_idxs)\n",
 716 |     "    #load left \n",
 717 |     "    floor_T2[row_idxs, 0:min_idx] = floor_vector\n",
 718 |     "    #load right\n",
 719 |     "    floor_T2[row_idxs, max_idx+1:] = floor_vector\n",
 720 |     "\n",
 721 |     "#output for excel\n",
 722 |     "floors_DF2 = pd.DataFrame(floor_T2)\n",
 723 |     "floors_DF2.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 724 |     "floors_DF2 = floors_DF2.reset_index()\n",
 725 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 726 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 727 |     "floors_DF2.columns = new_cols\n",
 728 |     "\n",
 729 |     "#print(\"Floor matrix HORIZONTAL translation matrix filled...\")\n",
 730 |     "\n",
 731 |     "####################################\n",
 732 |     "# \"REPMAT\" Floor Vectors - Vertically\n",
 733 |     "####################################\n",
 734 |     "#print(\"translating floor distances VERTICALLY... \")\n",
 735 |     "\n",
 736 |     "proxyDF = quadrant_idsDF\n",
 737 |     "floor_T3 = np.zeros((N,N))\n",
 738 |     "for bldg_id, floor_distances in dist_per_floors.items():\n",
 739 |     "    row_idxs = list(proxyDF.loc[(proxyDF[\"building_id\"] == bldg_id),\"index\"]) \n",
 740 |     "    col_idxs = row_idxs.copy()\n",
 741 |     "    cv, rv = np.meshgrid(row_idxs, col_idxs)\n",
 742 |     "    temp = floor_T[rv, cv]\n",
 743 |     "    floor_dist = temp[[0],:]  #first row\n",
 744 |     "    floor_vector = floor_dist\n",
 745 |     "    #print(\"REPMAT vertically\", bldg_id, \"num rows:\", len(floor_vector))\n",
 746 |     "   \n",
 747 |     "    min_idx, max_idx = min(col_idxs), max(col_idxs)\n",
 748 |     "   \n",
 749 |     "    #load left \n",
 750 |     "    floor_T3[0:min_idx, col_idxs] = floor_vector\n",
 751 |     "    #load right\n",
 752 |     "    floor_T3[max_idx+1:, col_idxs] = floor_vector\n",
 753 |     "   \n",
 754 |     "   \n",
 755 |     "\n",
 756 |     "#output for excel\n",
 757 |     "floors_DF3 = pd.DataFrame(floor_T3)\n",
 758 |     "floors_DF3.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 759 |     "floors_DF3 = floors_DF3.reset_index()\n",
 760 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 761 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 762 |     "floors_DF3.columns = new_cols\n",
 763 |     "\n",
 764 |     "#print(\"Floor matrix VERTICAL translation matrix filled...\")\n",
 765 |     "\n",
 766 |     "###################################################################\n",
 767 |     "# Add \"Translate\" Floor Vectors - Vertical + Horizontal + Diagonal\n",
 768 |     "###################################################################\n",
 769 |     "\n",
 770 |     "floor_T4 = floor_T + floor_T2 + floor_T3\n",
 771 |     "\n",
 772 |     "#output for excel\n",
 773 |     "floors_DF4 = pd.DataFrame(floor_T4)\n",
 774 |     "floors_DF4.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 775 |     "floors_DF4 = floors_DF4.reset_index()\n",
 776 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 777 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 778 |     "floors_DF4.columns = new_cols\n",
 779 |     "\n",
 780 |     "\n",
 781 |     "####################################\n",
 782 |     "#  Create Quadrants Distance Matrix\n",
 783 |     "####################################\n",
 784 |     "\n",
 785 |     "#new quadrants matrix (filled with zeros)\n",
 786 |     "new_quad_expanded_matrix = np.zeros((N,N))\n",
 787 |     "\n",
 788 |     "#print(\"New quad matrix created... expanding to quad size... shape:\", new_quad_expanded_matrix.shape )\n",
 789 |     "#print(\"adding quad distances as per coordinates in floors tab...\")\n",
 790 |     "\n",
 791 |     "proxyDF = quadrant_idsDF\n",
 792 |     "quad_T = new_quad_expanded_matrix\n",
 793 |     "for floor_id, quad_distances in quads_dist_per_floor.items():\n",
 794 |     "    quad_labels = quad_labels_per_floor[floor_id]\n",
 795 |     "    quad_labels_map = { i:s for i,s in enumerate(quad_labels)}\n",
 796 |     "    for rel_quad_idx1, quad_dist_row in enumerate(quad_distances):                        \n",
 797 |     "        from_quad = quad_labels_map[rel_quad_idx1]\n",
 798 |     "        for rel_quad_idx2, quad_dist in enumerate(quad_dist_row): \n",
 799 |     "            to_quad = quad_labels_map[rel_quad_idx2]\n",
 800 |     "            from_idxs = list(proxyDF.loc[(proxyDF[\"floor_id\"] == floor_id) & (proxyDF[\"quad\"]==from_quad),\"index\"]) \n",
 801 |     "            to_idxs = list(proxyDF.loc[(proxyDF[\"floor_id\"] == floor_id) & (proxyDF[\"quad\"]==to_quad),\"index\"]) \n",
 802 |     "            for i in from_idxs:\n",
 803 |     "                for j in to_idxs:\n",
 804 |     "                    quad_T[i, j] = quad_dist\n",
 805 |     "\n",
 806 |     "#print(\"quad matrix filled...\")\n",
 807 |     "\n",
 808 |     "#output for excel\n",
 809 |     "\n",
 810 |     "quads_DF = pd.DataFrame(quad_T)\n",
 811 |     "quads_DF.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 812 |     "quads_DF = quads_DF.reset_index()\n",
 813 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 814 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 815 |     "quads_DF.columns = new_cols\n",
 816 |     "\n",
 817 |     "####################################\n",
 818 |     "# \"REPMAT\" Quad Vectors - Horizontally\n",
 819 |     "####################################\n",
 820 |     "#print(\"translating quad distances horizontally... \")\n",
 821 |     "\n",
 822 |     "proxyDF = quadrant_idsDF\n",
 823 |     "quad_T2 = np.zeros((N,N))\n",
 824 |     "for floor_id, quad_distances in quads_dist_per_floor.items():\n",
 825 |     "    quad_dist = quad_distances[0]\n",
 826 |     "    quad_vector = np.array(quad_dist).reshape(len(quad_dist),1)\n",
 827 |     "    #print(\"REPMAT horizontally\", floor_id, \"num rows:\", len(quad_vector))\n",
 828 |     "    row_idxs = list(proxyDF.loc[(proxyDF[\"floor_id\"] == floor_id),\"index\"]) \n",
 829 |     "    col_idxs = row_idxs.copy()\n",
 830 |     "    min_idx, max_idx = min(col_idxs), max(col_idxs)\n",
 831 |     "    #load left \n",
 832 |     "    quad_T2[row_idxs, 0:min_idx] = quad_vector\n",
 833 |     "    #load right\n",
 834 |     "    quad_T2[row_idxs, max_idx+1:] = quad_vector\n",
 835 |     "\n",
 836 |     "#output for excel\n",
 837 |     "quads_DF2 = pd.DataFrame(quad_T2)\n",
 838 |     "quads_DF2.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 839 |     "quads_DF2 = quads_DF2.reset_index()\n",
 840 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 841 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 842 |     "quads_DF2.columns = new_cols\n",
 843 |     "\n",
 844 |     "#print(\"quad matrix HORIZONTAL translation matrix filled...\")\n",
 845 |     "\n",
 846 |     "####################################\n",
 847 |     "# \"REPMAT\" Quad Vectors - Vertical\n",
 848 |     "####################################\n",
 849 |     "#print(\"translating quads distances vertically... \")\n",
 850 |     "proxyDF = quadrant_idsDF\n",
 851 |     "quad_T3 = np.zeros((N,N))\n",
 852 |     "for floor_id, quad_distances in quads_dist_per_floor.items():\n",
 853 |     "    quad_dist = quad_distances[0]\n",
 854 |     "    quad_vector = np.array(quad_dist).reshape(1, len(quad_dist))\n",
 855 |     "    #print(\"REPMAT vertically\", floor_id, \"num rows:\", len(quad_vector))\n",
 856 |     "    row_idxs = list(proxyDF.loc[(proxyDF[\"floor_id\"] == floor_id),\"index\"]) \n",
 857 |     "    col_idxs = row_idxs.copy()\n",
 858 |     "    min_idx, max_idx = min(row_idxs), max(row_idxs)\n",
 859 |     "    #load up\n",
 860 |     "    quad_T3[0:min_idx, col_idxs] = quad_vector\n",
 861 |     "    #load right\n",
 862 |     "    quad_T3[max_idx+1:, col_idxs] = quad_vector\n",
 863 |     "\n",
 864 |     "#output for excel\n",
 865 |     "quads_DF3 = pd.DataFrame(quad_T3)\n",
 866 |     "quads_DF3.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 867 |     "quads_DF3 = quads_DF3.reset_index()\n",
 868 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 869 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 870 |     "quads_DF3.columns = new_cols\n",
 871 |     "\n",
 872 |     "#print(\"quad matrix VERTICAL translation matrix filled...\")\n",
 873 |     "\n",
 874 |     "##############################################################\n",
 875 |     "# Add \"Translate\" Quad Vectors - Vertical + Horizontal + Diagonal\n",
 876 |     "##############################################################\n",
 877 |     "\n",
 878 |     "quad_T4 = quad_T + quad_T2 + quad_T3\n",
 879 |     "\n",
 880 |     "#output for excel\n",
 881 |     "quads_DF4 = pd.DataFrame(quad_T4)\n",
 882 |     "quads_DF4.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 883 |     "quads_DF4 = quads_DF4.reset_index()\n",
 884 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 885 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 886 |     "quads_DF4.columns = new_cols\n",
 887 |     "\n",
 888 |     "\n",
 889 |     "##########>  TO DOs. \n",
 890 |     "# \n",
 891 |     "#               1) expand floor and quads vector (and/or affine transformation for final distance across floors quads)\n",
 892 |     "#\n",
 893 |     "#               2) calibrate quads coordinate (e.g. sqrt(2))\n",
 894 |     "#\n",
 895 |     "#               3) calibrate buildings, floors quads order of magnitude - bing map vs quads coordinates vs floor rel dist proportions according to CS needs (e.g. \"zoom in\" into quad vs \"zoom in\" floors, etc.) \n",
 896 |     "# \n",
 897 |     "###############################>>>\n",
 898 |     "\n",
 899 |     "bldg_boost = 1000\n",
 900 |     "floor_boost = 20\n",
 901 |     "quad_boost = 1\n",
 902 |     "\n",
 903 |     "final_T = bldg_T*bldg_boost + floor_T4*floor_boost + quad_T4*quad_boost + bldg_T* 5280\n",
 904 |     "\n",
 905 |     "#output for excel\n",
 906 |     "final_DF = pd.DataFrame(final_T)\n",
 907 |     "final_DF.index = list(quadrant_idsDF[\"quadrant_id\"])\n",
 908 |     "final_DF = final_DF.reset_index()\n",
 909 |     "quad_ids = list(quadrant_idsDF[\"quadrant_id\"])\n",
 910 |     "new_cols = [\"quadrant_id\"] + quad_ids\n",
 911 |     "final_DF.columns = new_cols\n",
 912 |     "\n",
 913 |     "quad_ids_keys = quad_ids\n",
 914 |     "quad_ids_values = list(df_zones_STG['Space Name'])\n",
 915 |     "\n",
 916 |     "# using zip() \n",
 917 |     "# to convert lists to dictionary \n",
 918 |     "quad_rename_dict  = dict(zip(quad_ids_keys, quad_ids_values)) \n",
 919 |     "\n",
 920 |     "final_DF2 = final_DF.copy()\n",
 921 |     "# Remap the values of the dataframe \n",
 922 |     "final_DF2['quadrant_id']= final_DF2['quadrant_id'].map(quad_rename_dict) \n",
 923 |     "final_DF2 = final_DF2.rename(columns=quad_rename_dict)\n",
 924 |     "\n",
 925 |     "\n",
 926 |     "final_DF2.to_csv('distance.csv',index = False)\n",
 927 |     "\n",
 928 |     "\n",
 929 |     "\n",
 930 |     "#w2f.save()\n",
 931 |     "#clear_output()\n",
 932 |     "\n",
 933 |     "showdshp = widgets.Button(description ='Show details')\n",
 934 |     "hidedshp = widgets.Button(description ='Hide details')\n",
 935 |     "dshp_mkdn = widgets.Output()\n",
 936 |     "with dshp_mkdn:\n",
 937 |     "    display(Markdown('<b>Distance logging</b>'))\n",
 938 |     "display(dshp_mkdn)\n",
 939 |     "display(showdshp)\n",
 940 |     "\n",
 941 |     "def click_on_hide_showdshp_details(click):\n",
 942 |     "    clear_output()\n",
 943 |     "    display(dshp_mkdn)\n",
 944 |     "    display(showdshp)\n",
 945 |     "\n",
 946 |     "def click_on_show_showdshp_details(click):\n",
 947 |     "    clear_output()\n",
 948 |     "    display(dshp_mkdn)\n",
 949 |     "    display(hidedshp)\n",
 950 |     "#     print(\"Loading space catalog from... \")\n",
 951 |     "#     print(\"folder:\", input_path)\n",
 952 |     "#     print(\"file:\", input_file)\n",
 953 |     "\n",
 954 |     "#     print(\"--------------------------------------------------------\")\n",
 955 |     "#     for tab_name in xl1.sheet_names:\n",
 956 |     "#         temp = xl1.parse(tab_name).fillna(0)\n",
 957 |     "#         print(\"reading excel tab...\", tab_name,\" shape... \", temp.shape)\n",
 958 |     "#         tabs.append(temp.copy())\n",
 959 |     "\n",
 960 |     "    print(\"loading buildings...\")\n",
 961 |     "\n",
 962 |     "    for i in range(num_bldgs):\n",
 963 |     "        bldg_id = dfbuildingsDIST.loc[i,\"Building ID\"]\n",
 964 |     "#         bldg_id.index = bldg_id.index.astype(int) #use astype to convert to int\n",
 965 |     "        bldg_bmcRaw = dfbuildingsDIST.loc[i,\"Coordinates\"]\n",
 966 |     "        blat = dfbuildingsDIST.loc[i,\"lat\"]\n",
 967 |     "        blong = dfbuildingsDIST.loc[i,\"long\"]\n",
 968 |     "        print(bldg_id, \"coordinates found...\", blat, blong)\n",
 969 |     "\n",
 970 |     "    print(\"--------------------------------------------------------\")\n",
 971 |     "\n",
 972 |     "    print(\"calculating building distance...\")\n",
 973 |     "\n",
 974 |     "    print(\"Everything good so far... \")\n",
 975 |     "    print(\"Calculated \" + str(len(bldg_distDF))+ \" distances across building and campus using Coordinates and Haversine formula\")\n",
 976 |     "\n",
 977 |     "    print(\"--------------------------------------------------------\")\n",
 978 |     "\n",
 979 |     "    print(\"calculating floor to floor distances...\")\n",
 980 |     "    print(\"loading floor relative distances....\")\n",
 981 |     "\n",
 982 |     "\n",
 983 |     "    for i in range(num_bldgs):\n",
 984 |     "        bldg_id = dfbuildingsDIST.loc[i,\"Building ID\"]\n",
 985 |     "        num_floors = int(dfbuildingsDIST.loc[i,\"Number of Floors\"])\n",
 986 |     "        #removing optional column to input average distance as per @july 20, 2020 excel input file validation review meeting\n",
 987 |     "        #avg_dist = df1.loc[i,\"Average Distance per Floor\"] \n",
 988 |     "        avg_dist = 2\n",
 989 |     "        floor_rel_dist = list(dfbuildingsDIST.iloc[i,f_start_idx: f_start_idx + num_floors])\n",
 990 |     "        floor_rel_dist = [int(i) for i in floor_rel_dist] \n",
 991 |     "        print(bldg_id, \"found\", num_floors, \"floors rel dist: \", floor_rel_dist)\n",
 992 |     "\n",
 993 |     "\n",
 994 |     "\n",
 995 |     "    if fn:\n",
 996 |     "            floors_per_bldg[bldg_id] = fn.copy()\n",
 997 |     "    else:\n",
 998 |     "        print(\"wait....something went wrong....first tab... row:\", i)\n",
 999 |     "\n",
1000 |     "\n",
1001 |     "    print(\"number of buildings with floor dist:\", len(dist_per_floors), \"of\", len(floors_per_bldg) )\n",
1002 |     "\n",
1003 |     "\n",
1004 |     "    print(\"--------------------------------------------------------\")\n",
1005 |     "\n",
1006 |     "    print(\"loading quadrant coordinates... from floors tab... \")\n",
1007 |     "\n",
1008 |     "    print(\"start of coordinates columns detected... column idx:\", start_idx)\n",
1009 |     "    print(\"validating coordinate columns vs number of zones... \")\n",
1010 |     "\n",
1011 |     "    for i in range(num_floors):\n",
1012 |     "        floor_id = df100.loc[i,\"Floor ID\"]\n",
1013 |     "        num_quads = int(df100.loc[i,\"Number of Zones\"])\n",
1014 |     "        coord_row = []\n",
1015 |     "        for j in range(int(num_quads)):\n",
1016 |     "            temp = df100.iloc[i, start_idx+j]\n",
1017 |     "            coord_row.append(temp)\n",
1018 |     "        if num_quads != len(coord_row):\n",
1019 |     "            print(floor_id, \"number of zones do not match\")\n",
1020 |     "            print(\"Excel Floor tab ERROR: Please enter one coordinate per each zone\")\n",
1021 |     "        else:\n",
1022 |     "            print(floor_id,\"found zone coordinates...\", coord_row)\n",
1023 |     "            quad_coord_per_floor[floor_id] = coord_row.copy() \n",
1024 |     "\n",
1025 |     "    print(\"--------------------------------------------------------\")\n",
1026 |     "\n",
1027 |     "    print(\"calculating zone to zone distances...\")\n",
1028 |     "    print(\"number of floors with quads dist\", len(quads_dist_per_floor))\n",
1029 |     "    print(\"number of spaces loaded... \", len(quadrant_ids))\n",
1030 |     "\n",
1031 |     "\n",
1032 |     "\n",
1033 |     "\n",
1034 |     "    print(\"--------------------------------------------------------\")\n",
1035 |     "\n",
1036 |     "    print(\"New building matrix created... expanding to quad size... shape:\", new_building_expanded_matrix.shape )\n",
1037 |     "    print(\"adding building distances as per Coordinates...\")\n",
1038 |     "\n",
1039 |     "\n",
1040 |     "    print(\"building matrix filled...\")\n",
1041 |     "\n",
1042 |     "\n",
1043 |     "    print(\"--------------------------------------------------------\")\n",
1044 |     "    print(\"New floors matrix created... expanding to quad size... shape:\", new_floors_expanded_matrix.shape )\n",
1045 |     "    print(\"adding floors distances as per buildings tab...\")\n",
1046 |     "    print(\"floor matrix filled...\")\n",
1047 |     "\n",
1048 |     "    print(\"--------------------------------------------------------\")\n",
1049 |     "    print(\"translating floor distances horizontally... \")\n",
1050 |     "    print(\"Floor matrix HORIZONTAL translation matrix filled...\")\n",
1051 |     "    print(\"--------------------------------------------------------\")\n",
1052 |     "    for bldg_id, floor_distances in dist_per_floors.items():\n",
1053 |     "        row_idxs = list(proxyDF.loc[(proxyDF[\"building_id\"] == bldg_id),\"index\"]) \n",
1054 |     "        col_idxs = row_idxs.copy()\n",
1055 |     "        cv, rv = np.meshgrid(row_idxs, col_idxs)\n",
1056 |     "        temp = floor_T[rv, cv]\n",
1057 |     "        floor_dist = temp[[0],:]  #first row\n",
1058 |     "        floor_vector = floor_dist\n",
1059 |     "        print(\"REPMAT vertically\", bldg_id, \"num rows:\", len(floor_vector))\n",
1060 |     "\n",
1061 |     "\n",
1062 |     "    print(\"Floor matrix VERTICAL translation matrix filled...\")\n",
1063 |     "\n",
1064 |     "\n",
1065 |     "    print(\"--------------------------------------------------------\")\n",
1066 |     "\n",
1067 |     "    print(\"New quad matrix created... expanding to quad size... shape:\", new_quad_expanded_matrix.shape )\n",
1068 |     "    print(\"adding quad distances as per coordinates in floors tab...\")\n",
1069 |     "\n",
1070 |     "    print(\"quad matrix filled...\")\n",
1071 |     "\n",
1072 |     "\n",
1073 |     "    print(\"--------------------------------------------------------\")\n",
1074 |     "    print(\"translating quad distances horizontally... \")\n",
1075 |     "\n",
1076 |     "    for floor_id, quad_distances in quads_dist_per_floor.items():\n",
1077 |     "        quad_dist = quad_distances[0]\n",
1078 |     "        quad_vector = np.array(quad_dist).reshape(len(quad_dist),1)\n",
1079 |     "        print(\"REPMAT horizontally\", floor_id, \"num rows:\", len(quad_vector))\n",
1080 |     "\n",
1081 |     "\n",
1082 |     "    print(\"quad matrix HORIZONTAL translation matrix filled...\")\n",
1083 |     "\n",
1084 |     "    print(\"--------------------------------------------------------\")\n",
1085 |     "\n",
1086 |     "    print(\"translating quads distances vertically... \")\n",
1087 |     "    for floor_id, quad_distances in quads_dist_per_floor.items():\n",
1088 |     "        quad_dist = quad_distances[0]\n",
1089 |     "        quad_vector = np.array(quad_dist).reshape(1, len(quad_dist))\n",
1090 |     "        print(\"REPMAT vertically\", floor_id, \"num rows:\", len(quad_vector))\n",
1091 |     "\n",
1092 |     "    print(\"quad matrix VERTICAL translation matrix filled...\")\n",
1093 |     "\n",
1094 |     "\n",
1095 |     "    print(\"--------------------------------------------------------\")\n",
1096 |     "\n",
1097 |     "\n",
1098 |     "\n",
1099 |     "showdshp.on_click(click_on_show_showdshp_details)\n",
1100 |     "hidedshp.on_click(click_on_hide_showdshp_details)\n",
1101 |     "\n",
1102 |     "# print(os.getcwd())\n",
1103 |     "# print(base_path)\n",
1104 |     "# print(base_path + out_filename)\n",
1105 |     "\n",
1106 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
1107 |    ]
1108 |   },
1109 |   {
1110 |    "cell_type": "code",
1111 |    "execution_count": null,
1112 |    "metadata": {},
1113 |    "outputs": [],
1114 |    "source": [
1115 |     "savedshpfinsh = '<div class=\"alert alert-block alert-success\"><b> Done: </b> Your distant output file <b>(distance.csv)</b> has successfully saved back to location of this notebook.('+ os.getcwd() +')</div>'\n",
1116 |     "display(Markdown(savedshpfinsh))\n",
1117 |     "style = {'description_width': '300px'}\n",
1118 |     "layout = {'width': '225px'}\n",
1119 |     "startValidationsNB = widgets.Button(description ='Open Validations UI Notebook',style=style,layout=layout,button_style='info')\n",
1120 |     "display(startValidationsNB)\n",
1121 |     "\n",
1122 |     "def go_to_nextblock(btn):\n",
1123 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
1124 |     "\n",
1125 |     "startValidationsNB.on_click(go_to_nextblock)           \n",
1126 |     "   "
1127 |    ]
1128 |   },
1129 |   {
1130 |    "cell_type": "code",
1131 |    "execution_count": null,
1132 |    "metadata": {},
1133 |    "outputs": [],
1134 |    "source": [
1135 |     "%%javascript\n",
1136 |     "var name_of_the_notebook = 'WPP.Input-File-Validations_PIPE-002_v2.ipynb'\n",
1137 |     "var url = window.location.href.split('/')\n",
1138 |     "var newurl = url[0] + '//'\n",
1139 |     "for (var i = 1; i < url.length - 1; i++) {\n",
1140 |     "    console.log(url[i], newurl)\n",
1141 |     "    newurl += url[i] + '/'\n",
1142 |     "}\n",
1143 |     "newurl += name_of_the_notebook\n",
1144 |     "window.open(newurl)"
1145 |    ]
1146 |   },
1147 |   {
1148 |    "cell_type": "code",
1149 |    "execution_count": null,
1150 |    "metadata": {},
1151 |    "outputs": [],
1152 |    "source": []
1153 |   }
1154 |  ],
1155 |  "metadata": {
1156 |   "kernelspec": {
1157 |    "display_name": "Python 3",
1158 |    "language": "python",
1159 |    "name": "python3"
1160 |   },
1161 |   "language_info": {
1162 |    "codemirror_mode": {
1163 |     "name": "ipython",
1164 |     "version": 3
1165 |    },
1166 |    "file_extension": ".py",
1167 |    "mimetype": "text/x-python",
1168 |    "name": "python",
1169 |    "nbconvert_exporter": "python",
1170 |    "pygments_lexer": "ipython3",
1171 |    "version": "3.8.8"
1172 |   }
1173 |  },
1174 |  "nbformat": 4,
1175 |  "nbformat_minor": 2
1176 | }
1177 | 


--------------------------------------------------------------------------------
/WPP.Input-File-Validations_PIPE-002_v2.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "# Copyright (c) Microsoft Corporation.\n",
  10 |     "# Licensed under the MIT license.\n",
  11 |     "\n",
  12 |     "\n",
  13 |     "\n",
  14 |     "from IPython.display import display, Markdown, clear_output, HTML, Javascript, display_html\n",
  15 |     "import subprocess\n",
  16 |     "import sys\n",
  17 |     "# display(Markdown(\"\"\" ## Checking required packages:\n",
  18 |     "# #### if any are missing, installing them now..... \"\"\"))\n",
  19 |     "# !{sys.executable} -m pip install --no-cache-dir -r requirements_SP.txt\n",
  20 |     "# clear_output() \n",
  21 |     "\n",
  22 |     "\n",
  23 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index(), IPython.notebook.get_selected_index()+1)'))\n",
  24 |     "\n",
  25 |     "HTML('''<script>{\n",
  26 |     "    $('div.input').hide();\n",
  27 |     "    }''')\n",
  28 |     "\n",
  29 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index(), IPython.notebook.get_selected_index()+1)'))\n",
  30 |     "HTML('''<script>\n",
  31 |     "    code_show=true; \n",
  32 |     "    function code_toggle() {\n",
  33 |     "     if (code_show){\n",
  34 |     "     $('div.input').hide();\n",
  35 |     "     } else {\n",
  36 |     "     $('div.input').show();\n",
  37 |     "     }\n",
  38 |     "     code_show = !code_show\n",
  39 |     "    } \n",
  40 |     "    $( document ).ready(code_toggle);\n",
  41 |     "    </script>\n",
  42 |     "    The raw code for this IPython notebook is by default hidden for easier reading toggle on/off the raw code by clicking <a href=\"javascript:code_toggle()\">here</a>.''')"
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "code",
  47 |    "execution_count": null,
  48 |    "metadata": {},
  49 |    "outputs": [],
  50 |    "source": [
  51 |     "\n",
  52 |     "display(Markdown(\"##  File Validations\"))\n",
  53 |     "display(Markdown(\"Run this notebook once you have prepared all of the required input files - distance, space_capacity, team_size, and interactions files. This notebook validates your input files and prepares them in a Final Files folder. This is a required step before you can create a seating plan in the Generate Floorplan notebook.\"))\n",
  54 |     "\n",
  55 |     "import pandas as pd\n",
  56 |     "import random\n",
  57 |     "import numpy as np\n",
  58 |     "# import pulp\n",
  59 |     "import csv\n",
  60 |     "from io import StringIO\n",
  61 |     "import os\n",
  62 |     "import itertools\n",
  63 |     "from IPython.display import HTML, Javascript, display_html,clear_output,Markdown\n",
  64 |     "import ipywidgets as widgets\n",
  65 |     "from ipyfilechooser import FileChooser\n",
  66 |     "from itertools import combinations\n",
  67 |     "import scipy as sc\n",
  68 |     "from scipy import optimize as op\n",
  69 |     "from itertools import combinations\n",
  70 |     "from datetime import datetime, date, time\n",
  71 |     "import re\n",
  72 |     "# display(Markdown('First in a series to validate input files:'))\n",
  73 |     "\n",
  74 |     "# display(Markdown('* Team Size'))\n",
  75 |     "# display(Markdown('* Interactions'))\n",
  76 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
  77 |    ]
  78 |   },
  79 |   {
  80 |    "cell_type": "code",
  81 |    "execution_count": null,
  82 |    "metadata": {},
  83 |    "outputs": [],
  84 |    "source": [
  85 |     "\n",
  86 |     "\n",
  87 |     "def createInteractionsMatrix2(flatInteractions):\n",
  88 |     "    #get unique names of participants\n",
  89 |     "    organizersNames = flatInteractions.iloc[:,0].unique()\n",
  90 |     "    contributorsNames = flatInteractions.iloc[:,1].unique()\n",
  91 |     "    allNames = set(organizersNames).union(set(contributorsNames))\n",
  92 |     "    removeNames = [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]\n",
  93 |     "    keepNames = allNames - set(removeNames)\n",
  94 |     "    cols = flatInteractions.columns\n",
  95 |     "\n",
  96 |     "    #add up withing group collaboration \n",
  97 |     "    temp = flatInteractions.copy()\n",
  98 |     "    temp.loc[temp[cols[1]]=='Collaborators Within Group', cols[1]] = temp[cols[0]]\n",
  99 |     "    temp = temp.drop(temp.columns[[2]], axis=1) # drop date column\n",
 100 |     "\n",
 101 |     "    #remove unwanted rows\n",
 102 |     "    temp =  temp[~ ((temp[cols[0]].isin(list(removeNames))) | (temp[cols[1]].isin(list(removeNames))))]\n",
 103 |     "\n",
 104 |     "    #build matrix\n",
 105 |     "    key_cols = [cols[0], cols[1]]\n",
 106 |     "    interactionsMatrix = temp.set_index(key_cols).groupby(key_cols)[cols[-1]].agg(['sum']).unstack()\n",
 107 |     "\n",
 108 |     "\n",
 109 |     "    #clean and wrap up\n",
 110 |     "    df = interactionsMatrix\n",
 111 |     "    df.columns = df.columns.droplevel(0)\n",
 112 |     "    df.columns.name = None\n",
 113 |     "    df.index.name = None\n",
 114 |     "    df = df.reset_index()\n",
 115 |     "    df.rename(columns = {df.columns[0]: 'Collab' }, inplace = True)\n",
 116 |     "    return df\n",
 117 |     "\n",
 118 |     "            \n",
 119 |     "            \n",
 120 |     "            \n",
 121 |     "def sortInteractions(iDF):\n",
 122 |     "    rawCols = iDF.columns\n",
 123 |     "    idxMap = {} \n",
 124 |     "    #sort rows\n",
 125 |     "    sortedRawI = iDF.sort_values([rawCols[0]])\n",
 126 |     "    #keep old rows idx i.e. just put it aside and forget about it (kept copy for audit purposes)\n",
 127 |     "    copyIdxRows = sortedRawI.reset_index().iloc[:, [0,1]]\n",
 128 |     "    copyIdxRows.rename(columns = {\"index\" : \"orig_index\"}, inplace = True)\n",
 129 |     "    idxMap[\"rows\"] = copyIdxRows.copy()\n",
 130 |     "    sortedRawI = sortedRawI.reset_index(drop = True)\n",
 131 |     "    #transpose \n",
 132 |     "    temp = sortedRawI.set_index(sortedRawI.columns[0])\n",
 133 |     "    transposeRawI = temp.transpose()\n",
 134 |     "    transposeRawI = transposeRawI.reset_index()\n",
 135 |     "    transposeRawI.rename(columns = {\"index\" : rawCols[0]}, inplace = True)\n",
 136 |     "    transposeRawI.columns.name = None\n",
 137 |     "    convert_col_collab = transposeRawI.columns[0] \n",
 138 |     "    transposeRawI[convert_col_collab] = transposeRawI[convert_col_collab].astype(str)\n",
 139 |     "    #sort columns \n",
 140 |     "    sortedRawI2 = transposeRawI.sort_values([transposeRawI.columns[0]])\n",
 141 |     "    #record old column idx, put it aside and forget about it (just in case it is needed... kept a copy...)\n",
 142 |     "    copyIdxCols = sortedRawI2.reset_index().iloc[:, [0,1]]\n",
 143 |     "    copyIdxCols.rename(columns = {\"index\" : \"orig_index\"}, inplace = True)\n",
 144 |     "    idxMap[\"cols\"] = copyIdxCols.copy()\n",
 145 |     "    sortedRawI2 = sortedRawI2.reset_index(drop = True)\n",
 146 |     "    #transpose back\n",
 147 |     "    temp2 = sortedRawI2.set_index(sortedRawI2.columns[0])\n",
 148 |     "    transposeRawI2 = temp2.transpose()\n",
 149 |     "    transposeRawI2 = transposeRawI2.reset_index()\n",
 150 |     "    transposeRawI2.rename(columns = {\"index\" : rawCols[0]}, inplace = True)\n",
 151 |     "    transposeRawI2.columns.name = None\n",
 152 |     "    return transposeRawI2, idxMap \n",
 153 |     "\n",
 154 |     "def sortTeamsDF(tsDF, dropIdx = True):\n",
 155 |     "    cols = tsDF.columns\n",
 156 |     "    sortedDF = tsDF.sort_values(by = [cols[0]])\n",
 157 |     "    if (dropIdx):\n",
 158 |     "        sortedDF = sortedDF.reset_index(drop = True)\n",
 159 |     "    else:\n",
 160 |     "        sortedDF = sortedDF.reset_index()\n",
 161 |     "    return sortedDF\n",
 162 |     "\n",
 163 |     "def getTeamIdsDF(sRawTS):\n",
 164 |     "    return (sRawTS[[sRawTS.columns[0]]].reset_index())[[sRawTS.columns[0],\"index\"]]\n",
 165 |     "\n",
 166 |     "            \n",
 167 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
 168 |     "            \n",
 169 |     "    "
 170 |    ]
 171 |   },
 172 |   {
 173 |    "cell_type": "code",
 174 |    "execution_count": null,
 175 |    "metadata": {},
 176 |    "outputs": [],
 177 |    "source": [
 178 |     "loadfiles_markdown = widgets.Output()\n",
 179 |     "\n",
 180 |     "with loadfiles_markdown:\n",
 181 |     "    display(Markdown('<div class=\"alert alert-block alert-info\"><b> Load Data: </b> Select your file(s).</div>'))\n",
 182 |     "\n",
 183 |     "display(loadfiles_markdown)\n",
 184 |     "\n",
 185 |     "        \n",
 186 |     "cwd = os.getcwd()\n",
 187 |     "tab_nest_001 = widgets.Accordion()\n",
 188 |     "style = {'description_width': '185px'}\n",
 189 |     "layout = {'width': '505px'}\n",
 190 |     "\n",
 191 |     "def go_to_nextblock(btn):\n",
 192 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+3)'))\n",
 193 |     "\n",
 194 |     "\n",
 195 |     "input_path_int = FileChooser(cwd)\n",
 196 |     "input_path_ts = FileChooser(cwd)\n",
 197 |     "input_path_dist = FileChooser(cwd)\n",
 198 |     "input_path_fc = FileChooser(cwd)\n",
 199 |     "\n",
 200 |     "#outfilename= widgets.Text(value=None,description='out_filename')\n",
 201 |     "go_run_full = widgets.Button(description='Go',button_style = 'primary')\n",
 202 |     "\n",
 203 |     "\n",
 204 |     "\n",
 205 |     "\n",
 206 |     "tab_nest_001.children = [input_path_int,input_path_ts,input_path_fc,input_path_dist]\n",
 207 |     "\n",
 208 |     "tab_nest_001.set_title(0,'Interactions')\n",
 209 |     "tab_nest_001.set_title(1,'Team size')\n",
 210 |     "tab_nest_001.set_title(2,'Space Capacity')\n",
 211 |     "tab_nest_001.set_title(3,'Distance')\n",
 212 |     "\n",
 213 |     "\n",
 214 |     "display(widgets.VBox([tab_nest_001,go_run_full]))\n",
 215 |     "    \n",
 216 |     "go_run_full.on_click(go_to_nextblock)     "
 217 |    ]
 218 |   },
 219 |   {
 220 |    "cell_type": "code",
 221 |    "execution_count": null,
 222 |    "metadata": {},
 223 |    "outputs": [],
 224 |    "source": [
 225 |     "int_input_path = input_path_int.selected_path + '\\\\'+ input_path_int.selected_filename\n",
 226 |     "ts_input_path = input_path_ts.selected_path + '\\\\'+ input_path_ts.selected_filename\n",
 227 |     "\n",
 228 |     "fc_input_path = input_path_fc.selected_path + '\\\\'+ input_path_fc.selected_filename\n",
 229 |     "dist_input_path = input_path_dist.selected_path + '\\\\'+ input_path_dist.selected_filename\n",
 230 |     "\n",
 231 |     "    \n",
 232 |     "#out_filename = outfilename.value"
 233 |    ]
 234 |   },
 235 |   {
 236 |    "cell_type": "code",
 237 |    "execution_count": null,
 238 |    "metadata": {},
 239 |    "outputs": [],
 240 |    "source": [
 241 |     "CGREEN  = '\\33[32m'\n",
 242 |     "CRED = '\\033[91m'\n",
 243 |     "CEND = '\\033[0m'\n",
 244 |     "BOLD = '\\033[1m'\n",
 245 |     "\n",
 246 |     "flatInteractionBool = True\n",
 247 |     "rawIraw = pd.read_csv(int_input_path)\n",
 248 |     "\n",
 249 |     "rawIraw_old = rawIraw.copy()\n",
 250 |     "\n",
 251 |     "out = widgets.Output(layout={'border': '1px solid black'})\n",
 252 |     "\n",
 253 |     "# Keep 1st 4 cols and  remove remaining empty columns \n",
 254 |     "rawIraw = rawIraw.drop(rawIraw.columns[4:], axis=1)\n",
 255 |     "\n",
 256 |     "rawIraw = (rawIraw.replace(r'^\\s*$', np.nan, regex=True))\n",
 257 |     "rawIraw = rawIraw.dropna(axis=0,how='all')\n",
 258 |     "rawIraw = rawIraw.dropna(axis = 1, how = 'all')\n",
 259 |     "# Replace Scenario 1a NaNs in CB hrs to 0.\n",
 260 |     "rawIraw['Collaboration_hours'] = rawIraw['Collaboration_hours'].replace(np.nan,0)\n",
 261 |     "\n",
 262 |     "\n",
 263 |     "\n",
 264 |     "rawTS = pd.read_csv(ts_input_path)\n",
 265 |     "\n",
 266 |     "# Keep 1st 2 cols (TS) and  remove remaining empty columns \n",
 267 |     "rawTS  = rawTS.drop(rawTS.columns[2:], axis=1)\n",
 268 |     "rawTS  = rawTS.replace(r'^\\s*$', np.nan, regex=True)\n",
 269 |     "\n",
 270 |     "\n",
 271 |     "# Keep 1st 2 cols (FC) and  remove remaining empty columns and get total # of rows for Distance file.\n",
 272 |     "\n",
 273 |     "rawFC = pd.read_csv(fc_input_path)\n",
 274 |     "rawFC  = rawFC.drop(rawFC.columns[2:], axis=1)\n",
 275 |     "\n",
 276 |     "rawFC_reduced = rawFC.copy()\n",
 277 |     "rawFC_reduced = rawFC_reduced.dropna()\n",
 278 |     "rawFC_reduced   = rawFC_reduced.replace(r'^\\s*$', np.nan, regex=True)\n",
 279 |     "no_of_floors  = len(rawFC_reduced)\n",
 280 |     "\n",
 281 |     "\n",
 282 |     "\n",
 283 |     "rawDT = pd.read_csv(dist_input_path)\n",
 284 |     "rawDT  = rawDT.drop(rawDT.columns[no_of_floors+1:], axis=1)\n",
 285 |     "rawDT  = rawDT.replace(r'^\\s*$', np.nan, regex=True)\n",
 286 |     "\n",
 287 |     "\n",
 288 |     "\n",
 289 |     "scenario1a_markdown = widgets.Output()\n",
 290 |     "\n",
 291 |     "with scenario1a_markdown:\n",
 292 |     "    display(Markdown('<div class=\"alert alert-block alert-warning\"><b> Scenario 1a: </b> Although this should not happen. In case; All teams present, but NaN collaboration, replace NaN collaboration with 0.</div>'))\n",
 293 |     "\n",
 294 |     "display(scenario1a_markdown)\n",
 295 |     "display(Markdown(\"Number of rows within your <b>Interaction file</b> that contain *NaN* for Collaboration_hours:\"))\n",
 296 |     "\n",
 297 |     "    \n",
 298 |     "if rawIraw_old['Collaboration_hours'].isnull().sum() > 0:\n",
 299 |     "    display(HTML('&#10060; Found ' + '<b>'+str(rawIraw_old['Collaboration_hours'].isnull().sum())+'</b>' + ' NaNs '))\n",
 300 |     "    display(out)\n",
 301 |     "else:\n",
 302 |     "    display(HTML('<p>&#9989; Passed</p>'))\n",
 303 |     "\n",
 304 |     "    print('Collaboration_hours       ' +str(rawIraw_old['Collaboration_hours'].isnull().sum()))\n",
 305 |     "    display(out)\n",
 306 |     "        \n",
 307 |     "    \n",
 308 |     "    \n",
 309 |     "    \n",
 310 |     "\n",
 311 |     "    \n",
 312 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 313 |    ]
 314 |   },
 315 |   {
 316 |    "cell_type": "code",
 317 |    "execution_count": null,
 318 |    "metadata": {},
 319 |    "outputs": [],
 320 |    "source": [
 321 |     "scenario1b_markdown = widgets.Output()\n",
 322 |     "\n",
 323 |     "with scenario1b_markdown:\n",
 324 |     "    display(Markdown('<div class=\"alert alert-block alert-warning\"><b> Scenario 1b: </b> Scrub any whitespaces/empty rows for all 4 files.<b> No action required from user</b>. This will be auto-fixed if any found..</div>'))\n",
 325 |     "\n",
 326 |     "blankrawI = rawIraw_old.isnull().sum()\n",
 327 |     "blankrawFC = rawFC.isnull().sum()\n",
 328 |     "blankrawDT = rawDT.isnull().sum()\n",
 329 |     "blankrawTS = rawTS.isnull().sum()\n",
 330 |     "\n",
 331 |     "pass_or_fail_mkdwn0 = widgets.Output()\n",
 332 |     "showscn1b = widgets.Button(description ='Show details')\n",
 333 |     "hidescn1b = widgets.Button(description ='Hide details')\n",
 334 |     "\n",
 335 |     "scn1b_mkdwn = widgets.Output()\n",
 336 |     "with scn1b_mkdwn:\n",
 337 |     "    display(Markdown(\"Number of rows per column that contain *empty rows* or *white-space* for <b>all files</b>:\"))\n",
 338 |     "\n",
 339 |     "with pass_or_fail_mkdwn0:\n",
 340 |     "    clear_output()\n",
 341 |     "    if blankrawI.sum() > 0 or blankrawDT.sum() > 0 or blankrawFC.sum() > 0 or blankrawTS.sum() > 0:\n",
 342 |     "        display(HTML('&#10060;'))\n",
 343 |     "    else:\n",
 344 |     "        display(HTML('<p>&#9989;'))\n",
 345 |     "display(widgets.VBox([widgets.VBox([scenario1b_markdown,widgets.HBox([pass_or_fail_mkdwn0,scn1b_mkdwn])]),showscn1b]))\n",
 346 |     "# display(out)\n",
 347 |     "\n",
 348 |     "\n",
 349 |     "\n",
 350 |     "\n",
 351 |     "def hide_details_scn1b_details(click):\n",
 352 |     "    clear_output()\n",
 353 |     "    display(widgets.VBox([widgets.VBox([scenario1b_markdown,widgets.HBox([pass_or_fail_mkdwn0,scn1b_mkdwn])]),showscn1b]))\n",
 354 |     "    display(out)\n",
 355 |     "\n",
 356 |     "def click_on_show_scn1b_details(click):\n",
 357 |     "    clear_output()\n",
 358 |     "    display(widgets.VBox([widgets.VBox([scenario1b_markdown,widgets.HBox([pass_or_fail_mkdwn0,scn1b_mkdwn])]),hidescn1b]))\n",
 359 |     "    display(widgets.HTML(value=\"<ins><b>Interaction file</b></ins>\"))\n",
 360 |     "    if blankrawI.sum() > 0:\n",
 361 |     "        display(HTML('&#10060; Found  ' +'<b>'+ str(blankrawI.sum())+'</b>' + ' cells that contain nulls or white spaces. '))\n",
 362 |     "    else:\n",
 363 |     "        display(HTML('<p>&#9989; Passed</p>'))\n",
 364 |     "    print('--------------------------------')\n",
 365 |     "    print(blankrawI)\n",
 366 |     "\n",
 367 |     "    display(widgets.HTML(value=\"<ins><b>Team size file</b></ins>\"))\n",
 368 |     "    if blankrawTS.sum() > 0:\n",
 369 |     "        display(HTML('&#10060; Found  ' +'<b>'+ str(blankrawTS.sum())+'</b>' + ' cells that contain nulls or white spaces. '))\n",
 370 |     "        display(Markdown(\" Any NaN or blank values inside size column will be replaced with 0.\"))\n",
 371 |     "    else:\n",
 372 |     "        display(HTML('<p>&#9989; Passed</p>'))\n",
 373 |     "    print('--------------------------------')\n",
 374 |     "    print(blankrawTS)\n",
 375 |     "\n",
 376 |     "\n",
 377 |     "    display(widgets.HTML(value=\"<ins><b>Floor capacity file</b></ins>\"))\n",
 378 |     "    if blankrawFC.sum() > 0:\n",
 379 |     "        display(HTML('&#10060; Found  ' +'<b>'+ str(blankrawFC.sum())+'</b>' + ' cells that contain nulls or white spaces. '))\n",
 380 |     "    else:\n",
 381 |     "        display(HTML('<p>&#9989; Passed</p>'))\n",
 382 |     "    print('--------------------------------')\n",
 383 |     "    print(blankrawFC)\n",
 384 |     "\n",
 385 |     "\n",
 386 |     "    display(widgets.HTML(value=\"<ins><b>Distance file</b></ins>\"))\n",
 387 |     "    if blankrawDT.sum() > 0:\n",
 388 |     "        display(HTML('&#10060; Found  ' +'<b>'+ str(blankrawDT.sum())+'</b>' + ' cells that contain nulls or white spaces. '))\n",
 389 |     "    else:\n",
 390 |     "        display(HTML('<p>&#9989; Passed</p>'))\n",
 391 |     "    print('--------------------------------')\n",
 392 |     "    print(blankrawDT)\n",
 393 |     "    display(out)\n",
 394 |     "\n",
 395 |     "\n",
 396 |     "\n",
 397 |     "\n",
 398 |     "\n",
 399 |     "    \n",
 400 |     "\n",
 401 |     "\n",
 402 |     "\n",
 403 |     "\n",
 404 |     "\n",
 405 |     "\n",
 406 |     "\n",
 407 |     "\n",
 408 |     "showscn1b.on_click(click_on_show_scn1b_details)\n",
 409 |     "hidescn1b.on_click(hide_details_scn1b_details)\n",
 410 |     "\n",
 411 |     "\n",
 412 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 413 |    ]
 414 |   },
 415 |   {
 416 |    "cell_type": "code",
 417 |    "execution_count": null,
 418 |    "metadata": {},
 419 |    "outputs": [],
 420 |    "source": [
 421 |     "clear_output()\n",
 422 |     "#display(rawI.head())\n",
 423 |     "# Convert TI and CB to string columns for Raw interactions file:\n",
 424 |     "Col_1 = rawIraw.columns[0]\n",
 425 |     "Col_2 = rawIraw.columns[1]\n",
 426 |     "\n",
 427 |     "rawIraw[Col_1] = rawIraw[Col_1].apply(str)\n",
 428 |     "rawIraw[Col_2] = rawIraw[Col_2].apply(str)\n",
 429 |     "\n",
 430 |     "# Scenario 1b: Get rid of whitespaces by replacing spaces with NaNs for all 4 files\n",
 431 |     "# Remove/drop  replaced NaNs from 4 files\n",
 432 |     "# rawIraw = (rawIraw.replace(r'^\\s*$', np.nan, regex=True))\n",
 433 |     "# rawIraw = rawIraw.dropna(axis=0,how='all')\n",
 434 |     "# rawIraw = rawIraw.dropna(axis='columns')\n",
 435 |     "# # Replace Scenario 1a NaNs in CB hrs to 0.\n",
 436 |     "# rawIraw['Collaboration_hours'] = rawIraw['Collaboration_hours'].replace(np.nan,0)\n",
 437 |     "\n",
 438 |     "col1_TS = rawTS.columns[0]\n",
 439 |     "\n",
 440 |     "rawTS = (rawTS.replace(r'^\\s*$', np.nan, regex=True))\n",
 441 |     "rawTS = rawTS.dropna(axis=0,how='all')\n",
 442 |     "rawTS = rawTS.dropna(axis = 1, how = 'all')\n",
 443 |     "rawTS[col1_TS] = rawTS[col1_TS].astype(str)\n",
 444 |     "rawFC = (rawFC.replace(r'^\\s*$', np.nan, regex=True))\n",
 445 |     "rawFC = rawFC.dropna(axis=0,how='all')\n",
 446 |     "rawFC = rawFC.dropna(axis = 1, how = 'all')\n",
 447 |     "rawDT = (rawDT.replace(r'^\\s*$', np.nan, regex=True))\n",
 448 |     "rawDT = rawDT.dropna(axis=0,how='all')\n",
 449 |     "rawDT = rawDT.dropna(axis = 1, how = 'all')\n",
 450 |     "\n",
 451 |     "if(flatInteractionBool):\n",
 452 |     "    rawI = createInteractionsMatrix2(rawIraw)\n",
 453 |     "\n",
 454 |     "sRawTS = sortTeamsDF(rawTS)\n",
 455 |     "teamsIds = getTeamIdsDF(sRawTS)\n",
 456 |     "teamLabels = dict([ [id, team] for team, id in teamsIds.to_dict(orient = 'split')['data']])\n",
 457 |     "#display(sRawTS.head())\n",
 458 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n"
 459 |    ]
 460 |   },
 461 |   {
 462 |    "cell_type": "code",
 463 |    "execution_count": null,
 464 |    "metadata": {},
 465 |    "outputs": [],
 466 |    "source": [
 467 |     "\n",
 468 |     "teamsize_mkdown = widgets.Output()\n",
 469 |     "teamsize_mkdown2 = widgets.Output()\n",
 470 |     "\n",
 471 |     "outreq = widgets.Output(layout={'border': '2px solid blue'})\n",
 472 |     "\n",
 473 |     "tab_nest_002 = widgets.Accordion(selected_index=None)\n",
 474 |     "style = {'description_width': '185px'}\n",
 475 |     "layout = {'width': '505px'}\n",
 476 |     "\n",
 477 |     "\n",
 478 |     "\n",
 479 |     "\n",
 480 |     "with teamsize_mkdown:\n",
 481 |     "    display(Markdown('<div class=\"alert alert-block alert-info\"><b> Step 1. Validate Team Size:</div>'))\n",
 482 |     "#display(teamsize_mkdown)\n",
 483 |     "\n",
 484 |     "with teamsize_mkdown2:\n",
 485 |     "\n",
 486 |     "    display(Markdown('<p>&nbsp; &nbsp; a. <b>Validate columns layout – different per mode</b>.</p>'))\n",
 487 |     "    display(Markdown('<p>&emsp; &emsp; i. Standard-should have 2 columns and match fixed schema ( \"Team\" and \"Size\"):</p>'))\n",
 488 |     "    display(Markdown('<p>&emsp; &emsp; ii.Co-located Teams: n columns (participants in column names should match)</p>'))\n",
 489 |     "    display(Markdown('<p>&nbsp; &nbsp; b. <b>Get unique name of participants</b>.</p>'))\n",
 490 |     "    display(Markdown('<p>&emsp; &emsp; i. Ensure they are unique (no duplicates)</p>'))\n",
 491 |     "    #display(Markdown('<p>&emsp; &emsp; ii. Check no empty or missing names</p>'))\n",
 492 |     "    display(Markdown('<p>&nbsp; &nbsp; c. <b>Validate Values</b>.</p>'))\n",
 493 |     "#     display(Markdown('<p>&emsp; &emsp; i. No NA, None or string</p>'))\n",
 494 |     "    display(Markdown('<p>&emsp; &emsp; i. Value > 0.Should be positive integer. </p>'))\n",
 495 |     "    display(Markdown(\"<p>&emsp; &emsp; ii. Team names should not contain invalid characters. Invalid chars: *|,:<>[]{}`';@&$#%</p>\"))\n",
 496 |     "    display(Markdown('<p>&emsp; &emsp; iii. Numeric </p>'))\n",
 497 |     "    display(Markdown('<p>&nbsp; &nbsp; d. <b>Calculate total team size</b>.</p>'))\n",
 498 |     "    display(Markdown('<p>&nbsp; &nbsp; e. <b>If Co-located Teams</b>.</p>'))\n",
 499 |     "    display(Markdown('<p>&emsp; &emsp; i. Check name of participants match</p>'))\n",
 500 |     "    display(Markdown('<p>&emsp; &emsp; ii. Check Adjacency seats have valid numeric values </p>'))\n",
 501 |     "    \n",
 502 |     "    \n",
 503 |     "\n",
 504 |     "tab_nest_002.children = [widgets.VBox([teamsize_mkdown,teamsize_mkdown2])]\n",
 505 |     "\n",
 506 |     "tab_nest_002.set_title(0,'Validate Team Size File Requirements')\n",
 507 |     "\n",
 508 |     "display(tab_nest_002)\n",
 509 |     "display(outreq)\n",
 510 |     "\n",
 511 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
 512 |     "    "
 513 |    ]
 514 |   },
 515 |   {
 516 |    "cell_type": "code",
 517 |    "execution_count": null,
 518 |    "metadata": {},
 519 |    "outputs": [],
 520 |    "source": [
 521 |     "\n",
 522 |     "\n",
 523 |     "clear_output()\n",
 524 |     "\n",
 525 |     "# 1.a Validate columns layout – different per mode.\n",
 526 |     "#        i.\tStandard/Optimize: 2 columns\n",
 527 |     "showtsz_colslayout = widgets.Button(description ='Show details')\n",
 528 |     "hidetsz_colslayout = widgets.Button(description ='Hide details')\n",
 529 |     "tszvalidatecols_mkdn = widgets.Output()\n",
 530 |     "pass_or_fail_mkdwn1 = widgets.Output()\n",
 531 |     "\n",
 532 |     "\n",
 533 |     "if len(rawTS.columns) >=2 :\n",
 534 |     "    tszColheader1= rawTS.columns[0]\n",
 535 |     "    tszColheader2= rawTS.columns[1]\n",
 536 |     "else:\n",
 537 |     "    \"\"\n",
 538 |     "\n",
 539 |     "\n",
 540 |     "with tszvalidatecols_mkdn:\n",
 541 |     "    display(Markdown('<b>Validate columns layout</b>'))\n",
 542 |     "display(tszvalidatecols_mkdn)\n",
 543 |     "with pass_or_fail_mkdwn1:\n",
 544 |     "    clear_output()\n",
 545 |     "    if len(rawTS.columns) == 2 and (tszColheader1 == 'Team' and tszColheader2 == 'Size'):\n",
 546 |     "        display(HTML('<p>&#9989;'))\n",
 547 |     "    else:\n",
 548 |     "        display(HTML('&#10060;'))\n",
 549 |     "        \n",
 550 |     "        \n",
 551 |     "        \n",
 552 |     "display(widgets.HBox([pass_or_fail_mkdwn1,showtsz_colslayout]))\n",
 553 |     "display(out)\n",
 554 |     "\n",
 555 |     "\n",
 556 |     "tsz_schema_error_markdown = widgets.Output()\n",
 557 |     "\n",
 558 |     "with tsz_schema_error_markdown:\n",
 559 |     "    if (len(rawTS.columns)>=2):\n",
 560 |     "        if (tszColheader1 == 'Team' and tszColheader2 == 'Size'):\n",
 561 |     "            \"\"\n",
 562 |     "        elif (tszColheader1 != 'Team' and tszColheader2 == 'Size'):\n",
 563 |     "            clear_output()\n",
 564 |     "            display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid column headers for Standard/Optimize: </b> '+ tszColheader1 + ' was found instead of expected schema of \"Team\".</div>'))\n",
 565 |     "            print(\"Please fix this error in your Team Size file and re-upload and click Go again.\")\n",
 566 |     "        elif (tszColheader1 == 'Team' and tszColheader2 != 'Size'):\n",
 567 |     "            clear_output()\n",
 568 |     "            display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid column headers for Standard/Optimize: </b> '+ tszColheader2 + ' was found instead of expected schema of \"Size\".</div>'))\n",
 569 |     "            print(\"Please fix this error in your Team Size file and re-upload and click Go again.\")\n",
 570 |     "        elif (tszColheader1 != 'Team' and tszColheader2 != 'Size'):\n",
 571 |     "            clear_output()\n",
 572 |     "            display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid column headers for Standard/Optimize: </b> '+ tszColheader1 + ' was found instead of expected schema of \"Team\" and ' + tszColheader2 + ' was found instead of expected schema of \"Size\".</div>'))\n",
 573 |     "            print(\"Please fix this error in your Team Size file and re-upload and click Go again.\")\n",
 574 |     "    else:\n",
 575 |     "        clear_output()\n",
 576 |     "        display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid number of columns for Standard: </b> Team Size file should contain 2 columns with the following schema. ( \"Team\" and \"Size\".</div>'))\n",
 577 |     "        print(\"Please fix this error in your Team Size file and re-upload and click Go again.\")\n",
 578 |     "\n",
 579 |     "\n",
 580 |     "\n",
 581 |     "def hide_details_tsz_cols_details(click):\n",
 582 |     "    clear_output()\n",
 583 |     "    display(tszvalidatecols_mkdn)\n",
 584 |     "    display(widgets.HBox([pass_or_fail_mkdwn1,showtsz_colslayout]))\n",
 585 |     "    display(tsz_schema_error_markdown)\n",
 586 |     "    display(out)\n",
 587 |     "\n",
 588 |     "def click_on_show_tsz_cols_details(click):\n",
 589 |     "    clear_output()\n",
 590 |     "    display(tszvalidatecols_mkdn)\n",
 591 |     "    display(widgets.HBox([pass_or_fail_mkdwn1,hidetsz_colslayout]))\n",
 592 |     "    display(tsz_schema_error_markdown)\n",
 593 |     "    print(BOLD,\"Number of Columns:\",CEND, len(rawTS.columns))\n",
 594 |     "    print(BOLD,\"Column headers: \",CEND, list(rawTS.columns))\n",
 595 |     "    if (len(rawTS.columns)>=2):\n",
 596 |     "        if len(rawTS.columns) == 2:\n",
 597 |     "            print(CGREEN +u'\\u2713'+CEND, BOLD,\"Valid number of columns for Standard/Optimize\",CEND)\n",
 598 |     "        if (tszColheader1 == 'Team' and tszColheader2 == 'Size'):\n",
 599 |     "            print(CGREEN +u'\\u2713'+CEND, BOLD,\"Valid column headers for Standard/Optimize\",CEND)\n",
 600 |     "        elif (tszColheader1 != 'Team' and tszColheader2 == 'Size'):\n",
 601 |     "            print(CRED +u'\\u2717'+CEND, BOLD,\"Invalid column headers for Standard/Optimize\",CEND)\n",
 602 |     "            print(tszColheader1 + \" was found instead of expected schema of 'Team'\")\n",
 603 |     "        elif (tszColheader1 == 'Team' and tszColheader2 != 'Size'):\n",
 604 |     "            print(CRED +u'\\u2717'+CEND, BOLD,\"Invalid column headers for Standard/Optimize\",CEND)\n",
 605 |     "            print(tszColheader2 + \" was found instead of expected schema of 'Size'\")\n",
 606 |     "        elif (tszColheader1 != 'Team' and tszColheader2 != 'Size'):\n",
 607 |     "            print(CRED +u'\\u2717'+CEND, BOLD,\"Invalid column headers for Standard/Optimize\",CEND)\n",
 608 |     "            print(tszColheader1 + \" was found instead of expected schema of 'Team'\")\n",
 609 |     "            print(tszColheader2 + \" was found instead of expected schema of 'Size'\")\n",
 610 |     "        \n",
 611 |     " \n",
 612 |     "\n",
 613 |     "        \n",
 614 |     "    display(out)\n",
 615 |     "    \n",
 616 |     "\n",
 617 |     "if (len(rawTS.columns)>=2) and (tszColheader1 == 'Team' and tszColheader2 == 'Size'):\n",
 618 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
 619 |     "else:\n",
 620 |     "    None\n",
 621 |     "\n",
 622 |     "sRawI, idxMapI = sortInteractions(rawI)\n",
 623 |     "showtsz_colslayout.on_click(click_on_show_tsz_cols_details)\n",
 624 |     "hidetsz_colslayout.on_click(hide_details_tsz_cols_details)\n",
 625 |     "display(tsz_schema_error_markdown)\n",
 626 |     "#     sRawI.head()\n",
 627 |     "#display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
 628 |    ]
 629 |   },
 630 |   {
 631 |    "cell_type": "code",
 632 |    "execution_count": null,
 633 |    "metadata": {},
 634 |    "outputs": [],
 635 |    "source": [
 636 |     "\n",
 637 |     "clear_output()\n",
 638 |     "# 1b. Get unique name of participants\n",
 639 |     "#     i.\tEnsure they are unique (no duplicates)\n",
 640 |     "teamsTSraw = rawTS.iloc[:,[0]]\n",
 641 |     "tszCol1= rawTS.columns[0]\n",
 642 |     "\n",
 643 |     "#find duplicates\n",
 644 |     "temp = teamsTSraw\n",
 645 |     "temp[\"cnt\"] = 1\n",
 646 |     "temp = temp.groupby([tszCol1], as_index = False) [\"cnt\"].sum().sort_values(by = [\"cnt\"])\n",
 647 |     "duplicateTeams = temp[temp.cnt > 1]\n",
 648 |     "nonDuplicateTeams = temp[temp.cnt == 1]\n",
 649 |     "\n",
 650 |     "\n",
 651 |     "pass_or_fail_mkdwn2 = widgets.Output()\n",
 652 |     "showtsz_uniqueptps = widgets.Button(description ='Show details')\n",
 653 |     "hidetsz_uniqueptps = widgets.Button(description ='Hide details')\n",
 654 |     "tszuniquenames_mkdn = widgets.Output()\n",
 655 |     "with tszuniquenames_mkdn:\n",
 656 |     "    display(Markdown('<b>Get unique name of participants</b>'))\n",
 657 |     "display(tszuniquenames_mkdn)\n",
 658 |     "with pass_or_fail_mkdwn2:\n",
 659 |     "    clear_output()\n",
 660 |     "    if len(duplicateTeams)> 0:\n",
 661 |     "        display(HTML('&#10060;'))\n",
 662 |     "    else:\n",
 663 |     "        display(HTML('<p>&#9989;'))\n",
 664 |     "display(widgets.HBox([pass_or_fail_mkdwn2,showtsz_uniqueptps]))\n",
 665 |     "display(out)\n",
 666 |     "\n",
 667 |     "\n",
 668 |     "\n",
 669 |     "\n",
 670 |     "\n",
 671 |     "def hide_details_tsz_unique_ptps(click):\n",
 672 |     "    clear_output()\n",
 673 |     "    display(tszuniquenames_mkdn)\n",
 674 |     "    display(widgets.HBox([pass_or_fail_mkdwn2,showtsz_uniqueptps]))\n",
 675 |     "    display(out)\n",
 676 |     "\n",
 677 |     "\n",
 678 |     "\n",
 679 |     "\n",
 680 |     "def click_on_show_tsz_unique_ptps(click):\n",
 681 |     "    clear_output()\n",
 682 |     "    display(tszuniquenames_mkdn)\n",
 683 |     "    display(widgets.HBox([pass_or_fail_mkdwn2,hidetsz_uniqueptps]))\n",
 684 |     "    if len(duplicateTeams)> 0:\n",
 685 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Number of duplicated teams:\",CEND, len(duplicateTeams))\n",
 686 |     "    else:\n",
 687 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Number of duplicated teams:\",CEND, len(duplicateTeams))\n",
 688 |     "    print(BOLD,\"Number of unique teams:\",CEND, len(nonDuplicateTeams))\n",
 689 |     "    display(out)\n",
 690 |     "\n",
 691 |     "teamCatalog = nonDuplicateTeams[[tszCol1]].copy()\n",
 692 |     "teamCatalog = teamCatalog.sort_values(by = tszCol1).reset_index(drop = True)\n",
 693 |     "teamCatalog[tszCol1] = teamCatalog[tszCol1].astype(str)\n",
 694 |     "teamCatalog.head()\n",
 695 |     "\n",
 696 |     "\n",
 697 |     "showtsz_uniqueptps.on_click(click_on_show_tsz_unique_ptps)\n",
 698 |     "hidetsz_uniqueptps.on_click(hide_details_tsz_unique_ptps)\n",
 699 |     "\n",
 700 |     "tempI = np.asmatrix(sRawI.iloc[0:, 1:])\n",
 701 |     "pd.DataFrame(tempI)\n",
 702 |     "    \n",
 703 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+3)'))"
 704 |    ]
 705 |   },
 706 |   {
 707 |    "cell_type": "code",
 708 |    "execution_count": null,
 709 |    "metadata": {},
 710 |    "outputs": [],
 711 |    "source": [
 712 |     "\n",
 713 |     "# clear_output()\n",
 714 |     "\n",
 715 |     "# showtsz_missingnames = widgets.Button(description ='Show details')\n",
 716 |     "# hidetsz_missingnames = widgets.Button(description ='Hide details')\n",
 717 |     "# tszmissingnames_mkdn = widgets.Output()\n",
 718 |     "# with tszmissingnames_mkdn:\n",
 719 |     "#     display(Markdown('<b>Check for empty or missing names</b>'))\n",
 720 |     "# display(tszmissingnames_mkdn)\n",
 721 |     "# display(showtsz_missingnames)\n",
 722 |     "\n",
 723 |     "\n",
 724 |     "# # 1b. Get unique name of participants\n",
 725 |     "# #    ii.\tCheck no empty or missing names\n",
 726 |     "# df = rawTS.copy()\n",
 727 |     "# isnullDF = df.replace(r'^\\s*$', np.nan, regex=True).isnull()\n",
 728 |     "#numMissingTeams = isnullDF.iloc[:,0].sum()\n",
 729 |     "\n",
 730 |     "# def hide_details_tsz_missingnames(click):\n",
 731 |     "#     clear_output()\n",
 732 |     "#     display(tszmissingnames_mkdn)\n",
 733 |     "#     display(showtsz_missingnames)\n",
 734 |     "\n",
 735 |     "# def click_on_show_tsz_missingnames(click):\n",
 736 |     "#     clear_output()\n",
 737 |     "#     display(tszmissingnames_mkdn)\n",
 738 |     "#     display(hidetsz_missingnames)\n",
 739 |     "#     print(\"num(empty/blank)Teams Names: \", numMissingTeams)\n",
 740 |     "#     if(numMissingTeams>0):\n",
 741 |     "#         errorMessage = \"Team name is mandatory. \\\"Team\\\" column cannot be empty or blank.\" \n",
 742 |     "#         emptyRowList = (np.where(isnullDF.iloc[:,0])[0]).tolist()\n",
 743 |     "    \n",
 744 |     "    \n",
 745 |     "# # display(Markdown('### Step 2. Detect missing values'))\n",
 746 |     "# #Get unique name of Teams\n",
 747 |     "# teamsOnCol1 = set(rawIraw[rawIraw.columns[0]])\n",
 748 |     "# teamsOnCol2 = set(rawIraw[rawIraw.columns[1]])\n",
 749 |     "# allNames = set(teamsOnCol1).union(set(teamsOnCol2))\n",
 750 |     "# teamCatalog2 = pd.DataFrame(list(allNames))\n",
 751 |     "# #print(\"Number of Raw Team Names in Col 1: \", len(teamsOnCol1))\n",
 752 |     "# #print(\"Number of Raw Team Names in Col 2: \", len(teamsOnCol2))\n",
 753 |     "# #print(\"Number or Raw Team Names in Both Cols: \", len(allNames))\n",
 754 |     "    \n",
 755 |     "    \n",
 756 |     "\n",
 757 |     "# showtsz_missingnames.on_click(click_on_show_tsz_missingnames)\n",
 758 |     "# hidetsz_missingnames.on_click(hide_details_tsz_missingnames)\n",
 759 |     "\n",
 760 |     "\n",
 761 |     "# display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
 762 |    ]
 763 |   },
 764 |   {
 765 |    "cell_type": "code",
 766 |    "execution_count": null,
 767 |    "metadata": {},
 768 |    "outputs": [],
 769 |    "source": [
 770 |     "\n",
 771 |     "clear_output()\n",
 772 |     "# c.\tValidate Values\n",
 773 |     " #       i.\tNo NA, None or string\n",
 774 |     " #       ii.\tNumeric\n",
 775 |     " #       iii.\tValue > 0\n",
 776 |     "df = rawTS.copy()    \n",
 777 |     "\n",
 778 |     "excludeKeywords = [\"@\",\"#\",\"$\",\"%\",\"&\",\"*\",\"+\",\"=\",\"|\",\":\",\";\",\"<\",\">\",\",\",\".\",\"/\",\"[\",\"]\",\"{\",\"}\",\"\\\\\"]\n",
 779 |     "checkinvalidTS = rawTS.copy()\n",
 780 |     "team_Col = checkinvalidTS.columns[0]\n",
 781 |     "\n",
 782 |     "checkinvalidTS[team_Col] = checkinvalidTS[team_Col].apply(str)\n",
 783 |     "invalidTS = checkinvalidTS[checkinvalidTS.apply(lambda r: any([kw in r[0] for kw in excludeKeywords]), axis=1)]\n",
 784 |     "\n",
 785 |     "if len(rawTS.columns) == 2:\n",
 786 |     "    teamSizeValuesDF = df.iloc[:,[1]]\n",
 787 |     "    teamSizeValuesDF = teamSizeValuesDF.fillna(0)\n",
 788 |     "    numInvalidSize = (teamSizeValuesDF.iloc[:,0].astype(int) < 0).sum()\n",
 789 |     "else:\n",
 790 |     "    teamSizeValuesDF = 0\n",
 791 |     "    numInvalidSize = 0\n",
 792 |     "\n",
 793 |     "\n",
 794 |     "showtsz_validatevalues = widgets.Button(description ='Show details')\n",
 795 |     "hidetsz_validatevalues = widgets.Button(description ='Hide details')\n",
 796 |     "pass_or_fail_mkdwn3 = widgets.Output()\n",
 797 |     "tszvv_mkdn = widgets.Output()\n",
 798 |     "\n",
 799 |     "with pass_or_fail_mkdwn3:\n",
 800 |     "    clear_output()\n",
 801 |     "    if(numInvalidSize>0) or len(invalidTS) > 0:\n",
 802 |     "        display(HTML('&#10060;'))\n",
 803 |     "    else:\n",
 804 |     "        display(HTML('<p>&#9989;'))\n",
 805 |     "        \n",
 806 |     "\n",
 807 |     "with tszvv_mkdn:\n",
 808 |     "    display(Markdown('<b>Validate values</b>'))\n",
 809 |     "display(tszvv_mkdn)\n",
 810 |     "# if(numInvalidSize>0) or len(invalidTS) > 0:\n",
 811 |     "#     display(HTML('&#10060;'))\n",
 812 |     "# else:\n",
 813 |     "#     display(HTML('<p>&#9989;'))\n",
 814 |     "display(widgets.HBox([pass_or_fail_mkdwn3,showtsz_validatevalues])) \n",
 815 |     "display(out)\n",
 816 |     "\n",
 817 |     "\n",
 818 |     "    \n",
 819 |     "def hide_details_tsz_vv(click):\n",
 820 |     "    clear_output()\n",
 821 |     "    display(tszvv_mkdn)\n",
 822 |     "    display(widgets.HBox([pass_or_fail_mkdwn3,showtsz_validatevalues])) \n",
 823 |     "    display(out)\n",
 824 |     "\n",
 825 |     "invalidSizeRows = teamSizeValuesDF[teamSizeValuesDF.iloc[:,0].astype(int)<0] \n",
 826 |     "\n",
 827 |     "\n",
 828 |     "def click_on_show_tsz_vv(click):\n",
 829 |     "    clear_output()\n",
 830 |     "    display(tszvv_mkdn)\n",
 831 |     "    display(widgets.HBox([pass_or_fail_mkdwn3,hidetsz_validatevalues]))\n",
 832 |     "    display(Markdown(\"\"\" #### i:\"\"\")) \n",
 833 |     "    if(numInvalidSize>0):\n",
 834 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Team size cannot be negative\",CEND)\n",
 835 |     "        print(invalidSizeRows)\n",
 836 |     "        print(\"-------------------------------------------------------------\")\n",
 837 |     "    else:\n",
 838 |     "        print(CGREEN +u'\\u2713'+CEND, BOLD,\"numInvalidSize Values (Non-positive intergers):\",CEND, numInvalidSize )\n",
 839 |     "        print(\"-------------------------------------------------------------\")\n",
 840 |     "    if len(invalidTS) > 0:\n",
 841 |     "        display(Markdown(\"\"\" #### ii:\"\"\"))\n",
 842 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Below are the following Team names that may contain invalid characters\",CEND)\n",
 843 |     "        display(invalidTS['Team'])\n",
 844 |     "    display(out)\n",
 845 |     "        \n",
 846 |     "#replaceValue = 0\n",
 847 |     "showtsz_validatevalues.on_click(click_on_show_tsz_vv)\n",
 848 |     "hidetsz_validatevalues.on_click(hide_details_tsz_vv)\n",
 849 |     "\n",
 850 |     "#print(\"As requested, replacing all missing values by default value: \", replaceValue)\n",
 851 |     "\n",
 852 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
 853 |    ]
 854 |   },
 855 |   {
 856 |    "cell_type": "code",
 857 |    "execution_count": null,
 858 |    "metadata": {},
 859 |    "outputs": [],
 860 |    "source": [
 861 |     "\n",
 862 |     "# clear_output()\n",
 863 |     "# from datetime import date\n",
 864 |     "\n",
 865 |     "# today = date.today()\n",
 866 |     "showtsz_totalseats = widgets.Button(description ='Show details')\n",
 867 |     "hidetsz_totalseats = widgets.Button(description ='Hide details')\n",
 868 |     "tsztotal_mkdn = widgets.Output()\n",
 869 |     "with tsztotal_mkdn:\n",
 870 |     "    display(Markdown('<b>Total team size</b>'))\n",
 871 |     "display(tsztotal_mkdn)\n",
 872 |     "\n",
 873 |     "pass_or_fail_mkdwn4 = widgets.Output()\n",
 874 |     "with pass_or_fail_mkdwn4:\n",
 875 |     "    clear_output()\n",
 876 |     "    display(HTML('<p>&#9989;'))\n",
 877 |     "\n",
 878 |     "display(widgets.HBox([pass_or_fail_mkdwn4,showtsz_totalseats]))\n",
 879 |     "\n",
 880 |     "display(out)\n",
 881 |     "    \n",
 882 |     "def hide_details_tsz_totalseats(click):\n",
 883 |     "    clear_output()\n",
 884 |     "    display(tsztotal_mkdn)\n",
 885 |     "    display(widgets.HBox([pass_or_fail_mkdwn4,showtsz_totalseats]))\n",
 886 |     "    display(out)\n",
 887 |     "\n",
 888 |     "# Calculate total team size\n",
 889 |     "rawTS = rawTS.fillna(0)\n",
 890 |     "if len(rawTS.columns) == 2:\n",
 891 |     "    totalSeatsRequested = sum(rawTS.iloc[:,1].astype(int))\n",
 892 |     "else:\n",
 893 |     "    totalSeatsRequested = 0\n",
 894 |     "\n",
 895 |     "def click_on_show_tsz_totalseats(click):\n",
 896 |     "    clear_output()\n",
 897 |     "    display(tsztotal_mkdn)\n",
 898 |     "    display(widgets.HBox([pass_or_fail_mkdwn4,hidetsz_totalseats]))\n",
 899 |     "    print(BOLD,\"Total Seats Requested:\",CEND, totalSeatsRequested)\n",
 900 |     "    display(out)\n",
 901 |     "\n",
 902 |     "    \n",
 903 |     "    \n",
 904 |     "showtsz_totalseats.on_click(click_on_show_tsz_totalseats)\n",
 905 |     "hidetsz_totalseats.on_click(hide_details_tsz_totalseats)    \n",
 906 |     "# mvCols = list(rawIraw.columns[0:2])\n",
 907 |     "# amountCol = rawIraw.columns[3]\n",
 908 |     "# NaNs = np.argwhere(np.isnan(tempI))\n",
 909 |     "# missingValuesDF = pd.DataFrame(NaNs, columns = mvCols)\n",
 910 |     "# missingValuesDF[\"Date\"] = today.strftime(\"%m/%d/%y\")\n",
 911 |     "# missingValuesDF[amountCol] = \"None\"\n",
 912 |     "# missingValuesDF[\"Comment\"] = 'Missing Value'\n",
 913 |     "# missingValuesDF[\"Replace by?\"] = replaceValue\n",
 914 |     "# #print(\"Number of Values Missing: \", len(missingValuesDF))\n",
 915 |     "# #missingValuesDF.head()\n",
 916 |     "    \n",
 917 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
 918 |    ]
 919 |   },
 920 |   {
 921 |    "cell_type": "code",
 922 |    "execution_count": null,
 923 |    "metadata": {},
 924 |    "outputs": [],
 925 |    "source": [
 926 |     "clear_output()\n",
 927 |     "#display(Markdown('### Step 1. Validate Team Size File'))\n",
 928 |     "\n",
 929 |     "tab_nest_003 = widgets.Accordion(selected_index=None)\n",
 930 |     "style = {'description_width': '185px'}\n",
 931 |     "layout = {'width': '505px'}\n",
 932 |     "    \n",
 933 |     "\n",
 934 |     "\n",
 935 |     "\n",
 936 |     "spccap_mkdown = widgets.Output()\n",
 937 |     "\n",
 938 |     "with spccap_mkdown:\n",
 939 |     "    display(Markdown('<div class=\"alert alert-block alert-info\"><b> Step 2. Validate Space Capacity File:</div>'))\n",
 940 |     "#display(intx_mkdown)\n",
 941 |     "\n",
 942 |     "\n",
 943 |     "spccap_mkdown2 = widgets.Output()\n",
 944 |     "\n",
 945 |     "with spccap_mkdown2:\n",
 946 |     "    display(Markdown('<p>&nbsp; &nbsp; a. <b>Validate columns layout – different per mode</b>.</p>'))\n",
 947 |     "    display(Markdown('<p>&emsp; &emsp; i. Standard-should have 2 columns and match fixed schema ( \"Floor\" and \"Total Capacity\"):</p>'))\n",
 948 |     "    display(Markdown('<p>&nbsp; &nbsp; b. <b>Validate Spaces Catalog</b>.</p>'))\n",
 949 |     "    display(Markdown('<p>&emsp; &emsp; i. Space names - Ensure they are unique (no duplicates)</p>'))\n",
 950 |     "    #display(Markdown('<p>&emsp; &emsp; ii. Check no empty or missing names</p>'))\n",
 951 |     "    display(Markdown('<p>&nbsp; &nbsp; c. <b>Validate Values</b>.</p>'))\n",
 952 |     "    display(Markdown('<p>&emsp; &emsp; i. Number of seats available - should be positive integer</p>'))\n",
 953 |     "    display(Markdown(\"<p>&emsp; &emsp; ii. Space names should not contain invalid characters. Invalid chars: *|,:<>[]{}`';@&$#%</p>\"))\n",
 954 |     "    display(Markdown('<p>&emsp; &emsp; iii. The sum of team sizes should be less than or equal to the sum of the space capacities</p>'))\n",
 955 |     "    \n",
 956 |     "tab_nest_003.children = [widgets.VBox([spccap_mkdown,spccap_mkdown2])]\n",
 957 |     "\n",
 958 |     "tab_nest_003.set_title(0,'Validate Space File Requirements')\n",
 959 |     "\n",
 960 |     "display(tab_nest_003)\n",
 961 |     "display(outreq)\n",
 962 |     "#K = len(teamCatalog2)\n",
 963 |     "#     print(\"Number of Teams: \", K)\n",
 964 |     "#     print(\"Total Team Combinations: \", K*K)\n",
 965 |     "#     print(\"Number of rows in Interactions File: \", len(rawIraw) )\n",
 966 |     "#     print(\"Missing + Existing: \", len(rawIraw) + len(missingValuesDF))\n",
 967 |     "#     print(\"Number of missing rows: \", len(missingValuesDF))\n",
 968 |     "#     print(\"Ratio of mising rows: \", len(missingValuesDF) / len(rawIraw))\n",
 969 |     "    \n",
 970 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
 971 |     "    "
 972 |    ]
 973 |   },
 974 |   {
 975 |    "cell_type": "code",
 976 |    "execution_count": null,
 977 |    "metadata": {},
 978 |    "outputs": [],
 979 |    "source": [
 980 |     "clear_output()\n",
 981 |     "\n",
 982 |     "# 1.a Validate columns layout – different per mode.\n",
 983 |     "#        i.\tStandard/Optimize: 2 columns\n",
 984 |     "showspc_colslayout = widgets.Button(description ='Show details')\n",
 985 |     "hidespc_colslayout = widgets.Button(description ='Hide details')\n",
 986 |     "spcvalidatecols_mkdn = widgets.Output()\n",
 987 |     "pass_or_fail_mkdwn5 = widgets.Output()\n",
 988 |     "\n",
 989 |     "if  len(rawFC.columns) >= 2:\n",
 990 |     "    spacesColheader1= rawFC.columns[0]\n",
 991 |     "    spacesColheader2= rawFC.columns[1]\n",
 992 |     "else:\n",
 993 |     "    spacesColheader1= rawFC.columns[0]\n",
 994 |     "    \n",
 995 |     "\n",
 996 |     "\n",
 997 |     "with spcvalidatecols_mkdn:\n",
 998 |     "    display(Markdown('<b>Validate columns layout</b>'))\n",
 999 |     "display(spcvalidatecols_mkdn)\n",
1000 |     "with pass_or_fail_mkdwn5:\n",
1001 |     "    clear_output()\n",
1002 |     "    if len(rawFC.columns) == 2 and (spacesColheader1 == 'Floor' and spacesColheader2 == 'Total Capacity'):\n",
1003 |     "        display(HTML('<p>&#9989;'))\n",
1004 |     "    else:\n",
1005 |     "        display(HTML('&#10060;'))\n",
1006 |     "        \n",
1007 |     "display(widgets.HBox([pass_or_fail_mkdwn5,showspc_colslayout]))\n",
1008 |     "display(out)\n",
1009 |     "\n",
1010 |     "\n",
1011 |     "spc_schema_error_markdown = widgets.Output()\n",
1012 |     "\n",
1013 |     "with spc_schema_error_markdown:\n",
1014 |     "    if (len(rawFC.columns) >= 2):\n",
1015 |     "        if  (spacesColheader1 == 'Floor' and spacesColheader2 == 'Total Capacity'):\n",
1016 |     "            \"\"\n",
1017 |     "        elif (spacesColheader1 != 'Floor' and spacesColheader2 == 'Total Capacity'):\n",
1018 |     "            clear_output()\n",
1019 |     "            display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid column headers for Standard/Optimize: </b> '+ spacesColheader1 + ' was found instead of expected schema of \"Floor\".</div>'))\n",
1020 |     "            print(\"Please fix this error in your Space Capacity file and re-upload and click Go again.\")\n",
1021 |     "        elif  (spacesColheader1 == 'Floor' and spacesColheader2 != 'Total Capacity'):\n",
1022 |     "            clear_output()\n",
1023 |     "            display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid column headers for Standard/Optimize: </b> '+ spacesColheader2 + ' was found instead of expected schema of \"Total Capacity\".</div>'))\n",
1024 |     "            print(\"Please fix this error in your Space Capacity file and re-upload and click Go again.\")\n",
1025 |     "        elif (spacesColheader1 != 'Floor' and spacesColheader2 != 'Total Capacity'):\n",
1026 |     "            clear_output()\n",
1027 |     "            display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid column headers for Standard/Optimize: </b> '+ spacesColheader1 + ' was found instead of expected schema of \"Floor\" and ' + spacesColheader2 + ' was found instead of expected schema of \"Total Capacity\".</div>'))\n",
1028 |     "            print(\"Please fix this error in your Space Capacity file and re-upload and click Go again.\")\n",
1029 |     "    else:\n",
1030 |     "        clear_output()\n",
1031 |     "        display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR! Invalid number of columns for Standard: </b> Space Capacity file should contain 2 columns with the following schema. ( \"Floor\" and \"Total Capacity\".</div>'))\n",
1032 |     "        print(\"Please fix this error in your Space Capacity file and re-upload and click Go again.\")\n",
1033 |     "\n",
1034 |     "\n",
1035 |     "\n",
1036 |     "\n",
1037 |     "def hide_details_spc_cols_details(click):\n",
1038 |     "    clear_output()\n",
1039 |     "    display(spcvalidatecols_mkdn)\n",
1040 |     "    display(widgets.HBox([pass_or_fail_mkdwn5,showspc_colslayout]))\n",
1041 |     "    display(spc_schema_error_markdown)\n",
1042 |     "    display(out)\n",
1043 |     "\n",
1044 |     "def click_on_show_spc_cols_details(click):\n",
1045 |     "    clear_output()\n",
1046 |     "    display(spcvalidatecols_mkdn)\n",
1047 |     "    display(widgets.HBox([pass_or_fail_mkdwn5,hidespc_colslayout]))\n",
1048 |     "    display(spc_schema_error_markdown)\n",
1049 |     "    \n",
1050 |     "    print(BOLD,\"Number of Columns:\",CEND, len(rawFC.columns))\n",
1051 |     "    print(BOLD,\"Column headers: \",CEND, list(rawFC.columns))\n",
1052 |     "    if len(rawFC.columns) == 2:\n",
1053 |     "        print(CGREEN + u'\\u2713'+CEND,BOLD,\"Valid number of columns for Standard\",CEND)\n",
1054 |     "    if len(rawFC.columns) >= 2:\n",
1055 |     "        if (spacesColheader1 == 'Floor' and spacesColheader2 == 'Total Capacity'):\n",
1056 |     "            print(CGREEN +u'\\u2713'+CEND, BOLD,\"Valid column headers for Standard/Optimize\",CEND)\n",
1057 |     "        elif (spacesColheader1 != 'Floor'  and spacesColheader2 == 'Total Capacity'):\n",
1058 |     "            print(CRED +u'\\u2717'+CEND, BOLD,\"Invalid column headers for Standard/Optimize\",CEND)\n",
1059 |     "            print(spacesColheader1  + \" was found instead of expected schema of 'Floor'\")\n",
1060 |     "        elif (spacesColheader1  == 'Floor' and spacesColheader2 != 'Total Capacity'):\n",
1061 |     "            print(CRED +u'\\u2717'+CEND, BOLD,\"Invalid column headers for Standard/Optimize\",CEND)\n",
1062 |     "            print(spacesColheader2 + \" was found instead of expected schema of 'Total Capacity'\")\n",
1063 |     "        elif (spacesColheader1  != 'Floor' and spacesColheader2 != 'Total Capacity'):\n",
1064 |     "            print(CRED +u'\\u2717'+CEND, BOLD,\"Invalid column headers for Standard/Optimize\",CEND)\n",
1065 |     "            print(spacesColheader1 + \" was found instead of expected schema of 'Floor'\")\n",
1066 |     "            print(spacesColheader2 + \" was found instead of expected schema of 'Total Capacity'\")\n",
1067 |     "        \n",
1068 |     "    display(out)\n",
1069 |     "\n",
1070 |     "showspc_colslayout.on_click(click_on_show_spc_cols_details)\n",
1071 |     "hidespc_colslayout.on_click(hide_details_spc_cols_details)\n",
1072 |     "display(spc_schema_error_markdown)\n",
1073 |     "#     sRawI.head()\n",
1074 |     "if (len(rawFC.columns) >= 2) and (spacesColheader1 == 'Floor' and spacesColheader2 == 'Total Capacity'):\n",
1075 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
1076 |     "else:\n",
1077 |     "    None\n",
1078 |     "#display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1079 |    ]
1080 |   },
1081 |   {
1082 |    "cell_type": "code",
1083 |    "execution_count": null,
1084 |    "metadata": {},
1085 |    "outputs": [],
1086 |    "source": [
1087 |     "clear_output()\n",
1088 |     "# 1b. Get unique name of participants\n",
1089 |     "#     i.\tEnsure they are unique (no duplicates)\n",
1090 |     "spacesFCraw = rawFC.iloc[:,[0]]\n",
1091 |     "spacesCol1= rawFC.columns[0]\n",
1092 |     "\n",
1093 |     "#find duplicates\n",
1094 |     "tempspc = spacesFCraw\n",
1095 |     "tempspc[\"cnt\"] = 1\n",
1096 |     "tempspc = tempspc.groupby([spacesCol1], as_index = False) [\"cnt\"].sum().sort_values(by = [\"cnt\"])\n",
1097 |     "duplicateSpaces = tempspc[tempspc.cnt > 1]\n",
1098 |     "nonDuplicateSpaces = tempspc[tempspc.cnt == 1]\n",
1099 |     "\n",
1100 |     "\n",
1101 |     "pass_or_fail_mkdwn6 = widgets.Output()\n",
1102 |     "showspc_unique = widgets.Button(description ='Show details')\n",
1103 |     "hidespc_unique = widgets.Button(description ='Hide details')\n",
1104 |     "spcuniquenames_mkdn = widgets.Output()\n",
1105 |     "with spcuniquenames_mkdn:\n",
1106 |     "    display(Markdown('<b>Get unique name of Spaces</b>'))\n",
1107 |     "display(spcuniquenames_mkdn)\n",
1108 |     "with pass_or_fail_mkdwn6:\n",
1109 |     "    clear_output()\n",
1110 |     "    if len(duplicateSpaces)== 0:\n",
1111 |     "        display(HTML('<p>&#9989;'))\n",
1112 |     "    else:\n",
1113 |     "        display(HTML('&#10060;'))\n",
1114 |     "        \n",
1115 |     "display(widgets.HBox([pass_or_fail_mkdwn6,showspc_unique]))\n",
1116 |     "display(out)\n",
1117 |     "\n",
1118 |     "\n",
1119 |     "\n",
1120 |     "\n",
1121 |     "\n",
1122 |     "def hide_details_spc_unique_spaces(click):\n",
1123 |     "    clear_output()\n",
1124 |     "    display(spcuniquenames_mkdn)\n",
1125 |     "    display(widgets.HBox([pass_or_fail_mkdwn6,showspc_unique]))\n",
1126 |     "    display(out)\n",
1127 |     "\n",
1128 |     "\n",
1129 |     "\n",
1130 |     "\n",
1131 |     "def click_on_show_spc_unique_spaces(click):\n",
1132 |     "    clear_output()\n",
1133 |     "    display(spcuniquenames_mkdn)\n",
1134 |     "    display(widgets.HBox([pass_or_fail_mkdwn6,hidespc_unique]))\n",
1135 |     "    \n",
1136 |     "    if len(duplicateSpaces)> 0:\n",
1137 |     "        print(CRED + u'\\u2717'+CEND,BOLD,\"Number of duplicated spaces:\",CEND, len(duplicateSpaces))\n",
1138 |     "        print(duplicateSpaces)\n",
1139 |     "        print('----------------------------------------')\n",
1140 |     "    else:\n",
1141 |     "        print(CGREEN + u'\\u2713'+CEND,BOLD,\"Number of duplicated spaces:\",CEND, len(duplicateSpaces))\n",
1142 |     "    print(BOLD,\"Number of unique spaces:\",CEND, len(nonDuplicateSpaces))\n",
1143 |     "    display(out)\n",
1144 |     "\n",
1145 |     "teamCatalogSC = nonDuplicateSpaces[[spacesCol1]].copy()\n",
1146 |     "teamCatalogSC = teamCatalogSC.sort_values(by = spacesCol1).reset_index(drop = True)\n",
1147 |     "teamCatalogSC[spacesCol1] = teamCatalogSC[spacesCol1].astype(str)\n",
1148 |     "teamCatalogSC.head()\n",
1149 |     "\n",
1150 |     "\n",
1151 |     "showspc_unique.on_click(click_on_show_spc_unique_spaces)\n",
1152 |     "hidespc_unique.on_click(hide_details_spc_unique_spaces)\n",
1153 |     "\n",
1154 |     "    \n",
1155 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+3)'))"
1156 |    ]
1157 |   },
1158 |   {
1159 |    "cell_type": "code",
1160 |    "execution_count": null,
1161 |    "metadata": {},
1162 |    "outputs": [],
1163 |    "source": [
1164 |     "\n",
1165 |     "clear_output()\n",
1166 |     "# c.\tValidate Values\n",
1167 |     " #       i.\tsecond column should be a positive integer (number of seats available)\n",
1168 |     " #       ii.\tNo invalid characters\n",
1169 |     " #       iii.\tThe sum of team sizes should be less than or equal to the sum of the floor capacities\n",
1170 |     "\n",
1171 |     "\n",
1172 |     "\n",
1173 |     "dfSPCagg = rawFC.copy()\n",
1174 |     "dfSPCagg = dfSPCagg.fillna(0)\n",
1175 |     "\n",
1176 |     "if len(rawFC.columns) == 2:\n",
1177 |     "    grptotalSPC = dfSPCagg.iloc[:,1].astype(int).sum()\n",
1178 |     "else:\n",
1179 |     "    grptotalSPC = 0\n",
1180 |     "\n",
1181 |     "\n",
1182 |     "# excludeKeywords = [\"@\",\"#\",\"$\",\"%\",\"&\",\"*\",\"_\",\"+\",\"-\",\"=\",\"|\",\":\",\";\",\"<\",\">\",\",\",\".\",\"/\",\"(\",\")\",\"[\",\"]\",\"{\",\"}\",\"\\\\\"]\n",
1183 |     "checkinvalidSP = rawFC.copy()\n",
1184 |     "space_Col = checkinvalidSP.columns[0]\n",
1185 |     "\n",
1186 |     "checkinvalidSP[space_Col] = checkinvalidSP[space_Col].apply(str)\n",
1187 |     "invalidSP = checkinvalidSP[checkinvalidSP.apply(lambda r: any([kw in r[0] for kw in excludeKeywords]), axis=1)]\n",
1188 |     "\n",
1189 |     "if len(rawFC.columns) == 2:\n",
1190 |     "    spaceSeatsValuesDF = dfSPCagg.iloc[:,[1]]\n",
1191 |     "    spaceSeatsValuesDF = spaceSeatsValuesDF.fillna(0)\n",
1192 |     "    numInvalidSizeSP = (spaceSeatsValuesDF.iloc[:,0].astype(int) < 0).sum()\n",
1193 |     "else:\n",
1194 |     "    spaceSeatsValuesDF = 0\n",
1195 |     "    numInvalidSizeSP = 0\n",
1196 |     "    \n",
1197 |     "\n",
1198 |     "\n",
1199 |     "if totalSeatsRequested <= grptotalSPC:\n",
1200 |     "    filecomp_TS_SPC = 0\n",
1201 |     "else:\n",
1202 |     "    filecomp_TS_SPC = 1\n",
1203 |     "\n",
1204 |     "showspc_validatevalues = widgets.Button(description ='Show details')\n",
1205 |     "hidespc_validatevalues = widgets.Button(description ='Hide details')\n",
1206 |     "pass_or_fail_mkdwn7 = widgets.Output()\n",
1207 |     "spcvv_mkdn = widgets.Output()\n",
1208 |     "\n",
1209 |     "with pass_or_fail_mkdwn7:\n",
1210 |     "    clear_output()\n",
1211 |     "    if(numInvalidSizeSP>0) or len(invalidSP) > 0 or filecomp_TS_SPC == 1:\n",
1212 |     "        display(HTML('&#10060;'))\n",
1213 |     "    else:\n",
1214 |     "        display(HTML('<p>&#9989;'))\n",
1215 |     "        \n",
1216 |     "\n",
1217 |     "with spcvv_mkdn:\n",
1218 |     "    display(Markdown('<b>Validate values</b>'))\n",
1219 |     "display(spcvv_mkdn)\n",
1220 |     "# if(numInvalidSize>0) or len(invalidTS) > 0:\n",
1221 |     "#     display(HTML('&#10060;'))\n",
1222 |     "# else:\n",
1223 |     "#     display(HTML('<p>&#9989;'))\n",
1224 |     "display(widgets.HBox([pass_or_fail_mkdwn7,showspc_validatevalues])) \n",
1225 |     "display(out)\n",
1226 |     "\n",
1227 |     "\n",
1228 |     "    \n",
1229 |     "def hide_details_spc_vv(click):\n",
1230 |     "    clear_output()\n",
1231 |     "    display(spcvv_mkdn)\n",
1232 |     "    display(widgets.HBox([pass_or_fail_mkdwn7,showspc_validatevalues])) \n",
1233 |     "    display(out)\n",
1234 |     "\n",
1235 |     "    \n",
1236 |     "\n",
1237 |     "\n",
1238 |     "def click_on_show_spc_vv(click):\n",
1239 |     "    clear_output()\n",
1240 |     "    display(spcvv_mkdn)\n",
1241 |     "    display(widgets.HBox([pass_or_fail_mkdwn7,hidespc_validatevalues]))\n",
1242 |     "    \n",
1243 |     "    display(Markdown(\"\"\" #### i:\"\"\")) \n",
1244 |     "    if(numInvalidSizeSP>0):\n",
1245 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Space capacity cannot be negative\",CEND)\n",
1246 |     "        print(\"-------------------------------------------------------------\")\n",
1247 |     "        print(spaceSeatsValuesDF[spaceSeatsValuesDF.iloc[:,0]<0])\n",
1248 |     "    else:\n",
1249 |     "        print(CGREEN + u'\\u2713'+CEND, BOLD,\"numInvalidSize Values (Non-positive intergers):\",CEND, numInvalidSizeSP)\n",
1250 |     "        print(\"-------------------------------------------------------------\")\n",
1251 |     "    if len(invalidSP) > 0:\n",
1252 |     "        display(Markdown(\"\"\" #### ii:\"\"\"))\n",
1253 |     "        print(CRED+ u'\\u2717'+CEND, BOLD,\"Below are the following space names that may contain invalid characters:\",CEND)\n",
1254 |     "        display(invalidSP[space_Col])\n",
1255 |     "        print(\"-------------------------------------------------------------\")\n",
1256 |     "\n",
1257 |     "    if filecomp_TS_SPC == 1:\n",
1258 |     "        display(Markdown(\"\"\" #### iii:\"\"\"))\n",
1259 |     "        print(CRED+ u'\\u2717'+CEND, BOLD,\"The sum of team sizes is not less than or equal to the sum of the space capacities.\",CEND)\n",
1260 |     "        print(str(totalSeatsRequested)+\"(Team-size)\" +\" > \" + str(grptotalSPC)+\"(Space-capacity)\" )\n",
1261 |     "        print(\"-------------------------------------------------------------\")\n",
1262 |     "    else:\n",
1263 |     "        display(Markdown(\"\"\" #### iii:\"\"\"))\n",
1264 |     "        print(CGREEN + u'\\u2713'+CEND,BOLD,\"Team size total is less than equal to space capacity total:\",CEND,str(totalSeatsRequested)+\"(Team-size)\" +\" <= \" + str(grptotalSPC)+\"(Space-capacity)\" )\n",
1265 |     "        \n",
1266 |     "    display(out)\n",
1267 |     "        \n",
1268 |     "#replaceValue = 0\n",
1269 |     "showspc_validatevalues.on_click(click_on_show_spc_vv)\n",
1270 |     "hidespc_validatevalues.on_click(hide_details_spc_vv)\n",
1271 |     "\n",
1272 |     "#print(\"As requested, replacing all missing values by default value: \", replaceValue)\n",
1273 |     "\n",
1274 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1275 |    ]
1276 |   },
1277 |   {
1278 |    "cell_type": "code",
1279 |    "execution_count": null,
1280 |    "metadata": {},
1281 |    "outputs": [],
1282 |    "source": [
1283 |     "clear_output()\n",
1284 |     "#display(Markdown('### Step 1. Validate Team Size File'))\n",
1285 |     "\n",
1286 |     "tab_nest_004 = widgets.Accordion(selected_index=None)\n",
1287 |     "style = {'description_width': '185px'}\n",
1288 |     "layout = {'width': '505px'}\n",
1289 |     "    \n",
1290 |     "\n",
1291 |     "\n",
1292 |     "\n",
1293 |     "dist_mkdown = widgets.Output()\n",
1294 |     "\n",
1295 |     "with dist_mkdown:\n",
1296 |     "    display(Markdown('<div class=\"alert alert-block alert-info\"><b> Step 3. Validate Distance File:</div>'))\n",
1297 |     "#display(intx_mkdown)\n",
1298 |     "\n",
1299 |     "\n",
1300 |     "dist_mkdown2 = widgets.Output()\n",
1301 |     "\n",
1302 |     "with dist_mkdown2:\n",
1303 |     "    display(Markdown('<p>&nbsp; &nbsp; a. <b>Validate columns layout – different per mode</b>.</p>'))\n",
1304 |     "    display(Markdown('<p>&emsp; &emsp; i. Columns and rows should be equal in number:</p>'))\n",
1305 |     "    display(Markdown('<p>&emsp; &emsp; ii. Needs to be a N * N matrix. Where N is the number of Spaces in Space Capacity File:</p>'))\n",
1306 |     "    display(Markdown('<p>&emsp; &emsp; iii. Columns and rows should have the same names in the same order:</p>'))\n",
1307 |     "    display(Markdown('<p>&nbsp; &nbsp; b. <b>Validate Space Catalog - in rows</b>.</p>'))\n",
1308 |     "    display(Markdown('<p>&emsp; &emsp; i. Row names should be unique (no duplicates)</p>'))\n",
1309 |     "    display(Markdown('<p>&emsp; &emsp; ii. Get unique name of spaces in Rows</p>'))\n",
1310 |     "    display(Markdown('<p>&nbsp; &nbsp; c. <b>Validate Space Catalog - in columns</b>.</p>'))\n",
1311 |     "    display(Markdown('<p>&emsp; &emsp; i. Column names should be unique (no duplicates)</p>'))\n",
1312 |     "    display(Markdown('<p>&emsp; &emsp; ii. Get unique name of spaces in Columns</p>'))\n",
1313 |     "    display(Markdown('<p>&nbsp; &nbsp; d. <b>Validate Space Catalog - Columns vs Rows</b>.</p>'))\n",
1314 |     "    display(Markdown('<p>&emsp; &emsp; i. (<b>For Space Capacity File</b>): Names in Space Capacity and not in Distance Matrix --> user needs to add these spaces to distance matrix</p>'))\n",
1315 |     "    display(Markdown('<p>&emsp; &emsp; ii. (<b>For Distance File</b>): Any space names in Rows and not in Columns --> user needs to add these space names to the Columns</p>'))\n",
1316 |     "    display(Markdown('<p>&emsp; &emsp; iii. (<b>For Distance File</b>): Any space names in Columns and not in Rows --> user needs to add these space names to the Rows</p>'))\n",
1317 |     "    display(Markdown('<p>&emsp; &emsp; iv. (<b>For Distance File</b>): Any space name in Rows and not in Space Capacity (or viceversa) --> user needs to add these space names to Space Capacity file</p>'))\n",
1318 |     "    display(Markdown('<p>&emsp; &emsp; v. (<b>For Distance File</b>): Any space name in Columns and not in Space Capacity (or viceversa) --> user needs to add these space names to Space Capacity file</p>'))\n",
1319 |     "    display(Markdown('<p>&nbsp; &nbsp; e. <b>Validate Values</b>.</p>'))\n",
1320 |     "    display(Markdown('<p>&emsp; &emsp; i. Matrix values should be positive numeric values (both positive decimals and positive integers are valid for Distance file values)</p>'))\n",
1321 |     "    \n",
1322 |     "tab_nest_004.children = [widgets.VBox([dist_mkdown,dist_mkdown2])]\n",
1323 |     "\n",
1324 |     "tab_nest_004.set_title(0,'Validate Distance File Requirements')\n",
1325 |     "\n",
1326 |     "display(tab_nest_004)\n",
1327 |     "display(outreq)\n",
1328 |     "#K = len(teamCatalog2)\n",
1329 |     "#     print(\"Number of Teams: \", K)\n",
1330 |     "#     print(\"Total Team Combinations: \", K*K)\n",
1331 |     "#     print(\"Number of rows in Interactions File: \", len(rawIraw) )\n",
1332 |     "#     print(\"Missing + Existing: \", len(rawIraw) + len(missingValuesDF))\n",
1333 |     "#     print(\"Number of missing rows: \", len(missingValuesDF))\n",
1334 |     "#     print(\"Ratio of mising rows: \", len(missingValuesDF) / len(rawIraw))\n",
1335 |     "    \n",
1336 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
1337 |     "    "
1338 |    ]
1339 |   },
1340 |   {
1341 |    "cell_type": "code",
1342 |    "execution_count": null,
1343 |    "metadata": {},
1344 |    "outputs": [],
1345 |    "source": [
1346 |     "clear_output()\n",
1347 |     "\n",
1348 |     "\n",
1349 |     "\n",
1350 |     "# 1.a Validate columns layout – different per mode.\n",
1351 |     "#        i.\tStandard/Optimize: 2 columns\n",
1352 |     "\n",
1353 |     "rem1colDT = rawDT.copy()\n",
1354 |     "rem1colDT.drop(rem1colDT.columns[0],axis=1,inplace=True)\n",
1355 |     "distHeaders = list(set(rem1colDT.columns))\n",
1356 |     "diststripHeaders = [x.strip(' ') for x in distHeaders]\n",
1357 |     "# diststripHeaders.remove(\"Unnamed: 0\")\n",
1358 |     "\n",
1359 |     "\n",
1360 |     "#2.a.\tValidate columns layout\n",
1361 |     "#        i.\tNeeds to be an N*N matrix \n",
1362 |     "#        ii.\tN is the number of Spaces – From Space Capacity File\n",
1363 |     "N = teamCatalogSC.shape[0]\n",
1364 |     "teamCatalogSetDT = set(teamCatalogSC.iloc[:,0])\n",
1365 |     "tempDT = rawDT.copy()\n",
1366 |     "SpacesOnColsDT = set(tempDT.columns[1:])\n",
1367 |     "SpacesOnRowsDT = set(tempDT.iloc[:,0])\n",
1368 |     "if(len(SpacesOnColsDT) == len(SpacesOnRowsDT)):\n",
1369 |     "    diff1DT = set()\n",
1370 |     "elif(len(SpacesOnColsDT) > len(SpacesOnRowsDT)):\n",
1371 |     "    diff1DT = SpacesOnColsDT - SpacesOnRowsDT\n",
1372 |     "else:\n",
1373 |     "    diff1DT = SpacesOnRowsDT - SpacesOnColsDT    \n",
1374 |     "if(N == len(SpacesOnColsDT)):\n",
1375 |     "    diff2DT = set()\n",
1376 |     "elif(N > len(SpacesOnColsDT)):\n",
1377 |     "    diff2DT = teamCatalogSetDT - SpacesOnColsDT \n",
1378 |     "else:    \n",
1379 |     "    diff2DT = SpacesOnColsDT - teamCatalogSetDT\n",
1380 |     "if(N == len(SpacesOnRowsDT)):\n",
1381 |     "    diff3DT = set()\n",
1382 |     "elif(N > len(SpacesOnRowsDT)):\n",
1383 |     "    diff3DT = teamCatalogSetDT - SpacesOnRowsDT\n",
1384 |     "else:    \n",
1385 |     "    diff3DT = SpacesOnRowsDT - teamCatalogSetDT\n",
1386 |     "\n",
1387 |     "#3.a.\tValidate columns layout\n",
1388 |     "#     columns and rows should have the same names in order.\n",
1389 |     "\n",
1390 |     "headerscheckDT = list(rem1colDT)\n",
1391 |     "headerscheckDT2 = [x.strip(' ') for x in headerscheckDT]\n",
1392 |     "#headerscheckDT2.remove(\"Unnamed: 0\")\n",
1393 |     "\n",
1394 |     "distrowHeader = rawDT.columns[0]\n",
1395 |     "rowscheckDT = list(rawDT[distrowHeader])\n",
1396 |     "\n",
1397 |     "\n",
1398 |     "\n",
1399 |     "\n",
1400 |     "\n",
1401 |     "showdist_colslayout = widgets.Button(description ='Show details')\n",
1402 |     "hidedist_colslayout = widgets.Button(description ='Hide details')\n",
1403 |     "distvalidatecols_mkdn = widgets.Output()\n",
1404 |     "pass_or_fail_mkdwn8 = widgets.Output()\n",
1405 |     "\n",
1406 |     "with distvalidatecols_mkdn:\n",
1407 |     "    display(Markdown('<b>Validate Distance Matrix columns layout</b>'))\n",
1408 |     "display(distvalidatecols_mkdn)\n",
1409 |     "with pass_or_fail_mkdwn8:\n",
1410 |     "    clear_output()\n",
1411 |     "    if len(diststripHeaders) == len(rawDT) :\n",
1412 |     "        display(HTML('<p>&#9989;'))\n",
1413 |     "    elif ((N != len(SpacesOnColsDT) or N != len(SpacesOnRowsDT)) or len(diststripHeaders) != len(rawDT)):\n",
1414 |     "        display(HTML('&#10060;'))\n",
1415 |     "        \n",
1416 |     "display(widgets.HBox([pass_or_fail_mkdwn8,showdist_colslayout]))\n",
1417 |     "display(out)\n",
1418 |     "\n",
1419 |     "\n",
1420 |     "\n",
1421 |     "def hide_details_dist_cols_details(click):\n",
1422 |     "    clear_output()\n",
1423 |     "    display(distvalidatecols_mkdn)\n",
1424 |     "    display(widgets.HBox([pass_or_fail_mkdwn8,showdist_colslayout]))\n",
1425 |     "    display(out)\n",
1426 |     "\n",
1427 |     "def click_on_show_dist_cols_details(click):\n",
1428 |     "    clear_output()\n",
1429 |     "    display(distvalidatecols_mkdn)\n",
1430 |     "    display(widgets.HBox([pass_or_fail_mkdwn8,hidedist_colslayout]))\n",
1431 |     "    \n",
1432 |     "    display(Markdown(\"\"\" #### i:\"\"\")) \n",
1433 |     "    if len(diststripHeaders) == len(rawDT):\n",
1434 |     "        print(CGREEN + u'\\u2713'+CEND,BOLD,\"Valid number of columns and rows for Standard\",CEND)\n",
1435 |     "        print(\"-------------------------------------------------------------\")\n",
1436 |     "        print(\"Number of Columns:\", len(diststripHeaders))\n",
1437 |     "        print(\"Number of Rows: \", len(rawDT))\n",
1438 |     "    else:\n",
1439 |     "        print(CRED + u'\\u2717'+CEND,BOLD,\"Invalid number of columns and rows for Standard\",CEND)\n",
1440 |     "        print(\"-------------------------------------------------------------\")\n",
1441 |     "        print(\"Number of Columns:\", len(diststripHeaders))\n",
1442 |     "        print(\"Number of Rows: \", len(rawDT))\n",
1443 |     "        \n",
1444 |     "    display(Markdown(\"\"\" #### ii:\"\"\")) \n",
1445 |     "    if(N != len(SpacesOnColsDT) or N != len(SpacesOnRowsDT)):\n",
1446 |     "        print(CRED + u'\\u2717'+CEND,BOLD,\"Distance Matrix needs to be a N*N matrix\",CEND)\n",
1447 |     "        print(\"-------------------------------------------------------------\")\n",
1448 |     "    else:\n",
1449 |     "        print(CGREEN + u'\\u2713'+CEND,BOLD,\"Distance Matrix needs to be a N*N matrix\",CEND)\n",
1450 |     "        print(\"-------------------------------------------------------------\")\n",
1451 |     "    print(\"Number of Unique Spaces in Space Capacity File: \", N)\n",
1452 |     "    print(\"Number of Unique Spaces in Cols: \", len(SpacesOnColsDT))\n",
1453 |     "    print(\"Number of Unique Spaces in Rows: \", len(SpacesOnRowsDT))\n",
1454 |     "    \n",
1455 |     "    \n",
1456 |     "\n",
1457 |     "    if(N != len(SpacesOnColsDT) or N != len(SpacesOnRowsDT)):\n",
1458 |     "        print(CRED,\"**File Validation Error**\",CEND,\": Distance Matrix needs to be a N*N matrix\")\n",
1459 |     "    if(diff1DT):\n",
1460 |     "        print(\"Difference between Columns and Rows in Distance File...\")\n",
1461 |     "        print(diff1DT)\n",
1462 |     "    elif(diff2DT):\n",
1463 |     "        print(\"Difference between Space Capacity and columns in Distance File...\")\n",
1464 |     "        print(diff2DT)\n",
1465 |     "    elif(diff3DT):\n",
1466 |     "        print(\"Difference between Space Capacity and rows in Distance File...\")\n",
1467 |     "        print(diff3DT)\n",
1468 |     "        \n",
1469 |     "    display(Markdown(\"\"\" #### iii:\"\"\"))\n",
1470 |     "    if headerscheckDT2 == rowscheckDT:\n",
1471 |     "        print(CGREEN + u'\\u2713'+CEND,BOLD,\"Columns and rows have the same names in same order\",CEND)\n",
1472 |     "        \n",
1473 |     "    else:\n",
1474 |     "        print(CRED + u'\\u2717'+CEND,BOLD,\"Columns and rows have the same names in same order\",CEND)\n",
1475 |     "        print(\"-------------------------------------------------------------\")\n",
1476 |     "        print(BOLD,\" Names in columns: \",CEND,headerscheckDT2)\n",
1477 |     "        display(Markdown(\"\"\"#### <p>&emsp; &emsp; vs. </p>\"\"\"))\n",
1478 |     "        print(BOLD,\" Names in rows:\",CEND ,rowscheckDT)\n",
1479 |     "        \n",
1480 |     "    \n",
1481 |     "     \n",
1482 |     "    display(out)\n",
1483 |     "\n",
1484 |     "showdist_colslayout.on_click(click_on_show_dist_cols_details)\n",
1485 |     "hidedist_colslayout.on_click(hide_details_dist_cols_details)\n",
1486 |     "#     sRawI.head()\n",
1487 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1488 |    ]
1489 |   },
1490 |   {
1491 |    "cell_type": "code",
1492 |    "execution_count": null,
1493 |    "metadata": {},
1494 |    "outputs": [],
1495 |    "source": [
1496 |     "\n",
1497 |     "clear_output()\n",
1498 |     "# b. Get unique name of space catalogs-in rows\n",
1499 |     "#     i.\tEnsure they are unique (no duplicates)\n",
1500 |     "distSpaceraw = rawDT.iloc[:,[0]].copy()\n",
1501 |     "distspcCol1 = rawDT.columns[0]\n",
1502 |     "#find duplicates\n",
1503 |     "tempdtspc = distSpaceraw\n",
1504 |     "tempdtspc[\"cnt\"] = 1\n",
1505 |     "tempdtspc = tempdtspc.groupby([distspcCol1], as_index = False) [\"cnt\"].sum().sort_values(by = [\"cnt\"])\n",
1506 |     "duplicatedistSpcs = tempdtspc[tempdtspc.cnt > 1]\n",
1507 |     "nonDuplicatedistSpcs = tempdtspc[tempdtspc.cnt == 1]\n",
1508 |     "\n",
1509 |     "\n",
1510 |     "\n",
1511 |     "showdist_spccatalog = widgets.Button(description ='Show details')\n",
1512 |     "hidedist_spccatalog = widgets.Button(description ='Hide details')\n",
1513 |     "distvalidatespc_mkdn = widgets.Output()\n",
1514 |     "pass_or_fail_mkdwn9 = widgets.Output()\n",
1515 |     "\n",
1516 |     "with distvalidatespc_mkdn:\n",
1517 |     "    display(Markdown('<b>Validate Distance Space Catalog - in rows</b>'))\n",
1518 |     "display(distvalidatespc_mkdn)\n",
1519 |     "with pass_or_fail_mkdwn9:\n",
1520 |     "    clear_output()\n",
1521 |     "    if len(duplicatedistSpcs)> 0 :\n",
1522 |     "        display(HTML('&#10060;'))\n",
1523 |     "    else:\n",
1524 |     "        display(HTML('<p>&#9989;'))\n",
1525 |     "        \n",
1526 |     "display(widgets.HBox([pass_or_fail_mkdwn9,showdist_spccatalog]))\n",
1527 |     "display(out)\n",
1528 |     "\n",
1529 |     "def hide_details_dist_spcs_details(click):\n",
1530 |     "    clear_output()\n",
1531 |     "    display(distvalidatespc_mkdn)\n",
1532 |     "    display(widgets.HBox([pass_or_fail_mkdwn9,showdist_spccatalog]))\n",
1533 |     "    display(out)\n",
1534 |     "\n",
1535 |     "def click_on_show_dist_spcs_details(click):\n",
1536 |     "    clear_output()\n",
1537 |     "    display(distvalidatespc_mkdn)\n",
1538 |     "    display(widgets.HBox([pass_or_fail_mkdwn9,hidedist_spccatalog]))\n",
1539 |     "    \n",
1540 |     "    if len(duplicatedistSpcs)> 0:\n",
1541 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Number of duplicated spaces:\",CEND, len(duplicatedistSpcs))\n",
1542 |     "        \n",
1543 |     "    else:\n",
1544 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Number of duplicated spaces:\",CEND, len(duplicatedistSpcs))\n",
1545 |     "        \n",
1546 |     "    print(BOLD,\"Number of unique spaces:\",CEND, len(nonDuplicatedistSpcs))\n",
1547 |     "    print(\"-------------------------------------------------------------\")\n",
1548 |     "    \n",
1549 |     "    \n",
1550 |     "    print(BOLD,f'Unique Space Names-Rows:',CEND,f'\\n{nonDuplicatedistSpcs}')\n",
1551 |     "    \n",
1552 |     "    display(out)\n",
1553 |     "    \n",
1554 |     "    \n",
1555 |     "showdist_spccatalog.on_click(click_on_show_dist_spcs_details)\n",
1556 |     "hidedist_spccatalog.on_click(hide_details_dist_spcs_details)\n",
1557 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1558 |    ]
1559 |   },
1560 |   {
1561 |    "cell_type": "code",
1562 |    "execution_count": null,
1563 |    "metadata": {},
1564 |    "outputs": [],
1565 |    "source": [
1566 |     "\n",
1567 |     "clear_output()\n",
1568 |     "# b. Get unique name of space catalogs-in columns\n",
1569 |     "#     ii.\tEnsure they are unique (no duplicates)\n",
1570 |     "\n",
1571 |     "\n",
1572 |     "showdist_spccatalog2 = widgets.Button(description ='Show details')\n",
1573 |     "hidedist_spccatalog2 = widgets.Button(description ='Hide details')\n",
1574 |     "distvalidatespc_mkdn2 = widgets.Output()\n",
1575 |     "pass_or_fail_mkdwn10 = widgets.Output()\n",
1576 |     "\n",
1577 |     "duplicatedistSpcs2 = len(headerscheckDT2)-len(set(headerscheckDT2))\n",
1578 |     "\n",
1579 |     "\n",
1580 |     "with distvalidatespc_mkdn2:\n",
1581 |     "    display(Markdown('<b>Validate Distance Space Catalog - in columns</b>'))\n",
1582 |     "display(distvalidatespc_mkdn2)\n",
1583 |     "with pass_or_fail_mkdwn10:\n",
1584 |     "    clear_output()\n",
1585 |     "    if len(headerscheckDT2) != len(set(headerscheckDT2)) :\n",
1586 |     "        display(HTML('&#10060;'))\n",
1587 |     "    else:\n",
1588 |     "        display(HTML('<p>&#9989;'))\n",
1589 |     "        \n",
1590 |     "display(widgets.HBox([pass_or_fail_mkdwn10,showdist_spccatalog2]))\n",
1591 |     "display(out)\n",
1592 |     "\n",
1593 |     "def hide_details_dist_spcs_details2(click):\n",
1594 |     "    clear_output()\n",
1595 |     "    display(distvalidatespc_mkdn2)\n",
1596 |     "    display(widgets.HBox([pass_or_fail_mkdwn10,showdist_spccatalog2]))\n",
1597 |     "    display(out)\n",
1598 |     "\n",
1599 |     "def click_on_show_dist_spcs_details2(click):\n",
1600 |     "    clear_output()\n",
1601 |     "    display(distvalidatespc_mkdn2)\n",
1602 |     "    display(widgets.HBox([pass_or_fail_mkdwn10,hidedist_spccatalog2]))\n",
1603 |     "    \n",
1604 |     "    if duplicatedistSpcs2 > 0:\n",
1605 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Number of duplicated spaces:\",CEND, duplicatedistSpcs2)\n",
1606 |     "        \n",
1607 |     "    else:\n",
1608 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Number of duplicated spaces:\",CEND, duplicatedistSpcs2)\n",
1609 |     "        \n",
1610 |     "    print(BOLD,\"Number of unique spaces:\",CEND, len(set(headerscheckDT2)))\n",
1611 |     "    print(\"-------------------------------------------------------------\")\n",
1612 |     "    \n",
1613 |     "    \n",
1614 |     "    print(BOLD,'Unique Space Names-Cols:',CEND,headerscheckDT2)\n",
1615 |     "    \n",
1616 |     "    display(out)\n",
1617 |     "    \n",
1618 |     "    \n",
1619 |     "showdist_spccatalog2.on_click(click_on_show_dist_spcs_details2)\n",
1620 |     "hidedist_spccatalog2.on_click(hide_details_dist_spcs_details2)\n",
1621 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1622 |    ]
1623 |   },
1624 |   {
1625 |    "cell_type": "code",
1626 |    "execution_count": null,
1627 |    "metadata": {},
1628 |    "outputs": [],
1629 |    "source": [
1630 |     "clear_output()\n",
1631 |     "# d. Get unique name of space catalogs-in columns\n",
1632 |     "#     i.\tspace capacity, names in space capacity and not in distance matrix--> User needs to add these spaces to distance matrix\n",
1633 |     "\n",
1634 |     "spcnames_nondup = list(set(spacesFCraw[spacesCol1]))\n",
1635 |     "distspcnames_nondup = list(nonDuplicatedistSpcs.iloc[:,0])\n",
1636 |     "namesin_dist_not_spc = [i for i in distspcnames_nondup if i not in spcnames_nondup]\n",
1637 |     "namesin_spc_not_dist=[i for i in spcnames_nondup if i not in distspcnames_nondup]\n",
1638 |     "namesin_distrows_not_spc=[i for i in distspcnames_nondup if i not in spcnames_nondup]\n",
1639 |     "namesin_distcols_not_spc=[i for i in headerscheckDT2 if i not in spcnames_nondup]\n",
1640 |     "\n",
1641 |     "#    ii.\tspace capacity, names in in distance matrix and not Team Size--> User needs to add these spaces to Team size file.\n",
1642 |     "#tsznames_nondup = list(nonDuplicateTeams.iloc[:,0])\n",
1643 |     "anynames_in_dist= list(set(headerscheckDT2) | set(distspcnames_nondup))\n",
1644 |     "#namesin_dist_not_tsz =[i for i in anynames_in_dist if i not in tsznames_nondup]\n",
1645 |     "\n",
1646 |     "namesin_distrows_not_distcols=[i for i in distspcnames_nondup if i not in headerscheckDT2]\n",
1647 |     "\n",
1648 |     "namesin_distcols_not_distrows=[i for i in headerscheckDT2 if i not in distspcnames_nondup]\n",
1649 |     "\n",
1650 |     "showdist_spcvsdist = widgets.Button(description ='Show details')\n",
1651 |     "hidedist_spcvsdist = widgets.Button(description ='Hide details')\n",
1652 |     "distspcvsdist_mkdn = widgets.Output()\n",
1653 |     "pass_or_fail_mkdwn11 = widgets.Output()\n",
1654 |     "\n",
1655 |     "\n",
1656 |     "with distspcvsdist_mkdn:\n",
1657 |     "    display(Markdown('<b>Validate Distance Space Catalog - Columns vs Rows</b>'))\n",
1658 |     "display(distspcvsdist_mkdn)\n",
1659 |     "with pass_or_fail_mkdwn11:\n",
1660 |     "    clear_output()\n",
1661 |     "    if ((len(namesin_spc_not_dist)> 0 or len(namesin_dist_not_spc) >0) or len(namesin_distrows_not_distcols) >0):\n",
1662 |     "        display(HTML('&#10060;'))\n",
1663 |     "    else:\n",
1664 |     "        display(HTML('<p>&#9989;'))\n",
1665 |     "        \n",
1666 |     "display(widgets.HBox([pass_or_fail_mkdwn11,showdist_spcvsdist]))\n",
1667 |     "display(out)\n",
1668 |     "\n",
1669 |     "\n",
1670 |     "\n",
1671 |     "def hide_details_spcvsdist_details(click):\n",
1672 |     "    clear_output()\n",
1673 |     "    display(distspcvsdist_mkdn)\n",
1674 |     "    display(widgets.HBox([pass_or_fail_mkdwn11,showdist_spcvsdist]))\n",
1675 |     "    display(out)\n",
1676 |     "\n",
1677 |     "def click_on_show_spcvsdist_details(click):\n",
1678 |     "    clear_output()\n",
1679 |     "    display(distspcvsdist_mkdn)\n",
1680 |     "    display(widgets.HBox([pass_or_fail_mkdwn11,hidedist_spcvsdist]))\n",
1681 |     "    \n",
1682 |     "    display(Markdown(\"\"\" #### i: For Space Capacity File - Space Capacity vs. Distance\"\"\"))\n",
1683 |     "    if len(namesin_spc_not_dist) > 0:\n",
1684 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Names in space capacity file and not in distance matrix :\",CEND,namesin_spc_not_dist)\n",
1685 |     "    else:\n",
1686 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in space capacity file and not in distance matrix :\",CEND,None)\n",
1687 |     "    if len(namesin_dist_not_spc) > 0:\n",
1688 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Names in distance matrix and not in space capacity file :\",CEND,namesin_dist_not_spc)\n",
1689 |     "        print(\"-------------------------------------------------------------\")\n",
1690 |     "    else:\n",
1691 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in distance matrix and not in space capacity file:\",CEND,None)\n",
1692 |     "        print(\"-------------------------------------------------------------\")\n",
1693 |     "    \n",
1694 |     "#     display(Markdown(\"\"\" #### ii: For Space Capacity File - Team size vs. Distance\"\"\"))\n",
1695 |     "#     if len(namesin_dist_not_tsz) > 0:\n",
1696 |     "#         print(CRED+u'\\u2717'+CEND,BOLD,\"Names in distance matrix and not in team size file :\",CEND,namesin_dist_not_tsz)\n",
1697 |     "#     else:\n",
1698 |     "#         print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in distance matrix and not in team size file :\",CEND,None)\n",
1699 |     "#         print(\"-------------------------------------------------------------\")\n",
1700 |     "        \n",
1701 |     "    display(Markdown(\"\"\" #### ii: For Distance File - Names in rows and not in columns\"\"\"))\n",
1702 |     "    if len(namesin_distrows_not_distcols) > 0:\n",
1703 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Names in distance rows and not in distance columns :\",CEND,namesin_distrows_not_distcols)\n",
1704 |     "        print(\"-------------------------------------------------------------\")\n",
1705 |     "    else:\n",
1706 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in distance rows and not in distance columns :\",CEND,None)\n",
1707 |     "        print(\"-------------------------------------------------------------\")\n",
1708 |     "        \n",
1709 |     "        \n",
1710 |     "    display(Markdown(\"\"\" #### iii: For Distance File - Names in columns and not in rows\"\"\"))\n",
1711 |     "    if len(namesin_distcols_not_distrows) > 0:\n",
1712 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Names in distance columns and not in distance rows :\",CEND,namesin_distcols_not_distrows)\n",
1713 |     "        print(\"-------------------------------------------------------------\")\n",
1714 |     "    else:\n",
1715 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in distance columns and not in distance rows :\",CEND,None)\n",
1716 |     "        print(\"-------------------------------------------------------------\")\n",
1717 |     "        \n",
1718 |     "        \n",
1719 |     "        \n",
1720 |     "    display(Markdown(\"\"\" #### iv: For Distance File - Any space name in rows and not in Space Capacity\"\"\"))\n",
1721 |     "    if len(namesin_distrows_not_spc) > 0:\n",
1722 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Names in distance rows and not in space capacity file :\",CEND,namesin_distrows_not_spc)\n",
1723 |     "        print(\"-------------------------------------------------------------\")\n",
1724 |     "    else:\n",
1725 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in distance rows and not in space capacity file :\",CEND,None)\n",
1726 |     "        print(\"-------------------------------------------------------------\")\n",
1727 |     "    \n",
1728 |     "    display(Markdown(\"\"\" #### v: For Distance File - Any space name in columns and not in Space Capacity\"\"\"))\n",
1729 |     "    if len(namesin_distcols_not_spc) > 0:\n",
1730 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Names in distance columns and not in space capacity file :\",CEND,namesin_distcols_not_spc)\n",
1731 |     "        print(\"-------------------------------------------------------------\")\n",
1732 |     "    else:\n",
1733 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"Names in distance columns and not in space capacity file :\",CEND,None)\n",
1734 |     "        print(\"-------------------------------------------------------------\")\n",
1735 |     "     \n",
1736 |     "    \n",
1737 |     "\n",
1738 |     "    \n",
1739 |     "        \n",
1740 |     "    \n",
1741 |     "        \n",
1742 |     "        \n",
1743 |     "showdist_spcvsdist.on_click(click_on_show_spcvsdist_details)\n",
1744 |     "hidedist_spcvsdist.on_click(hide_details_spcvsdist_details)\n",
1745 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1746 |    ]
1747 |   },
1748 |   {
1749 |    "cell_type": "code",
1750 |    "execution_count": null,
1751 |    "metadata": {},
1752 |    "outputs": [],
1753 |    "source": [
1754 |     "clear_output()\n",
1755 |     "# b. Get unique name of space catalogs-in columns\n",
1756 |     "#     ii.\tEnsure they are unique (no duplicates)\n",
1757 |     "\n",
1758 |     "\n",
1759 |     "showdist_vv = widgets.Button(description ='Show details')\n",
1760 |     "hidedist_vv = widgets.Button(description ='Hide details')\n",
1761 |     "distvalidatevalues_mkdn = widgets.Output()\n",
1762 |     "pass_or_fail_mkdwn12 = widgets.Output()\n",
1763 |     "\n",
1764 |     "psttestDT = rawDT.copy()\n",
1765 |     "psttestDT2 = psttestDT.drop(psttestDT.columns[0], axis = 1).apply(pd.to_numeric)\n",
1766 |     "numInvalidSizeDT = len(psttestDT2.columns[(psttestDT2 < 0).any()])\n",
1767 |     "colInvalidSizeDT = psttestDT2.columns[(psttestDT2 < 0).any()]\n",
1768 |     "\n",
1769 |     "with distvalidatevalues_mkdn:\n",
1770 |     "    display(Markdown('<b>Validate Distance Values</b>'))\n",
1771 |     "display(distvalidatevalues_mkdn)\n",
1772 |     "with pass_or_fail_mkdwn12:\n",
1773 |     "    clear_output()\n",
1774 |     "    if numInvalidSizeDT > 0 :\n",
1775 |     "        display(HTML('&#10060;'))\n",
1776 |     "    else:\n",
1777 |     "        display(HTML('<p>&#9989;'))\n",
1778 |     "        \n",
1779 |     "display(widgets.HBox([pass_or_fail_mkdwn12,showdist_vv]))\n",
1780 |     "display(out)\n",
1781 |     "\n",
1782 |     "\n",
1783 |     "def hide_details_distvv_details(click):\n",
1784 |     "    clear_output()\n",
1785 |     "    display(distvalidatevalues_mkdn)\n",
1786 |     "    display(widgets.HBox([pass_or_fail_mkdwn12,showdist_vv]))\n",
1787 |     "    display(out)\n",
1788 |     "\n",
1789 |     "def click_on_show_distvv_details(click):\n",
1790 |     "    clear_output()\n",
1791 |     "    display(distvalidatevalues_mkdn)\n",
1792 |     "    display(widgets.HBox([pass_or_fail_mkdwn12,hidedist_vv]))\n",
1793 |     "    \n",
1794 |     "    display(Markdown(\"\"\" #### i: Validate Distance Values - Should be positive integer \"\"\"))\n",
1795 |     "    if numInvalidSizeDT > 0:\n",
1796 |     "        print(CRED+u'\\u2717'+CEND,BOLD,\"Matrix values should be positive numeric values for distance file :\",CEND, \"The following columns contain non-positive values : \", colInvalidSizeDT)\n",
1797 |     "    else:\n",
1798 |     "        print(CGREEN+u'\\u2713'+CEND,BOLD,\"All matrix values are positive numeric for distance file:\",CEND)\n",
1799 |     "        \n",
1800 |     "        \n",
1801 |     "showdist_vv.on_click(click_on_show_distvv_details)\n",
1802 |     "hidedist_vv.on_click(hide_details_distvv_details)\n",
1803 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1804 |    ]
1805 |   },
1806 |   {
1807 |    "cell_type": "code",
1808 |    "execution_count": null,
1809 |    "metadata": {},
1810 |    "outputs": [],
1811 |    "source": [
1812 |     "clear_output()\n",
1813 |     "#display(Markdown('### Step 1. Validate Team Size File'))\n",
1814 |     "tab_nest_005 = widgets.Accordion(selected_index=None)\n",
1815 |     "\n",
1816 |     "intx_mkdown = widgets.Output()\n",
1817 |     "\n",
1818 |     "with intx_mkdown:\n",
1819 |     "    display(Markdown('<div class=\"alert alert-block alert-info\"><b> Step 4. Validate Interactions File:</div>'))\n",
1820 |     "\n",
1821 |     "    display(Markdown('<p>&nbsp; &nbsp; a. <b>Validate columns layout</b>.</p>'))\n",
1822 |     "    display(Markdown('<p>&emsp; &nbsp; i. Needs to be an K*K matrix :</p>'))\n",
1823 |     "    display(Markdown('<p>&emsp; &nbsp; ii. K is the number of Teams – From Team Size File</p>'))\n",
1824 |     "    display(Markdown('<p>&nbsp; &nbsp; b. <b>Get unique name of Teams</b>.</p>'))\n",
1825 |     "    display(Markdown('<p>&emsp; &nbsp; i. Check Time Investors Column</p>'))\n",
1826 |     "    display(Markdown('<p>&emsp; &emsp; 1. Ensure they are unique (no duplicates)</p>'))\n",
1827 |     "    display(Markdown('<p>&emsp; &emsp; 2. Check no empty or missing names</p>'))\n",
1828 |     "    display(Markdown('<p>&emsp; &emsp; 3. Check WPA specific names: For Example</p>'))\n",
1829 |     "    display(Markdown('           a. These are acceptable names but will be removed (not counted) ='))\n",
1830 |     "    display(Markdown('              [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]'))\n",
1831 |     "    display(Markdown('           b. \"Collaborators Within Group\" is acceptable in Collaborators column ONLY'))\n",
1832 |     "    display(Markdown('           c. No need to worry about these names ->  Currently the createInteractionMatrix2 routine in python will drop them.'))\n",
1833 |     "    display(Markdown('<p>&emsp; &nbsp; ii. Check Collaborators Column</p>'))\n",
1834 |     "    display(Markdown('<p>&emsp; &emsp; 1. Ensure they are unique (no duplicates)</p>'))\n",
1835 |     "    display(Markdown('<p>&emsp; &emsp; 2. Check no empty or missing names</p>'))\n",
1836 |     "    display(Markdown('<p>&emsp; &emsp; 3. Check WPA specific names: For Example</p>'))\n",
1837 |     "    display(Markdown('           a. These are acceptable names but will be removed (not counted) ='))\n",
1838 |     "    display(Markdown('              [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]'))\n",
1839 |     "    display(Markdown('           b. \"Collaborators Within Group\" is acceptable in Collaborators column ONLY'))\n",
1840 |     "    display(Markdown('           c. No other WPA specific names are accepted (might be difficult to detect them)'))\n",
1841 |     "    display(Markdown('           d. No need to worry about these names ->  Currently the createInteractionMatrix2 routine in python will drop them.'))\n",
1842 |     "    display(Markdown('<p>&nbsp; &nbsp; c. <b>Ensure Time Collaborators + Time Investors columns names match</b>.</p>'))\n",
1843 |     "    display(Markdown('<p>&emsp; &nbsp; i. With Team Size File</p>'))\n",
1844 |     "\n",
1845 |     "    display(Markdown('<p>&nbsp; &nbsp; d. <b>Validate Values</b>.</p>'))\n",
1846 |     "    display(Markdown('<p>&emsp; &nbsp; i. No NA, None or string</p>'))\n",
1847 |     "    display(Markdown('<p>&emsp; &nbsp; ii. Numeric </p>'))\n",
1848 |     "    display(Markdown('<p>&emsp; &nbsp; iii. Value > 0 </p>'))\n",
1849 |     "    \n",
1850 |     "tab_nest_005.children = [widgets.VBox([intx_mkdown])]\n",
1851 |     "\n",
1852 |     "tab_nest_005.set_title(0,'Validate Interaction File Requirements')\n",
1853 |     "display(tab_nest_005)\n",
1854 |     "display(outreq)\n",
1855 |     "#K = len(teamCatalog2)\n",
1856 |     "#     print(\"Number of Teams: \", K)\n",
1857 |     "#     print(\"Total Team Combinations: \", K*K)\n",
1858 |     "#     print(\"Number of rows in Interactions File: \", len(rawIraw) )\n",
1859 |     "#     print(\"Missing + Existing: \", len(rawIraw) + len(missingValuesDF))\n",
1860 |     "#     print(\"Number of missing rows: \", len(missingValuesDF))\n",
1861 |     "#     print(\"Ratio of mising rows: \", len(missingValuesDF) / len(rawIraw))\n",
1862 |     "    \n",
1863 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
1864 |     "   "
1865 |    ]
1866 |   },
1867 |   {
1868 |    "cell_type": "code",
1869 |    "execution_count": null,
1870 |    "metadata": {},
1871 |    "outputs": [],
1872 |    "source": [
1873 |     "\n",
1874 |     "clear_output()\n",
1875 |     "\n",
1876 |     "#a.\tValidate columns layout\n",
1877 |     "#        i.\tNeeds to be an K*K matrix \n",
1878 |     "#        ii.\tK is the number of Teams – From Team Size File\n",
1879 |     "K = teamCatalog.shape[0]\n",
1880 |     "teamCatalogSet = set(teamCatalog.iloc[:,0])\n",
1881 |     "temp = createInteractionsMatrix2(rawIraw)\n",
1882 |     "teamsOnCols = set(temp.columns[1:])\n",
1883 |     "teamsOnRows = set(temp.iloc[:,0])\n",
1884 |     "if(len(teamsOnCols) == len(teamsOnRows)):\n",
1885 |     "    diff1 = set()\n",
1886 |     "elif(len(teamsOnCols) > len(teamsOnRows)):\n",
1887 |     "    diff1 = teamsOnCols - teamsOnRows\n",
1888 |     "else:\n",
1889 |     "    diff1 = teamsOnRows - teamsOnCols    \n",
1890 |     "if(K == len(teamsOnCols)):\n",
1891 |     "    diff2 = set()\n",
1892 |     "elif(K > len(teamsOnCols)):\n",
1893 |     "    diff2 = teamCatalogSet - teamsOnCols \n",
1894 |     "else:    \n",
1895 |     "    diff2 = teamsOnCols - teamCatalogSet\n",
1896 |     "if(K == len(teamsOnRows)):\n",
1897 |     "    diff3 = set()\n",
1898 |     "elif(K > len(teamsOnRows)):\n",
1899 |     "    diff3 = teamCatalogSet - teamsOnRows\n",
1900 |     "else:    \n",
1901 |     "    diff3 = teamsOnRows - teamCatalogSet\n",
1902 |     "\n",
1903 |     "\n",
1904 |     "\n",
1905 |     "\n",
1906 |     "\n",
1907 |     "showintx_cols_layout = widgets.Button(description ='Show details')\n",
1908 |     "hideintx_cols_layout = widgets.Button(description ='Hide details')\n",
1909 |     "intxcolsvalidate_mkdn = widgets.Output()\n",
1910 |     "with intxcolsvalidate_mkdn:\n",
1911 |     "    display(Markdown('<b>Validate Interaction Matrix columns layout</b>'))\n",
1912 |     "display(intxcolsvalidate_mkdn)\n",
1913 |     "\n",
1914 |     "pass_or_fail_mkdwn13 = widgets.Output()\n",
1915 |     "with pass_or_fail_mkdwn13:\n",
1916 |     "    clear_output()\n",
1917 |     "    if(K != len(teamsOnCols) or K != len(teamsOnRows)):\n",
1918 |     "        display(HTML('&#10060;'))\n",
1919 |     "    else:\n",
1920 |     "        display(HTML('<p>&#9989;'))\n",
1921 |     "\n",
1922 |     "display(widgets.HBox([pass_or_fail_mkdwn13,showintx_cols_layout]))\n",
1923 |     "\n",
1924 |     "\n",
1925 |     "# if(K != len(teamsOnCols) or K != len(teamsOnRows)):\n",
1926 |     "#     display(HTML('&#10060;'))\n",
1927 |     "# else:\n",
1928 |     "#     display(HTML('<p>&#9989;'))\n",
1929 |     "# display(showintx_cols_layout) \n",
1930 |     "display(out)\n",
1931 |     "\n",
1932 |     "\n",
1933 |     "def hide_details_intx_colslayout(click):\n",
1934 |     "    clear_output()\n",
1935 |     "    display(intxcolsvalidate_mkdn)\n",
1936 |     "    display(widgets.HBox([pass_or_fail_mkdwn13,showintx_cols_layout]))\n",
1937 |     "    display(out)\n",
1938 |     "\n",
1939 |     "\n",
1940 |     "    \n",
1941 |     "    \n",
1942 |     "def click_on_show_intx_colslayout(click):\n",
1943 |     "    clear_output()\n",
1944 |     "    display(intxcolsvalidate_mkdn)\n",
1945 |     "    display(widgets.HBox([pass_or_fail_mkdwn13,hideintx_cols_layout]))\n",
1946 |     "    print(\"Number of Unique Teams in Team Size File: \", K)\n",
1947 |     "    print(\"Number of Unique Teams in Cols: \", len(teamsOnCols))\n",
1948 |     "    print(\"Number of Unique Teams in Rows: \", len(teamsOnRows))\n",
1949 |     "    \n",
1950 |     "\n",
1951 |     "\n",
1952 |     "    if(K != len(teamsOnCols) or K != len(teamsOnRows)):\n",
1953 |     "        print(CRED,\"**File Validation Error**\",CEND,\": Interaction Matrix needs to be a K*K matrix\")\n",
1954 |     "    if(diff1):\n",
1955 |     "        print(\"Difference between \"+ Col_1 + \" and \" + Col_2 + \" in Interactions File...\")\n",
1956 |     "        print(diff1)\n",
1957 |     "    elif(diff2):\n",
1958 |     "        print(\"Difference between Team Size and \"+ Col_1 + \" Interactions File...\")\n",
1959 |     "        print(diff2)\n",
1960 |     "    display(out)\n",
1961 |     "\n",
1962 |     "showintx_cols_layout.on_click(click_on_show_intx_colslayout)\n",
1963 |     "hideintx_cols_layout.on_click(hide_details_intx_colslayout)\n",
1964 |     "\n",
1965 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
1966 |    ]
1967 |   },
1968 |   {
1969 |    "cell_type": "code",
1970 |    "execution_count": null,
1971 |    "metadata": {},
1972 |    "outputs": [],
1973 |    "source": [
1974 |     "clear_output()\n",
1975 |     "\n",
1976 |     "ti_Col = rawIraw.columns[0]\n",
1977 |     "cb_Col = rawIraw.columns[1]\n",
1978 |     "\n",
1979 |     "excludeKeywords = [\"@\",\"#\",\"$\",\"%\",\"&\",\"*\",\"_\",\"+\",\"-\",\"=\",\"|\",\":\",\";\",\"<\",\">\",\",\",\".\",\"/\",\"(\",\")\",\"[\",\"]\",\"{\",\"}\",\"\\\\\"]\n",
1980 |     "checkinvalidINTX = rawIraw.copy()\n",
1981 |     "\n",
1982 |     "removeNames = [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]\n",
1983 |     "\n",
1984 |     "checkinvalidINTX[ti_Col] = checkinvalidINTX[ti_Col].apply(str)\n",
1985 |     "checkinvalidINTX[cb_Col] = checkinvalidINTX[cb_Col].apply(str)\n",
1986 |     "invalidTI= checkinvalidINTX[checkinvalidINTX.apply(lambda r: any([kw in r[0] for kw in excludeKeywords]), axis=1)]\n",
1987 |     "\n",
1988 |     "invalidTI = invalidTI[~invalidTI[ti_Col].isin(removeNames)]\n",
1989 |     "\n",
1990 |     "invalidTI = invalidTI[[ti_Col]].drop_duplicates()\n",
1991 |     "\n",
1992 |     "invalidCB= checkinvalidINTX[checkinvalidINTX.apply(lambda r: any([kw in r[1] for kw in excludeKeywords]), axis=1)]\n",
1993 |     "\n",
1994 |     "invalidCB = invalidCB[~invalidCB[cb_Col].isin(removeNames)]\n",
1995 |     "\n",
1996 |     "invalidCB = invalidCB[[cb_Col]].drop_duplicates()\n",
1997 |     "\n",
1998 |     "\n",
1999 |     "showintx_ticb_invalid = widgets.Button(description ='Show details')\n",
2000 |     "hideintx_ticb_invalid = widgets.Button(description ='Hide details')\n",
2001 |     "intxinvalid_mkdn = widgets.Output()\n",
2002 |     "with intxinvalid_mkdn:\n",
2003 |     "    display(Markdown('<b> Checking invalid character values - Time Investors & Collaborators column</b>'))\n",
2004 |     "display(intxinvalid_mkdn)\n",
2005 |     "\n",
2006 |     "pass_or_fail_mkdwn14 = widgets.Output()\n",
2007 |     "with pass_or_fail_mkdwn14:\n",
2008 |     "    clear_output()\n",
2009 |     "    if len(invalidTI) > 0 or len(invalidCB) >0:\n",
2010 |     "        display(HTML('&#10060;'))\n",
2011 |     "    else:\n",
2012 |     "        display(HTML('<p>&#9989;'))\n",
2013 |     "\n",
2014 |     "display(widgets.HBox([pass_or_fail_mkdwn14,showintx_ticb_invalid]))\n",
2015 |     "\n",
2016 |     "\n",
2017 |     "\n",
2018 |     "# if len(invalidTI) > 0 or len(invalidCB) >0:\n",
2019 |     "#     display(HTML('&#10060;'))\n",
2020 |     "# else:\n",
2021 |     "#     display(HTML('<p>&#9989;'))\n",
2022 |     "\n",
2023 |     "# display(showintx_ticb_invalid) \n",
2024 |     "display(out)\n",
2025 |     "\n",
2026 |     "\n",
2027 |     "def hide_details_intx_ticbinvalid(click):\n",
2028 |     "    clear_output()\n",
2029 |     "    display(intxinvalid_mkdn)\n",
2030 |     "    display(widgets.HBox([pass_or_fail_mkdwn14,showintx_ticb_invalid]))\n",
2031 |     "    display(out)\n",
2032 |     "\n",
2033 |     "\n",
2034 |     "\n",
2035 |     "def click_on_show_intx_ticbinvalid(click):\n",
2036 |     "    clear_output()\n",
2037 |     "    display(intxinvalid_mkdn)\n",
2038 |     "    display(widgets.HBox([pass_or_fail_mkdwn14,hideintx_ticb_invalid]))\n",
2039 |     "    if len(invalidTI) > 0 and len(invalidCB) < 1:\n",
2040 |     "        print(\"Invalid \" + ti_Col + \": \" + invalidTI .to_string(index=False,header=False))\n",
2041 |     "    elif len(invalidTI) < 1 and len(invalidCB) > 0:\n",
2042 |     "        print(\"Invalid \" + cb_Col + \": \" + invalidCB .to_string(index=False,header=False))\n",
2043 |     "    elif len(invalidTI) > 0 and len(invalidCB) > 0:\n",
2044 |     "        print(\"Invalid \" + ti_Col + \": \" + invalidTI .to_string(index=False,header=False))\n",
2045 |     "        print(\"Invalid \" + cb_Col + \": \" + invalidCB .to_string(index=False,header=False))\n",
2046 |     "    else:\n",
2047 |     "        print(\"No invalid characters found in \"+ ti_Col + \" or \" + cb_Col +\".\")\n",
2048 |     "#     if(numMissingTeams>0):\n",
2049 |     "#         errorMessage = \"Team name is mandatory. \\\"Team\\\" column cannot be empty or blank.\" \n",
2050 |     "#         emptyRowList = (np.where(isnullDF.iloc[:,0])[0]).tolist()\n",
2051 |     "    display(out)\n",
2052 |     "\n",
2053 |     "        \n",
2054 |     "showintx_ticb_invalid.on_click(click_on_show_intx_ticbinvalid)\n",
2055 |     "hideintx_ticb_invalid.on_click(hide_details_intx_ticbinvalid)        \n",
2056 |     "    \n",
2057 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+4)'))"
2058 |    ]
2059 |   },
2060 |   {
2061 |    "cell_type": "code",
2062 |    "execution_count": null,
2063 |    "metadata": {},
2064 |    "outputs": [],
2065 |    "source": [
2066 |     "\n",
2067 |     "# clear_output()\n",
2068 |     "\n",
2069 |     "\n",
2070 |     "\n",
2071 |     "# showintx_ti_missing = widgets.Button(description ='Show details')\n",
2072 |     "# hideintx_ti_missing = widgets.Button(description ='Hide details')\n",
2073 |     "# intxtimissing_mkdn = widgets.Output()\n",
2074 |     "# with intxtimissing_mkdn:\n",
2075 |     "#     display(Markdown('<b> Check missing values - Time Investors column</b>'))\n",
2076 |     "# display(intxtimissing_mkdn)    \n",
2077 |     "# display(showintx_ti_missing) \n",
2078 |     "\n",
2079 |     "\n",
2080 |     "# def hide_details_intx_timissing(click):\n",
2081 |     "#     clear_output()\n",
2082 |     "#     display(intxtimissing_mkdn)  \n",
2083 |     "#     display(showintx_ti_missing)\n",
2084 |     "\n",
2085 |     "\n",
2086 |     "\n",
2087 |     "# # display(Markdown('#####  Check no empty None or missing values - Time Investors Column'))\n",
2088 |     "\n",
2089 |     "# #        i.\tCheck Time Investors Column\n",
2090 |     "# #         2.\tCheck no empty or missing names\n",
2091 |     "# #2.\tCheck no empty (None, N/A NaN or missing names) in Time Investors Columns\n",
2092 |     "# cols = rawIraw.columns \n",
2093 |     "# timeInvestors = rawIraw[[rawIraw.columns[0]]].copy()\n",
2094 |     "# df = timeInvestors.copy()\n",
2095 |     "# isnullDF = df.replace(r'^\\s*$', np.nan, regex=True).isnull()\n",
2096 |     "# numMissingTeams = isnullDF.iloc[:,0].sum()\n",
2097 |     "\n",
2098 |     "\n",
2099 |     "# def click_on_show_intx_timissing(click):\n",
2100 |     "#     clear_output()\n",
2101 |     "#     display(intxtimissing_mkdn)  \n",
2102 |     "#     display(hideintx_ti_missing)\n",
2103 |     "    \n",
2104 |     "#     print(\"numMissingTeams Names: \", numMissingTeams)\n",
2105 |     "#     if(numMissingTeams>0):\n",
2106 |     "#         errorMessage = \"Team name is mandatory. \\\"Team\\\" column cannot be empty or blank.\" \n",
2107 |     "#         emptyRowList = (np.where(isnullDF.iloc[:,0])[0]).tolist()\n",
2108 |     "\n",
2109 |     "        \n",
2110 |     "# showintx_ti_missing.on_click(click_on_show_intx_timissing)\n",
2111 |     "# hideintx_ti_missing.on_click(hide_details_intx_timissing)        \n",
2112 |     "    \n",
2113 |     "# display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2114 |    ]
2115 |   },
2116 |   {
2117 |    "cell_type": "code",
2118 |    "execution_count": null,
2119 |    "metadata": {},
2120 |    "outputs": [],
2121 |    "source": [
2122 |     "\n",
2123 |     "# clear_output()\n",
2124 |     "\n",
2125 |     "# showintx_cb_missing = widgets.Button(description ='Show details')\n",
2126 |     "# hideintx_cb_missing = widgets.Button(description ='Hide details')\n",
2127 |     "# intxcbmissing_mkdn = widgets.Output()\n",
2128 |     "# with intxcbmissing_mkdn:\n",
2129 |     "#     display(Markdown('<b> Check missing values - Collaborators column</b>'))\n",
2130 |     "# display(intxcbmissing_mkdn)\n",
2131 |     "# display(showintx_cb_missing) \n",
2132 |     "\n",
2133 |     "\n",
2134 |     "# def hide_details_intx_cbmissing(click):\n",
2135 |     "#     clear_output()\n",
2136 |     "#     display(intxcbmissing_mkdn)\n",
2137 |     "#     display(showintx_cb_missing)\n",
2138 |     "\n",
2139 |     "\n",
2140 |     "# # display(Markdown('#####  Check no empty None or missing values - Collaborators Column'))\n",
2141 |     "\n",
2142 |     "# #        i.\tCheck Collaborators Column\n",
2143 |     "# #         2.\tCheck no empty or missing names\n",
2144 |     "# #2.\tCheck no empty (None, N/A NaN or missing names) \n",
2145 |     "# cols = rawIraw.columns \n",
2146 |     "# collaborators = rawIraw[[rawIraw.columns[1]]].copy()\n",
2147 |     "# df = collaborators.copy()\n",
2148 |     "# isnullDF = df.replace(r'^\\s*$', np.nan, regex=True).isnull()\n",
2149 |     "# numMissingTeams = isnullDF.iloc[:,0].sum()\n",
2150 |     "\n",
2151 |     "\n",
2152 |     "# def click_on_show_intx_cbmissing(click):\n",
2153 |     "#     clear_output()\n",
2154 |     "#     display(intxcbmissing_mkdn)\n",
2155 |     "#     display(hideintx_cb_missing)\n",
2156 |     "    \n",
2157 |     "#     print(\"numMissingTeams Names: \", numMissingTeams)\n",
2158 |     "#     if(numMissingTeams>0):\n",
2159 |     "#         errorMessage = \"Team name is mandatory. \\\"Team\\\" column cannot be empty or blank.\" \n",
2160 |     "#         emptyRowList = (np.where(isnullDF.iloc[:,0])[0]).tolist()\n",
2161 |     "\n",
2162 |     "# showintx_cb_missing.on_click(click_on_show_intx_cbmissing)\n",
2163 |     "# hideintx_cb_missing.on_click(hide_details_intx_cbmissing)          \n",
2164 |     "        \n",
2165 |     "# display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2166 |    ]
2167 |   },
2168 |   {
2169 |    "cell_type": "code",
2170 |    "execution_count": null,
2171 |    "metadata": {},
2172 |    "outputs": [],
2173 |    "source": [
2174 |     "\n",
2175 |     "clear_output()\n",
2176 |     "\n",
2177 |     "showintx_uniquenames = widgets.Button(description ='Show details')\n",
2178 |     "hideintx_uniquenames = widgets.Button(description ='Hide details')\n",
2179 |     "intxuniquenames_mkdn = widgets.Output()\n",
2180 |     "with intxuniquenames_mkdn:\n",
2181 |     "    display(Markdown('<b> Get unique name of Teams</b>'))\n",
2182 |     "display(intxuniquenames_mkdn)\n",
2183 |     "pass_or_fail_mkdwn15 = widgets.Output()\n",
2184 |     "with pass_or_fail_mkdwn15:\n",
2185 |     "    clear_output()\n",
2186 |     "    display(HTML('<p>&#9989;'))\n",
2187 |     "\n",
2188 |     "display(widgets.HBox([pass_or_fail_mkdwn15,showintx_uniquenames]))\n",
2189 |     "\n",
2190 |     "display(out)\n",
2191 |     "\n",
2192 |     "\n",
2193 |     "def hide_details_intx_uniquenames(click):\n",
2194 |     "    clear_output()\n",
2195 |     "    display(intxuniquenames_mkdn)\n",
2196 |     "    display(widgets.HBox([pass_or_fail_mkdwn15,showintx_uniquenames]))\n",
2197 |     "    display(out)\n",
2198 |     "    \n",
2199 |     "#display(Markdown('#####  Get unique name of Teams'))\n",
2200 |     "\n",
2201 |     "\n",
2202 |     "# convert team names to string for both TI & CB cols:\n",
2203 |     "rawIraw[Col_1] = rawIraw[Col_1].astype(str)\n",
2204 |     "rawIraw[Col_2] = rawIraw[Col_2].astype(str)\n",
2205 |     "\n",
2206 |     "teamsOnCol1 = set(rawIraw[rawIraw.columns[0]])\n",
2207 |     "teamsOnCol2 = set(rawIraw[rawIraw.columns[1]])\n",
2208 |     "allNames = set(teamsOnCol1).union(set(teamsOnCol2))\n",
2209 |     "\n",
2210 |     "\n",
2211 |     "def click_on_show_intx_uniquenames(click):\n",
2212 |     "    clear_output()\n",
2213 |     "    display(intxuniquenames_mkdn)\n",
2214 |     "    display(widgets.HBox([pass_or_fail_mkdwn15,hideintx_uniquenames]))\n",
2215 |     "\n",
2216 |     "    print(\"Number of Raw Team Names in \" + Col_1 +\": \", len(teamsOnCol1))\n",
2217 |     "    print(\"Number of Raw Team Names in \" + Col_2 +\": \", len(teamsOnCol2))\n",
2218 |     "    print(\"Number or Raw Team Names in Both Cols: \", len(allNames))\n",
2219 |     "    display(out)\n",
2220 |     "    \n",
2221 |     "showintx_uniquenames.on_click(click_on_show_intx_uniquenames)\n",
2222 |     "hideintx_uniquenames.on_click(hide_details_intx_uniquenames)    \n",
2223 |     "\n",
2224 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2225 |    ]
2226 |   },
2227 |   {
2228 |    "cell_type": "code",
2229 |    "execution_count": null,
2230 |    "metadata": {},
2231 |    "outputs": [],
2232 |    "source": [
2233 |     "\n",
2234 |     "clear_output()\n",
2235 |     "\n",
2236 |     "showintx_rmvnames = widgets.Button(description ='Show details')\n",
2237 |     "hideintx_rmvnames = widgets.Button(description ='Hide details')\n",
2238 |     "rmvnames_mkdn = widgets.Output()\n",
2239 |     "with rmvnames_mkdn:\n",
2240 |     "    display(Markdown('<b> Acceptable names but will be removed -</b> [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]'))\n",
2241 |     "display(rmvnames_mkdn)\n",
2242 |     "\n",
2243 |     "pass_or_fail_mkdwn16 = widgets.Output()\n",
2244 |     "with pass_or_fail_mkdwn16:\n",
2245 |     "    clear_output()\n",
2246 |     "    display(HTML('<p>&#9989;'))\n",
2247 |     "\n",
2248 |     "display(widgets.HBox([pass_or_fail_mkdwn16,showintx_rmvnames]))\n",
2249 |     "\n",
2250 |     "display(out)\n",
2251 |     "\n",
2252 |     "\n",
2253 |     "def hide_details_intx_rmvnames(click):\n",
2254 |     "    clear_output()\n",
2255 |     "    display(rmvnames_mkdn)\n",
2256 |     "    display(widgets.HBox([pass_or_fail_mkdwn16,showintx_rmvnames]))\n",
2257 |     "    display(out)\n",
2258 |     "\n",
2259 |     "# Check WPA specific names: For Example\n",
2260 |     "#      a.\tThese are acceptable names but will be removed (not counted) =\n",
2261 |     "#       [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]\n",
2262 |     "removeNames = [\"Other_Collaborators\", \"Unclassified_Internal\", \"Unclassified_External\", \"Collaborators Within Group\"]\n",
2263 |     "keepNames = allNames - set(removeNames)\n",
2264 |     "\n",
2265 |     "def click_on_show_intx_rmvnames(click):\n",
2266 |     "    clear_output()\n",
2267 |     "    display(rmvnames_mkdn)\n",
2268 |     "    display(widgets.HBox([pass_or_fail_mkdwn16,hideintx_rmvnames]))\n",
2269 |     "    print(\"Number of Raw Team Names in Both less WPA Removed: \", len(keepNames))\n",
2270 |     "    display(out)\n",
2271 |     "\n",
2272 |     "showintx_rmvnames.on_click(click_on_show_intx_rmvnames)\n",
2273 |     "hideintx_rmvnames.on_click(hide_details_intx_rmvnames)    \n",
2274 |     "\n",
2275 |     "    \n",
2276 |     "    \n",
2277 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2278 |    ]
2279 |   },
2280 |   {
2281 |    "cell_type": "code",
2282 |    "execution_count": null,
2283 |    "metadata": {},
2284 |    "outputs": [],
2285 |    "source": [
2286 |     "\n",
2287 |     "clear_output()\n",
2288 |     "\n",
2289 |     "temp1 = rawIraw.copy()\n",
2290 |     "cols = rawIraw.columns\n",
2291 |     "temp2 = temp1.loc[temp1[cols[1]].astype(str) =='Collaborators Within Group', cols[1]]\n",
2292 |     "temp3 = temp1.loc[temp1[cols[0]].astype(str) =='Collaborators Within Group', cols[0]]\n",
2293 |     "\n",
2294 |     "showintx_cbacceptable = widgets.Button(description ='Show details')\n",
2295 |     "hideintx_cbacceptable= widgets.Button(description ='Hide details')\n",
2296 |     "intx_acptcb_mkdn = widgets.Output()\n",
2297 |     "with intx_acptcb_mkdn:\n",
2298 |     "    display(Markdown('<b> \"Collaborators Within Group” is acceptable in Collaborators column ONLY </b>'))\n",
2299 |     "display(intx_acptcb_mkdn)\n",
2300 |     "\n",
2301 |     "\n",
2302 |     "pass_or_fail_mkdwn17 = widgets.Output()\n",
2303 |     "with pass_or_fail_mkdwn17:\n",
2304 |     "    clear_output()\n",
2305 |     "    if (len(temp3) == 0 and len(temp2) >= 0):\n",
2306 |     "        display(HTML('<p>&#9989;'))\n",
2307 |     "    else:\n",
2308 |     "        display(HTML('&#10060;'))\n",
2309 |     "\n",
2310 |     "display(widgets.HBox([pass_or_fail_mkdwn17,showintx_cbacceptable]))\n",
2311 |     " \n",
2312 |     "display(out)\n",
2313 |     "\n",
2314 |     "#display(Markdown('####  \"Collaborators Within Group” is acceptable in Collaborators column ONLY'))\n",
2315 |     "\n",
2316 |     "def hide_details_intx_cbacpt(click):\n",
2317 |     "    clear_output()\n",
2318 |     "    display(intx_acptcb_mkdn)\n",
2319 |     "    display(widgets.HBox([pass_or_fail_mkdwn17,showintx_cbacceptable]))\n",
2320 |     "    display(out)\n",
2321 |     "\n",
2322 |     "\n",
2323 |     "\n",
2324 |     "def click_on_show_intx_cbacpt(click):\n",
2325 |     "    clear_output()\n",
2326 |     "    display(intx_acptcb_mkdn)\n",
2327 |     "    display(widgets.HBox([pass_or_fail_mkdwn17,hideintx_cbacceptable]))\n",
2328 |     "    print(\"Number of Collaborators_Within_Group tag in Time Investors Col: \", len(temp3))\n",
2329 |     "    print(\"Number of Collaborators_Within_Group tag in Collaborators Col: \", len(temp2))\n",
2330 |     "    if (len(temp3) == 0 and len(temp2) >= 0):\n",
2331 |     "        print(\"Collaborators_within_group tags are OK\")\n",
2332 |     "    else: \n",
2333 |     "        print(\"collaborators_within_group tags are INVALID\")\n",
2334 |     "    display(out)\n",
2335 |     "\n",
2336 |     "        \n",
2337 |     "showintx_cbacceptable.on_click(click_on_show_intx_cbacpt)\n",
2338 |     "hideintx_cbacceptable.on_click(hide_details_intx_cbacpt)        \n",
2339 |     "        \n",
2340 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))    "
2341 |    ]
2342 |   },
2343 |   {
2344 |    "cell_type": "code",
2345 |    "execution_count": null,
2346 |    "metadata": {},
2347 |    "outputs": [],
2348 |    "source": [
2349 |     "\n",
2350 |     "clear_output()\n",
2351 |     "\n",
2352 |     "\n",
2353 |     "uniqueTeams1 = teamsOnCol1 - set(removeNames)\n",
2354 |     "uniqueTeams2 = teamsOnCol2 - set(removeNames)\n",
2355 |     "if(len(uniqueTeams2) > len(uniqueTeams1)):\n",
2356 |     "    uniqueDiff = uniqueTeams2 - uniqueTeams1\n",
2357 |     "else:\n",
2358 |     "    uniqueDiff = uniqueTeams1 - uniqueTeams2\n",
2359 |     "    \n",
2360 |     "    \n",
2361 |     "showintx_namesmatch = widgets.Button(description ='Show details')\n",
2362 |     "hideintx_namesmatch= widgets.Button(description ='Hide details')\n",
2363 |     "intx_namesmatch_mkdn = widgets.Output()\n",
2364 |     "with intx_namesmatch_mkdn:\n",
2365 |     "    display(Markdown('<b> Ensure Time Collaborators + Time Investors columns names match </b>'))\n",
2366 |     "display(intx_namesmatch_mkdn)\n",
2367 |     "\n",
2368 |     "pass_or_fail_mkdwn18 = widgets.Output()\n",
2369 |     "with pass_or_fail_mkdwn18:\n",
2370 |     "    clear_output()\n",
2371 |     "    if(len(uniqueTeams1) != len(uniqueTeams2)): \n",
2372 |     "        display(HTML('&#10060;'))\n",
2373 |     "    else:\n",
2374 |     "        display(HTML('<p>&#9989;'))\n",
2375 |     "\n",
2376 |     "display(widgets.HBox([pass_or_fail_mkdwn18,showintx_namesmatch]))\n",
2377 |     " \n",
2378 |     "display(out)\n",
2379 |     "#display(Markdown('####  Ensure Time Collaborators + Time Investors columns names match'))\n",
2380 |     "\n",
2381 |     "\n",
2382 |     "\n",
2383 |     "def hide_details_intx_namesmatch(click):\n",
2384 |     "    clear_output()\n",
2385 |     "    display(intx_namesmatch_mkdn)\n",
2386 |     "    display(widgets.HBox([pass_or_fail_mkdwn18,showintx_namesmatch]))\n",
2387 |     "    display(out)\n",
2388 |     "\n",
2389 |     "            \n",
2390 |     "def click_on_show_intx_namesmatch(click):\n",
2391 |     "    clear_output()\n",
2392 |     "    display(intx_namesmatch_mkdn)\n",
2393 |     "    display(widgets.HBox([pass_or_fail_mkdwn18,hideintx_namesmatch]))\n",
2394 |     "    print(\"Number of unique Teams in \" + Col_1 +\":\", len(uniqueTeams1))\n",
2395 |     "    print(\"Number of unique Teams in \" + Col_2 +\":\", len(uniqueTeams2))\n",
2396 |     "    if(len(uniqueTeams1) != len(uniqueTeams2)): \n",
2397 |     "        print(\"ERROR: Time Collaborators + Time Investors names don't match\")\n",
2398 |     "        print(\"Teams that don't match... \")\n",
2399 |     "\n",
2400 |     "    display(uniqueDiff)\n",
2401 |     "    display(out)\n",
2402 |     "\n",
2403 |     "showintx_namesmatch.on_click(click_on_show_intx_namesmatch)\n",
2404 |     "hideintx_namesmatch.on_click(hide_details_intx_namesmatch)  \n",
2405 |     "            \n",
2406 |     "            \n",
2407 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2408 |    ]
2409 |   },
2410 |   {
2411 |    "cell_type": "code",
2412 |    "execution_count": null,
2413 |    "metadata": {},
2414 |    "outputs": [],
2415 |    "source": [
2416 |     "\n",
2417 |     "clear_output()\n",
2418 |     "\n",
2419 |     "intFileNames = keepNames\n",
2420 |     "teamSizeFileNames = set(teamCatalog.iloc[:,0])\n",
2421 |     "\n",
2422 |     "\n",
2423 |     "showintx_namesmatchwtsz = widgets.Button(description ='Show details')\n",
2424 |     "hideintx_namesmatchwtsz= widgets.Button(description ='Hide details')\n",
2425 |     "intx_namesmatchwtsz_mkdn = widgets.Output()\n",
2426 |     "with intx_namesmatchwtsz_mkdn:\n",
2427 |     "    display(Markdown('<b> Ensure Time Collaborators + Time Investors columns names match --> with Team Size File </b>'))\n",
2428 |     "display(intx_namesmatchwtsz_mkdn)\n",
2429 |     "\n",
2430 |     "pass_or_fail_mkdwn19 = widgets.Output()\n",
2431 |     "with pass_or_fail_mkdwn19:\n",
2432 |     "    clear_output()\n",
2433 |     "    if(len(teamSizeFileNames) != len(intFileNames)):\n",
2434 |     "        display(HTML('&#10060;'))\n",
2435 |     "    else:\n",
2436 |     "        display(HTML('<p>&#9989;'))\n",
2437 |     "\n",
2438 |     "display(widgets.HBox([pass_or_fail_mkdwn19,showintx_namesmatchwtsz]))\n",
2439 |     "\n",
2440 |     "#display(Markdown('####  Ensure Time Collaborators + Time Investors columns names match --> with Team Size File'))\n",
2441 |     "display(out)\n",
2442 |     "\n",
2443 |     "def hide_details_intx_namesmatchwtsz(click):\n",
2444 |     "    clear_output()\n",
2445 |     "    display(intx_namesmatchwtsz_mkdn)\n",
2446 |     "    display(widgets.HBox([pass_or_fail_mkdwn19,showintx_namesmatchwtsz]))  \n",
2447 |     "    display(out)\n",
2448 |     "\n",
2449 |     "\n",
2450 |     "\n",
2451 |     "def click_on_show_intx_namesmatchwtsz(click):\n",
2452 |     "    clear_output()\n",
2453 |     "    display(intx_namesmatchwtsz_mkdn)\n",
2454 |     "    display(widgets.HBox([pass_or_fail_mkdwn19,hideintx_namesmatchwtsz])) \n",
2455 |     "    display(out)\n",
2456 |     "\n",
2457 |     "    print(\"Number of Teams in Team Size File: \", len(teamSizeFileNames))\n",
2458 |     "    print(\"Number of Teams in Interactions File: \", len(intFileNames))\n",
2459 |     "    if(len(teamSizeFileNames) == len(intFileNames)):\n",
2460 |     "        print(\"The total number of teams in both files match\")\n",
2461 |     "        fileNamesDiff = teamSizeFileNames ^ intFileNames\n",
2462 |     "    elif(len(teamSizeFileNames) > len(intFileNames)):\n",
2463 |     "        fileNamesDiff = teamSizeFileNames - intFileNames\n",
2464 |     "    else:\n",
2465 |     "        fileNamesDiff = intFileNames - teamSizeFileNames \n",
2466 |     "    display(fileNamesDiff)\n",
2467 |     "    \n",
2468 |     "    \n",
2469 |     "showintx_namesmatchwtsz.on_click(click_on_show_intx_namesmatchwtsz)\n",
2470 |     "hideintx_namesmatchwtsz.on_click(hide_details_intx_namesmatchwtsz)  \n",
2471 |     "\n",
2472 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2473 |    ]
2474 |   },
2475 |   {
2476 |    "cell_type": "code",
2477 |    "execution_count": null,
2478 |    "metadata": {},
2479 |    "outputs": [],
2480 |    "source": [
2481 |     "\n",
2482 |     "clear_output()\n",
2483 |     "\n",
2484 |     "scenario2_markdown = widgets.Output()\n",
2485 |     "\n",
2486 |     "with scenario2_markdown:\n",
2487 |     "    display(Markdown('<div class=\"alert alert-block alert-warning\"><b> Scenario 2: </b> If teams missing from Team size file but existing in one or both of the Time Investor/Time Collaborator columns, prompt user with message that “Team name exists in Interaction File that does not exist in Team Size file: {name1, name2, etc.}” and stop validation.</div>'))\n",
2488 |     "\n",
2489 |     "display(scenario2_markdown)\n",
2490 |     "\n",
2491 |     "\n",
2492 |     "#Report differences - Across all teams files and columns\n",
2493 |     "diffTS1 = teamSizeFileNames - uniqueTeams1\n",
2494 |     "diffTS2 = teamSizeFileNames - uniqueTeams2\n",
2495 |     "\n",
2496 |     "diffIn1 = uniqueTeams1 - teamSizeFileNames\n",
2497 |     "diffIn2 = uniqueTeams2 - teamSizeFileNames\n",
2498 |     "\n",
2499 |     "diffIn3 = uniqueTeams1 - uniqueTeams2\n",
2500 |     "diffIn4 = uniqueTeams2 - uniqueTeams1\n",
2501 |     "diffIn5 = uniqueTeams1 ^ uniqueTeams2\n",
2502 |     "\n",
2503 |     "\n",
2504 |     "    \n",
2505 |     "    \n",
2506 |     "\n",
2507 |     "if len(diffTS1)> 0:\n",
2508 |     "    print(\"Teams in Team Size and NOT in Interactions \" + Col_1 +\"... \")\n",
2509 |     "    print(\"     \", diffTS1)\n",
2510 |     "else:\n",
2511 |     "    print(\"Teams in Team Size and NOT in Interactions \" + Col_1 +\"... \")\n",
2512 |     "    print(\"     \", 'None')\n",
2513 |     "    \n",
2514 |     "print(\"Teams in Team Size and NOT in Interactions \" + Col_2 +\"... \")\n",
2515 |     "print(\"     \", diffTS2)\n",
2516 |     "\n",
2517 |     "\n",
2518 |     "print(\"Teams in \" + Col_1 +\" and not in \" + Col_2 +\"...\")\n",
2519 |     "print(\"     \", diffIn3)\n",
2520 |     "print(\"Teams in \" + Col_2 +\" and not in \" + Col_1 +\"...\")\n",
2521 |     "print(\"     \", diffIn4)\n",
2522 |     "print(\"Total teams mismatches between \" + Col_1 + \" and \" + Col_2)\n",
2523 |     "print(\"     \", diffIn5)\n",
2524 |     "\n",
2525 |     "\n",
2526 |     "\n",
2527 |     "\n",
2528 |     "if len(diffIn1) > 0 and len(diffIn2) > 0:\n",
2529 |     "    totaldiff1_2 = list(set(diffIn1) | set(diffIn2))\n",
2530 |     "    \n",
2531 |     "elif len(diffIn1) > 0 and len(diffIn2) < 1:\n",
2532 |     "    totaldiff1_2 = list(set(diffIn1))\n",
2533 |     "                        \n",
2534 |     "elif len(diffIn1) < 1 and len(diffIn2) > 0:\n",
2535 |     "    totaldiff1_2 = list(set(diffIn2))\n",
2536 |     "\n",
2537 |     "    \n",
2538 |     "    \n",
2539 |     "scenario2error_markdown = widgets.Output()\n",
2540 |     "\n",
2541 |     "with scenario2error_markdown:\n",
2542 |     "    display(Markdown('<div class=\"alert alert-block alert-danger\"><b> ERROR!: </b> Team name exists in Interaction File that <b>does not exist</b> in Team Size file: {'+str(totaldiff1_2)+'}.</div>'))\n",
2543 |     "    \n",
2544 |     "    \n",
2545 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2546 |     "    \n",
2547 |     "    print(\"Teams in \" + Col_1 +\" and not in Team Size... \")\n",
2548 |     "    print(\"None\")\n",
2549 |     "    print(\"Teams in \" + Col_2 +\" and not in Team Size... \")\n",
2550 |     "    print(\"None\") \n",
2551 |     "    display(HTML('<p>&#9989; Passed</p>'))\n",
2552 |     "    display(out)\n",
2553 |     "    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+1)'))\n",
2554 |     "else:\n",
2555 |     "    display(scenario2error_markdown)\n",
2556 |     "    #display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2557 |     "    display(out)\n",
2558 |     "    \n",
2559 |     "\n",
2560 |     "    \n",
2561 |     "# date_stamp = \"\"\n",
2562 |     "# out_dir = \"Abel_tests_003\" + date_stamp + \"/\"\n",
2563 |     "# out_dir\n"
2564 |    ]
2565 |   },
2566 |   {
2567 |    "cell_type": "code",
2568 |    "execution_count": null,
2569 |    "metadata": {},
2570 |    "outputs": [],
2571 |    "source": [
2572 |     "style = {'description_width': 'initial'}\n",
2573 |     "\n",
2574 |     "superoutput_toggle = widgets.ToggleButtons(\n",
2575 |     "    options=['Total Collaboration', 'Boost Intra-Collab', 'Latest date','Avg. Collaboration'],\n",
2576 |     "    disabled=False,\n",
2577 |     "    value=None,\n",
2578 |     "    button_style='primary',\n",
2579 |     "    style = style\n",
2580 |     "    \n",
2581 |     "    \n",
2582 |     ")\n",
2583 |     "\n",
2584 |     "boost_slider = widgets.IntSlider(value=0, min=0,max=10,step=1, description = 'Boost Multiplier:', style=style)\n",
2585 |     "\n",
2586 |     "boost_ready = widgets.Button(description='Ready',button_style='success')\n",
2587 |     "\n",
2588 |     "\n",
2589 |     "scenario3_markdown = widgets.Output()\n",
2590 |     "\n",
2591 |     "\n",
2592 |     "with scenario3_markdown:\n",
2593 |     "    display(Markdown('<div class=\"alert alert-block alert-warning\"><b> Scenario 3: </b> Team name is in Team size file and only showing for the TimeInvestor or Collaborator column within Interaction file. Missing combinations of collaboration among these teams will be generated here... </div>'))\n",
2594 |     "\n",
2595 |     "    \n",
2596 |     "display(scenario3_markdown)\n",
2597 |     "#display(superoutput_toggle)\n",
2598 |     "        \n",
2599 |     "pick_output_markdown = widgets.Output()\n",
2600 |     "\n",
2601 |     "\n",
2602 |     "with pick_output_markdown:\n",
2603 |     "    display(Markdown('<div class=\"alert alert-block alert-info\"> Which output scenario do you need: <b>Latest date</b>,<b> Average Collaboration</b>, <b>Total Collaboration</b>, or <b>Boost Intra-Collaboration</b>?</div>'))\n",
2604 |     "    display(Markdown(\"<b>Total Collaboration</b>: This will group by teams( both Investor and Collaborator) and get total(sum) hours spent.\"))    \n",
2605 |     "    display(Markdown(\"<b>Latest date</b>: This will pick the latest interaction date from a Team to Team collaboration.\"))\n",
2606 |     "    display(Markdown(\"<b>Average Collaboration</b>: This will group by teams( both Investor and Collaborator) and get average hours spent.\"))\n",
2607 |     "    display(Markdown(\"<b>Boost Intra-Collab</b>: This will boost collaboration amongst same teams( both Investor and Collaborator).\"))    \n",
2608 |     "    \n",
2609 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2610 |     "    clear_output()\n",
2611 |     "    display(pick_output_markdown)\n",
2612 |     "    display(superoutput_toggle)\n",
2613 |     "else:\n",
2614 |     "    clear_output()\n",
2615 |     "\n",
2616 |     "\n",
2617 |     "def boost_multiplier_on_change(change):\n",
2618 |     "    global boostv \n",
2619 |     "    boostv = change.new\n",
2620 |     "    \n",
2621 |     "    \n",
2622 |     "    \n",
2623 |     "def ready_clicked(clicked):\n",
2624 |     "    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2625 |     "\n",
2626 |     "\n",
2627 |     "\n",
2628 |     "\n",
2629 |     "\n",
2630 |     "def toggle_on_click_output(change):\n",
2631 |     "    if change.new =='Latest date':\n",
2632 |     "        clear_output()\n",
2633 |     "        display(scenario3_markdown)\n",
2634 |     "        display(widgets.VBox([pick_output_markdown,superoutput_toggle]))\n",
2635 |     "        display(Markdown(\"<b>Latest date</b>: This will pick the latest interaction date from a Team to Team collaboration.\"))\n",
2636 |     "        display(out)\n",
2637 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2638 |     "    \n",
2639 |     "    elif change.new=='Avg. Collaboration':\n",
2640 |     "        clear_output()\n",
2641 |     "        display(scenario3_markdown)\n",
2642 |     "        display(widgets.VBox([pick_output_markdown,superoutput_toggle]))\n",
2643 |     "        display(Markdown(\"<b>Average Collaboration</b>: This will group by teams( both Investor and Collaborator) and get average hours spent.\"))\n",
2644 |     "        display(out)\n",
2645 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2646 |     "    \n",
2647 |     "    elif change.new=='Total Collaboration':\n",
2648 |     "        clear_output()\n",
2649 |     "        display(scenario3_markdown)\n",
2650 |     "        display(widgets.VBox([pick_output_markdown,superoutput_toggle]))\n",
2651 |     "        display(Markdown(\"<b>Total Collaboration</b>: This will group by teams( both Investor and Collaborator) and get total(sum) hours spent.\"))\n",
2652 |     "        display(out)\n",
2653 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2654 |     "    \n",
2655 |     "    elif change.new=='Boost Intra-Collab':\n",
2656 |     "        clear_output()\n",
2657 |     "        display(scenario3_markdown)\n",
2658 |     "        display(widgets.VBox([pick_output_markdown,superoutput_toggle]))\n",
2659 |     "        display(Markdown(\"<b>Boost Intra-Collab</b>: This will boost collaboration amongst same teams( both Investor and Collaborator).\"))\n",
2660 |     "        display(widgets.HBox([boost_slider,boost_ready]))\n",
2661 |     "        display(out)\n",
2662 |     "superoutput_toggle.observe(toggle_on_click_output, 'value')\n",
2663 |     "boost_slider.observe(boost_multiplier_on_change,names='value')\n",
2664 |     "boost_ready.on_click(ready_clicked)"
2665 |    ]
2666 |   },
2667 |   {
2668 |    "cell_type": "code",
2669 |    "execution_count": null,
2670 |    "metadata": {},
2671 |    "outputs": [],
2672 |    "source": [
2673 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2674 |     "\n",
2675 |     "    names = list(rawIraw.columns[0:2])\n",
2676 |     "\n",
2677 |     "    tsnames = list(set(rawTS['Team']))\n",
2678 |     "    tsnames2 = list(set(rawTS['Team']))\n",
2679 |     "\n",
2680 |     "    combined = [tsnames, tsnames2]\n",
2681 |     "    today = date.today()\n",
2682 |     "    datetoday = today.strftime(\"%m/%d/%y\")\n",
2683 |     "\n",
2684 |     "\n",
2685 |     "    if superoutput_toggle.value == 'Latest date':   \n",
2686 |     "        mergenames = list(names)\n",
2687 |     "        mergenames.append('Date')\n",
2688 |     "\n",
2689 |     "\n",
2690 |     "        rawIraw['Date'] = pd.to_datetime(rawIraw['Date'], format='%m/%d/%Y')\n",
2691 |     "\n",
2692 |     "        data_last_date = rawIraw.groupby(names, as_index=False).agg({\"Date\": \"max\"})\n",
2693 |     "\n",
2694 |     "\n",
2695 |     "        data_1 = pd.merge(data_last_date, rawIraw,on=mergenames,  how='inner')\n",
2696 |     "\n",
2697 |     "        #ti = data_1.columns[0]\n",
2698 |     "        #cb = data_1.columns[1]\n",
2699 |     "        #unique_gg = pd.concat([data_1[ti].rename(columns={ti: 'team'}, inplace=True),data_1[cb].rename(columns={cb: 'team'}, inplace=True)]).drop_duplicates()\n",
2700 |     "\n",
2701 |     "\n",
2702 |     "        #unique_TI= data_1[ti].rename(columns={ti: 'team'}, inplace=True).drop_duplicates()\n",
2703 |     "        #unique_CB = data_1[cb].rename(columns={cb: 'team'}, inplace=True).drop_duplicates()\n",
2704 |     "\n",
2705 |     "\n",
2706 |     "\n",
2707 |     "        #d = []\n",
2708 |     "        #for x in unique_TI:\n",
2709 |     "            #for y in unique_CB:\n",
2710 |     "                #d.append((x,y))\n",
2711 |     "\n",
2712 |     "        #allComb = pd.DataFrame(d,columns=names)\n",
2713 |     "        missing_combinationsDF = pd.DataFrame(columns = names, data=list(itertools.product(*combined)))\n",
2714 |     "        missing_combinationsDF = missing_combinationsDF.astype(str)\n",
2715 |     "\n",
2716 |     "        superinteractionsDF = pd.merge(missing_combinationsDF, data_1, on=names, how= \"outer\").fillna(0)\n",
2717 |     "        missingvaluereportDF = superinteractionsDF[superinteractionsDF['Collaboration_hours']==0]\n",
2718 |     "        numofmissingrecords = missingvaluereportDF.shape[0]\n",
2719 |     "\n",
2720 |     "\n",
2721 |     "\n",
2722 |     "\n",
2723 |     "    elif superoutput_toggle.value == 'Avg. Collaboration': \n",
2724 |     "\n",
2725 |     "        missing_combinationsDF = pd.DataFrame(columns = names, data=list(itertools.product(*combined)))\n",
2726 |     "        missing_combinationsDF = missing_combinationsDF.astype(str)\n",
2727 |     "\n",
2728 |     "        superinteractionsDF = missing_combinationsDF.merge(rawIraw, how='outer').fillna(0)\n",
2729 |     "        superinteractionsDF.Date.replace([0],datetoday,inplace=True)\n",
2730 |     "\n",
2731 |     "        rawagg_interactionsDF = rawIraw.groupby(names).sum().reset_index()\n",
2732 |     "\n",
2733 |     "        missingvaluereportDF = superinteractionsDF[superinteractionsDF['Collaboration_hours']==0]\n",
2734 |     "\n",
2735 |     "        superinteractionsDF = superinteractionsDF.groupby(names).mean().reset_index()\n",
2736 |     "\n",
2737 |     "        superinteractionsDF['Date']=datetoday\n",
2738 |     "        col_order = list(names)\n",
2739 |     "        col_order.append('Date')\n",
2740 |     "        col_order.append('Collaboration_hours')\n",
2741 |     "\n",
2742 |     "\n",
2743 |     "        superinteractionsDF = superinteractionsDF.reindex(columns=col_order)\n",
2744 |     "\n",
2745 |     "        numofmissingrecords = missingvaluereportDF.shape[0]\n",
2746 |     "\n",
2747 |     "\n",
2748 |     "    elif superoutput_toggle.value == 'Total Collaboration': \n",
2749 |     "\n",
2750 |     "        missing_combinationsDF = pd.DataFrame(columns = names, data=list(itertools.product(*combined)))\n",
2751 |     "        missing_combinationsDF = missing_combinationsDF.astype(str)\n",
2752 |     "\n",
2753 |     "        superinteractionsDF = missing_combinationsDF.merge(rawIraw, how='outer').fillna(0)\n",
2754 |     "        superinteractionsDF.Date.replace([0],datetoday,inplace=True)\n",
2755 |     "\n",
2756 |     "        rawagg_interactionsDF = rawIraw.groupby(names).sum().reset_index()\n",
2757 |     "\n",
2758 |     "        missingvaluereportDF = superinteractionsDF[superinteractionsDF['Collaboration_hours']==0]\n",
2759 |     "\n",
2760 |     "        superinteractionsDF = superinteractionsDF.groupby(names).sum().reset_index()\n",
2761 |     "\n",
2762 |     "        superinteractionsDF['Date']=datetoday\n",
2763 |     "        col_order = list(names)\n",
2764 |     "        col_order.append('Date')\n",
2765 |     "        col_order.append('Collaboration_hours')\n",
2766 |     "\n",
2767 |     "\n",
2768 |     "        superinteractionsDF = superinteractionsDF.reindex(columns=col_order)\n",
2769 |     "\n",
2770 |     "\n",
2771 |     "        numofmissingrecords = missingvaluereportDF.shape[0]\n",
2772 |     "        \n",
2773 |     "    elif superoutput_toggle.value == 'Boost Intra-Collab': \n",
2774 |     "        \n",
2775 |     "        output_slider = widgets.Output()\n",
2776 |     "        with output_slider:\n",
2777 |     "            print(\"Collaboration will boost using \"+ str(boostv)+\"X multiplier of max collaboration\")\n",
2778 |     "        display(output_slider)\n",
2779 |     "\n",
2780 |     "        missing_combinationsDF = pd.DataFrame(columns = names, data=list(itertools.product(*combined)))\n",
2781 |     "        missing_combinationsDF = missing_combinationsDF.astype(str)\n",
2782 |     "\n",
2783 |     "        superinteractionsDF = missing_combinationsDF.merge(rawIraw, how='outer').fillna(0)\n",
2784 |     "        superinteractionsDF.Date.replace([0],datetoday,inplace=True)\n",
2785 |     "\n",
2786 |     "        rawagg_interactionsDF = rawIraw.groupby(names).sum().reset_index()\n",
2787 |     "\n",
2788 |     "        missingvaluereportDF = superinteractionsDF[superinteractionsDF['Collaboration_hours']==0]\n",
2789 |     "\n",
2790 |     "        superinteractionsDF = superinteractionsDF.groupby(names).sum().reset_index()\n",
2791 |     "\n",
2792 |     "        maxcolumn = superinteractionsDF[\"Collaboration_hours\"]\n",
2793 |     "        max_value = maxcolumn.max() \n",
2794 |     "        boosted_value = max_value * boostv\n",
2795 |     "\n",
2796 |     "        superinteractionsDF['Collaboration_hours'] = np.where(superinteractionsDF[names[0]] == superinteractionsDF[names[1]], boosted_value, superinteractionsDF['Collaboration_hours'])\n",
2797 |     "\n",
2798 |     "        superinteractionsDF['Date']=datetoday\n",
2799 |     "        col_order = list(names)\n",
2800 |     "        col_order.append('Date')\n",
2801 |     "        col_order.append('Collaboration_hours')\n",
2802 |     "\n",
2803 |     "\n",
2804 |     "        superinteractionsDF = superinteractionsDF.reindex(columns=col_order)\n",
2805 |     "\n",
2806 |     "\n",
2807 |     "        numofmissingrecords = missingvaluereportDF.shape[0]\n",
2808 |     "\n",
2809 |     "\n",
2810 |     "    if superoutput_toggle.value == 'Latest date':\n",
2811 |     "\n",
2812 |     "        display(Markdown(\"Total records in original Interaction file : \" + \"<b>\"+str(rawIraw.shape[0])+\"</b>\"))\n",
2813 |     "        display(Markdown(\"Total records with reduced  latest date Interaction file : \" + \"<b>\"+str(data_1.shape[0])+\"</b>\"))\n",
2814 |     "        display(Markdown(\"Total records added based on missing combinations : \" + \"<b>\"+str(numofmissingrecords)+\"</b>\"))\n",
2815 |     "        display(Markdown('Total records for new  \"Superinteractions\"  file : '+ \"<b>\"+str(numofmissingrecords+data_1.shape[0]) +\"</b>\"))\n",
2816 |     "        display(HTML('<p>&#9989; Done </p>'))\n",
2817 |     "        display(out)\n",
2818 |     "        \n",
2819 |     "    elif superoutput_toggle.value == 'Avg. Collaboration':\n",
2820 |     "\n",
2821 |     "        display(Markdown(\"Total records in original Interaction file : \" + \"<b>\"+str(rawIraw.shape[0])+\"</b>\"))\n",
2822 |     "        display(Markdown(\"Total records with reduced by agg{<b>average</b>} Interaction file : \" + \"<b>\"+str(rawagg_interactionsDF.shape[0])+\"</b>\"))  \n",
2823 |     "        display(Markdown(\"Total records added based on missing combinations : \" + \"<b>\"+str(numofmissingrecords)+\"</b>\"))\n",
2824 |     "        display(Markdown('Total records for new  \"Superinteractions\"  file : '+ \"<b>\"+str(numofmissingrecords+rawagg_interactionsDF.shape[0]) +\"</b>\"))\n",
2825 |     "        display(HTML('<p>&#9989; Done </p>'))\n",
2826 |     "        display(out)\n",
2827 |     "\n",
2828 |     "    elif superoutput_toggle.value == 'Total Collaboration':\n",
2829 |     "\n",
2830 |     "        display(Markdown(\"Total records in original Interaction file : \" + \"<b>\"+str(rawIraw.shape[0])+\"</b>\"))\n",
2831 |     "        display(Markdown(\"Total records with reduced by agg{<b>sum</b>} Interaction file : \" + \"<b>\"+str(rawagg_interactionsDF.shape[0])+\"</b>\"))  \n",
2832 |     "        display(Markdown(\"Total records added based on missing combinations : \" + \"<b>\"+str(numofmissingrecords)+\"</b>\"))\n",
2833 |     "        display(Markdown('Total records for new  \"Superinteractions\"  file : '+ \"<b>\"+str(numofmissingrecords+rawagg_interactionsDF.shape[0]) +\"</b>\"))\n",
2834 |     "        display(HTML('<p>&#9989; Done </p>'))\n",
2835 |     "        display(out)\n",
2836 |     "        \n",
2837 |     "    elif superoutput_toggle.value == 'Boost Intra-Collab':\n",
2838 |     "\n",
2839 |     "        display(Markdown(\"Total records in original Interaction file : \" + \"<b>\"+str(rawIraw.shape[0])+\"</b>\"))\n",
2840 |     "        display(Markdown(\"Total records with reduced by agg{<b>sum</b>} Interaction file : \" + \"<b>\"+str(rawagg_interactionsDF.shape[0])+\"</b>\")) \n",
2841 |     "        display(Markdown(\"Hours boosted value for Group A matching Group B : \" + \"<b>\"+str(boosted_value)+\"</b>\"))\n",
2842 |     "        display(Markdown(\"Total records added based on missing combinations : \" + \"<b>\"+str(numofmissingrecords)+\"</b>\"))\n",
2843 |     "        display(Markdown('Total records for new  \"Superinteractions\"  file : '+ \"<b>\"+str(numofmissingrecords+rawagg_interactionsDF.shape[0]) +\"</b>\"))\n",
2844 |     "        display(HTML('<p>&#9989; Done </p>'))\n",
2845 |     "        display(out)\n",
2846 |     "else:\n",
2847 |     "    clear_output()\n",
2848 |     "    \n",
2849 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2850 |    ]
2851 |   },
2852 |   {
2853 |    "cell_type": "code",
2854 |    "execution_count": null,
2855 |    "metadata": {},
2856 |    "outputs": [],
2857 |    "source": [
2858 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2859 |     "\n",
2860 |     "    keyCol_1 = missingvaluereportDF.columns[0]\n",
2861 |     "    keyCol_2 = missingvaluereportDF.columns[1]\n",
2862 |     "\n",
2863 |     "    mvrMatrixDF = missingvaluereportDF.pivot_table(index=keyCol_1, columns=keyCol_2,values=\"Collaboration_hours\")\n",
2864 |     "\n",
2865 |     "    mvrMatrixDF.columns.name = None\n",
2866 |     "    mvrMatrixDF.index.name = None\n",
2867 |     "    mvrMatrixDF = mvrMatrixDF.reset_index()\n",
2868 |     "    mvrMatrixDF = mvrMatrixDF.fillna('Not Missing')\n",
2869 |     "    mvrMatrixDF.drop(mvrMatrixDF.columns[0],axis=1)\n",
2870 |     "    mvrMatrixDF.rename(columns = {mvrMatrixDF.columns[0]: 'Collab' }, inplace = True)\n",
2871 |     "else:\n",
2872 |     "    clear_output()\n",
2873 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2874 |    ]
2875 |   },
2876 |   {
2877 |    "cell_type": "code",
2878 |    "execution_count": null,
2879 |    "metadata": {},
2880 |    "outputs": [],
2881 |    "source": [
2882 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2883 |     "    def color_missing(val):\n",
2884 |     "        color = 'red' if val == 0 else 'black'\n",
2885 |     "        #color = 'white' if val == 'Not Missing' else 'black'\n",
2886 |     "        return 'color: %s' % color\n",
2887 |     "    if numofmissingrecords > 0:\n",
2888 |     "        display(Markdown('Missing values matrix shown below: <b> \"Not Missing\" </b> means an existing collaboration exists'))\n",
2889 |     "        display(Markdown('Please wait for output...'))\n",
2890 |     "        display(mvrMatrixDF.style.applymap(color_missing))\n",
2891 |     "        display(out)\n",
2892 |     "    else:\n",
2893 |     "        clear_output()\n",
2894 |     "else:\n",
2895 |     "    clear_output()\n",
2896 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2897 |    ]
2898 |   },
2899 |   {
2900 |    "cell_type": "code",
2901 |    "execution_count": null,
2902 |    "metadata": {},
2903 |    "outputs": [],
2904 |    "source": [
2905 |     "root = './Final Files set/'\n",
2906 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2907 |     "\n",
2908 |     "    try:\n",
2909 |     "        if not (os.path.exists(root) and os.path.isdir(root)):\n",
2910 |     "            os.mkdir(root)\n",
2911 |     "            print (\"Successfully created the directory %s \" % root)\n",
2912 |     "    except OSError:\n",
2913 |     "        print (\"Creation of the directory %s failed\" % root)\n",
2914 |     "\n",
2915 |     "    superinteractionsDF.to_csv(root+'interactions.csv',index = False)\n",
2916 |     "\n",
2917 |     "\n",
2918 |     "    rawTS.to_csv(root+'team_size.csv',index = False)\n",
2919 |     "    rawFC.to_csv(root+'space_capacity.csv',index = False)\n",
2920 |     "    rawDT.to_csv(root+'distance.csv',index = False)\n",
2921 |     "\n",
2922 |     "    mvrMatrixDF.to_csv('missingvaluesMatrix.csv',index = False)\n",
2923 |     "\n",
2924 |     "    saveintxfinsh = '<div class=\"alert alert-block alert-success\"><b> Done: </b> Your \"super\" interaction file <b>(interactions.csv)</b> along with the other 3 files(<b>team_size.csv, space_capacity.csv, and distance.csv </b>) have successfully saved back to a new sub-directory inside the location of this Notebook! Folder name is [<b>Final Files set</b>].</div>'\n",
2925 |     "\n",
2926 |     "else:\n",
2927 |     "    clear_output()\n",
2928 |     "    \n",
2929 |     "display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))"
2930 |    ]
2931 |   },
2932 |   {
2933 |    "cell_type": "code",
2934 |    "execution_count": null,
2935 |    "metadata": {},
2936 |    "outputs": [],
2937 |    "source": [
2938 |     "if len(diffIn1) < 1 and  len(diffIn2) < 1:\n",
2939 |     "    display(Markdown(saveintxfinsh))\n",
2940 |     "    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2941 |     "\n",
2942 |     "else:\n",
2943 |     "    clear_output()\n"
2944 |    ]
2945 |   },
2946 |   {
2947 |    "cell_type": "code",
2948 |    "execution_count": null,
2949 |    "metadata": {},
2950 |    "outputs": [],
2951 |    "source": [
2952 |     "style2 = {'description_width': '425px'}\n",
2953 |     "layout2 = {'width': '300px'}\n",
2954 |     "startAlgorithmNB = widgets.Button(description ='Open Generate Floorplan Notebook',style=style2,layout=layout2,button_style='info')\n",
2955 |     "display(startAlgorithmNB)\n",
2956 |     "\n",
2957 |     "def go_to_nextblock(btn):\n",
2958 |     "        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))\n",
2959 |     "\n",
2960 |     "startAlgorithmNB.on_click(go_to_nextblock)           \n",
2961 |     "        "
2962 |    ]
2963 |   },
2964 |   {
2965 |    "cell_type": "code",
2966 |    "execution_count": null,
2967 |    "metadata": {},
2968 |    "outputs": [],
2969 |    "source": [
2970 |     "%%javascript\n",
2971 |     "var name_of_the_notebook = 'WPP.Generate-Floorplan_PIPE-003_v2.1.1.ipynb'\n",
2972 |     "var url = window.location.href.split('/')\n",
2973 |     "var newurl = url[0] + '//'\n",
2974 |     "for (var i = 1; i < url.length - 1; i++) {\n",
2975 |     "    console.log(url[i], newurl)\n",
2976 |     "    newurl += url[i] + '/'\n",
2977 |     "}\n",
2978 |     "newurl += name_of_the_notebook\n",
2979 |     "window.open(newurl)"
2980 |    ]
2981 |   },
2982 |   {
2983 |    "cell_type": "code",
2984 |    "execution_count": null,
2985 |    "metadata": {},
2986 |    "outputs": [],
2987 |    "source": [
2988 |     "# melted_MatrixDF = mvrMatrixDF.melt(id_vars=['Collab'], var_name = ['Teams'])\n",
2989 |     "# only_missingDF = melted_MatrixDF[melted_MatrixDF['value']==0]\n"
2990 |    ]
2991 |   },
2992 |   {
2993 |    "cell_type": "code",
2994 |    "execution_count": null,
2995 |    "metadata": {},
2996 |    "outputs": [],
2997 |    "source": [
2998 |     "# display(widgets.HTML(value=\"<ins><b>Interaction file</b></ins>\"))\n",
2999 |     "\n",
3000 |     "# print(rawIraw.isnull().sum())\n",
3001 |     "\n",
3002 |     "# display(widgets.HTML(value=\"<ins><b>Team size file</b></ins>\"))\n",
3003 |     "# print(rawTS.isnull().sum())\n",
3004 |     "\n",
3005 |     "# display(widgets.HTML(value=\"<ins><b>Floor capacity file</b></ins>\"))\n",
3006 |     "# print(rawFC.isnull().sum())\n",
3007 |     "\n",
3008 |     "\n",
3009 |     "# display(widgets.HTML(value=\"<ins><b>Distance file</b></ins>\"))\n",
3010 |     "# print(rawDT.isnull().sum())"
3011 |    ]
3012 |   }
3013 |  ],
3014 |  "metadata": {
3015 |   "kernelspec": {
3016 |    "display_name": "Python 3",
3017 |    "language": "python",
3018 |    "name": "python3"
3019 |   },
3020 |   "language_info": {
3021 |    "codemirror_mode": {
3022 |     "name": "ipython",
3023 |     "version": 3
3024 |    },
3025 |    "file_extension": ".py",
3026 |    "mimetype": "text/x-python",
3027 |    "name": "python",
3028 |    "nbconvert_exporter": "python",
3029 |    "pygments_lexer": "ipython3",
3030 |    "version": "3.8.8"
3031 |   }
3032 |  },
3033 |  "nbformat": 4,
3034 |  "nbformat_minor": 2
3035 | }
3036 | 


--------------------------------------------------------------------------------
/requirements_SP.txt:
--------------------------------------------------------------------------------
 1 | ipython
 2 | ipywidgets
 3 | scipy
 4 | pandas
 5 | numpy
 6 | ipywidgets
 7 | ipyfilechooser
 8 | pulp
 9 | ipysheet
10 | geopy


--------------------------------------------------------------------------------