├── .github
    └── CODEOWNERS
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── docs
    ├── configuration.md
    ├── images
    │   ├── hub-stress-test-health.png
    │   ├── hub-stress-test-request-response-times.png
    │   ├── hub-stress-test-resource-usage.png
    │   └── py-spy-example.svg
    ├── profiling.md
    └── stress-test.md
├── requirements.txt
├── scripts
    └── hub-stress-test.py
├── test-requirements.txt
└── tox.ini


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *   @mriedem @rmoe
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | purge.log
2 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python: 3.7
 4 | 
 5 | cache: pip
 6 | 
 7 | install: pip install tox
 8 | 
 9 | script: tox
10 | 
11 | jobs:
12 |   include:
13 |   - stage: test
14 |     env: TOXENV=flake8
15 |   - stage: test
16 |     env: TOXENV=hub-stress-test
17 | 
18 | notifications:
19 |   email:
20 |     on_success: never
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # jupyter-tools
 2 | 
 3 | Collection of tools for working with JupyterHub and notebooks.
 4 | 
 5 | ## Load Testing
 6 | 
 7 | In order to support high load events we have tooling to run stress tests on our JupyterHub deployment.
 8 | 
 9 | * [hub-stress-test](scripts/hub-stress-test.py): This script allows scaling up hundreds of fake users and notebook
10 |   servers (pods) at once against a target JupyterHub cluster to see how it responds to sudden load, like event users
11 |   signing on at the beginning of the event. It also allows for scaling up and having a steady state of many users
12 |   to profile the performance of the hub. See [Hub Stress Testing](docs/stress-test.md) for more details.
13 | 
14 | ## Configuration tuning
15 | There are various configuration settings you can modify to improve both steady-state and scale-up
16 | performance. See [Configuration settings](docs/configuration.md) for more details.
17 | 
18 | ## Profiling
19 | Performance data can be collected during normal operations or a stress-test run. See
20 | [Profiling](docs/profiling.md) for more details.
21 | 


--------------------------------------------------------------------------------
/docs/configuration.md:
--------------------------------------------------------------------------------
  1 | # Configuration settings
  2 | 
  3 | This document provides an overview of configuration settings for increasing hub performance.
  4 | 
  5 | 1. [Culler settings](#culler)
  6 |    1. [Frequency](#culler-frequency)
  7 |    2. [Concurrency limit](#culler-concurrency)
  8 |    3. [Timeout](#culler-timeout)
  9 |    4. [Notebook culler](#notebook-culler)
 10 | 2. [Activity intervals](#activity)
 11 |    1. [`activity_resolution`](#activity-resolution)
 12 |    2. [`last_activity_interval`](#last-activity-interval)
 13 |    3. [`JUPYERHUB_ACTIVITY_INTERVAL`](#hub-activity-interval)
 14 | 3. [Startup time](#startup)
 15 |    1. [`init_spawners_timeout`](#spawners-timeout)
 16 | 4. [Other settings](#other)
 17 |    1. [`k8s_threadpool_api_workers`](#kubespawner-thread)
 18 |    2. [Disable events](#kubespawner-events)
 19 |    3. [Disable consecutiveFailureLimit](#disable-consecutivefailurelimit)
 20 |    4. [Increase http_timeout](#increase-http-timeout)
 21 | 5. [References](#references)
 22 | 
 23 | 
 24 | <a name="culler"></a>
 25 | ## Culler settings
 26 | There are two mechanisms for controlling the culling of servers and users. One is a
 27 | process managed by the hub which will periodically cull users and servers. The other
 28 | is a setting which will allow servers to delete themselves after a period of inactivity.
 29 | 
 30 | <a name="culler-frequency"></a>
 31 | ### Frequency
 32 | By default the culler runs every 10 minutes. With a more aggressive setting for the notebook
 33 | idle timeout the hub-managed culler can be run less frequently.
 34 | 
 35 | <a name="culler-concurrency"></a>
 36 | ### Concurrency limit
 37 | By default the culler has a concurrency limit of 10. This means it will make up to 10
 38 | concurrent API calls. When deleting a large number of users that can generate a high load
 39 | on the hub. Setting this to `1` helps to reduce load on the hub.
 40 | 
 41 | <a name="culler-timeout"></a>
 42 | ### Timeout
 43 | The timeout controls how long a server can be idle before being deleted. Because the servers
 44 | will aggressively cull themselves this value can be set very high.
 45 | 
 46 | These can be all configured in the `cull` section of [values.yaml](https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/master/jupyterhub/values.yaml):
 47 | ```yaml
 48 | cull:
 49 |   timeout: 432000 # 5 days
 50 |   every: 3600 # Run once an hour instead of every 10 minutes
 51 |   concurrency: 1
 52 | ```
 53 | 
 54 | <a name="notebook-culler"></a>
 55 | ### Notebook culler
 56 | There are two settings which control how the notebooks cull themselves. The first is
 57 | `c.NotebookApp.shutdown_no_activity_timeout` which specifies the period of inactivity
 58 | (in seconds) before a server is shutdown. The second is `c.MappingKernelManager.cull_idle_timeout`
 59 | which determines when kernels will be shutdown. These settings can be configured as described
 60 | [here](https://jupyter-notebook.readthedocs.io/en/stable/config_overview.html).
 61 | 
 62 | <a name="activity"></a>
 63 | ## Activity intervals
 64 | These settings control how spawner and user activity is tracked. These settings have
 65 | a large impact on the performance of the hub.
 66 | 
 67 | <a name="activity-resolution"></a>
 68 | ### `c.JupyterHub.activity_resolution`
 69 | Activity resolution controls how often activity updates are written to the database. Many
 70 | API calls will record activity for a user. This setting determines whether or not that update
 71 | is written to the database. If the update is more recent than `activity_resolution` seconds
 72 | ago it's ignored. Increasing this value will reduce commits to the database.
 73 | 
 74 | ```yaml
 75 | extraConfig:
 76 |   myConfig: |
 77 |     c.JupyterHub.activity_resolution = 6000
 78 | ```
 79 | 
 80 | <a name="last-activity-interval"></a>
 81 | ### `c.JupyterHub.last_activity_interval`
 82 | This setting controls how often a periodic task in the hub named `update_last_activity`
 83 | runs. This task updates user activity using information from the proxy. This task makes
 84 | a large number of database calls and can put a fairly significant load on the hub. Zero to
 85 | Jupyterhub sets this to 1 minute by default. The upstream default of 5 minutes is a better
 86 | setting.
 87 | 
 88 | ```yaml
 89 | extraConfig:
 90 |   myConfig: |
 91 |     c.JupyterHub.last_activity_interval = 300
 92 | ```
 93 | 
 94 | <a name="hub-activity-interval"></a>
 95 | ### `JUPYTERHUB_ACTIVITY_INTERVAL`
 96 | This controls how often each server reports its activity back to the hub. The default
 97 | is 5 minutes and with hundreds or thousands of users posting activity updates it puts
 98 | a heavy load on the hub and the hub's database. Increasing this to one hour or more
 99 | reduces the load placed on the hub by these activity updates.
100 | 
101 | ```yaml
102 | singleuser:
103 |   extraEnv:
104 |     JUPYTERHUB_ACTIVITY_INTERVAL: "3600"
105 | ```
106 | 
107 | <a name="startup"></a>
108 | ## Startup time
109 | 
110 | <a name="spawners-timeout"></a>
111 | ### `init_spawners_timeout`
112 | [c.JupyterHub.init_spawners_timeout](https://jupyterhub.readthedocs.io/en/stable/api/app.html#jupyterhub.app.JupyterHub.init_spawners_timeout) controls how long the hub will wait for spawners to
113 | initialize. When this timeout is reached the spawner check will go into the background and
114 | hub startup will continue. With many hundreds or thousands of spawners this is always going
115 | to exceed any reasonable timeout so there's no reason to wait at all. Setting it to `1` 
116 | (which is the minimum value) allows the hub to start faster and start servicing other requests.
117 | 
118 | In `values.yaml`:
119 | ```yaml
120 | extraConfig:
121 |   myConfig: |
122 |      c.JupyterHub.init_spawners_timeout = 1
123 | ```
124 | 
125 | <a name="other"></a>
126 | ## Other settings
127 | Other settings which are helpful for tuning performance.
128 | 
129 | <a name="kubespawner-thread"></a>
130 | ### `c.KubeSpawner.k8s_api_threadpool_workers`
131 | This value controls the number of threads `kubespawner` will create to make API calls to
132 | Kubernetes. The default is `5 * num_cpus`. Given a large enough number of users logging in
133 | and spawning servers at the same time this may not be enough threads. A more sensible value
134 | for this setting is [c.Jupyterhub.concurrent_spawn_limit](https://jupyterhub.readthedocs.io/en/stable/api/app.html#jupyterhub.app.JupyterHub.concurrent_spawn_limit).
135 | `concurrent_spawn_limit` controls how many users can spawn servers at the same time.
136 | By creating that many threadpool workers we ensure that there's always a thread available
137 | to service a user's spawn request. The upstream default for `concurrent_spawn_limit` is 100 while
138 | the default with Zero to JupyterHub is 64.
139 | 
140 | In `values.yaml`:
141 | ```yaml
142 | extraConfig:
143 |   perfConfig: |
144 |      c.KubeSpawner.k8s_api_threadpool_workers = c.JupyterHub.concurrent_spawn_limit
145 | ```
146 | 
147 | <a name="kubespawner-events"></a>
148 | ### Disable user events
149 | With this enabled `kubespawner` will process events from the Kubernetes API which are then
150 | used to show progress on the user spawn page. Disabling this reduces the load on `kubespawner`.
151 | 
152 | To disable user events update the `events` key in the `values.yaml` file. This value ultimately
153 | sets `c.KubeSpawner.events_enabled`.
154 | 
155 | ```yaml
156 | singleuser:
157 |   events: false
158 | ```
159 | 
160 | <a name="disable-consecutivefailurelimit"></a>
161 | ### Disable consecutiveFailureLimit
162 | JupyterHub itself defaults [c.Spawner.consecutive_failure_limit](https://jupyterhub.readthedocs.io/en/stable/api/spawner.html#jupyterhub.spawner.Spawner.consecutive_failure_limit) to 0 to disable it but zero-to-jupyterhub-k8s
163 | defaults it to [5](https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/0.11.0/jupyterhub/values.yaml#L43).
164 | This can be problematic when a large user event starts and many users are starting server pods at the same time
165 | if the user node capacity is exhausted and, for example, spawns timeout due to waiting on the node auto-scaler adding
166 | more user node capacity. When the consecutive failure limit is reached the hub will restart which probably will not
167 | help with this type of failure scenario when pod spawn timeouts are occurring because of capacity issues.
168 | 
169 | To disable the consecutive failure limit update the `consecutiveFailureLimit` key in the `values.yaml` file.
170 | 
171 | ```yaml
172 | hub:
173 |   consecutiveFailureLimit: 0
174 | ```
175 | 
176 | <a name="increase-http-timeout"></a>
177 | ### Increase http_timeout
178 | 
179 | [`c.KubeSpawner.http_timeout`](https://jupyterhub.readthedocs.io/en/stable/api/spawner.html#jupyterhub.spawner.Spawner.http_timeout)
180 | defaults to 30 seconds. During scale and load testing we have seen that sometimes
181 | we can hit this timeout and the hub will delete the server pod but if we had just waited a few seconds more it
182 | would have been enough. So if you have node capacity so that pods are being created, but maybe they are just
183 | slow to come up and are hitting this timeout, you might want to increase it to something like 60 seconds. This
184 | also seems to vary depending on whether you are using `notebook` or `jupyterlab` / `jupyter-server`, the type of
185 | backing storage for the user pods (i.e. s3fs shared object storage is known to be slow(er)), and how many and what kinds of
186 | extensions you have in the user image.
187 | 
188 | <a name="references"></a>
189 | ## References
190 | - https://discourse.jupyter.org/t/confusion-of-the-db-instance/3878
191 | - https://discourse.jupyter.org/t/identifying-jupyterhub-api-performance-bottleneck/1289
192 | - https://discourse.jupyter.org/t/minimum-specs-for-jupyterhub-infrastructure-vms/5309
193 | - https://discourse.jupyter.org/t/background-for-jupyterhub-kubernetes-cost-calculations/5289
194 | - https://discourse.jupyter.org/t/core-component-resilience-reliability/5433
195 | - https://discourse.jupyter.org/t/scheduler-insufficient-memory-waiting-errors-any-suggestions/5314
196 | 


--------------------------------------------------------------------------------
/docs/images/hub-stress-test-health.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/jupyter-tools/747256d3a994ab36abfeba501f14cc05307facd9/docs/images/hub-stress-test-health.png


--------------------------------------------------------------------------------
/docs/images/hub-stress-test-request-response-times.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/jupyter-tools/747256d3a994ab36abfeba501f14cc05307facd9/docs/images/hub-stress-test-request-response-times.png


--------------------------------------------------------------------------------
/docs/images/hub-stress-test-resource-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/jupyter-tools/747256d3a994ab36abfeba501f14cc05307facd9/docs/images/hub-stress-test-resource-usage.png


--------------------------------------------------------------------------------
/docs/images/py-spy-example.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg version="1.1" width="1200" height="630" onload="init(evt)" viewBox="0 0 1200 630" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><!--Flame graph stack visualization. See https://github.com/brendangregg/FlameGraph for latest version, and http://www.brendangregg.com/flamegraphs.html for examples.--><!--NOTES: --><defs><linearGradient id="background" y1="0" y2="1" x1="0" x2="0"><stop stop-color="#eeeeee" offset="5%"/><stop stop-color="#eeeeb0" offset="95%"/></linearGradient></defs><style type="text/css">
  2 | text { font-family:"Verdana"; font-size:12px; fill:rgb(0,0,0); }
  3 | #title { text-anchor:middle; font-size:17px; }
  4 | #search { opacity:0.1; cursor:pointer; }
  5 | #search:hover, #search.show { opacity:1; }
  6 | #subtitle { text-anchor:middle; font-color:rgb(160,160,160); }
  7 | #unzoom { cursor:pointer; }
  8 | #frames > *:hover { stroke:black; stroke-width:0.5; cursor:pointer; }
  9 | .hide { display:none; }
 10 | .parent { opacity:0.5; }
 11 | </style><script type="text/ecmascript"><![CDATA[var nametype = 'Function:';
 12 | var fontsize = 12;
 13 | var fontwidth = 0.59;
 14 | var xpad = 10;
 15 | var inverted = true;
 16 | var searchcolor = 'rgb(230,0,230)';
 17 | var fluiddrawing = true;
 18 | var truncate_text_right = false;]]><![CDATA["use strict";
 19 | var details, searchbtn, unzoombtn, matchedtxt, svg, searching, frames;
 20 | function init(evt) {
 21 |     details = document.getElementById("details").firstChild;
 22 |     searchbtn = document.getElementById("search");
 23 |     unzoombtn = document.getElementById("unzoom");
 24 |     matchedtxt = document.getElementById("matched");
 25 |     svg = document.getElementsByTagName("svg")[0];
 26 |     frames = document.getElementById("frames");
 27 |     searching = 0;
 28 | 
 29 |     // Use GET parameters to restore a flamegraph's state.
 30 |     var restore_state = function() {
 31 |         var params = get_params();
 32 |         if (params.x && params.y)
 33 |             zoom(find_group(document.querySelector('[x="' + params.x + '"][y="' + params.y + '"]')));
 34 |         if (params.s)
 35 |             search(params.s);
 36 |     };
 37 | 
 38 |     if (fluiddrawing) {
 39 |         // Make width dynamic so the SVG fits its parent's width.
 40 |         svg.removeAttribute("width");
 41 |         // Edge requires us to have a viewBox that gets updated with size changes.
 42 |         var isEdge = /Edge\/\d./i.test(navigator.userAgent);
 43 |         if (!isEdge) {
 44 |           svg.removeAttribute("viewBox");
 45 |         }
 46 |         var update_for_width_change = function() {
 47 |             if (isEdge) {
 48 |                 svg.attributes.viewBox.value = "0 0 " + svg.width.baseVal.value + " " + svg.height.baseVal.value;
 49 |             }
 50 | 
 51 |             // Keep consistent padding on left and right of frames container.
 52 |             frames.attributes.width.value = svg.width.baseVal.value - xpad * 2;
 53 | 
 54 |             // Text truncation needs to be adjusted for the current width.
 55 |             var el = frames.children;
 56 |             for(var i = 0; i < el.length; i++) {
 57 |                 update_text(el[i]);
 58 |             }
 59 | 
 60 |             // Keep search elements at a fixed distance from right edge.
 61 |             var svgWidth = svg.width.baseVal.value;
 62 |             searchbtn.attributes.x.value = svgWidth - xpad - 100;
 63 |             matchedtxt.attributes.x.value = svgWidth - xpad - 100;
 64 |         };
 65 |         window.addEventListener('resize', function() {
 66 |             update_for_width_change();
 67 |         });
 68 |         // This needs to be done asynchronously for Safari to work.
 69 |         setTimeout(function() {
 70 |             unzoom();
 71 |             update_for_width_change();
 72 |             restore_state();
 73 |         }, 0);
 74 |     } else {
 75 |         restore_state();
 76 |     }
 77 | }
 78 | // event listeners
 79 | window.addEventListener("click", function(e) {
 80 |     var target = find_group(e.target);
 81 |     if (target) {
 82 |         if (target.nodeName == "a") {
 83 |             if (e.ctrlKey === false) return;
 84 |             e.preventDefault();
 85 |         }
 86 |         if (target.classList.contains("parent")) unzoom();
 87 |         zoom(target);
 88 | 
 89 |         // set parameters for zoom state
 90 |         var el = target.querySelector("rect");
 91 |         if (el && el.attributes && el.attributes.y && el.attributes._orig_x) {
 92 |             var params = get_params()
 93 |             params.x = el.attributes._orig_x.value;
 94 |             params.y = el.attributes.y.value;
 95 |             history.replaceState(null, null, parse_params(params));
 96 |         }
 97 |     }
 98 |     else if (e.target.id == "unzoom") {
 99 |         unzoom();
100 | 
101 |         // remove zoom state
102 |         var params = get_params();
103 |         if (params.x) delete params.x;
104 |         if (params.y) delete params.y;
105 |         history.replaceState(null, null, parse_params(params));
106 |     }
107 |     else if (e.target.id == "search") search_prompt();
108 | }, false)
109 | // mouse-over for info
110 | // show
111 | window.addEventListener("mouseover", function(e) {
112 |     var target = find_group(e.target);
113 |     if (target) details.nodeValue = nametype + " " + g_to_text(target);
114 | }, false)
115 | // clear
116 | window.addEventListener("mouseout", function(e) {
117 |     var target = find_group(e.target);
118 |     if (target) details.nodeValue = ' ';
119 | }, false)
120 | // ctrl-F for search
121 | window.addEventListener("keydown",function (e) {
122 |     if (e.keyCode === 114 || (e.ctrlKey && e.keyCode === 70)) {
123 |         e.preventDefault();
124 |         search_prompt();
125 |     }
126 | }, false)
127 | // functions
128 | function get_params() {
129 |     var params = {};
130 |     var paramsarr = window.location.search.substr(1).split('&');
131 |     for (var i = 0; i < paramsarr.length; ++i) {
132 |         var tmp = paramsarr[i].split("=");
133 |         if (!tmp[0] || !tmp[1]) continue;
134 |         params[tmp[0]]  = decodeURIComponent(tmp[1]);
135 |     }
136 |     return params;
137 | }
138 | function parse_params(params) {
139 |     var uri = "?";
140 |     for (var key in params) {
141 |         uri += key + '=' + encodeURIComponent(params[key]) + '&';
142 |     }
143 |     if (uri.slice(-1) == "&")
144 |         uri = uri.substring(0, uri.length - 1);
145 |     if (uri == '?')
146 |         uri = window.location.href.split('?')[0];
147 |     return uri;
148 | }
149 | function find_child(node, selector) {
150 |     var children = node.querySelectorAll(selector);
151 |     if (children.length) return children[0];
152 |     return;
153 | }
154 | function find_group(node) {
155 |     var parent = node.parentElement;
156 |     if (!parent) return;
157 |     if (parent.id == "frames") return node;
158 |     return find_group(parent);
159 | }
160 | function orig_save(e, attr, val) {
161 |     if (e.attributes["_orig_" + attr] != undefined) return;
162 |     if (e.attributes[attr] == undefined) return;
163 |     if (val == undefined) val = e.attributes[attr].value;
164 |     e.setAttribute("_orig_" + attr, val);
165 | }
166 | function orig_load(e, attr) {
167 |     if (e.attributes["_orig_"+attr] == undefined) return;
168 |     e.attributes[attr].value = e.attributes["_orig_" + attr].value;
169 |     e.removeAttribute("_orig_" + attr);
170 | }
171 | function g_to_text(e) {
172 |     var text = find_child(e, "title").firstChild.nodeValue;
173 |     return (text)
174 | }
175 | function g_to_func(e) {
176 |     var func = g_to_text(e);
177 |     // if there's any manipulation we want to do to the function
178 |     // name before it's searched, do it here before returning.
179 |     return (func);
180 | }
181 | function update_text(e) {
182 |     var r = find_child(e, "rect");
183 |     var t = find_child(e, "text");
184 |     var w = parseFloat(r.attributes.width.value) * frames.attributes.width.value / 100 - 3;
185 |     var txt = find_child(e, "title").textContent.replace(/\([^(]*\)$/,"");
186 |     t.attributes.x.value = format_percent((parseFloat(r.attributes.x.value) + (100 * 3 / frames.attributes.width.value)));
187 |     // Smaller than this size won't fit anything
188 |     if (w < 2 * fontsize * fontwidth) {
189 |         t.textContent = "";
190 |         return;
191 |     }
192 |     t.textContent = txt;
193 |     // Fit in full text width
194 |     if (/^ *\$/.test(txt) || t.getComputedTextLength() < w)
195 |         return;
196 |     if (truncate_text_right) {
197 |         // Truncate the right side of the text.
198 |         for (var x = txt.length - 2; x > 0; x--) {
199 |             if (t.getSubStringLength(0, x + 2) <= w) {
200 |                 t.textContent = txt.substring(0, x) + "..";
201 |                 return;
202 |             }
203 |         }
204 |     } else {
205 |         // Truncate the left side of the text.
206 |         for (var x = 2; x < txt.length; x++) {
207 |             if (t.getSubStringLength(x - 2, txt.length) <= w) {
208 |                 t.textContent = ".." + txt.substring(x, txt.length);
209 |                 return;
210 |             }
211 |         }
212 |     }
213 |     t.textContent = "";
214 | }
215 | // zoom
216 | function zoom_reset(e) {
217 |     if (e.attributes != undefined) {
218 |         orig_load(e, "x");
219 |         orig_load(e, "width");
220 |     }
221 |     if (e.childNodes == undefined) return;
222 |     for(var i = 0, c = e.childNodes; i < c.length; i++) {
223 |         zoom_reset(c[i]);
224 |     }
225 | }
226 | function zoom_child(e, x, ratio) {
227 |     if (e.attributes != undefined) {
228 |         if (e.attributes.x != undefined) {
229 |             orig_save(e, "x");
230 |             e.attributes.x.value = format_percent((parseFloat(e.attributes.x.value) - x) * ratio);
231 |             if (e.tagName == "text") {
232 |                 e.attributes.x.value = format_percent(parseFloat(find_child(e.parentNode, "rect[x]").attributes.x.value) + (100 * 3 / frames.attributes.width.value));
233 |             }
234 |         }
235 |         if (e.attributes.width != undefined) {
236 |             orig_save(e, "width");
237 |             e.attributes.width.value = format_percent(parseFloat(e.attributes.width.value) * ratio);
238 |         }
239 |     }
240 |     if (e.childNodes == undefined) return;
241 |     for(var i = 0, c = e.childNodes; i < c.length; i++) {
242 |         zoom_child(c[i], x, ratio);
243 |     }
244 | }
245 | function zoom_parent(e) {
246 |     if (e.attributes) {
247 |         if (e.attributes.x != undefined) {
248 |             orig_save(e, "x");
249 |             e.attributes.x.value = "0.0%";
250 |         }
251 |         if (e.attributes.width != undefined) {
252 |             orig_save(e, "width");
253 |             e.attributes.width.value = "100.0%";
254 |         }
255 |     }
256 |     if (e.childNodes == undefined) return;
257 |     for(var i = 0, c = e.childNodes; i < c.length; i++) {
258 |         zoom_parent(c[i]);
259 |     }
260 | }
261 | function zoom(node) {
262 |     var attr = find_child(node, "rect").attributes;
263 |     var width = parseFloat(attr.width.value);
264 |     var xmin = parseFloat(attr.x.value);
265 |     var xmax = xmin + width;
266 |     var ymin = parseFloat(attr.y.value);
267 |     var ratio = 100 / width;
268 |     // XXX: Workaround for JavaScript float issues (fix me)
269 |     var fudge = 0.001;
270 |     unzoombtn.classList.remove("hide");
271 |     var el = frames.children;
272 |     for (var i = 0; i < el.length; i++) {
273 |         var e = el[i];
274 |         var a = find_child(e, "rect").attributes;
275 |         var ex = parseFloat(a.x.value);
276 |         var ew = parseFloat(a.width.value);
277 |         // Is it an ancestor
278 |         if (!inverted) {
279 |             var upstack = parseFloat(a.y.value) > ymin;
280 |         } else {
281 |             var upstack = parseFloat(a.y.value) < ymin;
282 |         }
283 |         if (upstack) {
284 |             // Direct ancestor
285 |             if (ex <= xmin && (ex+ew+fudge) >= xmax) {
286 |                 e.classList.add("parent");
287 |                 zoom_parent(e);
288 |                 update_text(e);
289 |             }
290 |             // not in current path
291 |             else
292 |                 e.classList.add("hide");
293 |         }
294 |         // Children maybe
295 |         else {
296 |             // no common path
297 |             if (ex < xmin || ex + fudge >= xmax) {
298 |                 e.classList.add("hide");
299 |             }
300 |             else {
301 |                 zoom_child(e, xmin, ratio);
302 |                 update_text(e);
303 |             }
304 |         }
305 |     }
306 | }
307 | function unzoom() {
308 |     unzoombtn.classList.add("hide");
309 |     var el = frames.children;
310 |     for(var i = 0; i < el.length; i++) {
311 |         el[i].classList.remove("parent");
312 |         el[i].classList.remove("hide");
313 |         zoom_reset(el[i]);
314 |         update_text(el[i]);
315 |     }
316 | }
317 | // search
318 | function reset_search() {
319 |     var el = document.querySelectorAll("#frames rect");
320 |     for (var i = 0; i < el.length; i++) {
321 |         orig_load(el[i], "fill")
322 |     }
323 |     var params = get_params();
324 |     delete params.s;
325 |     history.replaceState(null, null, parse_params(params));
326 | }
327 | function search_prompt() {
328 |     if (!searching) {
329 |         var term = prompt("Enter a search term (regexp " +
330 |             "allowed, eg: ^ext4_)", "");
331 |         if (term != null) {
332 |             search(term)
333 |         }
334 |     } else {
335 |         reset_search();
336 |         searching = 0;
337 |         searchbtn.classList.remove("show");
338 |         searchbtn.firstChild.nodeValue = "Search"
339 |         matchedtxt.classList.add("hide");
340 |         matchedtxt.firstChild.nodeValue = ""
341 |     }
342 | }
343 | function search(term) {
344 |     var re = new RegExp(term);
345 |     var el = frames.children;
346 |     var matches = new Object();
347 |     var maxwidth = 0;
348 |     for (var i = 0; i < el.length; i++) {
349 |         var e = el[i];
350 |         var func = g_to_func(e);
351 |         var rect = find_child(e, "rect");
352 |         if (func == null || rect == null)
353 |             continue;
354 |         // Save max width. Only works as we have a root frame
355 |         var w = parseFloat(rect.attributes.width.value);
356 |         if (w > maxwidth)
357 |             maxwidth = w;
358 |         if (func.match(re)) {
359 |             // highlight
360 |             var x = parseFloat(rect.attributes.x.value);
361 |             orig_save(rect, "fill");
362 |             rect.attributes.fill.value = searchcolor;
363 |             // remember matches
364 |             if (matches[x] == undefined) {
365 |                 matches[x] = w;
366 |             } else {
367 |                 if (w > matches[x]) {
368 |                     // overwrite with parent
369 |                     matches[x] = w;
370 |                 }
371 |             }
372 |             searching = 1;
373 |         }
374 |     }
375 |     if (!searching)
376 |         return;
377 |     var params = get_params();
378 |     params.s = term;
379 |     history.replaceState(null, null, parse_params(params));
380 | 
381 |     searchbtn.classList.add("show");
382 |     searchbtn.firstChild.nodeValue = "Reset Search";
383 |     // calculate percent matched, excluding vertical overlap
384 |     var count = 0;
385 |     var lastx = -1;
386 |     var lastw = 0;
387 |     var keys = Array();
388 |     for (k in matches) {
389 |         if (matches.hasOwnProperty(k))
390 |             keys.push(k);
391 |     }
392 |     // sort the matched frames by their x location
393 |     // ascending, then width descending
394 |     keys.sort(function(a, b){
395 |         return a - b;
396 |     });
397 |     // Step through frames saving only the biggest bottom-up frames
398 |     // thanks to the sort order. This relies on the tree property
399 |     // where children are always smaller than their parents.
400 |     var fudge = 0.0001;    // JavaScript floating point
401 |     for (var k in keys) {
402 |         var x = parseFloat(keys[k]);
403 |         var w = matches[keys[k]];
404 |         if (x >= lastx + lastw - fudge) {
405 |             count += w;
406 |             lastx = x;
407 |             lastw = w;
408 |         }
409 |     }
410 |     // display matched percent
411 |     matchedtxt.classList.remove("hide");
412 |     var pct = 100 * count / maxwidth;
413 |     if (pct != 100) pct = pct.toFixed(1);
414 |     matchedtxt.firstChild.nodeValue = "Matched: " + pct + "%";
415 | }
416 | function format_percent(n) {
417 |     return n.toFixed(4) + "%";
418 | }
419 | ]]></script><rect x="0" y="0" width="100%" height="630" fill="url(#background)"/><text id="title" x="50.0000%" y="24.00">py-spy</text><text id="details" x="10" y="613.00"> </text><text id="unzoom" class="hide" x="10" y="24.00">Reset Zoom</text><text id="search" x="1090" y="24.00">Search</text><text id="matched" x="1090" y="613.00"> </text><svg id="frames" x="10" width="1180"><g><title>_execute_context (sqlalchemy/engine/base.py:1278) (782 samples, 1.17%)</title><rect x="2.4504%" y="324" width="1.1684%" height="15" fill="rgb(227,0,7)"/><text x="2.7004%" y="334.50"></text></g><g><title>do_execute (sqlalchemy/engine/default.py:593) (782 samples, 1.17%)</title><rect x="2.4504%" y="340" width="1.1684%" height="15" fill="rgb(217,0,24)"/><text x="2.7004%" y="350.50"></text></g><g><title>__iter__ (sqlalchemy/orm/query.py:3503) (1,612 samples, 2.41%)</title><rect x="1.2177%" y="244" width="2.4086%" height="15" fill="rgb(221,193,54)"/><text x="1.4677%" y="254.50">__..</text></g><g><title>_execute_and_instances (sqlalchemy/orm/query.py:3528) (1,166 samples, 1.74%)</title><rect x="1.8841%" y="260" width="1.7422%" height="15" fill="rgb(248,212,6)"/><text x="2.1341%" y="270.50"></text></g><g><title>execute (sqlalchemy/engine/base.py:1014) (1,163 samples, 1.74%)</title><rect x="1.8886%" y="276" width="1.7377%" height="15" fill="rgb(208,68,35)"/><text x="2.1386%" y="286.50"></text></g><g><title>_execute_on_connection (sqlalchemy/sql/elements.py:298) (1,163 samples, 1.74%)</title><rect x="1.8886%" y="292" width="1.7377%" height="15" fill="rgb(232,128,0)"/><text x="2.1386%" y="302.50"></text></g><g><title>_execute_clauseelement (sqlalchemy/engine/base.py:1133) (846 samples, 1.26%)</title><rect x="2.3623%" y="308" width="1.2641%" height="15" fill="rgb(207,160,47)"/><text x="2.6123%" y="318.50"></text></g><g><title>find (jupyterhub/orm.py:464) (1,808 samples, 2.70%)</title><rect x="0.9443%" y="228" width="2.7015%" height="15" fill="rgb(228,23,34)"/><text x="1.1943%" y="238.50">fi..</text></g><g><title>get_current_user_token (jupyterhub/handlers/base.py:360) (2,195 samples, 3.28%)</title><rect x="0.3765%" y="212" width="3.2797%" height="15" fill="rgb(218,30,26)"/><text x="0.6265%" y="222.50">get..</text></g><g><title>get_current_user_token (jupyterhub/handlers/base.py:374) (679 samples, 1.01%)</title><rect x="3.7832%" y="212" width="1.0145%" height="15" fill="rgb(220,122,19)"/><text x="4.0332%" y="222.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (678 samples, 1.01%)</title><rect x="3.7847%" y="228" width="1.0130%" height="15" fill="rgb(250,228,42)"/><text x="4.0347%" y="238.50"></text></g><g><title>get_current_user (jupyterhub/handlers/base.py:422) (2,978 samples, 4.45%)</title><rect x="0.3601%" y="196" width="4.4496%" height="15" fill="rgb(240,193,28)"/><text x="0.6101%" y="206.50">get_c..</text></g><g><title>prepare (jupyterhub/handlers/base.py:85) (3,019 samples, 4.51%)</title><rect x="0.3511%" y="180" width="4.5109%" height="15" fill="rgb(216,20,37)"/><text x="0.6011%" y="190.50">prepa..</text></g><g><title>_execute (tornado/web.py:1682) (3,050 samples, 4.56%)</title><rect x="0.3511%" y="164" width="4.5572%" height="15" fill="rgb(206,188,39)"/><text x="0.6011%" y="174.50">_exec..</text></g><g><title>_execute_context (sqlalchemy/engine/base.py:1278) (733 samples, 1.10%)</title><rect x="6.8776%" y="340" width="1.0952%" height="15" fill="rgb(217,207,13)"/><text x="7.1276%" y="350.50"></text></g><g><title>do_execute (sqlalchemy/engine/default.py:593) (733 samples, 1.10%)</title><rect x="6.8776%" y="356" width="1.0952%" height="15" fill="rgb(231,73,38)"/><text x="7.1276%" y="366.50"></text></g><g><title>__iter__ (sqlalchemy/orm/query.py:3503) (1,511 samples, 2.26%)</title><rect x="5.7182%" y="260" width="2.2577%" height="15" fill="rgb(225,20,46)"/><text x="5.9682%" y="270.50">_..</text></g><g><title>_execute_and_instances (sqlalchemy/orm/query.py:3528) (989 samples, 1.48%)</title><rect x="6.4981%" y="276" width="1.4777%" height="15" fill="rgb(210,31,41)"/><text x="6.7481%" y="286.50"></text></g><g><title>execute (sqlalchemy/engine/base.py:1014) (987 samples, 1.47%)</title><rect x="6.5011%" y="292" width="1.4747%" height="15" fill="rgb(221,200,47)"/><text x="6.7511%" y="302.50"></text></g><g><title>_execute_on_connection (sqlalchemy/sql/elements.py:298) (985 samples, 1.47%)</title><rect x="6.5041%" y="308" width="1.4718%" height="15" fill="rgb(226,26,5)"/><text x="6.7541%" y="318.50"></text></g><g><title>_execute_clauseelement (sqlalchemy/engine/base.py:1133) (782 samples, 1.17%)</title><rect x="6.8074%" y="324" width="1.1684%" height="15" fill="rgb(249,33,26)"/><text x="7.0574%" y="334.50"></text></g><g><title>first (sqlalchemy/orm/query.py:3397) (1,721 samples, 2.57%)</title><rect x="5.4134%" y="228" width="2.5715%" height="15" fill="rgb(235,183,28)"/><text x="5.6634%" y="238.50">fi..</text></g><g><title>__getitem__ (sqlalchemy/orm/query.py:3171) (1,711 samples, 2.56%)</title><rect x="5.4283%" y="244" width="2.5565%" height="15" fill="rgb(221,5,38)"/><text x="5.6783%" y="254.50">__..</text></g><g><title>m (jupyterhub/apihandlers/users.py:117) (1,829 samples, 2.73%)</title><rect x="5.2923%" y="180" width="2.7328%" height="15" fill="rgb(247,18,42)"/><text x="5.5423%" y="190.50">m ..</text></g><g><title>find_user (jupyterhub/handlers/base.py:449) (1,829 samples, 2.73%)</title><rect x="5.2923%" y="196" width="2.7328%" height="15" fill="rgb(241,131,45)"/><text x="5.5423%" y="206.50">fi..</text></g><g><title>find (jupyterhub/orm.py:222) (1,829 samples, 2.73%)</title><rect x="5.2923%" y="212" width="2.7328%" height="15" fill="rgb(249,31,29)"/><text x="5.5423%" y="222.50">fi..</text></g><g><title>commit (sqlalchemy/orm/session.py:504) (688 samples, 1.03%)</title><rect x="8.9964%" y="228" width="1.0280%" height="15" fill="rgb(225,111,53)"/><text x="9.2464%" y="238.50"></text></g><g><title>_prepare_impl (sqlalchemy/orm/session.py:483) (685 samples, 1.02%)</title><rect x="9.0009%" y="244" width="1.0235%" height="15" fill="rgb(238,160,17)"/><text x="9.2509%" y="254.50"></text></g><g><title>flush (sqlalchemy/orm/session.py:2523) (684 samples, 1.02%)</title><rect x="9.0023%" y="260" width="1.0220%" height="15" fill="rgb(214,148,48)"/><text x="9.2523%" y="270.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (874 samples, 1.31%)</title><rect x="8.9919%" y="212" width="1.3059%" height="15" fill="rgb(232,36,49)"/><text x="9.2419%" y="222.50"></text></g><g><title>_execute (tornado/web.py:1701) (3,605 samples, 5.39%)</title><rect x="4.9128%" y="164" width="5.3865%" height="15" fill="rgb(209,103,24)"/><text x="5.1628%" y="174.50">_execut..</text></g><g><title>m (jupyterhub/apihandlers/users.py:119) (1,522 samples, 2.27%)</title><rect x="8.0252%" y="180" width="2.2741%" height="15" fill="rgb(229,88,8)"/><text x="8.2752%" y="190.50">m..</text></g><g><title>post (jupyterhub/apihandlers/users.py:744) (876 samples, 1.31%)</title><rect x="8.9904%" y="196" width="1.3089%" height="15" fill="rgb(213,181,19)"/><text x="9.2404%" y="206.50"></text></g><g><title>_new_spawner (jupyterhub/user.py:286) (692 samples, 1.03%)</title><rect x="11.6933%" y="212" width="1.0340%" height="15" fill="rgb(254,191,54)"/><text x="11.9433%" y="222.50"></text></g><g><title>__getitem__ (jupyterhub/user.py:140) (737 samples, 1.10%)</title><rect x="11.6336%" y="196" width="1.1012%" height="15" fill="rgb(241,83,37)"/><text x="11.8836%" y="206.50"></text></g><g><title>post (jupyterhub/apihandlers/users.py:393) (738 samples, 1.10%)</title><rect x="11.6336%" y="180" width="1.1027%" height="15" fill="rgb(233,36,39)"/><text x="11.8836%" y="190.50"></text></g><g><title>spawn_single_user (jupyterhub/handlers/base.py:769) (972 samples, 1.45%)</title><rect x="12.7661%" y="196" width="1.4523%" height="15" fill="rgb(226,3,54)"/><text x="13.0161%" y="206.50"></text></g><g><title>post (jupyterhub/apihandlers/users.py:414) (1,103 samples, 1.65%)</title><rect x="12.7437%" y="180" width="1.6481%" height="15" fill="rgb(245,192,40)"/><text x="12.9937%" y="190.50"></text></g><g><title>__getitem__ (jupyterhub/user.py:77) (732 samples, 1.09%)</title><rect x="15.8770%" y="228" width="1.0937%" height="15" fill="rgb(238,167,29)"/><text x="16.1270%" y="238.50"></text></g><g><title>post (jupyterhub/apihandlers/users.py:87) (1,480 samples, 2.21%)</title><rect x="14.7624%" y="180" width="2.2114%" height="15" fill="rgb(232,182,51)"/><text x="15.0124%" y="190.50">p..</text></g><g><title>user_from_username (jupyterhub/handlers/base.py:460) (734 samples, 1.10%)</title><rect x="15.8770%" y="196" width="1.0967%" height="15" fill="rgb(231,60,39)"/><text x="16.1270%" y="206.50"></text></g><g><title>_user_from_orm (jupyterhub/handlers/base.py:412) (734 samples, 1.10%)</title><rect x="15.8770%" y="212" width="1.0967%" height="15" fill="rgb(208,69,12)"/><text x="16.1270%" y="222.50"></text></g><g><title>_execute (tornado/web.py:1703) (4,471 samples, 6.68%)</title><rect x="10.2993%" y="164" width="6.6804%" height="15" fill="rgb(235,93,37)"/><text x="10.5493%" y="174.50">_execute ..</text></g><g><title>_server_request_loop (tornado/http1connection.py:817) (704 samples, 1.05%)</title><rect x="19.2254%" y="164" width="1.0519%" height="15" fill="rgb(213,116,39)"/><text x="19.4754%" y="174.50"></text></g><g><title>execute (sqlalchemy/orm/unitofwork.py:589) (830 samples, 1.24%)</title><rect x="20.6837%" y="324" width="1.2402%" height="15" fill="rgb(222,207,29)"/><text x="20.9337%" y="334.50"></text></g><g><title>_flush (sqlalchemy/orm/session.py:2624) (967 samples, 1.44%)</title><rect x="20.4820%" y="292" width="1.4449%" height="15" fill="rgb(206,96,30)"/><text x="20.7320%" y="302.50"></text></g><g><title>execute (sqlalchemy/orm/unitofwork.py:422) (843 samples, 1.26%)</title><rect x="20.6673%" y="308" width="1.2596%" height="15" fill="rgb(218,138,4)"/><text x="20.9173%" y="318.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:504) (998 samples, 1.49%)</title><rect x="20.4372%" y="244" width="1.4912%" height="15" fill="rgb(250,191,14)"/><text x="20.6872%" y="254.50"></text></g><g><title>_prepare_impl (sqlalchemy/orm/session.py:483) (997 samples, 1.49%)</title><rect x="20.4387%" y="260" width="1.4897%" height="15" fill="rgb(239,60,40)"/><text x="20.6887%" y="270.50"></text></g><g><title>flush (sqlalchemy/orm/session.py:2523) (997 samples, 1.49%)</title><rect x="20.4387%" y="276" width="1.4897%" height="15" fill="rgb(206,27,48)"/><text x="20.6887%" y="286.50"></text></g><g><title>spawn (jupyterhub/user.py:488) (1,228 samples, 1.83%)</title><rect x="20.3864%" y="180" width="1.8348%" height="15" fill="rgb(225,35,8)"/><text x="20.6364%" y="190.50">s..</text></g><g><title>new_api_token (jupyterhub/orm.py:215) (1,228 samples, 1.83%)</title><rect x="20.3864%" y="196" width="1.8348%" height="15" fill="rgb(250,213,24)"/><text x="20.6364%" y="206.50">n..</text></g><g><title>new (jupyterhub/orm.py:502) (1,196 samples, 1.79%)</title><rect x="20.4342%" y="212" width="1.7870%" height="15" fill="rgb(247,123,22)"/><text x="20.6842%" y="222.50">n..</text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (1,196 samples, 1.79%)</title><rect x="20.4342%" y="228" width="1.7870%" height="15" fill="rgb(231,138,38)"/><text x="20.6842%" y="238.50">c..</text></g><g><title>first (sqlalchemy/orm/query.py:3397) (692 samples, 1.03%)</title><rect x="24.1203%" y="212" width="1.0340%" height="15" fill="rgb(231,145,46)"/><text x="24.3703%" y="222.50"></text></g><g><title>__getitem__ (sqlalchemy/orm/query.py:3171) (690 samples, 1.03%)</title><rect x="24.1233%" y="228" width="1.0310%" height="15" fill="rgb(251,118,11)"/><text x="24.3733%" y="238.50"></text></g><g><title>spawn (jupyterhub/user.py:518) (696 samples, 1.04%)</title><rect x="24.1173%" y="180" width="1.0399%" height="15" fill="rgb(217,147,25)"/><text x="24.3673%" y="190.50"></text></g><g><title>fetch_by_client_id (jupyterhub/oauth/provider.py:584) (696 samples, 1.04%)</title><rect x="24.1173%" y="196" width="1.0399%" height="15" fill="rgb(247,81,37)"/><text x="24.3673%" y="206.50"></text></g><g><title>spawn (jupyterhub/user.py:526) (1,932 samples, 2.89%)</title><rect x="25.1573%" y="180" width="2.8867%" height="15" fill="rgb(209,12,38)"/><text x="25.4073%" y="190.50">sp..</text></g><g><title>add_client (jupyterhub/oauth/provider.py:580) (716 samples, 1.07%)</title><rect x="26.9742%" y="196" width="1.0698%" height="15" fill="rgb(227,1,9)"/><text x="27.2242%" y="206.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (716 samples, 1.07%)</title><rect x="26.9742%" y="212" width="1.0698%" height="15" fill="rgb(248,47,43)"/><text x="27.2242%" y="222.50"></text></g><g><title>_flush (sqlalchemy/orm/session.py:2624) (1,091 samples, 1.63%)</title><rect x="28.1755%" y="260" width="1.6301%" height="15" fill="rgb(221,10,30)"/><text x="28.4255%" y="270.50"></text></g><g><title>execute (sqlalchemy/orm/unitofwork.py:422) (993 samples, 1.48%)</title><rect x="28.3219%" y="276" width="1.4837%" height="15" fill="rgb(210,229,1)"/><text x="28.5719%" y="286.50"></text></g><g><title>execute (sqlalchemy/orm/unitofwork.py:589) (991 samples, 1.48%)</title><rect x="28.3249%" y="292" width="1.4807%" height="15" fill="rgb(222,148,37)"/><text x="28.5749%" y="302.50"></text></g><g><title>_prepare_impl (sqlalchemy/orm/session.py:483) (1,150 samples, 1.72%)</title><rect x="28.0918%" y="228" width="1.7183%" height="15" fill="rgb(234,67,33)"/><text x="28.3418%" y="238.50"></text></g><g><title>flush (sqlalchemy/orm/session.py:2523) (1,149 samples, 1.72%)</title><rect x="28.0933%" y="244" width="1.7168%" height="15" fill="rgb(247,98,35)"/><text x="28.3433%" y="254.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:504) (1,152 samples, 1.72%)</title><rect x="28.0903%" y="212" width="1.7213%" height="15" fill="rgb(247,138,52)"/><text x="28.3403%" y="222.50"></text></g><g><title>spawn (jupyterhub/user.py:548) (1,372 samples, 2.05%)</title><rect x="28.0903%" y="180" width="2.0500%" height="15" fill="rgb(213,79,30)"/><text x="28.3403%" y="190.50">s..</text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (1,372 samples, 2.05%)</title><rect x="28.0903%" y="196" width="2.0500%" height="15" fill="rgb(246,177,23)"/><text x="28.3403%" y="206.50">c..</text></g><g><title>spawn (jupyterhub/user.py:585) (873 samples, 1.30%)</title><rect x="31.2027%" y="180" width="1.3044%" height="15" fill="rgb(230,62,27)"/><text x="31.4527%" y="190.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (873 samples, 1.30%)</title><rect x="31.2027%" y="196" width="1.3044%" height="15" fill="rgb(216,154,8)"/><text x="31.4527%" y="206.50"></text></g><g><title>_flush (sqlalchemy/orm/session.py:2624) (967 samples, 1.44%)</title><rect x="32.5564%" y="260" width="1.4449%" height="15" fill="rgb(244,35,45)"/><text x="32.8064%" y="270.50"></text></g><g><title>execute (sqlalchemy/orm/unitofwork.py:422) (895 samples, 1.34%)</title><rect x="32.6639%" y="276" width="1.3373%" height="15" fill="rgb(251,115,12)"/><text x="32.9139%" y="286.50"></text></g><g><title>execute (sqlalchemy/orm/unitofwork.py:589) (894 samples, 1.34%)</title><rect x="32.6654%" y="292" width="1.3358%" height="15" fill="rgb(240,54,50)"/><text x="32.9154%" y="302.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:504) (985 samples, 1.47%)</title><rect x="32.5354%" y="212" width="1.4718%" height="15" fill="rgb(233,84,52)"/><text x="32.7854%" y="222.50"></text></g><g><title>_prepare_impl (sqlalchemy/orm/session.py:483) (984 samples, 1.47%)</title><rect x="32.5369%" y="228" width="1.4703%" height="15" fill="rgb(207,117,47)"/><text x="32.7869%" y="238.50"></text></g><g><title>flush (sqlalchemy/orm/session.py:2523) (984 samples, 1.47%)</title><rect x="32.5369%" y="244" width="1.4703%" height="15" fill="rgb(249,43,39)"/><text x="32.7869%" y="254.50"></text></g><g><title>spawn (jupyterhub/user.py:671) (1,181 samples, 1.76%)</title><rect x="32.5354%" y="180" width="1.7646%" height="15" fill="rgb(209,38,44)"/><text x="32.7854%" y="190.50"></text></g><g><title>commit (sqlalchemy/orm/session.py:1042) (1,181 samples, 1.76%)</title><rect x="32.5354%" y="196" width="1.7646%" height="15" fill="rgb(236,212,23)"/><text x="32.7854%" y="206.50"></text></g><g><title>finish_user_spawn (jupyterhub/handlers/base.py:852) (9,340 samples, 13.96%)</title><rect x="20.3759%" y="164" width="13.9555%" height="15" fill="rgb(242,79,21)"/><text x="20.6259%" y="174.50">finish_user_spawn (ju..</text></g><g><title>_flush (sqlalchemy/orm/session.py:2624) (1,074 samples, 1.60%)</title><rect x="35.6463%" y="276" width="1.6047%" height="15" fill="rgb(211,96,35)"/><text x="35.8963%" y="286.50"></text></g><g><title>__iter__ (sqlalchemy/orm/query.py:3502) (1,292 samples, 1.93%)</title><rect x="35.4042%" y="228" width="1.9305%" height="15" fill="rgb(253,215,40)"/><text x="35.6542%" y="238.50">_..</text></g><g><title>_autoflush (sqlalchemy/orm/session.py:1617) (1,290 samples, 1.93%)</title><rect x="35.4072%" y="244" width="1.9275%" height="15" fill="rgb(211,81,21)"/><text x="35.6572%" y="254.50">_..</text></g><g><title>flush (sqlalchemy/orm/session.py:2523) (1,287 samples, 1.92%)</title><rect x="35.4117%" y="260" width="1.9230%" height="15" fill="rgb(208,190,38)"/><text x="35.6617%" y="270.50">f..</text></g><g><title>_execute_context (sqlalchemy/engine/base.py:1278) (1,070 samples, 1.60%)</title><rect x="38.0146%" y="308" width="1.5988%" height="15" fill="rgb(235,213,38)"/><text x="38.2646%" y="318.50"></text></g><g><title>do_execute (sqlalchemy/engine/default.py:593) (1,070 samples, 1.60%)</title><rect x="38.0146%" y="324" width="1.5988%" height="15" fill="rgb(237,122,38)"/><text x="38.2646%" y="334.50"></text></g><g><title>__iter__ (sqlalchemy/orm/query.py:3503) (1,527 samples, 2.28%)</title><rect x="37.3347%" y="228" width="2.2816%" height="15" fill="rgb(244,218,35)"/><text x="37.5847%" y="238.50">_..</text></g><g><title>_execute_and_instances (sqlalchemy/orm/query.py:3528) (1,511 samples, 2.26%)</title><rect x="37.3586%" y="244" width="2.2577%" height="15" fill="rgb(240,68,47)"/><text x="37.6086%" y="254.50">_..</text></g><g><title>execute (sqlalchemy/engine/base.py:1014) (1,505 samples, 2.25%)</title><rect x="37.3676%" y="260" width="2.2487%" height="15" fill="rgb(210,16,53)"/><text x="37.6176%" y="270.50">e..</text></g><g><title>_execute_on_connection (sqlalchemy/sql/elements.py:298) (1,504 samples, 2.25%)</title><rect x="37.3691%" y="276" width="2.2472%" height="15" fill="rgb(235,124,12)"/><text x="37.6191%" y="286.50">_..</text></g><g><title>_execute_clauseelement (sqlalchemy/engine/base.py:1133) (1,134 samples, 1.69%)</title><rect x="37.9219%" y="292" width="1.6944%" height="15" fill="rgb(224,169,11)"/><text x="38.1719%" y="302.50"></text></g><g><title>first (sqlalchemy/orm/query.py:3397) (3,055 samples, 4.56%)</title><rect x="35.0636%" y="196" width="4.5647%" height="15" fill="rgb(250,166,2)"/><text x="35.3136%" y="206.50">first..</text></g><g><title>__getitem__ (sqlalchemy/orm/query.py:3171) (3,047 samples, 4.55%)</title><rect x="35.0755%" y="212" width="4.5527%" height="15" fill="rgb(242,216,29)"/><text x="35.3255%" y="222.50">__get..</text></g><g><title>update_last_activity (jupyterhub/app.py:2500) (3,142 samples, 4.69%)</title><rect x="34.9694%" y="164" width="4.6947%" height="15" fill="rgb(230,116,27)"/><text x="35.2194%" y="174.50">updat..</text></g><g><title>find (jupyterhub/orm.py:222) (3,141 samples, 4.69%)</title><rect x="34.9709%" y="180" width="4.6932%" height="15" fill="rgb(228,99,48)"/><text x="35.2209%" y="190.50">find ..</text></g><g><title>_run (asyncio/events.py:145) (26,450 samples, 39.52%)</title><rect x="0.2226%" y="148" width="39.5207%" height="15" fill="rgb(253,11,6)"/><text x="0.4726%" y="158.50">_run (asyncio/events.py:145)</text></g><g><title>_run_once (asyncio/base_events.py:1451) (26,460 samples, 39.54%)</title><rect x="0.2122%" y="132" width="39.5356%" height="15" fill="rgb(247,143,39)"/><text x="0.4622%" y="142.50">_run_once (asyncio/base_events.py:1451)</text></g><g><title>&lt;module&gt; (jupyterhub:11) (26,598 samples, 39.74%)</title><rect x="0.0075%" y="68" width="39.7418%" height="15" fill="rgb(236,97,10)"/><text x="0.2575%" y="78.50">&lt;module&gt; (jupyterhub:11)</text></g><g><title>launch_instance (jupyterhub/app.py:2782) (26,598 samples, 39.74%)</title><rect x="0.0075%" y="84" width="39.7418%" height="15" fill="rgb(233,208,19)"/><text x="0.2575%" y="94.50">launch_instance (jupyterhub/app.py:2782)</text></g><g><title>start (tornado/platform/asyncio.py:149) (26,598 samples, 39.74%)</title><rect x="0.0075%" y="100" width="39.7418%" height="15" fill="rgb(216,164,2)"/><text x="0.2575%" y="110.50">start (tornado/platform/asyncio.py:149)</text></g><g><title>run_forever (asyncio/base_events.py:438) (26,598 samples, 39.74%)</title><rect x="0.0075%" y="116" width="39.7418%" height="15" fill="rgb(220,129,5)"/><text x="0.2575%" y="126.50">run_forever (asyncio/base_events.py:438)</text></g><g><title>deserialize (kubernetes/client/api_client.py:252) (3,311 samples, 4.95%)</title><rect x="41.2464%" y="212" width="4.9472%" height="15" fill="rgb(242,17,10)"/><text x="41.4964%" y="222.50">deseri..</text></g><g><title>loads (json/__init__.py:354) (3,311 samples, 4.95%)</title><rect x="41.2464%" y="228" width="4.9472%" height="15" fill="rgb(242,107,0)"/><text x="41.4964%" y="238.50">loads ..</text></g><g><title>decode (json/decoder.py:339) (3,311 samples, 4.95%)</title><rect x="41.2464%" y="244" width="4.9472%" height="15" fill="rgb(251,28,31)"/><text x="41.4964%" y="254.50">decode..</text></g><g><title>raw_decode (json/decoder.py:355) (3,311 samples, 4.95%)</title><rect x="41.2464%" y="260" width="4.9472%" height="15" fill="rgb(233,223,10)"/><text x="41.4964%" y="270.50">raw_de..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:631) (721 samples, 1.08%)</title><rect x="53.0862%" y="468" width="1.0773%" height="15" fill="rgb(215,21,27)"/><text x="53.3362%" y="478.50"></text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (1,158 samples, 1.73%)</title><rect x="54.8359%" y="468" width="1.7302%" height="15" fill="rgb(232,23,21)"/><text x="55.0859%" y="478.50"></text></g><g><title>__deserialize (kubernetes/client/api_client.py:273) (3,833 samples, 5.73%)</title><rect x="51.6608%" y="420" width="5.7271%" height="15" fill="rgb(244,5,23)"/><text x="51.9108%" y="430.50">__deser..</text></g><g><title>&lt;listcomp&gt; (kubernetes/client/api_client.py:273) (3,705 samples, 5.54%)</title><rect x="51.8520%" y="436" width="5.5359%" height="15" fill="rgb(226,81,46)"/><text x="52.1020%" y="446.50">&lt;listco..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (3,003 samples, 4.49%)</title><rect x="52.9009%" y="452" width="4.4870%" height="15" fill="rgb(247,70,30)"/><text x="53.1509%" y="462.50">__des..</text></g><g><title>__next__ (dateutil/parser/_parser.py:196) (1,287 samples, 1.92%)</title><rect x="60.4091%" y="516" width="1.9230%" height="15" fill="rgb(212,68,19)"/><text x="60.6591%" y="526.50">_..</text></g><g><title>_parse (dateutil/parser/_parser.py:725) (1,924 samples, 2.87%)</title><rect x="59.6291%" y="484" width="2.8748%" height="15" fill="rgb(240,187,13)"/><text x="59.8791%" y="494.50">_p..</text></g><g><title>split (dateutil/parser/_parser.py:207) (1,900 samples, 2.84%)</title><rect x="59.6650%" y="500" width="2.8389%" height="15" fill="rgb(223,113,26)"/><text x="59.9150%" y="510.50">sp..</text></g><g><title>_parse (dateutil/parser/_parser.py:746) (1,270 samples, 1.90%)</title><rect x="63.0568%" y="484" width="1.8976%" height="15" fill="rgb(206,192,2)"/><text x="63.3068%" y="494.50">_..</text></g><g><title>parse (dateutil/parser/_parser.py:646) (4,660 samples, 6.96%)</title><rect x="59.1988%" y="468" width="6.9628%" height="15" fill="rgb(241,108,4)"/><text x="59.4488%" y="478.50">parse (da..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:293) (6,338 samples, 9.47%)</title><rect x="58.2530%" y="420" width="9.4700%" height="15" fill="rgb(247,173,49)"/><text x="58.5030%" y="430.50">__deserialize ..</text></g><g><title>__deserialize_datatime (kubernetes/client/api_client.py:605) (6,209 samples, 9.28%)</title><rect x="58.4458%" y="436" width="9.2773%" height="15" fill="rgb(224,114,35)"/><text x="58.6958%" y="446.50">__deserialize..</text></g><g><title>parse (dateutil/parser/_parser.py:1374) (6,184 samples, 9.24%)</title><rect x="58.4831%" y="452" width="9.2399%" height="15" fill="rgb(245,159,27)"/><text x="58.7331%" y="462.50">parse (dateut..</text></g><g><title>__next__ (dateutil/parser/_parser.py:196) (905 samples, 1.35%)</title><rect x="72.3579%" y="580" width="1.3522%" height="15" fill="rgb(245,172,44)"/><text x="72.6079%" y="590.50"></text></g><g><title>_parse (dateutil/parser/_parser.py:725) (1,436 samples, 2.15%)</title><rect x="71.8156%" y="548" width="2.1456%" height="15" fill="rgb(236,23,11)"/><text x="72.0656%" y="558.50">_..</text></g><g><title>split (dateutil/parser/_parser.py:207) (1,423 samples, 2.13%)</title><rect x="71.8350%" y="564" width="2.1262%" height="15" fill="rgb(205,117,38)"/><text x="72.0850%" y="574.50">s..</text></g><g><title>_parse (dateutil/parser/_parser.py:746) (851 samples, 1.27%)</title><rect x="74.3452%" y="548" width="1.2715%" height="15" fill="rgb(237,72,25)"/><text x="74.5952%" y="558.50"></text></g><g><title>parse (dateutil/parser/_parser.py:646) (3,313 samples, 4.95%)</title><rect x="71.5511%" y="532" width="4.9502%" height="15" fill="rgb(244,70,9)"/><text x="71.8011%" y="542.50">parse ..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:293) (4,602 samples, 6.88%)</title><rect x="70.7114%" y="484" width="6.8761%" height="15" fill="rgb(217,125,39)"/><text x="70.9614%" y="494.50">__deseria..</text></g><g><title>__deserialize_datatime (kubernetes/client/api_client.py:605) (4,450 samples, 6.65%)</title><rect x="70.9385%" y="500" width="6.6490%" height="15" fill="rgb(235,36,10)"/><text x="71.1885%" y="510.50">__deseria..</text></g><g><title>parse (dateutil/parser/_parser.py:1374) (4,431 samples, 6.62%)</title><rect x="70.9669%" y="516" width="6.6206%" height="15" fill="rgb(251,123,47)"/><text x="71.2169%" y="526.50">parse (da..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (5,671 samples, 8.47%)</title><rect x="69.5952%" y="468" width="8.4734%" height="15" fill="rgb(221,13,13)"/><text x="69.8452%" y="478.50">__deserializ..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (6,655 samples, 9.94%)</title><rect x="68.4343%" y="436" width="9.9437%" height="15" fill="rgb(238,131,9)"/><text x="68.6843%" y="446.50">__deserialize_..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (6,304 samples, 9.42%)</title><rect x="68.9587%" y="452" width="9.4192%" height="15" fill="rgb(211,50,8)"/><text x="69.2087%" y="462.50">__deserialize..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (18,906 samples, 28.25%)</title><rect x="50.6731%" y="404" width="28.2487%" height="15" fill="rgb(245,182,24)"/><text x="50.9231%" y="414.50">__deserialize_model (kubernetes/client/api_cl..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (7,495 samples, 11.20%)</title><rect x="67.7230%" y="420" width="11.1988%" height="15" fill="rgb(242,14,37)"/><text x="67.9730%" y="430.50">__deserialize (k..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:638) (689 samples, 1.03%)</title><rect x="78.9218%" y="404" width="1.0295%" height="15" fill="rgb(246,228,12)"/><text x="79.1718%" y="414.50"></text></g><g><title>__deserialize (kubernetes/client/api_client.py:273) (21,385 samples, 31.95%)</title><rect x="48.1689%" y="356" width="31.9527%" height="15" fill="rgb(213,55,15)"/><text x="48.4189%" y="366.50">__deserialize (kubernetes/client/api_client.py:273)</text></g><g><title>&lt;listcomp&gt; (kubernetes/client/api_client.py:273) (21,292 samples, 31.81%)</title><rect x="48.3079%" y="372" width="31.8138%" height="15" fill="rgb(209,9,3)"/><text x="48.5579%" y="382.50">&lt;listcomp&gt; (kubernetes/client/api_client.py:273)</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (20,942 samples, 31.29%)</title><rect x="48.8308%" y="388" width="31.2908%" height="15" fill="rgb(230,59,30)"/><text x="49.0808%" y="398.50">__deserialize (kubernetes/client/api_client.py:295)</text></g><g><title>_parse (dateutil/parser/_parser.py:725) (810 samples, 1.21%)</title><rect x="81.5665%" y="420" width="1.2103%" height="15" fill="rgb(209,121,21)"/><text x="81.8165%" y="430.50"></text></g><g><title>split (dateutil/parser/_parser.py:207) (807 samples, 1.21%)</title><rect x="81.5710%" y="436" width="1.2058%" height="15" fill="rgb(220,109,13)"/><text x="81.8210%" y="446.50"></text></g><g><title>parse (dateutil/parser/_parser.py:646) (2,036 samples, 3.04%)</title><rect x="81.4783%" y="404" width="3.0421%" height="15" fill="rgb(232,18,1)"/><text x="81.7283%" y="414.50">par..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:293) (2,797 samples, 4.18%)</title><rect x="81.0704%" y="356" width="4.1792%" height="15" fill="rgb(215,41,42)"/><text x="81.3204%" y="366.50">__des..</text></g><g><title>__deserialize_datatime (kubernetes/client/api_client.py:605) (2,740 samples, 4.09%)</title><rect x="81.1556%" y="372" width="4.0940%" height="15" fill="rgb(224,123,36)"/><text x="81.4056%" y="382.50">__de..</text></g><g><title>parse (dateutil/parser/_parser.py:1374) (2,725 samples, 4.07%)</title><rect x="81.1780%" y="388" width="4.0716%" height="15" fill="rgb(240,125,3)"/><text x="81.4280%" y="398.50">pars..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:631) (2,026 samples, 3.03%)</title><rect x="86.3911%" y="564" width="3.0272%" height="15" fill="rgb(205,98,50)"/><text x="86.6411%" y="574.50">__d..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (2,914 samples, 4.35%)</title><rect x="85.9937%" y="500" width="4.3540%" height="15" fill="rgb(205,185,37)"/><text x="86.2437%" y="510.50">__des..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:273) (2,856 samples, 4.27%)</title><rect x="86.0804%" y="516" width="4.2673%" height="15" fill="rgb(238,207,15)"/><text x="86.3304%" y="526.50">__des..</text></g><g><title>&lt;listcomp&gt; (kubernetes/client/api_client.py:273) (2,801 samples, 4.19%)</title><rect x="86.1625%" y="532" width="4.1852%" height="15" fill="rgb(213,199,42)"/><text x="86.4125%" y="542.50">&lt;list..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (2,655 samples, 3.97%)</title><rect x="86.3807%" y="548" width="3.9670%" height="15" fill="rgb(235,201,11)"/><text x="86.6307%" y="558.50">__de..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (3,156 samples, 4.72%)</title><rect x="85.6904%" y="436" width="4.7156%" height="15" fill="rgb(207,46,11)"/><text x="85.9404%" y="446.50">__des..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:273) (3,085 samples, 4.61%)</title><rect x="85.7965%" y="452" width="4.6095%" height="15" fill="rgb(241,35,35)"/><text x="86.0465%" y="462.50">__des..</text></g><g><title>&lt;listcomp&gt; (kubernetes/client/api_client.py:273) (3,068 samples, 4.58%)</title><rect x="85.8219%" y="468" width="4.5841%" height="15" fill="rgb(243,32,47)"/><text x="86.0719%" y="478.50">&lt;list..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (3,027 samples, 4.52%)</title><rect x="85.8831%" y="484" width="4.5228%" height="15" fill="rgb(247,202,23)"/><text x="86.1331%" y="494.50">__des..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (3,275 samples, 4.89%)</title><rect x="85.5529%" y="404" width="4.8934%" height="15" fill="rgb(219,102,11)"/><text x="85.8029%" y="414.50">__dese..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (3,251 samples, 4.86%)</title><rect x="85.5888%" y="420" width="4.8575%" height="15" fill="rgb(243,110,44)"/><text x="85.8388%" y="430.50">__dese..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (3,409 samples, 5.09%)</title><rect x="85.3915%" y="372" width="5.0936%" height="15" fill="rgb(222,74,54)"/><text x="85.6415%" y="382.50">__dese..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (3,372 samples, 5.04%)</title><rect x="85.4468%" y="388" width="5.0383%" height="15" fill="rgb(216,99,12)"/><text x="85.6968%" y="398.50">__dese..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (28,929 samples, 43.22%)</title><rect x="47.3979%" y="340" width="43.2247%" height="15" fill="rgb(226,22,26)"/><text x="47.6479%" y="350.50">__deserialize_model (kubernetes/client/api_client.py:636)</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (3,596 samples, 5.37%)</title><rect x="85.2496%" y="356" width="5.3730%" height="15" fill="rgb(217,163,10)"/><text x="85.4996%" y="366.50">__deser..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (30,074 samples, 44.94%)</title><rect x="46.4058%" y="308" width="44.9355%" height="15" fill="rgb(213,25,53)"/><text x="46.6558%" y="318.50">__deserialize_model (kubernetes/client/api_client.py:636)</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (29,990 samples, 44.81%)</title><rect x="46.5313%" y="324" width="44.8100%" height="15" fill="rgb(252,105,26)"/><text x="46.7813%" y="334.50">__deserialize (kubernetes/client/api_client.py:295)</text></g><g><title>__deserialize (kubernetes/client/api_client.py:273) (30,302 samples, 45.28%)</title><rect x="46.1936%" y="260" width="45.2762%" height="15" fill="rgb(220,39,43)"/><text x="46.4436%" y="270.50">__deserialize (kubernetes/client/api_client.py:273)</text></g><g><title>&lt;listcomp&gt; (kubernetes/client/api_client.py:273) (30,300 samples, 45.27%)</title><rect x="46.1966%" y="276" width="45.2732%" height="15" fill="rgb(229,68,48)"/><text x="46.4466%" y="286.50">&lt;listcomp&gt; (kubernetes/client/api_client.py:273)</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (30,252 samples, 45.20%)</title><rect x="46.2683%" y="292" width="45.2015%" height="15" fill="rgb(252,8,32)"/><text x="46.5183%" y="302.50">__deserialize (kubernetes/client/api_client.py:295)</text></g><g><title>_list_and_update (kubespawner/reflector.py:157) (34,615 samples, 51.72%)</title><rect x="39.7508%" y="132" width="51.7205%" height="15" fill="rgb(223,20,43)"/><text x="40.0008%" y="142.50">_list_and_update (kubespawner/reflector.py:157)</text></g><g><title>list_namespaced_pod (kubernetes/client/api/core_v1_api.py:12803) (34,615 samples, 51.72%)</title><rect x="39.7508%" y="148" width="51.7205%" height="15" fill="rgb(229,81,49)"/><text x="40.0008%" y="158.50">list_namespaced_pod (kubernetes/client/api/core_v1_api.py:12803)</text></g><g><title>list_namespaced_pod_with_http_info (kubernetes/client/api/core_v1_api.py:12905) (34,614 samples, 51.72%)</title><rect x="39.7523%" y="164" width="51.7190%" height="15" fill="rgb(236,28,36)"/><text x="40.0023%" y="174.50">list_namespaced_pod_with_http_info (kubernetes/client/api/core_v1_api.py:12905)</text></g><g><title>call_api (kubernetes/client/api_client.py:345) (34,614 samples, 51.72%)</title><rect x="39.7523%" y="180" width="51.7190%" height="15" fill="rgb(249,185,26)"/><text x="40.0023%" y="190.50">call_api (kubernetes/client/api_client.py:345)</text></g><g><title>__call_api (kubernetes/client/api_client.py:184) (33,956 samples, 50.74%)</title><rect x="40.7354%" y="196" width="50.7359%" height="15" fill="rgb(249,174,33)"/><text x="40.9854%" y="206.50">__call_api (kubernetes/client/api_client.py:184)</text></g><g><title>deserialize (kubernetes/client/api_client.py:256) (30,303 samples, 45.28%)</title><rect x="46.1936%" y="212" width="45.2777%" height="15" fill="rgb(233,201,37)"/><text x="46.4436%" y="222.50">deserialize (kubernetes/client/api_client.py:256)</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (30,303 samples, 45.28%)</title><rect x="46.1936%" y="228" width="45.2777%" height="15" fill="rgb(221,78,26)"/><text x="46.4436%" y="238.50">__deserialize (kubernetes/client/api_client.py:295)</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (30,303 samples, 45.28%)</title><rect x="46.1936%" y="244" width="45.2777%" height="15" fill="rgb(250,127,30)"/><text x="46.4436%" y="254.50">__deserialize_model (kubernetes/client/api_client.py:636)</text></g><g><title>__eq__ (kubernetes/client/models/v1_pod_spec.py:945) (682 samples, 1.02%)</title><rect x="92.3618%" y="196" width="1.0190%" height="15" fill="rgb(230,49,44)"/><text x="92.6118%" y="206.50"></text></g><g><title>_watch_and_update (kubespawner/reflector.py:206) (36,260 samples, 54.18%)</title><rect x="39.7493%" y="116" width="54.1784%" height="15" fill="rgb(229,67,23)"/><text x="39.9993%" y="126.50">_watch_and_update (kubespawner/reflector.py:206)</text></g><g><title>_list_and_update (kubespawner/reflector.py:160) (1,644 samples, 2.46%)</title><rect x="91.4713%" y="132" width="2.4564%" height="15" fill="rgb(249,83,47)"/><text x="91.7213%" y="142.50">_l..</text></g><g><title>__set__ (traitlets/traitlets.py:585) (1,615 samples, 2.41%)</title><rect x="91.5146%" y="148" width="2.4131%" height="15" fill="rgb(215,43,3)"/><text x="91.7646%" y="158.50">__..</text></g><g><title>set (traitlets/traitlets.py:567) (1,159 samples, 1.73%)</title><rect x="92.1960%" y="164" width="1.7317%" height="15" fill="rgb(238,154,13)"/><text x="92.4460%" y="174.50"></text></g><g><title>__eq__ (kubernetes/client/models/v1_pod.py:216) (1,143 samples, 1.71%)</title><rect x="92.2199%" y="180" width="1.7078%" height="15" fill="rgb(219,56,2)"/><text x="92.4699%" y="190.50"></text></g><g><title>_update_chunk_length (urllib3/response.py:693) (1,172 samples, 1.75%)</title><rect x="94.0846%" y="180" width="1.7512%" height="15" fill="rgb(233,0,4)"/><text x="94.3346%" y="190.50"></text></g><g><title>readinto (socket.py:586) (1,153 samples, 1.72%)</title><rect x="94.1130%" y="196" width="1.7228%" height="15" fill="rgb(235,30,7)"/><text x="94.3630%" y="206.50"></text></g><g><title>recv_into (ssl.py:1012) (1,149 samples, 1.72%)</title><rect x="94.1190%" y="212" width="1.7168%" height="15" fill="rgb(250,79,13)"/><text x="94.3690%" y="222.50"></text></g><g><title>read (ssl.py:874) (1,147 samples, 1.71%)</title><rect x="94.1220%" y="228" width="1.7138%" height="15" fill="rgb(211,146,34)"/><text x="94.3720%" y="238.50"></text></g><g><title>read (ssl.py:631) (1,145 samples, 1.71%)</title><rect x="94.1249%" y="244" width="1.7108%" height="15" fill="rgb(228,22,38)"/><text x="94.3749%" y="254.50"></text></g><g><title>read_chunked (urllib3/response.py:763) (1,179 samples, 1.76%)</title><rect x="94.0801%" y="164" width="1.7616%" height="15" fill="rgb(235,168,5)"/><text x="94.3301%" y="174.50"></text></g><g><title>iter_resp_lines (kubernetes/watch/watch.py:46) (1,318 samples, 1.97%)</title><rect x="94.0756%" y="148" width="1.9693%" height="15" fill="rgb(221,155,16)"/><text x="94.3256%" y="158.50">i..</text></g><g><title>stream (kubernetes/watch/watch.py:144) (1,322 samples, 1.98%)</title><rect x="94.0741%" y="132" width="1.9753%" height="15" fill="rgb(215,215,53)"/><text x="94.3241%" y="142.50">s..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (1,144 samples, 1.71%)</title><rect x="96.6994%" y="292" width="1.7093%" height="15" fill="rgb(223,4,10)"/><text x="96.9494%" y="302.50"></text></g><g><title>__deserialize (kubernetes/client/api_client.py:273) (1,327 samples, 1.98%)</title><rect x="96.5545%" y="244" width="1.9828%" height="15" fill="rgb(234,103,6)"/><text x="96.8045%" y="254.50">_..</text></g><g><title>&lt;listcomp&gt; (kubernetes/client/api_client.py:273) (1,318 samples, 1.97%)</title><rect x="96.5679%" y="260" width="1.9693%" height="15" fill="rgb(227,97,0)"/><text x="96.8179%" y="270.50">&lt;..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (1,301 samples, 1.94%)</title><rect x="96.5933%" y="276" width="1.9439%" height="15" fill="rgb(234,150,53)"/><text x="96.8433%" y="286.50">_..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (1,734 samples, 2.59%)</title><rect x="96.4947%" y="228" width="2.5909%" height="15" fill="rgb(228,201,54)"/><text x="96.7447%" y="238.50">__..</text></g><g><title>__deserialize_model (kubernetes/client/api_client.py:636) (1,881 samples, 2.81%)</title><rect x="96.4035%" y="196" width="2.8105%" height="15" fill="rgb(222,22,37)"/><text x="96.6535%" y="206.50">__..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (1,870 samples, 2.79%)</title><rect x="96.4200%" y="212" width="2.7941%" height="15" fill="rgb(237,53,32)"/><text x="96.6700%" y="222.50">__..</text></g><g><title>unmarshal_event (kubernetes/watch/watch.py:96) (1,998 samples, 2.99%)</title><rect x="96.2482%" y="148" width="2.9853%" height="15" fill="rgb(233,25,53)"/><text x="96.4982%" y="158.50">unm..</text></g><g><title>deserialize (kubernetes/client/api_client.py:256) (1,906 samples, 2.85%)</title><rect x="96.3856%" y="164" width="2.8479%" height="15" fill="rgb(210,40,34)"/><text x="96.6356%" y="174.50">de..</text></g><g><title>__deserialize (kubernetes/client/api_client.py:295) (1,901 samples, 2.84%)</title><rect x="96.3931%" y="180" width="2.8404%" height="15" fill="rgb(241,220,44)"/><text x="96.6431%" y="190.50">__..</text></g><g><title>stream (kubernetes/watch/watch.py:145) (2,138 samples, 3.19%)</title><rect x="96.0494%" y="132" width="3.1945%" height="15" fill="rgb(235,28,35)"/><text x="96.2994%" y="142.50">str..</text></g><g><title>_watch_and_update (kubespawner/reflector.py:226) (3,558 samples, 5.32%)</title><rect x="93.9292%" y="116" width="5.3162%" height="15" fill="rgb(210,56,17)"/><text x="94.1792%" y="126.50">_watch_..</text></g><g><title>_bootstrap (threading.py:884) (40,323 samples, 60.25%)</title><rect x="39.7493%" y="68" width="60.2492%" height="15" fill="rgb(224,130,29)"/><text x="39.9993%" y="78.50">_bootstrap (threading.py:884)</text></g><g><title>_bootstrap_inner (threading.py:916) (40,323 samples, 60.25%)</title><rect x="39.7493%" y="84" width="60.2492%" height="15" fill="rgb(235,212,8)"/><text x="39.9993%" y="94.50">_bootstrap_inner (threading.py:916)</text></g><g><title>run (threading.py:864) (40,323 samples, 60.25%)</title><rect x="39.7493%" y="100" width="60.2492%" height="15" fill="rgb(223,33,50)"/><text x="39.9993%" y="110.50">run (threading.py:864)</text></g><g><title>all (66,927 samples, 100%)</title><rect x="0.0000%" y="36" width="100.0000%" height="15" fill="rgb(219,149,13)"/><text x="0.2500%" y="46.50"></text></g><g><title>process 1:&quot;/usr/bin/python3 /usr/local/bin/jupyterhub --config /etc/jupyterhub/jupyterhub_config.py --upgrade-db&quot; (66,927 samples, 100.00%)</title><rect x="0.0000%" y="52" width="100.0000%" height="15" fill="rgb(250,156,29)"/><text x="0.2500%" y="62.50">process 1:&quot;/usr/bin/python3 /usr/local/bin/jupyterhub --config /etc/jupyterhub/jupyterhub_config.py --upgrade-db&quot;</text></g></svg></svg>


--------------------------------------------------------------------------------
/docs/profiling.md:
--------------------------------------------------------------------------------
  1 | # Profiling
  2 | During stress testing, or even normal operations, [py-spy](https://github.com/benfred/py-spy) can be used to capture profiling data. It will generate SVGs
  3 | that show where the hub is spending its time.
  4 | 
  5 | 1. [Installation](#py-spy-installation)
  6 | 1. [Collecting data](#py-spy-collecting-data)
  7 |    1. [`py-spy top`](#py-spy-top)
  8 |    1. [`py-spy record`](#py-spy-record)
  9 |    1. [`py-spy dump`](#py-spy-dump)
 10 | 
 11 | <a name="py-spy-installation"></a>
 12 | ### Installation
 13 | `py-spy` is installed by z2jh but it won't work without additional configuration. The hub image
 14 | must be modified to set the `SYS_PTRACE` capability on the `py-spy` binary. The following line
 15 | should be added after `py-spy` has been installed and before root privileges are dropped during
 16 | the image build.
 17 | 
 18 | ```
 19 | RUN setcap cap_sys_ptrace+ep $(which py-spy)
 20 | ```
 21 | 
 22 | Additionally the `securityContext` must be configured in the hub deployment. This is done
 23 | by setting `containerSecurityContext` in `values.yaml`.
 24 | 
 25 | ```yaml
 26 | containerSecurityContext:
 27 |   allowPrivilegeEscalation: true
 28 |   capabilities:
 29 |     drop:
 30 |     - all
 31 |     add:
 32 |     - SYS_PTRACE
 33 | ```
 34 | 
 35 | <a name="py-spy-collecting-data"></a>
 36 | ### Collecting data
 37 | `py-spy` must be run from inside the hub container.
 38 | 
 39 | ```bash
 40 | $ kubectl -n <namespace> exec -it <pod> -- bash
 41 | ```
 42 | 
 43 | There are three ways to investigate the hub's activity.
 44 | 
 45 | <a name="py-spy-top"></a>
 46 | 1. `py-spy top`  
 47 | This shows a live view of which functions are taking most time. It's similar to the
 48 | Linux `top` command.
 49 | 	```
 50 | 	jovyan@hub-69f94ddc84-g26qj:/$ py-spy top -p 1
 51 | 
 52 | 	Collecting samples from '/usr/bin/python3 /usr/local/bin/jupyterhub --config /etc/jupyterhub/jupyterhub_config.py --upgrade-db' (python v3.6.9)
 53 | 	Total Samples 1118
 54 | 	GIL: 0.00%, Active: 0.00%, Threads: 1
 55 | 
 56 | 	  %Own   %Total  OwnTime  TotalTime  Function (filename:line)
 57 | 	  0.00%   0.00%   0.010s    0.010s   add_timeout (tornado/ioloop.py:580)
 58 | 	  0.00%   0.00%   0.000s    0.010s   <module> (jupyterhub:11)
 59 | 	  0.00%   0.00%   0.000s    0.010s   start (tornado/platform/asyncio.py:149)
 60 | 	  0.00%   0.00%   0.000s    0.010s   _run_callback (tornado/ioloop.py:743)
 61 | 	  0.00%   0.00%   0.000s    0.010s   _schedule_next (tornado/ioloop.py:916)
 62 | 	  0.00%   0.00%   0.000s    0.010s   launch_instance (jupyterhub/app.py:2782)
 63 | 	  0.00%   0.00%   0.000s    0.010s   run_forever (asyncio/base_events.py:438)
 64 | 	  0.00%   0.00%   0.000s    0.010s   _run (tornado/ioloop.py:911)
 65 | 	  0.00%   0.00%   0.000s    0.010s   _run_once (asyncio/base_events.py:1451)
 66 | 	  0.00%   0.00%   0.000s    0.010s   _run (asyncio/events.py:145)
 67 |     ```
 68 |     The output can be sorted by each column as well.
 69 | <a name="py-spy-record"></a>
 70 | 1. `py-spy record`  
 71 | The record command runs in the foreground collecting samples and when it's closed (either
 72 | by CTRL+C or when it reaches its configured duration) an SVG flamegraph is written to disk.
 73 |     ```
 74 |     jovyan@hub-69f94ddc84-g26qj:/$ py-spy record -o /tmp/py-spy-trace -p 1
 75 |     py-spy> Sampling process 100 times a second. Press Control-C to exit.
 76 | 
 77 |     ^C
 78 |     py-spy> Stopped sampling because Control-C pressed
 79 |     py-spy> Wrote flamegraph data to '/tmp/py-spy-trace'. Samples: 20541 Errors: 0
 80 |     ```
 81 |     That will produce an SVG like this:
 82 |     ![Example](./images/py-spy-example.svg)
 83 | <a name="py-spy-dump"></a>
 84 | 1. `py-spy dump`  
 85 | The dump command will dump the state of all threads for the specified process. It can
 86 | optionally show the local variables for each frame. This is helpful for figuring out why
 87 | the hub process may appear stuck for example.
 88 | 
 89 | 	```
 90 | 	jovyan@hub-69f94ddc84-g26qj:/$ py-spy dump -p 1 --locals
 91 | 	Process 1: /usr/bin/python3 /usr/local/bin/jupyterhub --config /etc/jupyterhub/jupyterhub_config.py --upgrade-db
 92 | 	Python v3.6.9 (/usr/bin/python3.6)
 93 | 
 94 | 	Thread 1 (idle): "MainThread"
 95 | 		select (selectors.py:445)
 96 | 			Arguments::
 97 | 				self: <EpollSelector at 0x7f79ac4ab898>
 98 | 				timeout: 0.998
 99 | 			Locals::
100 | 				max_ev: 3
101 | 				ready: []
102 | 		_run_once (asyncio/base_events.py:1415)
103 | 			Arguments::
104 | 				self: <_UnixSelectorEventLoop at 0x7f79ac4ab7b8>
105 | 			Locals::
106 | 				sched_count: 45
107 | 				timeout: 0.9979082886129618
108 | 				when: 5094106.57817052
109 | 		run_forever (asyncio/base_events.py:438)
110 | 			Arguments::
111 | 				self: <_UnixSelectorEventLoop at 0x7f79ac4ab7b8>
112 | 			Locals::
113 | 				old_agen_hooks: (None, None)
114 | 		start (tornado/platform/asyncio.py:149)
115 | 			Arguments::
116 | 				self: <AsyncIOMainLoop at 0x7f79ac447be0>
117 | 			Locals::
118 | 				old_loop: <_UnixSelectorEventLoop at 0x7f79ac4ab7b8>
119 | 		launch_instance (jupyterhub/app.py:2782)
120 | 			Arguments::
121 | 				cls: <MetaHasTraits at 0x30ee728>
122 | 				argv: None
123 | 			Locals::
124 | 				self: <JupyterHub at 0x7f79b12b7358>
125 | 				loop: <AsyncIOMainLoop at 0x7f79ac447be0>
126 | 				task: <_asyncio.Task at 0x7f79abd788c8>
127 | 		<module> (jupyterhub:11)
128 | 	```
129 | 


--------------------------------------------------------------------------------
/docs/stress-test.md:
--------------------------------------------------------------------------------
  1 | # Hub Stress Testing
  2 | 
  3 | This document gives an overview of the [hub-stress-test script](../scripts/hub-stress-test.py)
  4 | and how it can be used.
  5 | 
  6 | 1. [Setup](#setup)
  7 | 1. [Scaling up](#scaling-up)
  8 |    1. [Placeholders and user nodes](#placeholders)
  9 |    1. [Steady state testing](#steady-state)
 10 |    1. [Activity update testing](#activity-update-testing)
 11 | 1. [Scaling down](#scaling-down)
 12 | 1. [Monitoring](#monitoring)
 13 | 
 14 | <a name="setup"></a>
 15 | ## Setup
 16 | 
 17 | You will need two things to run the script, an admin token and a target hub API endpoint URL.
 18 | 
 19 | The admin token can be provided to the script on the command line but it's recommended to create a
 20 | file from which you can source and export the `JUPYTERHUB_API_TOKEN` environment variable.
 21 | 
 22 | For the hub API endpoint URL, you can probably use the same value as the `JUPYTERHUB_API_URL`
 23 | environment variable in your user notebooks, e.g. `https://myhub-testing.acme.com/hub/api`.
 24 | 
 25 | Putting these together, you can have a script like the following to prepare your environment:
 26 | 
 27 | ```bash
 28 | #!/bin/bash -e
 29 | export JUPYTERHUB_API_TOKEN=abcdef123456
 30 | export JUPYTERHUB_ENDPOINT=https://myhub-testing.acme.com/hub/api
 31 | ```
 32 | 
 33 | <a name="scaling-up"></a>
 34 | ## Scaling up
 35 | 
 36 | By default the `stress-test` command of the `hub-stress-test` script will scale up to
 37 | 100 users and notebook servers (pods) in batches, wait for them to be "ready" and then
 38 | stop and delete them.
 39 | 
 40 | <a name="placeholders"></a>
 41 | ### Placeholders and user nodes
 42 | 
 43 | The number of pods that can be created in any given run depends on the number of
 44 | `user-placeholder` pods already in the cluster and the number of `user` nodes. The
 45 | `user-placeholder` pods are pre-emptible pods which are part of a StatefulSet:
 46 | 
 47 | ```console
 48 | $ kubectl get statefulset/user-placeholder -n jhub
 49 | NAME               READY     AGE
 50 | user-placeholder   300/300   118d
 51 | ```
 52 | 
 53 | We normally have very few of these in our testing cluster but need to
 54 | scale them up when doing stress testing otherwise the `hub-stress-test` script has to wait
 55 | for the auto-scaler to add more nodes to the `user` worker pool. The number of available
 56 | workers can be found like so:
 57 | 
 58 | ```console
 59 | $ kubectl get nodes -l workerPurpose=users | grep -c "Ready\s"
 60 | 13
 61 | ```
 62 | 
 63 | The number of `user` nodes needed for a scale test will depend on the resource requirements
 64 | of the user notebook pods, reserved space on the nodes, other system pods running on the nodes,
 65 | e.g. logging daemon, pod limits per node, etc.
 66 | 
 67 | If there are not enough nodes available and the auto-scaler has to create them
 68 | as the stress test is running, we can hit the [consecutive failure limit](https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/363d0b7db5/jupyterhub/values.yaml#L17) which will cause the hub container to crash and restart.
 69 | One way to avoid this is run the script with a `--count` that is not higher than 500 which
 70 | gives time between runs for the auto-scaler to add more `user` nodes.
 71 | 
 72 | As an example, the `kubelet` default `maxPods` limit is 110 per node and on IBM Cloud there are about
 73 | 25 system pods per node. Our testing cluster user notebooks are using a micro
 74 | profile so their resource usage is not an issue, they are just limited to the 110 pod-per-node limit.
 75 | As a reference, to scale up to 3000 users/pods we need to have at least 35 user nodes.
 76 | 
 77 | <a name="steady-state"></a>
 78 | ### Steady state testing
 79 | 
 80 | The `--keep` option can be used to scale up the number of pods in the cluser and retain them
 81 | so that you can perform tests or profiling on the hub with a high load. When the script runs
 82 | it will first check for the number of existing `hub-stress-test` users and start creating new
 83 | users based on an index so you can run the script with a `--count` value of 200-500 if you need
 84 | to let the auto-scaler add `user` nodes after each run.
 85 | 
 86 | Note that the `c.NotebookApp.shutdown_no_activity_timeout` value in the user notebook image (in the
 87 | testing cluster) should either be left at the default (0) or set to some larger window so that while
 88 | you are scaling up the notebook pods do not shut themselves down.
 89 | 
 90 | <a name="activity-update-testing"></a>
 91 | ### Activity update testing
 92 | 
 93 | The `activity-stress-test` command can be used to simulate `--count` users POSTing activity
 94 | updates. This command only creates users, not servers. It takes a number of users to simulate
 95 | specified by `--count` and a number of worker threads, `--workers`, to perform the actual
 96 | requests. If `--keep` isn't specified then the users will be deleted after the test.
 97 | 
 98 | <a name="scaling-down"></a>
 99 | ## Scaling down
100 | 
101 | If you used the `--keep` option to scale up and retain pods for steady state testing, when you are
102 | done you can scale down the pods and users by using the `purge` command. The users created by the
103 | script all have a specific naming convention so it knows which notebook servers to stop and users
104 | to remove.
105 | 
106 | <a name="monitoring"></a>
107 | ## Monitoring
108 | 
109 | Depending on the number of pods being created or deleted the script can take awhile. During a run
110 | there are some dashboards you should be watching and also the hub logs. The logging and monitoring
111 | platform is deployment-specific but the following are some examples of dashboards we monitor:
112 | 
113 | * `Jupyter Notebook Health (Testing)`
114 |   This dashboard shows the active user notebook pods, nodes in the cluster and `user-placeholder`
115 |   pods. This is mostly interesting to watch the active user notebook pods go up or down when scaling
116 |   up or down with the script. The placeholders and user nodes may also fluctuate as placeholder pods
117 |   are pre-empted and as the auto-scaler is adding or removing user nodes.
118 | 
119 |   ![hub-stress-test-health](images/hub-stress-test-health.png)
120 | 
121 | * `Jupyter Hub Golden Signals (testing)`
122 |   This is where you can monitor the response time and request rate on the hub. As user notebook pods
123 |   are scaled up each of those pods will "check in" with the hub to report their activity. By default
124 |   each pod checks in with the hub every [5 minutes](https://github.com/jupyterhub/jupyterhub/blob/5dee864af/jupyterhub/singleuser.py#L463). So we expect that the more active user notebook pods in the cluster will increase
125 |   the request rate and increase the response time in this dashboard. The error rates may also increase
126 |   as we get 429 responses from the hub while scaling up due to the [concurrentSpawnLimit](https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/363d0b7db/jupyterhub/values.yaml#L16). Those 429 responses are expected
127 |   and the `hub-stress-test` script is built to retry on them. Here is an example of a 3000 user load
128 |   run:
129 | 
130 |   ![hub-stress-test-request-response-times](images/hub-stress-test-request-response-times.png)
131 | 
132 |   That run started around 2:30 and then the purge started around 9 which is why response times track
133 |   the increase in request rates. As the purge runs the number of pods reporting activity is going down
134 |   so the request rate also goes down. One thing to note on the purge is that the [slow_stop_timeout](https://github.com/jupyterhub/jupyterhub/blob/42adb4415/jupyterhub/handlers/base.py#L761) defaults to 10 seconds so as
135 |   we are stopping user notebook servers (deleting pods) the response times spike up because of that
136 |   arbitrary 10 second delay in the hub API.
137 | 
138 |   Other useful panels on this dashboard are for tracking CPU and memory usage of the hub. From the same
139 |   3000 user run as above:
140 | 
141 |   ![hub-stress-test-resource-usage](images/hub-stress-test-resource-usage.png)
142 | 
143 |   CPU, memory and network I/O increase as the number of user notebook pods increases and are reporting
144 |   activity to the hub. The decrease in CPU and network I/O are when the purge starts running. Note that
145 |   memory usage remains high even after the purge starts because the hub aggressively caches DB state in
146 |   memory and is apparently not cleaning up the cache references even after spawners and users are deleted
147 |   from the database.
148 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | urllib3
3 | 


--------------------------------------------------------------------------------
/scripts/hub-stress-test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | from concurrent import futures
  5 | from datetime import datetime, timedelta
  6 | import functools
  7 | import json
  8 | import logging
  9 | from unittest import mock
 10 | import os
 11 | import random
 12 | import sys
 13 | import time
 14 | 
 15 | import requests
 16 | from requests import adapters
 17 | from urllib3.util import retry
 18 | 
 19 | 
 20 | LOG_FORMAT = "%(asctime)s %(levelname)s [%(name)s] %(message)s"
 21 | LOG = logging.getLogger('hub-stress-test')
 22 | 
 23 | # POST /users/{name}/servers can take over 10 seconds so be conservative with
 24 | # the default timeout value.
 25 | DEFAULT_TIMEOUT = 30
 26 | 
 27 | # The default timeout for waiting on a server status change (starting/stopping)
 28 | SERVER_LIFECYCLE_TIMEOUT = 60
 29 | 
 30 | USERNAME_PREFIX = 'hub-stress-test'
 31 | 
 32 | 
 33 | def parse_args():
 34 |     # Consider splitting this into sub-commands in case you want to be able to
 35 |     # scale up and retain servers to do some profiling and then have another
 36 |     # command to scale down when done. It could also be useful to have a
 37 |     # sub-command to report information about the hub, e.g. current number of
 38 |     # users/servers and which of those were created by this tool.
 39 |     parser = argparse.ArgumentParser(
 40 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 41 |         description='''
 42 | JupyterHub Stress Test
 43 | 
 44 | The `stress-test` command will create `--count` number of fake users and
 45 | notebook servers in batches defined by the `--batch-size` option in the
 46 | given JupyterHub `--endpoint`. It will wait for each notebook server to
 47 | be considered "ready" by the hub. By default the created users and servers
 48 | will be deleted but the `--keep` option can be used to retain the resources
 49 | for steady-state profiling. The `purge` command is available to delete any
 50 | previously kept users/servers.
 51 | 
 52 | The `activity-stress-test` command simulates user activity updates. This
 53 | will create `--count` fake users with no server. These users will be
 54 | deleted unless `--keep` is specified. A number of threads specified by
 55 | `--workers` will be created to send updates to the hub. While these worker
 56 | threads are sending activity another thread makes requests to the API and
 57 | reports on the average, minimum, and maximum time of that API call.
 58 | 
 59 | An admin API token is required and may be specified using the
 60 | JUPYTERHUB_API_TOKEN environment variable.
 61 | 
 62 | Similarly the hub API endpoint must be provided and may be specified using the
 63 | JUPYTERHUB_ENDPOINT environment variable.
 64 | 
 65 | A `--dry-run` option is available for seeing what the test would look like
 66 | without actually making any changes, for example:
 67 | 
 68 |   JUPYTERHUB_API_TOKEN=test
 69 |   JUPYTERHUB_ENDPOINT=http://localhost:8000/hub/api
 70 |   python hub-stress-test.py -v --dry-run stress-test
 71 | ''')
 72 |     parser.add_argument('-e', '--endpoint',
 73 |                         default=os.environ.get('JUPYTERHUB_ENDPOINT'),
 74 |                         help='The target hub API endpoint for the stress '
 75 |                              'test. Can also be read from the '
 76 |                              'JUPYTERHUB_ENDPOINT environment variable.')
 77 |     parser.add_argument('-t', '--token',
 78 |                         default=os.environ.get('JUPYTERHUB_API_TOKEN'),
 79 |                         help='JupyterHub admin API token. Must be a token '
 80 |                              'for an admin user in order to create other fake '
 81 |                              'users for the scale test. Can also be read from '
 82 |                              'the JUPYTERHUB_API_TOKEN environment variable.')
 83 |     parser.add_argument('--dry-run', action='store_true',
 84 |                         help='If set do not actually make API requests.')
 85 |     # Note that with nargs='?' if --log-to-file is specified but without an
 86 |     # argument value then it will be True (uses the const value) and we'll
 87 |     # generate a log file under /tmp. If --log-to-file is not specified at all
 88 |     # then it will default to False and we'll log to stdout. Otherwise if
 89 |     # --log-to-file is specified with a command line argument we'll log to that
 90 |     # file.
 91 |     parser.add_argument('--log-to-file', nargs='?', default=False, const=True,
 92 |                         metavar='FILEPATH',
 93 |                         help='If set logging will be redirected to a file. If '
 94 |                              'no FILEPATH value is provided then a '
 95 |                              'timestamp-based log file under /tmp will be '
 96 |                              'created. Note that if a FILEPATH value is given '
 97 |                              'an existing file will be overwritten.')
 98 |     parser.add_argument('-v', '--verbose', action='store_true',
 99 |                         help='Enable verbose (debug) logging which includes '
100 |                              'logging API response times.')
101 | 
102 |     # This parser holds arguments that need to be shared among two or more
103 |     # subcommands but should not be top-level arguments.
104 |     parent_parser = argparse.ArgumentParser(add_help=False)
105 |     parent_parser.add_argument(
106 |         '-k', '--keep', action='store_true',
107 |         help='Retain the created fake users/servers once they all created. '
108 |              'By default the script will scale up and then teardown. The '
109 |              'script can be run with --keep multiple times to build on an '
110 |              'existing set of fake users.'
111 |     )
112 |     parent_parser.add_argument('-c', '--count', default=100, type=int,
113 |                                help='Number of users/servers (pods) to create '
114 |                                     '(default: 100).')
115 | 
116 |     subparsers = parser.add_subparsers(dest='command', required=True)
117 |     stress_parser = subparsers.add_parser(
118 |         'stress-test', parents=[parent_parser]
119 |     )
120 |     stress_parser.add_argument(
121 |         '-b', '--batch-size', default=10, type=int,
122 |         help='Batch size to use when creating users and notebook servers. '
123 |              'Note that by default z2jh will limit concurrent server creation '
124 |              'to 64 (see c.JupyterHub.concurrent_spawn_limit) (default: 10). '
125 |     )
126 |     stress_parser.add_argument(
127 |         '-p', '--profile', type=str, required=False,
128 |         help='Hardware profile for servers.'
129 |     )
130 | 
131 |     activity_parser = subparsers.add_parser(
132 |         'activity-stress-test', parents=[parent_parser]
133 |     )
134 |     activity_parser.add_argument(
135 |         '--workers', type=int, default=100,
136 |         help='Number of worker threads to create. Each thread will receive '
137 |              'len(users) // workers users to send updates for.'
138 |     )
139 | 
140 |     # Add a standalone purge subcommand
141 |     subparsers.add_parser('purge')
142 | 
143 |     args = parser.parse_args()
144 |     return args
145 | 
146 | 
147 | def validate(args):
148 |     if args.command == 'stress-test':
149 |         if args.batch_size < 1:
150 |             raise Exception('--batch-size must be greater than 0')
151 |         if args.count < 1:
152 |             raise Exception('--count must be greater than 0')
153 |     if args.token is None:
154 |         raise Exception('An API token must be provided either using --token '
155 |                         'or the JUPYTERHUB_API_TOKEN environment variable')
156 |     if args.endpoint is None:
157 |         raise Exception('A hub API endpoint URL must be provided either using '
158 |                         '--endpoint or the JUPYTERHUB_ENDPOINT environment '
159 |                         'variable')
160 | 
161 | 
162 | def setup_logging(verbose=False, log_to_file=False, args=None):
163 |     filename = None
164 |     if log_to_file:  # If --log-to-file is specified at all this is Truthy
165 |         if isinstance(log_to_file, str):  # A specific file is given so use it.
166 |             filename = log_to_file
167 |         else:  # --log-to-file with no arg so generate a tmp file for logging.
168 |             timestamp = datetime.utcnow().isoformat(timespec='seconds')
169 |             filename = os.path.join(
170 |                 '/tmp', f'hub-stress-test-{timestamp}.log')
171 |         print(f'Redirecting logs to: {filename}')
172 |     logging.basicConfig(format=LOG_FORMAT, filename=filename, filemode='w')
173 |     root_logger = logging.getLogger(None)
174 |     root_logger.setLevel(logging.INFO)
175 |     if verbose:
176 |         root_logger.setLevel(logging.DEBUG)
177 |     logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING)
178 | 
179 |     if log_to_file and args:
180 |         # Log the args used to run the script for posterity.
181 |         # Scrub the token though so we don't log it.
182 |         args_dict = dict(vars(args))  # Make sure to copy the vars dict.
183 |         args_dict['token'] = '***'
184 |         LOG.info('Args: %s', args_dict)
185 | 
186 |     def log_uncaught_exceptions(exc_type, exc_value, exc_traceback):
187 |         root_logger.critical("Uncaught exception",
188 |                              exc_info=(exc_type, exc_value, exc_traceback))
189 | 
190 |     sys.excepthook = log_uncaught_exceptions
191 | 
192 | 
193 | def timeit(f):
194 |     @functools.wraps(f)
195 |     def wrapper(*args, **kwargs):
196 |         start_time = time.time()
197 |         try:
198 |             return f(*args, **kwargs)
199 |         finally:
200 |             LOG.info('Took %.3f seconds to %s',
201 |                      (time.time() - start_time), f.__name__)
202 |     return wrapper
203 | 
204 | 
205 | def log_response_time(resp, *args, **kwargs):
206 |     """Logs response time elapsed.
207 | 
208 |     See: https://requests.readthedocs.io/en/master/user/advanced/#event-hooks
209 | 
210 |     :param resp: requests.Response object
211 |     :param args: ignored
212 |     :param kwargs: ignored
213 |     """
214 |     LOG.debug('%(method)s %(url)s status:%(status)s time:%(elapsed)ss',
215 |               {'method': resp.request.method,
216 |                'url': resp.url,
217 |                'status': resp.status_code,
218 |                'elapsed': resp.elapsed.total_seconds()})
219 | 
220 | 
221 | def get_session(token, dry_run=False, pool_maxsize=100):
222 |     if dry_run:
223 |         return mock.create_autospec(requests.Session)
224 |     session = requests.Session()
225 |     session.headers.update({'Authorization': 'token %s' % token})
226 |     # Retry on errors that might be caused by stress testing.
227 |     r = retry.Retry(
228 |         backoff_factor=0.5,
229 |         method_whitelist=False,  # retry on any verb (including POST)
230 |         status_forcelist={
231 |             429,  # concurrent_spawn_limit returns a 429
232 |             503,  # if the hub container crashes we get a 503
233 |             504,  # if the cloudflare gateway times out we get a 504
234 |         })
235 |     adapter = adapters.HTTPAdapter(max_retries=r, pool_maxsize=pool_maxsize)
236 |     session.mount("http://", adapter)
237 |     session.mount("https://", adapter)
238 |     if LOG.isEnabledFor(logging.DEBUG):
239 |         session.hooks['response'].append(log_response_time)
240 |     return session
241 | 
242 | 
243 | def wait_for_server_to_stop(username, endpoint, session):
244 |     count = 1
245 |     while count <= SERVER_LIFECYCLE_TIMEOUT:
246 |         resp = session.get(endpoint + '/users/%s' % username)
247 |         if resp:
248 |             user = resp.json()
249 |             # When the server is stopped the servers dict should be empty.
250 |             if not user.get('servers') or isinstance(user, mock.Mock):
251 |                 return True
252 |             LOG.debug('Still waiting for server for user %s to stop, '
253 |                       'attempt: %d', username, count)
254 |         elif resp.status_code == 404:
255 |             # Was the user deleted underneath us?
256 |             LOG.info('Got 404 while waiting for server for user %s to '
257 |                      'stop: %s', username, resp.content)
258 |             # Consider this good if the user is gone.
259 |             return True
260 |         else:
261 |             LOG.warning('Unexpected error while waiting for server for '
262 |                         'user %s to stop: %s', username, resp.content)
263 |         time.sleep(1)
264 |         count += 1
265 |     else:
266 |         LOG.warning('Timed out waiting for server for user %s to stop after '
267 |                     '%d seconds', username, SERVER_LIFECYCLE_TIMEOUT)
268 |         return False
269 | 
270 | 
271 | def stop_server(username, endpoint, session, wait=False):
272 |     resp = session.delete(endpoint + '/users/%s/server' % username,
273 |                           timeout=DEFAULT_TIMEOUT)
274 |     if resp:
275 |         # If we got a 204 then the server is stopped and we should not
276 |         # need to poll.
277 |         if resp.status_code == 204:
278 |             return True
279 |         if wait:
280 |             return wait_for_server_to_stop(username, endpoint, session)
281 |         # We're not going to wait so just return True to indicate that we
282 |         # successfully sent the stop request.
283 |         return True
284 |     else:
285 |         LOG.warning('Failed to stop server for user %s. Response status '
286 |                     'code: %d. Response content: %s', username,
287 |                     resp.status_code, resp.content)
288 |         return False
289 | 
290 | 
291 | @timeit
292 | def stop_servers(usernames, endpoint, session, batch_size):
293 |     stopped = {}  # map of username to whether or not the server was stopped
294 |     LOG.debug('Stopping servers for %d users in batches of %d',
295 |               len(usernames), batch_size)
296 |     # Do this in batches in a ThreadPoolExecutor because the
297 |     # `slow_stop_timeout` default of 10 seconds in the hub API can cause the
298 |     # stop action to be somewhat synchronous.
299 |     with futures.ThreadPoolExecutor(
300 |             max_workers=batch_size,
301 |             thread_name_prefix='hub-stress-test:stop_servers') as executor:
302 |         future_to_username = {
303 |             executor.submit(stop_server, username, endpoint, session): username
304 |             for username in usernames
305 |         }
306 |         # as_completed returns an iterator which yields futures as they
307 |         # complete
308 |         for future in futures.as_completed(future_to_username):
309 |             username = future_to_username[future]
310 |             stopped[username] = future.result()
311 |     return stopped
312 | 
313 | 
314 | @timeit
315 | def wait_for_servers_to_stop(stopped, endpoint, session):
316 |     """Wait for a set of user servers to stop.
317 | 
318 |     :param stopped: dict of username to boolean value of whether or not the
319 |         server stop request was successful because if not we don't wait for
320 |         that server; if the boolean value is True then it is updated in-place
321 |         with the result of whether or not the server was fully stopped
322 |     :param endpoint: base endpoint URL
323 |     :param session: requests.Session instance
324 |     """
325 |     LOG.debug('Waiting for servers to stop')
326 |     for username, was_stopped in stopped.items():
327 |         # Only wait if we actually successfully tried to stop it.
328 |         if was_stopped:
329 |             # Update our tracking flag by reference.
330 |             stopped[username] = wait_for_server_to_stop(
331 |                 username, endpoint, session)
332 | 
333 | 
334 | @timeit
335 | def delete_users_after_stopping_servers(stopped, endpoint, session):
336 |     """Delete users after stopping their servers.
337 | 
338 |     :param stopped: dict of username to boolean value of whether or not the
339 |         server was successfully stopped
340 |     :param endpoint: base endpoint URL
341 |     :param session: requests.Session instance
342 |     :returns: True if all users were successfully deleted, False otherwise
343 |     """
344 |     LOG.debug('Deleting users now that servers are stopped')
345 |     success = True
346 |     for username, was_stopped in stopped.items():
347 |         resp = session.delete(endpoint + '/users/%s' % username,
348 |                               timeout=DEFAULT_TIMEOUT)
349 |         if resp:
350 |             LOG.debug('Deleted user: %s', username)
351 |         elif resp.status_code == 404:
352 |             LOG.debug('User already deleted: %s', username)
353 |         else:
354 |             LOG.warning('Failed to delete user: %s. Response status code: %d. '
355 |                         'Response content: %s. Was the server stopped? %s',
356 |                         username, resp.status_code, resp.content, was_stopped)
357 |             success = False
358 |     return success
359 | 
360 | 
361 | @timeit
362 | def delete_users(usernames, endpoint, session, batch_size=10):
363 |     # Do this in batches by first explicitly stopping all of the servers since
364 |     # that could be asynchronous, then wait for the servers to be stopped and
365 |     # then finally delete the users.
366 |     stopped = stop_servers(usernames, endpoint, session, batch_size)
367 | 
368 |     # Now wait for the servers to be stopped. With a big list the ones at the
369 |     # end should be done by the time we get to them.
370 |     wait_for_servers_to_stop(stopped, endpoint, session)
371 | 
372 |     # Now try to delete the users.
373 |     return delete_users_after_stopping_servers(stopped, endpoint, session)
374 | 
375 | 
376 | @timeit
377 | def create_users(count, batch_size, endpoint, session, existing_users=[]):
378 |     LOG.info('Start creating %d users in batches of %d at %s',
379 |              count, batch_size, endpoint)
380 |     # POST /users is a synchronous call so the timeout should be the batch size
381 |     # or greater.
382 |     timeout = max(batch_size, DEFAULT_TIMEOUT)
383 |     num_existing_users = len(existing_users)
384 |     index = num_existing_users + 1
385 |     users = []  # Keep track of the batches to create servers.
386 |     while index <= count + num_existing_users:
387 |         # Batch create multiple users in a single request.
388 |         usernames = []
389 |         for _ in range(batch_size):
390 |             usernames.append('%s-%d' % (USERNAME_PREFIX, index))
391 |             index += 1
392 |         # Maybe we should use the single user POST so we can deal with 409s
393 |         # gracefully if we are re-running the script on a set of existing users
394 |         resp = session.post(endpoint + '/users', json={'usernames': usernames},
395 |                             timeout=timeout)
396 |         if resp:
397 |             LOG.debug('Created users: %s', usernames)
398 |             users.append(usernames)
399 |         else:
400 |             LOG.error('Failed to create users: %s. Response status code: %d. '
401 |                       'Response content: %s', usernames, resp.status_code,
402 |                       resp.content)
403 |             try:
404 |                 delete_users(usernames, endpoint, session)
405 |             except Exception:
406 |                 LOG.warning('Failed to delete users: %s', usernames,
407 |                             exc_info=True)
408 |             raise Exception('Failed to create users.')
409 |     return users
410 | 
411 | 
412 | def start_server(username, endpoint, session, profile=None):
413 |     if profile:
414 |         profile = {"profile": profile}
415 |     resp = session.post(endpoint + '/users/%s/server' % username,
416 |                         timeout=DEFAULT_TIMEOUT, json=profile)
417 |     if resp:
418 |         LOG.debug('Server for user %s is starting', username)
419 |     else:
420 |         # Should we delete the user now? Should we stop or keep going?
421 |         LOG.error('Failed to create server for user: %s. '
422 |                   'Response status code: %d. Response content: %s',
423 |                   username, resp.status_code, resp.content)
424 | 
425 | 
426 | @timeit
427 | def start_servers(users, endpoint, session, profile=None):
428 |     LOG.info('Starting notebook servers')
429 |     for index, usernames in enumerate(users):
430 |         # Start the servers in batches using a ThreadPoolExecutor because
431 |         # the start operation is not totally asynchronous so we should be able
432 |         # to speed this up by doing the starts concurrently. That will also be
433 |         # more realistic to users logging on en masse during an event.
434 |         thread_name_prefix = f'hub-stress-test:start_servers:{index}'
435 |         with futures.ThreadPoolExecutor(
436 |                 max_workers=len(usernames),
437 |                 thread_name_prefix=thread_name_prefix) as executor:
438 |             for username in usernames:
439 |                 executor.submit(
440 |                     start_server, username, endpoint, session,
441 |                     profile=profile
442 |                 )
443 | 
444 | 
445 | @timeit
446 | def wait_for_servers_to_start(users, endpoint, session):
447 |     LOG.info('Waiting for notebook servers to be ready')
448 |     # Rather than do a GET for each individual user/server, we could get all
449 |     # users and then filter out any that aren't in our list. However, there
450 |     # could be servers in that list that are ready (the ones created first) and
451 |     # others that are not yet (the ones created last). If we check individually
452 |     # then there is a chance that by the time we get to the end of the list
453 |     # those servers are already ready while we waited for those at the front of
454 |     # the list.
455 |     for usernames in users:
456 |         for username in usernames:
457 |             count = 0  # start our timer
458 |             while count < SERVER_LIFECYCLE_TIMEOUT:
459 |                 resp = session.get(endpoint + '/users/%s' % username)
460 |                 if resp:
461 |                     user = resp.json()
462 |                     # We don't allow named servers so the user should have a
463 |                     # single server named ''.
464 |                     server = user.get('servers', {}).get('', {})
465 |                     if server.get('ready'):
466 |                         LOG.debug('Server for user %s is ready after %d '
467 |                                   'checks', username, count + 1)
468 |                         break
469 |                     elif not server.get('pending'):
470 |                         # It's possible that the server failed to start and in
471 |                         # that case we want to break the loop so we don't wait
472 |                         # needlessly until the timeout.
473 |                         LOG.error('Server for user %s failed to start. Waited '
474 |                                   '%d seconds but the user record has no '
475 |                                   'pending action. Check the hub logs for '
476 |                                   'details. User: %s', username, count, user)
477 |                         break
478 |                 else:
479 |                     LOG.warning('Failed to get user: %s. Response status '
480 |                                 'code: %d. Response content: %s', username,
481 |                                 resp.status_code, resp.content)
482 |                 time.sleep(1)
483 |                 count += 1
484 |             else:
485 |                 # Should we fail here?
486 |                 LOG.error('Timed out waiting for server for user %s to be '
487 |                           'ready after %d seconds', username,
488 |                           SERVER_LIFECYCLE_TIMEOUT)
489 | 
490 | 
491 | @timeit
492 | def find_existing_stress_test_users(endpoint, session):
493 |     """Finds all existing hub-stress-test users.
494 | 
495 |     :param endpoint: base endpoint URL
496 |     :param session: requests.Session instance
497 |     :returns: list of existing hub-stress-test users
498 |     """
499 |     # This could be a lot of users so make the timeout conservative.
500 |     resp = session.get(endpoint + '/users', timeout=120)
501 |     if resp:
502 |         users = resp.json()
503 |         LOG.debug('Found %d existing users in the hub', len(users))
504 |         if users:
505 |             users = list(
506 |                 filter(lambda user: user['name'].startswith(USERNAME_PREFIX),
507 |                        users))
508 |             LOG.debug('Found %d existing hub-stress-test users', len(users))
509 |         return users
510 |     else:
511 |         # If the token is bad then we want to bail.
512 |         if resp.status_code == 403:
513 |             raise Exception('Invalid token')
514 |         LOG.warning('Failed to list existing users: %s', resp.content)
515 |         return []
516 | 
517 | 
518 | @timeit
519 | def run_stress_test(count, batch_size, token, endpoint, dry_run=False,
520 |                     keep=False, profile=None):
521 |     session = get_session(token, dry_run=dry_run)
522 |     if batch_size > count:
523 |         batch_size = count
524 |     # First figure out how many existing hub-stress-test users there are since
525 |     # that will determine our starting index for names.
526 |     existing_users = find_existing_stress_test_users(endpoint, session)
527 |     # Create the users in batches.
528 |     users = create_users(count, batch_size, endpoint, session,
529 |                          existing_users=existing_users)
530 |     # Now that we've created the users, start a server for each in batches.
531 |     start_servers(users, endpoint, session, profile=profile)
532 |     # Now that all servers are starting we need to poll until they are ready.
533 |     # Note that because of the concurrent_spawn_limit in the hub we could be
534 |     # waiting awhile. We could also be waiting in case the auto-scaler needs to
535 |     # add more nodes.
536 |     wait_for_servers_to_start(users, endpoint, session)
537 |     # If we don't need to keep the users/servers then remove them.
538 |     if not keep:
539 |         # Flatten the list of lists so we delete all users in a single run.
540 |         usernames = [username for usernames in users for username in usernames]
541 |         LOG.info('Deleting %d users', len(usernames))
542 |         if not delete_users(usernames, endpoint, session, batch_size):
543 |             raise Exception('Failed to delete all users')
544 | 
545 | 
546 | @timeit
547 | def purge_users(token, endpoint, dry_run=False):
548 |     session = get_session(token, dry_run=dry_run)
549 |     users = find_existing_stress_test_users(endpoint, session)
550 |     if users:
551 |         usernames = [user['name'] for user in users]
552 |         LOG.info('Deleting %d users', len(usernames))
553 |         if not delete_users(usernames, endpoint, session):
554 |             raise Exception('Failed to delete all users')
555 | 
556 | 
557 | @timeit
558 | def notebook_activity_test(count, token, endpoint, workers, keep=False,
559 |                            dry_run=False):
560 |     if count < workers:
561 |         workers = count
562 |     session = get_session(token=token, dry_run=dry_run, pool_maxsize=workers)
563 | 
564 |     # First figure out how many existing hub-stress-test users there are since
565 |     # that will determine our starting index for names.
566 |     existing_users = find_existing_stress_test_users(endpoint, session)
567 | 
568 |     usernames = [user['name'] for user in existing_users]
569 | 
570 |     # Create the missing users.
571 |     to_create = count - len(existing_users)
572 |     if to_create > 0:
573 |         users = create_users(to_create, to_create, endpoint, session,
574 |                              existing_users=existing_users)
575 |         usernames.extend([name for usernames in users for name in usernames])
576 | 
577 |     def send_activity(users, endpoint, session):
578 |         now = datetime.utcnow() + timedelta(minutes=1)
579 |         now = now.isoformat()
580 |         body = {
581 |             "servers": {
582 |                 "": {
583 |                     "last_activity": now,
584 |                 }
585 |             },
586 |             "last_activity": now,
587 |         }
588 |         times = []
589 |         for username in users:
590 |             time.sleep(random.random())
591 |             url = "{}/users/{}/activity".format(endpoint, username)
592 |             resp = session.post(
593 |                 url, data=json.dumps(body), timeout=DEFAULT_TIMEOUT)
594 |             total_time = 1 if dry_run else resp.elapsed.total_seconds()
595 |             times.append(total_time)
596 |             LOG.debug("Sent activity for user %s (%f)", username, total_time)
597 | 
598 |         return times
599 | 
600 |     def chunk(users, n):
601 |         for i in range(0, len(users), n):
602 |             yield users[i:i + n]
603 | 
604 |     # STOP_PING is used to control the ping_hub function.
605 |     STOP_PING = False
606 | 
607 |     def ping_hub(endpoint, session):
608 |         ping_times = []
609 |         while not STOP_PING:
610 |             resp = session.get("{}/users/{}".format(endpoint, usernames[0]))
611 |             total = 1 if dry_run else resp.elapsed.total_seconds()
612 |             ping_times.append(total)
613 |             LOG.debug("[ping-hub] Fetching user model took %f seconds", total)
614 | 
615 |         avg = sum(ping_times) / len(ping_times)
616 |         LOG.info("Hub ping time: average=%f, min=%f, max=%f",
617 |                  avg, min(ping_times), max(ping_times))
618 | 
619 |     LOG.info("Simulating activity updates for %d users", count)
620 |     times = []
621 |     with futures.ThreadPoolExecutor(max_workers=workers) as executor:
622 |         # Launch our 'ping' thread. This will repeatedly hit the API during
623 |         # the test and track the timing. We don't need to get the future
624 |         # because this thread is controlled via the STOP_PING varaible.
625 |         executor.submit(ping_hub, endpoint, session)
626 | 
627 |         # Give each worker thread an even share of the test users. Each thread
628 |         # will iterate over its list of users and POST an activity update. The
629 |         # thread will sleep a random amount of time between 0 and 1 seconds
630 |         # between users.
631 |         future_to_timing = {
632 |             executor.submit(send_activity, users, endpoint, session): users
633 |             for users in chunk(usernames, len(usernames) // workers)
634 |         }
635 |         for future in futures.as_completed(future_to_timing):
636 |             times.extend(future.result())
637 | 
638 |         # We only want the ping_hub thread to run while the users are POSTing
639 |         # activity updates. Once all futures are completed we can shut down
640 |         # the ping thread.
641 |         STOP_PING = True
642 | 
643 |     avg = sum(times) / len(times)
644 |     LOG.info("Time to POST activity update: average=%f, min=%f, max=%f",
645 |              avg, min(times), max(times))
646 | 
647 |     if not keep:
648 |         delete_users(usernames, endpoint, session)
649 | 
650 | 
651 | def main():
652 |     args = parse_args()
653 |     setup_logging(verbose=args.verbose, log_to_file=args.log_to_file,
654 |                   args=args)
655 |     try:
656 |         validate(args)
657 |     except Exception as e:
658 |         LOG.error(e)
659 |         sys.exit(1)
660 | 
661 |     try:
662 |         if args.command == 'purge':
663 |             purge_users(args.token, args.endpoint, dry_run=args.dry_run)
664 |         elif args.command == 'stress-test':
665 |             run_stress_test(args.count, args.batch_size, args.token,
666 |                             args.endpoint, dry_run=args.dry_run,
667 |                             keep=args.keep, profile=args.profile)
668 |         elif args.command == 'activity-stress-test':
669 |             notebook_activity_test(args.count, args.token,
670 |                                    args.endpoint, args.workers, keep=args.keep,
671 |                                    dry_run=args.dry_run)
672 |     except Exception as e:
673 |         LOG.exception(e)
674 |         sys.exit(128)
675 | 
676 | 
677 | if __name__ == "__main__":
678 |     main()
679 | 


--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | flake8
2 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # tox (https://tox.readthedocs.io/) is a tool for running tests
 2 | # in multiple virtualenvs. This configuration file will run the
 3 | # test suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | 
 6 | [tox]
 7 | envlist = flake8, hub-stress-test
 8 | skipsdist = True
 9 | 
10 | [testenv]
11 | basepython = python3
12 | whitelist_externals =
13 |     cat
14 |     rm
15 | deps =
16 |     -r{toxinidir}/requirements.txt
17 |     -r{toxinidir}/test-requirements.txt
18 | 
19 | [testenv:flake8]
20 | commands =
21 |     flake8 scripts
22 | 
23 | [testenv:hub-stress-test]
24 | basepython = python3.7
25 | setenv =
26 |     JUPYTERHUB_API_TOKEN=test
27 |     JUPYTERHUB_ENDPOINT=https://notebooks.foo.com/hub/api
28 | commands =
29 |     python scripts/hub-stress-test.py -v --dry-run stress-test -c 5
30 |     python scripts/hub-stress-test.py -v --dry-run --log-to-file purge.log purge
31 |     python scripts/hub-stress-test.py --dry-run activity-stress-test --count 10
32 |     cat purge.log
33 |     rm purge.log
34 | 


--------------------------------------------------------------------------------