├── .gitignore ├── LICENSE ├── README.md ├── build.sh ├── example.html ├── footer.js ├── header.js ├── package-lock.json ├── package.json ├── tsconfig.json ├── webgl-profiler.js ├── webgl-profiler.ts └── webgl-profiler.umd.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Figma, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WebGL Profiler 2 | 3 | This repository contains a small library to enable GPU-side profiling of 4 | WebGL command queues using the `EXT_disjoint_timer_query` OpenGL extension. 5 | The output at the end is a profile that can be dropped into 6 | https://www.speedscope.app/ and viewed as a flamechart. 7 | 8 | We need to do special profiling GPU-side because CPU-side gl calls are not 9 | synchronized with the GPU's actual execution of those commands. Instead, to 10 | measure how long things are taking on the GPU, we need to insert special 11 | commands into the GPU's command queue telling it when to start a timer and 12 | when to stop the timer. 13 | 14 | This comes with an annoying list of limitations: 15 | 16 | - This currently only works in Desktop Chrome >= 70. 17 | The extension was completedly removed in Chrome in Chrome 65 18 | (https://crbug.com/808744) and Firefox 63 due to a severe security 19 | vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229). 20 | It was re-introduced in Chrome 70 (https://crbug.com/820891). There's 21 | an open bug for re-exposing this in Android Chrome (https://crbug.com/870491). 22 | 23 | - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT` 24 | was designed for, but it was removed in 2016 (https://crbug.com/595172). 25 | This makes it difficult to see how much time has elapsed between queries, 26 | so instead we need to have queries always running. 27 | 28 | - It seems like the elapsed time for every command other than draw calls is 29 | indicated as zero on GPUs I've tested. The total elapsed times still seem 30 | ballpark correct when comparing against active GPU time in a Chrome 31 | performance profile, however. This could either mean that the GPU times of 32 | other commands are negligible, or that the `EXT_disjoint_timer_query` is lying 33 | in this cases :| 34 | 35 | - Some graphics card/driver combinations seem to have unresolvably buggy 36 | behavior. This unfortunately includes the NVIDIA GeForce GT 750M, which was 37 | actually the very first card I tested this on, since it's the discrete 38 | graphics card on my MacBook Pro! If you try to use the profiler with this 39 | card, it will hard crash to avoid providing confusing information. Other 40 | cards are probably buggy too. See: https://twitter.com/jlfwong/status/1058475013546770432 41 | 42 | ## Usage 43 | 44 | To use this library, you can either install it as an npm module, or just 45 | include it as a script tag: 46 | 47 | ```html 48 | 49 | ``` 50 | 51 | If consuming through npm, you can get access to the `WebGLProfiler` class 52 | via `const WebGLProfiler = require('webgl-profiler')`. If you included it 53 | as a script tag, you can access it as a global `WebGLProfiler` variable. 54 | 55 | From there, you can construct a profiler for a given `WebGLRenderingContext`, 56 | like so: 57 | 58 | ```javascript 59 | const gl = canvas.getContext('webgl'); 60 | const profiler = new WebGLProfiler(gl) 61 | ``` 62 | 63 | To start a profile, run `profiler.start()`. To stop a profile, wait for the GPU 64 | commands to flush, then download a file that can be imported into 65 | https://www.speedscope.app/, run `profiler.stopAndDownload()`. 66 | 67 | Unlike CPU side operations, there's no concept of a "call stack", so we need 68 | to explicitly annotate the GPU command queue with human-readable information. 69 | You can either do this via paired calls to 70 | `profiler.pushContext(contextName)` and `profiler.popContext(contextName)`, or 71 | you can use `profiler.withContext(contextName)`. 72 | 73 | Here's the relevant bits of an example usage: 74 | 75 | ```javascript 76 | var profiler = new WebGLProfiler(gl) 77 | profiler.start() 78 | { 79 | profiler.pushContext("a") 80 | gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); 81 | { 82 | profiler.pushContext("b") 83 | for (let i = 0; i < 10; i++) { 84 | gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4) 85 | } 86 | profiler.popContext("b") 87 | 88 | profiler.withContext("c", function() { 89 | for (let i = 0; i < 10; i++) { 90 | gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4) 91 | } 92 | }) 93 | } 94 | profiler.popContext("a") 95 | } 96 | profiler.stopAndDownload() 97 | ``` 98 | 99 | This will produce a profile that looks something like this in speedscope: 100 | 101 | ![example profile](https://user-images.githubusercontent.com/150329/48817461-75acb780-ecfb-11e8-8468-46ba4edf9c2d.png) 102 | 103 | 104 | You can see a full working example in [`example.html`](example.html). 105 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eoux pipefail 3 | npm install 4 | node_modules/.bin/tsc 5 | cat header.js webgl-profiler.js footer.js > webgl-profiler.js.tmp 6 | mv webgl-profiler.js.tmp webgl-profiler.js -------------------------------------------------------------------------------- /example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | WebGL Profiler Demo 7 | 8 | 9 | 10 | 11 | 12 | 13 | 73 | -------------------------------------------------------------------------------- /footer.js: -------------------------------------------------------------------------------- 1 | 2 | if (typeof module === "object" && typeof module.exports === "object") { 3 | module.exports = WebGLProfiler 4 | } else if (typeof window !== 'undefined') { 5 | window['WebGLProfiler'] = WebGLProfiler 6 | } 7 | })(); -------------------------------------------------------------------------------- /header.js: -------------------------------------------------------------------------------- 1 | // This file is generated from webgl-profiler.ts. Do not edit this file directly. 2 | ;(function() { -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webgl-profiler", 3 | "version": "1.0.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "typescript": { 8 | "version": "3.1.6", 9 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.1.6.tgz", 10 | "integrity": "sha512-tDMYfVtvpb96msS1lDX9MEdHrW4yOuZ4Kdc4Him9oU796XldPYF/t2+uKoX0BBa0hXXwDlqYQbXY5Rzjzc5hBA==", 11 | "dev": true 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webgl-profiler", 3 | "version": "1.0.0", 4 | "description": "A GPU-side profiler using EXT_disjoint_timer_query", 5 | "main": "webgl-profiler.umd.js", 6 | "scripts": { 7 | "build": "tsc" 8 | }, 9 | "author": "Jamie Wong", 10 | "license": "MIT", 11 | "devDependencies": { 12 | "typescript": "^3.1.6" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "none", 4 | "strict": true, 5 | "noUnusedLocals": true, 6 | "lib": ["dom", "es5", "es2015.promise"], 7 | "target": "es5" 8 | } 9 | } -------------------------------------------------------------------------------- /webgl-profiler.js: -------------------------------------------------------------------------------- 1 | // This file is generated from webgl-profiler.ts. Do not edit this file directly. 2 | ;(function() {"use strict"; 3 | var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { 4 | return new (P || (P = Promise))(function (resolve, reject) { 5 | function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } 6 | function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } 7 | function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } 8 | step((generator = generator.apply(thisArg, _arguments || [])).next()); 9 | }); 10 | }; 11 | var __generator = (this && this.__generator) || function (thisArg, body) { 12 | var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; 13 | return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; 14 | function verb(n) { return function (v) { return step([n, v]); }; } 15 | function step(op) { 16 | if (f) throw new TypeError("Generator is already executing."); 17 | while (_) try { 18 | if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; 19 | if (y = 0, t) op = [op[0] & 2, t.value]; 20 | switch (op[0]) { 21 | case 0: case 1: t = op; break; 22 | case 4: _.label++; return { value: op[1], done: false }; 23 | case 5: _.label++; y = op[1]; op = [0]; continue; 24 | case 7: op = _.ops.pop(); _.trys.pop(); continue; 25 | default: 26 | if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } 27 | if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } 28 | if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } 29 | if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } 30 | if (t[2]) _.ops.pop(); 31 | _.trys.pop(); continue; 32 | } 33 | op = body.call(thisArg, _); 34 | } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } 35 | if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; 36 | } 37 | }; 38 | /* 39 | * This is a utility class for profiling GPU-side operations using the 40 | * EXT_disjoint_timer_query OpenGL extension. 41 | * 42 | * We need to do special profiling GPU-side because CPU-side gl 43 | * calls are not synchronized with the GPU's actual execution of those 44 | * commands. Instead, to measure how long things are taking on the GPU, we 45 | * need to insert special commands into the GPU's command queue telling it 46 | * when to start a timer and when to stop the timer. 47 | * 48 | * This extension has a number of annoying limitations: 49 | * - Only one query can be active at a time. This means that we need to 50 | * implement nested timers ourselves in order to be able to produce 51 | * helpful flamegraphs. 52 | * - This currently only works in Desktop Chrome >= 70. 53 | * The extension was completedly removed in Chrome in Chrome 65 54 | * (https://crbug.com/808744) and Firefox 63 due to a severe security 55 | * vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229). 56 | * It was re-introduced in Chrome 70 (https://crbug.com/820891). There's 57 | * an open bug for re-exposing this in Android Chrome (https://crbug.com/870491). 58 | * - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT` 59 | * was designed for, but it was removed in 2016 (https://crbug.com/595172). 60 | * This makes it difficult to see how much time has elapsed between queries, 61 | * so instead we need to have queries always running. 62 | * - It seems like the elapsed time for every command other than draw calls is 63 | * indicated as zero on GPUs I've tested. The total elapsed times still seem 64 | * ballpark correct when comparing against active GPU time in a Chrome 65 | * performance profile, however. This could either mean that the GPU times of 66 | * other commands are negligible, or that the EXT_disjoint_timer_query is lying 67 | * in this cases :| 68 | * 69 | * Since only one disjoint timer query can be active at a time, in order to create 70 | * nested timers, we mark "OPEN_FRAME" and "CLOSE_FRAME" events along the timeline 71 | * by changing the active timer at each event. It should look something like this: 72 | * 73 | * ---------- Time ---------> 74 | * 75 | * Queries q1 q2 q3 q4 q5 q6 q7 q8 q9 76 | * <-> <---------> <---> <-----------> <---> <--> <----------> <-------> <-> 77 | * 78 | * Stack +---+-----------------------------------------------------------------+---+ 79 | * | Draw Frame | 80 | * +-----------+-------------------------+----+------------+---------+ 81 | * | Draw Node | | Draw Hover | 82 | * +-----+-------------+-----+ +------------+ 83 | * | Draw Shadow | 84 | * +-------------+ 85 | * 86 | * Events 87 | * q1 start: profile start 88 | * q2 start: OPEN_FRAME "Draw Frame" 89 | * q3 start: OPEN_FRAME "Draw Node" 90 | * q4 start: OPEN_FRAME "Draw Shadow" 91 | * q5 start: CLOSE_FRAME "Draw Shadow" 92 | * q6 start: CLOSE_FRAME "Draw Node" 93 | * q7 start: OPEN_FRAME "Draw Hover" 94 | * q8 start: CLOSE_FRAME "Draw Hover" 95 | * q9 start: CLOSE_FRAME "Draw Frame" 96 | * q9 end: profile end 97 | * 98 | * For each query, the only information we know about it is its duration. 99 | * Assuming we have timing queries running for the entire duration of the 100 | * profile, however, this is sufficient to construct a flamegraph as long as 101 | * we remember what event is associated with the start/end of each query. 102 | */ 103 | var WebGLProfiler = /** @class */ (function () { 104 | function WebGLProfiler(context) { 105 | this.ext = null; 106 | this.activeQuery = null; 107 | this.isRunning = false; 108 | // This list contains events whose beginQueryEXT/endQueryEXT calls have been 109 | // enqueued in the GPU command buffer, but whose timing results aren't yet 110 | // available. These are in chronological order. 111 | this.eventsPendingTimestamps = []; 112 | // This list contains events whose timestamps have already been inferred based 113 | // on the durations retrieved from the GPU. These are also in chronological order. 114 | this.resolvedEvents = []; 115 | // This is a stack of currently active named contexts. This is used to validate 116 | // that the pushContext/popContext calls match up properly. 117 | this.namedContextStack = []; 118 | this.context = context; 119 | this.ext = context.getExtension("EXT_disjoint_timer_query"); 120 | } 121 | WebGLProfiler.prototype.isProfilerRunning = function () { 122 | return this.isRunning; 123 | }; 124 | WebGLProfiler.prototype.start = function () { 125 | if (this.ext == null) { 126 | throw new Error("EXT_disjoint_timer_query WebGL extension is not available. Cannot start profiler."); 127 | } 128 | if (this.isRunning) { 129 | throw new Error("Profiler is already running"); 130 | } 131 | var infoExt = this.context.getExtension("WEBGL_debug_renderer_info"); 132 | if (infoExt != null) { 133 | var renderer = this.context.getParameter(infoExt.UNMASKED_RENDERER_WEBGL); 134 | if (renderer.indexOf("NVIDIA GeForce GT 750M") !== -1) { 135 | // See: https://twitter.com/jlfwong/status/1058475013546770432 136 | throw new Error(renderer + " cards seem to have a buggy implementation of EXT_disjoint_timer_query. Refusing to record to avoid misleading results."); 137 | } 138 | } 139 | this.isRunning = true; 140 | this.eventsPendingTimestamps = []; 141 | this.resolvedEvents = []; 142 | this.activeQuery = this.ext.createQueryEXT(); 143 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery); 144 | this.pushContext("profile"); 145 | }; 146 | WebGLProfiler.prototype.stop = function () { 147 | if (this.ext == null) { 148 | return; 149 | } 150 | if (!this.isRunning) { 151 | throw new Error("Profiler is already stopped"); 152 | } 153 | this.isRunning = false; 154 | this.popContext("profile"); 155 | this.activeQuery = null; 156 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT); 157 | }; 158 | WebGLProfiler.prototype.pushContext = function (name) { 159 | this.markAction({ type: GPUProfilerActionType.OPEN_FRAME, name: name }); 160 | this.namedContextStack.push(name); 161 | }; 162 | WebGLProfiler.prototype.popContext = function (name) { 163 | if (this.namedContextStack.length === 0) { 164 | throw new Error("Tried to pop a context when the context stack is empty!"); 165 | } 166 | var popped = this.namedContextStack.pop(); 167 | if (popped !== name) { 168 | throw new Error("Expected popContext to be called with " + popped + ", but it was called with " + name); 169 | } 170 | this.markAction({ type: GPUProfilerActionType.CLOSE_FRAME, name: name }); 171 | }; 172 | WebGLProfiler.prototype.withContext = function (name, callback) { 173 | this.pushContext(name); 174 | callback(); 175 | this.popContext(name); 176 | }; 177 | WebGLProfiler.prototype.exportSpeedscopeProfile = function () { 178 | return __awaiter(this, void 0, void 0, function () { 179 | return __generator(this, function (_a) { 180 | switch (_a.label) { 181 | case 0: 182 | if (!(this.eventsPendingTimestamps.length > 0)) return [3 /*break*/, 2]; 183 | this.resolveEventsIfPossible(); 184 | return [4 /*yield*/, new Promise(function (resolve) { return requestAnimationFrame(resolve); })]; 185 | case 1: 186 | _a.sent(); 187 | return [3 /*break*/, 0]; 188 | case 2: return [2 /*return*/, this.toSpeedscopeProfile()]; 189 | } 190 | }); 191 | }); 192 | }; 193 | WebGLProfiler.prototype.downloadWhenReady = function () { 194 | return __awaiter(this, void 0, void 0, function () { 195 | var profileText, link; 196 | return __generator(this, function (_a) { 197 | switch (_a.label) { 198 | case 0: return [4 /*yield*/, this.exportSpeedscopeProfile()]; 199 | case 1: 200 | profileText = _a.sent(); 201 | link = document.createElement("a"); 202 | link.href = URL.createObjectURL(new Blob([profileText], { "type": "application/json" })); 203 | link.download = "gpuprofile-" + +new Date() + ".speedscope.json"; 204 | document.body.appendChild(link); 205 | link.click(); 206 | document.body.removeChild(link); 207 | return [2 /*return*/]; 208 | } 209 | }); 210 | }); 211 | }; 212 | WebGLProfiler.prototype.stopAndDownload = function () { 213 | return __awaiter(this, void 0, void 0, function () { 214 | return __generator(this, function (_a) { 215 | switch (_a.label) { 216 | case 0: 217 | this.stop(); 218 | return [4 /*yield*/, this.downloadWhenReady()]; 219 | case 1: 220 | _a.sent(); 221 | return [2 /*return*/]; 222 | } 223 | }); 224 | }); 225 | }; 226 | WebGLProfiler.prototype.markAction = function (action) { 227 | if (this.ext == null) { 228 | return; 229 | } 230 | if (this.activeQuery == null) { 231 | throw new Error("Cannot mark actions while no profile is active"); 232 | } 233 | var oldQuery = this.activeQuery; 234 | this.activeQuery = this.ext.createQueryEXT(); 235 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT); 236 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery); 237 | this.eventsPendingTimestamps.push({ action: action, query: oldQuery }); 238 | }; 239 | WebGLProfiler.prototype.resolveEventsIfPossible = function () { 240 | if (this.ext == null) { 241 | return; 242 | } 243 | var i = 0; 244 | while (i < this.eventsPendingTimestamps.length) { 245 | var pendingAction = this.eventsPendingTimestamps[i]; 246 | var query = pendingAction.query; 247 | if (!this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_AVAILABLE_EXT)) { 248 | break; 249 | } 250 | // I don't totally understand what this means, but apparently if this is true, 251 | // it means that the GPU timing information is definitely going to unreliable. 252 | // This is based on this example: 253 | // https://developer.mozilla.org/en-US/docs/Web/API/EXT_disjoint_timer_query/getQueryObjectEXT#Examples 254 | if (this.context.getParameter(this.ext.GPU_DISJOINT_EXT)) { 255 | throw new Error("GPU_DISJOINT_EXT"); 256 | } 257 | var elapsed = this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_EXT); 258 | // TODO(jlfwong): If the creation & deletion of queries ends up having non-trivial 259 | // overhead, we could generate a bunch of queries up-front, and then use a free list 260 | // instead of needing to call createQueryEXT and deleteQueryEXT all the time. 261 | this.ext.deleteQueryEXT(query); 262 | var lastTimestamp = this.resolvedEvents.length === 0 ? 0 : this.resolvedEvents[this.resolvedEvents.length - 1].timestamp; 263 | var timestamp = lastTimestamp + elapsed; 264 | this.resolvedEvents.push({ action: pendingAction.action, timestamp: timestamp }); 265 | i++; 266 | } 267 | if (i > 0) { 268 | this.eventsPendingTimestamps = this.eventsPendingTimestamps.slice(i); 269 | } 270 | }; 271 | // Convert the currently recorded profile into speedscope's 272 | // file format. 273 | WebGLProfiler.prototype.toSpeedscopeProfile = function () { 274 | var frames = []; 275 | var speedscopeEvents = []; 276 | if (this.resolvedEvents.length === 0) { 277 | throw new Error("Profile is empty"); 278 | } 279 | var profile = { 280 | "type": SpeedscopeProfileType.EVENTED, 281 | "name": "GPU Profile", 282 | "unit": "nanoseconds", 283 | "startValue": 0, 284 | "endValue": this.resolvedEvents[this.resolvedEvents.length - 1].timestamp, 285 | "events": speedscopeEvents 286 | }; 287 | var file = { 288 | "$schema": "https://www.Speedscopeapp/file-format-schema.json", 289 | "shared": { 290 | "frames": frames, 291 | }, 292 | "profiles": [profile] 293 | }; 294 | var frameToIndex = {}; 295 | function getOrInsertFrame(name) { 296 | if (!(name in frameToIndex)) { 297 | frameToIndex[name] = frames.length; 298 | frames.push({ 299 | "name": name 300 | }); 301 | } 302 | return frameToIndex[name]; 303 | } 304 | for (var _i = 0, _a = this.resolvedEvents; _i < _a.length; _i++) { 305 | var event_1 = _a[_i]; 306 | speedscopeEvents.push({ 307 | "type": event_1.action.type == GPUProfilerActionType.OPEN_FRAME ? SpeedscopeEventType.OPEN_FRAME : SpeedscopeEventType.CLOSE_FRAME, 308 | "frame": getOrInsertFrame(event_1.action.name), 309 | "at": event_1.timestamp 310 | }); 311 | } 312 | return JSON.stringify(file); 313 | }; 314 | return WebGLProfiler; 315 | }()); 316 | var GPUProfilerActionType; 317 | (function (GPUProfilerActionType) { 318 | GPUProfilerActionType[GPUProfilerActionType["OPEN_FRAME"] = 0] = "OPEN_FRAME"; 319 | GPUProfilerActionType[GPUProfilerActionType["CLOSE_FRAME"] = 1] = "CLOSE_FRAME"; 320 | })(GPUProfilerActionType || (GPUProfilerActionType = {})); 321 | var SpeedscopeProfileType; 322 | (function (SpeedscopeProfileType) { 323 | SpeedscopeProfileType["EVENTED"] = "evented"; 324 | SpeedscopeProfileType["SAMPLED"] = "sampled"; 325 | })(SpeedscopeProfileType || (SpeedscopeProfileType = {})); 326 | var SpeedscopeEventType; 327 | (function (SpeedscopeEventType) { 328 | SpeedscopeEventType["OPEN_FRAME"] = "O"; 329 | SpeedscopeEventType["CLOSE_FRAME"] = "C"; 330 | })(SpeedscopeEventType || (SpeedscopeEventType = {})); 331 | 332 | if (typeof module === "object" && typeof module.exports === "object") { 333 | module.exports = WebGLProfiler 334 | } else if (typeof window !== 'undefined') { 335 | window['WebGLProfiler'] = WebGLProfiler 336 | } 337 | })(); -------------------------------------------------------------------------------- /webgl-profiler.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * This is a utility class for profiling GPU-side operations using the 3 | * EXT_disjoint_timer_query OpenGL extension. 4 | * 5 | * We need to do special profiling GPU-side because CPU-side gl 6 | * calls are not synchronized with the GPU's actual execution of those 7 | * commands. Instead, to measure how long things are taking on the GPU, we 8 | * need to insert special commands into the GPU's command queue telling it 9 | * when to start a timer and when to stop the timer. 10 | * 11 | * This extension has a number of annoying limitations: 12 | * - Only one query can be active at a time. This means that we need to 13 | * implement nested timers ourselves in order to be able to produce 14 | * helpful flamegraphs. 15 | * - This currently only works in Desktop Chrome >= 70. 16 | * The extension was completedly removed in Chrome in Chrome 65 17 | * (https://crbug.com/808744) and Firefox 63 due to a severe security 18 | * vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229). 19 | * It was re-introduced in Chrome 70 (https://crbug.com/820891). There's 20 | * an open bug for re-exposing this in Android Chrome (https://crbug.com/870491). 21 | * - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT` 22 | * was designed for, but it was removed in 2016 (https://crbug.com/595172). 23 | * This makes it difficult to see how much time has elapsed between queries, 24 | * so instead we need to have queries always running. 25 | * - It seems like the elapsed time for every command other than draw calls is 26 | * indicated as zero on GPUs I've tested. The total elapsed times still seem 27 | * ballpark correct when comparing against active GPU time in a Chrome 28 | * performance profile, however. This could either mean that the GPU times of 29 | * other commands are negligible, or that the EXT_disjoint_timer_query is lying 30 | * in this cases :| 31 | * 32 | * Since only one disjoint timer query can be active at a time, in order to create 33 | * nested timers, we mark "OPEN_FRAME" and "CLOSE_FRAME" events along the timeline 34 | * by changing the active timer at each event. It should look something like this: 35 | * 36 | * ---------- Time ---------> 37 | * 38 | * Queries q1 q2 q3 q4 q5 q6 q7 q8 q9 39 | * <-> <---------> <---> <-----------> <---> <--> <----------> <-------> <-> 40 | * 41 | * Stack +---+-----------------------------------------------------------------+---+ 42 | * | Draw Frame | 43 | * +-----------+-------------------------+----+------------+---------+ 44 | * | Draw Node | | Draw Hover | 45 | * +-----+-------------+-----+ +------------+ 46 | * | Draw Shadow | 47 | * +-------------+ 48 | * 49 | * Events 50 | * q1 start: profile start 51 | * q2 start: OPEN_FRAME "Draw Frame" 52 | * q3 start: OPEN_FRAME "Draw Node" 53 | * q4 start: OPEN_FRAME "Draw Shadow" 54 | * q5 start: CLOSE_FRAME "Draw Shadow" 55 | * q6 start: CLOSE_FRAME "Draw Node" 56 | * q7 start: OPEN_FRAME "Draw Hover" 57 | * q8 start: CLOSE_FRAME "Draw Hover" 58 | * q9 start: CLOSE_FRAME "Draw Frame" 59 | * q9 end: profile end 60 | * 61 | * For each query, the only information we know about it is its duration. 62 | * Assuming we have timing queries running for the entire duration of the 63 | * profile, however, this is sufficient to construct a flamegraph as long as 64 | * we remember what event is associated with the start/end of each query. 65 | */ 66 | class WebGLProfiler { 67 | private readonly context: WebGLRenderingContext 68 | private readonly ext: EXTDisjointTimerQuery | null = null 69 | private activeQuery: WebGLTimerQueryEXT | null = null 70 | private isRunning = false 71 | 72 | // This list contains events whose beginQueryEXT/endQueryEXT calls have been 73 | // enqueued in the GPU command buffer, but whose timing results aren't yet 74 | // available. These are in chronological order. 75 | private eventsPendingTimestamps: GPUProfilerEventPendingTimestamp[] = [] 76 | 77 | // This list contains events whose timestamps have already been inferred based 78 | // on the durations retrieved from the GPU. These are also in chronological order. 79 | private resolvedEvents: GPUProfilerResolvedEvent[] = [] 80 | 81 | // This is a stack of currently active named contexts. This is used to validate 82 | // that the pushContext/popContext calls match up properly. 83 | private namedContextStack: string[] = [] 84 | 85 | constructor(context: WebGLRenderingContext) { 86 | this.context = context 87 | this.ext = context.getExtension("EXT_disjoint_timer_query") as EXTDisjointTimerQuery | null 88 | } 89 | 90 | isProfilerRunning(): boolean { 91 | return this.isRunning 92 | } 93 | 94 | start(): void { 95 | if (this.ext == null) { 96 | throw new Error("EXT_disjoint_timer_query WebGL extension is not available. Cannot start profiler.") 97 | } 98 | if (this.isRunning) { 99 | throw new Error("Profiler is already running") 100 | } 101 | const infoExt = this.context.getExtension("WEBGL_debug_renderer_info") 102 | if (infoExt != null) { 103 | const renderer: string = this.context.getParameter(infoExt.UNMASKED_RENDERER_WEBGL) 104 | if (renderer.indexOf("NVIDIA GeForce GT 750M") !== -1) { 105 | // See: https://twitter.com/jlfwong/status/1058475013546770432 106 | throw new Error(`${renderer} cards seem to have a buggy implementation of EXT_disjoint_timer_query. Refusing to record to avoid misleading results.`) 107 | } 108 | } 109 | 110 | this.isRunning = true 111 | this.eventsPendingTimestamps = [] 112 | this.resolvedEvents = [] 113 | 114 | this.activeQuery = this.ext.createQueryEXT() 115 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery) 116 | 117 | this.pushContext("profile") 118 | } 119 | 120 | stop(): void { 121 | if (this.ext == null) { 122 | return 123 | } 124 | if (!this.isRunning) { 125 | throw new Error("Profiler is already stopped") 126 | } 127 | this.isRunning = false 128 | 129 | this.popContext("profile") 130 | this.activeQuery = null 131 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT) 132 | } 133 | 134 | pushContext(name: string): void { 135 | this.markAction({type: GPUProfilerActionType.OPEN_FRAME, name}) 136 | this.namedContextStack.push(name) 137 | } 138 | 139 | popContext(name: string): void { 140 | if (this.namedContextStack.length === 0) { 141 | throw new Error("Tried to pop a context when the context stack is empty!") 142 | } 143 | const popped = this.namedContextStack.pop() 144 | if (popped !== name) { 145 | throw new Error(`Expected popContext to be called with ${popped}, but it was called with ${name}`) 146 | } 147 | this.markAction({type: GPUProfilerActionType.CLOSE_FRAME, name}) 148 | } 149 | 150 | withContext(name: string, callback: () => void): void { 151 | this.pushContext(name) 152 | callback() 153 | this.popContext(name) 154 | } 155 | 156 | async exportSpeedscopeProfile(): Promise { 157 | while (this.eventsPendingTimestamps.length > 0) { 158 | this.resolveEventsIfPossible() 159 | await new Promise((resolve) => requestAnimationFrame(resolve)) 160 | } 161 | 162 | return this.toSpeedscopeProfile() 163 | } 164 | 165 | async downloadWhenReady() { 166 | const profileText = await this.exportSpeedscopeProfile() 167 | 168 | const link = document.createElement("a") 169 | link.href = URL.createObjectURL(new Blob([profileText], { "type": "application/json" })) 170 | link.download = `gpuprofile-${+new Date()}.speedscope.json` 171 | document.body.appendChild(link) 172 | link.click() 173 | document.body.removeChild(link) 174 | } 175 | 176 | async stopAndDownload() { 177 | this.stop() 178 | await this.downloadWhenReady() 179 | } 180 | 181 | private markAction(action: GPUProfilerAction): void { 182 | if (this.ext == null) { 183 | return 184 | } 185 | 186 | if (this.activeQuery == null) { 187 | throw new Error("Cannot mark actions while no profile is active") 188 | } 189 | 190 | const oldQuery = this.activeQuery 191 | this.activeQuery = this.ext.createQueryEXT() 192 | 193 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT) 194 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery) 195 | 196 | this.eventsPendingTimestamps.push({action, query: oldQuery}) 197 | } 198 | 199 | private resolveEventsIfPossible(): void { 200 | if (this.ext == null) { 201 | return 202 | } 203 | 204 | let i = 0 205 | while (i < this.eventsPendingTimestamps.length) { 206 | let pendingAction = this.eventsPendingTimestamps[i] 207 | let query = pendingAction.query 208 | if (!this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_AVAILABLE_EXT)) { 209 | break 210 | } 211 | 212 | // I don't totally understand what this means, but apparently if this is true, 213 | // it means that the GPU timing information is definitely going to unreliable. 214 | // This is based on this example: 215 | // https://developer.mozilla.org/en-US/docs/Web/API/EXT_disjoint_timer_query/getQueryObjectEXT#Examples 216 | if (this.context.getParameter(this.ext.GPU_DISJOINT_EXT)) { 217 | throw new Error("GPU_DISJOINT_EXT") 218 | } 219 | 220 | const elapsed = this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_EXT) 221 | 222 | // TODO(jlfwong): If the creation & deletion of queries ends up having non-trivial 223 | // overhead, we could generate a bunch of queries up-front, and then use a free list 224 | // instead of needing to call createQueryEXT and deleteQueryEXT all the time. 225 | this.ext.deleteQueryEXT(query) 226 | 227 | var lastTimestamp = this.resolvedEvents.length === 0 ? 0 : this.resolvedEvents[this.resolvedEvents.length - 1].timestamp 228 | var timestamp = lastTimestamp + elapsed 229 | 230 | this.resolvedEvents.push({action: pendingAction.action, timestamp}) 231 | i++ 232 | } 233 | 234 | if (i > 0) { 235 | this.eventsPendingTimestamps = this.eventsPendingTimestamps.slice(i) 236 | } 237 | } 238 | 239 | // Convert the currently recorded profile into speedscope's 240 | // file format. 241 | private toSpeedscopeProfile(): string { 242 | const frames: SpeedscopeFrame[] = [] 243 | const speedscopeEvents: (SpeedscopeOpenFrameEvent | SpeedscopeCloseFrameEvent)[] = [] 244 | 245 | if (this.resolvedEvents.length === 0) { 246 | throw new Error("Profile is empty") 247 | } 248 | 249 | const profile: SpeedscopeEventedProfile = { 250 | "type": SpeedscopeProfileType.EVENTED, 251 | "name": "GPU Profile", 252 | "unit": "nanoseconds", 253 | "startValue": 0, 254 | "endValue": this.resolvedEvents[this.resolvedEvents.length - 1].timestamp, 255 | "events": speedscopeEvents 256 | } 257 | 258 | const file: SpeedscopeFile = { 259 | "$schema": "https://www.Speedscopeapp/file-format-schema.json", 260 | "shared": { 261 | "frames": frames, 262 | }, 263 | "profiles": [profile] 264 | } 265 | 266 | const frameToIndex: {[key: string]: number} = {} 267 | 268 | 269 | function getOrInsertFrame(name: string): number { 270 | if (!(name in frameToIndex)) { 271 | frameToIndex[name] = frames.length 272 | frames.push({ 273 | "name": name 274 | }) 275 | } 276 | return frameToIndex[name] 277 | } 278 | 279 | for (let event of this.resolvedEvents) { 280 | speedscopeEvents.push({ 281 | "type": event.action.type == GPUProfilerActionType.OPEN_FRAME ? SpeedscopeEventType.OPEN_FRAME : SpeedscopeEventType.CLOSE_FRAME, 282 | "frame": getOrInsertFrame(event.action.name), 283 | "at": event.timestamp 284 | } as (SpeedscopeOpenFrameEvent | SpeedscopeCloseFrameEvent)) 285 | } 286 | 287 | return JSON.stringify(file) 288 | } 289 | } 290 | 291 | enum GPUProfilerActionType { 292 | OPEN_FRAME, 293 | CLOSE_FRAME 294 | } 295 | 296 | interface GPUProfilerAction { 297 | readonly type: GPUProfilerActionType 298 | readonly name: string 299 | } 300 | 301 | interface GPUProfilerEventPendingTimestamp { 302 | readonly action: GPUProfilerAction 303 | readonly query: WebGLTimerQueryEXT 304 | } 305 | 306 | interface GPUProfilerResolvedEvent { 307 | readonly action: GPUProfilerAction 308 | readonly timestamp: number 309 | } 310 | 311 | // DOM APIs 312 | interface WebGLTimerQueryEXT {} 313 | 314 | interface EXTDisjointTimerQuery { 315 | QUERY_COUNTER_BITS_EXT: 0x8864 316 | CURRENT_QUERY_EXT: 0x8865 317 | QUERY_RESULT_EXT: 0x8866 318 | QUERY_RESULT_AVAILABLE_EXT: 0x8867 319 | TIME_ELAPSED_EXT: 0x88BF 320 | TIMESTAMP_EXT: 0x8E28 321 | GPU_DISJOINT_EXT: 0x8FBB 322 | 323 | createQueryEXT(): WebGLTimerQueryEXT 324 | deleteQueryEXT(query: WebGLTimerQueryEXT): void 325 | isQueryEXT(query: WebGLTimerQueryEXT): boolean 326 | beginQueryEXT(target: GLenum, query: WebGLTimerQueryEXT): void 327 | endQueryEXT(target: GLenum): void 328 | getQueryEXT(target: GLenum, pname: GLenum): any 329 | getQueryObjectEXT(query: WebGLTimerQueryEXT, pname: 0x8867 /* QUERY_RESULT_AVAILBLE_EXT */): boolean 330 | getQueryObjectEXT(query: WebGLTimerQueryEXT, pname: 0x8866 /* QUERY_RESULT_EXT */): number 331 | getQueryObjectEXT(query: WebGLTimerQueryEXT, pname: GLenum): any 332 | } 333 | 334 | // speedscope types (from https://github.com/jlfwong/speedscope/blob/master/src/lib/file-format-spec.ts) 335 | interface SpeedscopeFile { 336 | $schema: 'https://www.Speedscopeapp/file-format-schema.json' 337 | 338 | // Data shared between profiles 339 | shared: { 340 | frames: SpeedscopeFrame[] 341 | } 342 | 343 | // List of profile definitions 344 | profiles: SpeedscopeEventedProfile[] 345 | 346 | // The name of the contained profile group. If omitted, will use the name of 347 | // the file itself. 348 | // Added in 0.6.0 349 | name?: string 350 | 351 | // The index into the `profiles` array that should be displayed upon file 352 | // load. If omitted, will default to displaying the first profile in the 353 | // file. 354 | // 355 | // Added in 0.6.0 356 | activeProfileIndex?: number 357 | 358 | // The name of the the program which exported this profile. This isn't 359 | // consumed but can be helpful for debugging generated data by seeing what 360 | // was generating it! Recommended format is "name@version". e.g. when the 361 | // file was exported by speedscope v0.6.0 itself, it will be 362 | // "speedscope@0.6.0" 363 | // 364 | // Added in 0.6.0 365 | exporter?: string 366 | } 367 | 368 | interface SpeedscopeFrame { 369 | name: string 370 | file?: string 371 | line?: number 372 | col?: number 373 | } 374 | 375 | enum SpeedscopeProfileType { 376 | EVENTED = 'evented', 377 | SAMPLED = 'sampled', 378 | } 379 | 380 | interface SpeedscopeEventedProfile { 381 | type: SpeedscopeProfileType.EVENTED 382 | 383 | // Name of the profile. Typically a filename for the source of the profile. 384 | name: string 385 | 386 | // Unit which all value are specified using in the profile. 387 | unit: SpeedscopeValueUnit 388 | 389 | // The starting value of the profile. This will typically be a timestamp. 390 | // All event values will be relative to this startValue. 391 | startValue: number 392 | 393 | // The final value of the profile. This will typically be a timestamp. This 394 | // must be greater than or equal to the startValue. This is useful in 395 | // situations where the recorded profile extends past the end of the recorded 396 | // events, which may happen if nothing was happening at the end of the 397 | // profile. 398 | endValue: number 399 | 400 | // List of events that occured as part of this profile. 401 | // The "at" field of every event must be in non-decreasing order. 402 | events: (SpeedscopeOpenFrameEvent | SpeedscopeCloseFrameEvent)[] 403 | } 404 | 405 | type SpeedscopeValueUnit = 406 | | 'none' 407 | | 'nanoseconds' 408 | | 'microseconds' 409 | | 'milliseconds' 410 | | 'seconds' 411 | | 'bytes' 412 | 413 | enum SpeedscopeEventType { 414 | OPEN_FRAME = 'O', 415 | CLOSE_FRAME = 'C', 416 | } 417 | 418 | // Indicates a stack frame opened. Every opened stack frame must have a 419 | // corresponding close frame event, and the ordering must be balanced. 420 | interface SpeedscopeOpenFrameEvent { 421 | type: SpeedscopeEventType.OPEN_FRAME 422 | // An index into the frames array in the shared data within the profile 423 | frame: number 424 | } 425 | 426 | interface SpeedscopeCloseFrameEvent { 427 | type: SpeedscopeEventType.CLOSE_FRAME 428 | // An index into the frames array in the shared data within the profile 429 | frame: number 430 | } -------------------------------------------------------------------------------- /webgl-profiler.umd.js: -------------------------------------------------------------------------------- 1 | // This file is generated from webgl-profiler.ts. Do not edit this file directly. 2 | ;(function() {"use strict"; 3 | var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { 4 | return new (P || (P = Promise))(function (resolve, reject) { 5 | function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } 6 | function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } 7 | function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } 8 | step((generator = generator.apply(thisArg, _arguments || [])).next()); 9 | }); 10 | }; 11 | var __generator = (this && this.__generator) || function (thisArg, body) { 12 | var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; 13 | return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; 14 | function verb(n) { return function (v) { return step([n, v]); }; } 15 | function step(op) { 16 | if (f) throw new TypeError("Generator is already executing."); 17 | while (_) try { 18 | if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; 19 | if (y = 0, t) op = [op[0] & 2, t.value]; 20 | switch (op[0]) { 21 | case 0: case 1: t = op; break; 22 | case 4: _.label++; return { value: op[1], done: false }; 23 | case 5: _.label++; y = op[1]; op = [0]; continue; 24 | case 7: op = _.ops.pop(); _.trys.pop(); continue; 25 | default: 26 | if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } 27 | if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } 28 | if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } 29 | if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } 30 | if (t[2]) _.ops.pop(); 31 | _.trys.pop(); continue; 32 | } 33 | op = body.call(thisArg, _); 34 | } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } 35 | if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; 36 | } 37 | }; 38 | /* 39 | * This is a utility class for profiling GPU-side operations using the 40 | * EXT_disjoint_timer_query OpenGL extension. 41 | * 42 | * We need to do special profiling GPU-side because CPU-side gl 43 | * calls are not synchronized with the GPU's actual execution of those 44 | * commands. Instead, to measure how long things are taking on the GPU, we 45 | * need to insert special commands into the GPU's command queue telling it 46 | * when to start a timer and when to stop the timer. 47 | * 48 | * This extension has a number of annoying limitations: 49 | * - Only one query can be active at a time. This means that we need to 50 | * implement nested timers ourselves in order to be able to produce 51 | * helpful flamegraphs. 52 | * - This currently only works in Desktop Chrome >= 70. 53 | * The extension was completedly removed in Chrome in Chrome 65 54 | * (https://crbug.com/808744) and Firefox 63 due to a severe security 55 | * vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229). 56 | * It was re-introduced in Chrome 70 (https://crbug.com/820891). There's 57 | * an open bug for re-exposing this in Android Chrome (https://crbug.com/870491). 58 | * - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT` 59 | * was designed for, but it was removed in 2016 (https://crbug.com/595172). 60 | * This makes it difficult to see how much time has elapsed between queries, 61 | * so instead we need to have queries always running. 62 | * - It seems like the elapsed time for every command other than draw calls is 63 | * indicated as zero on GPUs I've tested. The total elapsed times still seem 64 | * ballpark correct when comparing against active GPU time in a Chrome 65 | * performance profile, however. This could either mean that the GPU times of 66 | * other commands are negligible, or that the EXT_disoint_timer_query is lying 67 | * in this cases :| 68 | * 69 | * Since only one disjoint timer query can be active at a time, in order to create 70 | * nested timers, we mark "OPEN_FRAME" and "CLOSE_FRAME" events along the timeline 71 | * by changing the active timer at each event. It should look something like this: 72 | * 73 | * ---------- Time ---------> 74 | * 75 | * Queries q1 q2 q3 q4 q5 q6 q7 q8 q9 76 | * <-> <---------> <---> <-----------> <---> <--> <----------> <-------> <-> 77 | * 78 | * Stack +---+-----------------------------------------------------------------+---+ 79 | * | Draw Frame | 80 | * +-----------+-------------------------+----+------------+---------+ 81 | * | Draw Node | | Draw Hover | 82 | * +-----+-------------+-----+ +------------+ 83 | * | Draw Shadow | 84 | * +-------------+ 85 | * 86 | * Events 87 | * q1 start: profile start 88 | * q2 start: OPEN_FRAME "Draw Frame" 89 | * q3 start: OPEN_FRAME "Draw Node" 90 | * q4 start: OPEN_FRAME "Draw Shadow" 91 | * q5 start: CLOSE_FRAME "Draw Shadow" 92 | * q6 start: CLOSE_FRAME "Draw Node" 93 | * q7 start: OPEN_FRAME "Draw Hover" 94 | * q8 start: CLOSE_FRAME "Draw Hover" 95 | * q9 start: CLOSE_FRAME "Draw Frame" 96 | * q9 end: profile end 97 | * 98 | * For each query, the only information we know about it is its duration. 99 | * Assuming we have timing queries running for the entire duration of the 100 | * profile, however, this is sufficient to construct a flamegraph as long as 101 | * we remember what event is associated with the start/end of each query. 102 | */ 103 | var WebGLProfiler = /** @class */ (function () { 104 | function WebGLProfiler(context) { 105 | this.ext = null; 106 | this.activeQuery = null; 107 | this.isRunning = false; 108 | // This list contains events whose beginQueryEXT/endQueryEXT calls have been 109 | // enqueued in the GPU command buffer, but whose timing results aren't yet 110 | // available. These are in chronological order. 111 | this.eventsPendingTimestamps = []; 112 | // This list contains events whose timestamps have already been inferred based 113 | // on the durations retrieved from the GPU. These are also in chronological order. 114 | this.resolvedEvents = []; 115 | this.context = context; 116 | this.ext = context.getExtension("EXT_disjoint_timer_query"); 117 | } 118 | WebGLProfiler.prototype.isProfilerRunning = function () { 119 | return this.isRunning; 120 | }; 121 | WebGLProfiler.prototype.start = function () { 122 | if (this.ext == null) { 123 | throw new Error("EXT_disjoint_timer_query WebGL extension is not available. Cannot start profiler."); 124 | } 125 | if (this.isRunning) { 126 | throw new Error("Profiler is already running"); 127 | } 128 | var infoExt = this.context.getExtension("WEBGL_debug_renderer_info"); 129 | if (infoExt != null) { 130 | var renderer = this.context.getParameter(infoExt.UNMASKED_RENDERER_WEBGL); 131 | if (renderer.indexOf("NVIDIA GeForce GT 750M") !== -1) { 132 | // See: https://twitter.com/jlfwong/status/1058475013546770432 133 | console.warn("\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F"); 134 | console.warn(renderer + " cards seem to have a buggy implementation of EXT_disjoint_timer_query. Results may be very misleading."); 135 | console.warn("\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F"); 136 | } 137 | } 138 | this.isRunning = true; 139 | this.eventsPendingTimestamps = []; 140 | this.resolvedEvents = []; 141 | this.activeQuery = this.ext.createQueryEXT(); 142 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery); 143 | this.pushContext("profile"); 144 | }; 145 | WebGLProfiler.prototype.stop = function () { 146 | if (this.ext == null) { 147 | return; 148 | } 149 | if (!this.isRunning) { 150 | throw new Error("Profiler is already stopped"); 151 | } 152 | this.isRunning = false; 153 | this.popContext("profile"); 154 | this.activeQuery = null; 155 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT); 156 | }; 157 | WebGLProfiler.prototype.pushContext = function (name) { 158 | this.markAction({ type: GPUProfilerActionType.OPEN_FRAME, name: name }); 159 | }; 160 | WebGLProfiler.prototype.popContext = function (name) { 161 | this.markAction({ type: GPUProfilerActionType.CLOSE_FRAME, name: name }); 162 | }; 163 | WebGLProfiler.prototype.exportSpeedscopeProfile = function () { 164 | return __awaiter(this, void 0, void 0, function () { 165 | return __generator(this, function (_a) { 166 | switch (_a.label) { 167 | case 0: 168 | if (!(this.eventsPendingTimestamps.length > 0)) return [3 /*break*/, 2]; 169 | this.resolveEventsIfPossible(); 170 | return [4 /*yield*/, new Promise(function (resolve) { return requestAnimationFrame(resolve); })]; 171 | case 1: 172 | _a.sent(); 173 | return [3 /*break*/, 0]; 174 | case 2: return [2 /*return*/, this.toSpeedscopeProfile()]; 175 | } 176 | }); 177 | }); 178 | }; 179 | WebGLProfiler.prototype.downloadWhenReady = function () { 180 | return __awaiter(this, void 0, void 0, function () { 181 | var profileText, link; 182 | return __generator(this, function (_a) { 183 | switch (_a.label) { 184 | case 0: return [4 /*yield*/, this.exportSpeedscopeProfile()]; 185 | case 1: 186 | profileText = _a.sent(); 187 | link = document.createElement("a"); 188 | link.href = URL.createObjectURL(new Blob([profileText], { "type": "application/json" })); 189 | link.download = "gpuprofile-" + +new Date() + ".speedscope.json"; 190 | document.body.appendChild(link); 191 | link.click(); 192 | document.body.removeChild(link); 193 | return [2 /*return*/]; 194 | } 195 | }); 196 | }); 197 | }; 198 | WebGLProfiler.prototype.stopAndDownload = function () { 199 | return __awaiter(this, void 0, void 0, function () { 200 | return __generator(this, function (_a) { 201 | switch (_a.label) { 202 | case 0: 203 | this.stop(); 204 | return [4 /*yield*/, this.downloadWhenReady()]; 205 | case 1: 206 | _a.sent(); 207 | return [2 /*return*/]; 208 | } 209 | }); 210 | }); 211 | }; 212 | WebGLProfiler.prototype.markAction = function (action) { 213 | if (this.ext == null) { 214 | return; 215 | } 216 | if (this.activeQuery == null) { 217 | throw new Error("Cannot mark actions while no profile is active"); 218 | } 219 | var oldQuery = this.activeQuery; 220 | this.activeQuery = this.ext.createQueryEXT(); 221 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT); 222 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery); 223 | this.eventsPendingTimestamps.push({ action: action, query: oldQuery }); 224 | }; 225 | WebGLProfiler.prototype.resolveEventsIfPossible = function () { 226 | if (this.ext == null) { 227 | return; 228 | } 229 | var i = 0; 230 | while (i < this.eventsPendingTimestamps.length) { 231 | var pendingAction = this.eventsPendingTimestamps[i]; 232 | var query = pendingAction.query; 233 | if (!this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_AVAILABLE_EXT)) { 234 | break; 235 | } 236 | // I don't totally understand what this means, but apparently if this is true, 237 | // it means that the GPU timing information is definitely going to unreliable. 238 | // This is based on this example: 239 | // https://developer.mozilla.org/en-US/docs/Web/API/EXT_disjoint_timer_query/getQueryObjectEXT#Examples 240 | if (this.context.getParameter(this.ext.GPU_DISJOINT_EXT)) { 241 | throw new Error("GPU_DISJOINT_EXT"); 242 | } 243 | var elapsed = this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_EXT); 244 | // TODO(jlfwong): If the creation & deletion of queries ends up having non-trivial 245 | // overhead, we could generate a bunch of queries up-front, and then use a free list 246 | // instead of needing to call createQueryEXT and deleteQueryEXT all the time. 247 | this.ext.deleteQueryEXT(query); 248 | var lastTimestamp = this.resolvedEvents.length === 0 ? 0 : this.resolvedEvents[this.resolvedEvents.length - 1].timestamp; 249 | var timestamp = lastTimestamp + elapsed; 250 | this.resolvedEvents.push({ action: pendingAction.action, timestamp: timestamp }); 251 | i++; 252 | } 253 | if (i > 0) { 254 | this.eventsPendingTimestamps = this.eventsPendingTimestamps.slice(i); 255 | } 256 | }; 257 | // Convert the currently recorded profile into speedscope's 258 | // file format. 259 | WebGLProfiler.prototype.toSpeedscopeProfile = function () { 260 | var frames = []; 261 | var speedscopeEvents = []; 262 | if (this.resolvedEvents.length === 0) { 263 | throw new Error("Profile is empty"); 264 | } 265 | var profile = { 266 | "type": SpeedscopeProfileType.EVENTED, 267 | "name": "GPU Profile", 268 | "unit": "nanoseconds", 269 | "startValue": 0, 270 | "endValue": this.resolvedEvents[this.resolvedEvents.length - 1].timestamp, 271 | "events": speedscopeEvents 272 | }; 273 | var file = { 274 | "$schema": "https://www.Speedscopeapp/file-format-schema.json", 275 | "shared": { 276 | "frames": frames, 277 | }, 278 | "profiles": [profile] 279 | }; 280 | var frameToIndex = {}; 281 | function getOrInsertFrame(name) { 282 | if (!(name in frameToIndex)) { 283 | frameToIndex[name] = frames.length; 284 | frames.push({ 285 | "name": name 286 | }); 287 | } 288 | return frameToIndex[name]; 289 | } 290 | for (var _i = 0, _a = this.resolvedEvents; _i < _a.length; _i++) { 291 | var event_1 = _a[_i]; 292 | speedscopeEvents.push({ 293 | "type": event_1.action.type == GPUProfilerActionType.OPEN_FRAME ? SpeedscopeEventType.OPEN_FRAME : SpeedscopeEventType.CLOSE_FRAME, 294 | "frame": getOrInsertFrame(event_1.action.name), 295 | "at": event_1.timestamp 296 | }); 297 | } 298 | return JSON.stringify(file); 299 | }; 300 | return WebGLProfiler; 301 | }()); 302 | var GPUProfilerActionType; 303 | (function (GPUProfilerActionType) { 304 | GPUProfilerActionType[GPUProfilerActionType["OPEN_FRAME"] = 0] = "OPEN_FRAME"; 305 | GPUProfilerActionType[GPUProfilerActionType["CLOSE_FRAME"] = 1] = "CLOSE_FRAME"; 306 | })(GPUProfilerActionType || (GPUProfilerActionType = {})); 307 | var SpeedscopeProfileType; 308 | (function (SpeedscopeProfileType) { 309 | SpeedscopeProfileType["EVENTED"] = "evented"; 310 | SpeedscopeProfileType["SAMPLED"] = "sampled"; 311 | })(SpeedscopeProfileType || (SpeedscopeProfileType = {})); 312 | var SpeedscopeEventType; 313 | (function (SpeedscopeEventType) { 314 | SpeedscopeEventType["OPEN_FRAME"] = "O"; 315 | SpeedscopeEventType["CLOSE_FRAME"] = "C"; 316 | })(SpeedscopeEventType || (SpeedscopeEventType = {})); 317 | 318 | if (typeof module === "object" && typeof module.exports === "object") { 319 | module.exports = WebGLProfiler 320 | } else if (typeof window !== 'undefined') { 321 | window['WebGLProfiler'] = WebGLProfiler 322 | } 323 | })(); --------------------------------------------------------------------------------