├── .gitignore
├── LICENSE
├── README.md
├── build.sh
├── example.html
├── footer.js
├── header.js
├── package-lock.json
├── package.json
├── tsconfig.json
├── webgl-profiler.js
├── webgl-profiler.ts
└── webgl-profiler.umd.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Figma, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # WebGL Profiler
2 |
3 | This repository contains a small library to enable GPU-side profiling of
4 | WebGL command queues using the `EXT_disjoint_timer_query` OpenGL extension.
5 | The output at the end is a profile that can be dropped into
6 | https://www.speedscope.app/ and viewed as a flamechart.
7 |
8 | We need to do special profiling GPU-side because CPU-side gl calls are not
9 | synchronized with the GPU's actual execution of those commands. Instead, to
10 | measure how long things are taking on the GPU, we need to insert special
11 | commands into the GPU's command queue telling it when to start a timer and
12 | when to stop the timer.
13 |
14 | This comes with an annoying list of limitations:
15 |
16 | - This currently only works in Desktop Chrome >= 70.
17 | The extension was completedly removed in Chrome in Chrome 65
18 | (https://crbug.com/808744) and Firefox 63 due to a severe security
19 | vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229).
20 | It was re-introduced in Chrome 70 (https://crbug.com/820891). There's
21 | an open bug for re-exposing this in Android Chrome (https://crbug.com/870491).
22 |
23 | - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT`
24 | was designed for, but it was removed in 2016 (https://crbug.com/595172).
25 | This makes it difficult to see how much time has elapsed between queries,
26 | so instead we need to have queries always running.
27 |
28 | - It seems like the elapsed time for every command other than draw calls is
29 | indicated as zero on GPUs I've tested. The total elapsed times still seem
30 | ballpark correct when comparing against active GPU time in a Chrome
31 | performance profile, however. This could either mean that the GPU times of
32 | other commands are negligible, or that the `EXT_disjoint_timer_query` is lying
33 | in this cases :|
34 |
35 | - Some graphics card/driver combinations seem to have unresolvably buggy
36 | behavior. This unfortunately includes the NVIDIA GeForce GT 750M, which was
37 | actually the very first card I tested this on, since it's the discrete
38 | graphics card on my MacBook Pro! If you try to use the profiler with this
39 | card, it will hard crash to avoid providing confusing information. Other
40 | cards are probably buggy too. See: https://twitter.com/jlfwong/status/1058475013546770432
41 |
42 | ## Usage
43 |
44 | To use this library, you can either install it as an npm module, or just
45 | include it as a script tag:
46 |
47 | ```html
48 |
49 | ```
50 |
51 | If consuming through npm, you can get access to the `WebGLProfiler` class
52 | via `const WebGLProfiler = require('webgl-profiler')`. If you included it
53 | as a script tag, you can access it as a global `WebGLProfiler` variable.
54 |
55 | From there, you can construct a profiler for a given `WebGLRenderingContext`,
56 | like so:
57 |
58 | ```javascript
59 | const gl = canvas.getContext('webgl');
60 | const profiler = new WebGLProfiler(gl)
61 | ```
62 |
63 | To start a profile, run `profiler.start()`. To stop a profile, wait for the GPU
64 | commands to flush, then download a file that can be imported into
65 | https://www.speedscope.app/, run `profiler.stopAndDownload()`.
66 |
67 | Unlike CPU side operations, there's no concept of a "call stack", so we need
68 | to explicitly annotate the GPU command queue with human-readable information.
69 | You can either do this via paired calls to
70 | `profiler.pushContext(contextName)` and `profiler.popContext(contextName)`, or
71 | you can use `profiler.withContext(contextName)`.
72 |
73 | Here's the relevant bits of an example usage:
74 |
75 | ```javascript
76 | var profiler = new WebGLProfiler(gl)
77 | profiler.start()
78 | {
79 | profiler.pushContext("a")
80 | gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4);
81 | {
82 | profiler.pushContext("b")
83 | for (let i = 0; i < 10; i++) {
84 | gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4)
85 | }
86 | profiler.popContext("b")
87 |
88 | profiler.withContext("c", function() {
89 | for (let i = 0; i < 10; i++) {
90 | gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4)
91 | }
92 | })
93 | }
94 | profiler.popContext("a")
95 | }
96 | profiler.stopAndDownload()
97 | ```
98 |
99 | This will produce a profile that looks something like this in speedscope:
100 |
101 | 
102 |
103 |
104 | You can see a full working example in [`example.html`](example.html).
105 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -eoux pipefail
3 | npm install
4 | node_modules/.bin/tsc
5 | cat header.js webgl-profiler.js footer.js > webgl-profiler.js.tmp
6 | mv webgl-profiler.js.tmp webgl-profiler.js
--------------------------------------------------------------------------------
/example.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | WebGL Profiler Demo
7 |
8 |
9 |
10 |
11 |
12 |
13 |
73 |
--------------------------------------------------------------------------------
/footer.js:
--------------------------------------------------------------------------------
1 |
2 | if (typeof module === "object" && typeof module.exports === "object") {
3 | module.exports = WebGLProfiler
4 | } else if (typeof window !== 'undefined') {
5 | window['WebGLProfiler'] = WebGLProfiler
6 | }
7 | })();
--------------------------------------------------------------------------------
/header.js:
--------------------------------------------------------------------------------
1 | // This file is generated from webgl-profiler.ts. Do not edit this file directly.
2 | ;(function() {
--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "webgl-profiler",
3 | "version": "1.0.0",
4 | "lockfileVersion": 1,
5 | "requires": true,
6 | "dependencies": {
7 | "typescript": {
8 | "version": "3.1.6",
9 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.1.6.tgz",
10 | "integrity": "sha512-tDMYfVtvpb96msS1lDX9MEdHrW4yOuZ4Kdc4Him9oU796XldPYF/t2+uKoX0BBa0hXXwDlqYQbXY5Rzjzc5hBA==",
11 | "dev": true
12 | }
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "webgl-profiler",
3 | "version": "1.0.0",
4 | "description": "A GPU-side profiler using EXT_disjoint_timer_query",
5 | "main": "webgl-profiler.umd.js",
6 | "scripts": {
7 | "build": "tsc"
8 | },
9 | "author": "Jamie Wong",
10 | "license": "MIT",
11 | "devDependencies": {
12 | "typescript": "^3.1.6"
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "module": "none",
4 | "strict": true,
5 | "noUnusedLocals": true,
6 | "lib": ["dom", "es5", "es2015.promise"],
7 | "target": "es5"
8 | }
9 | }
--------------------------------------------------------------------------------
/webgl-profiler.js:
--------------------------------------------------------------------------------
1 | // This file is generated from webgl-profiler.ts. Do not edit this file directly.
2 | ;(function() {"use strict";
3 | var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
4 | return new (P || (P = Promise))(function (resolve, reject) {
5 | function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6 | function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7 | function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
8 | step((generator = generator.apply(thisArg, _arguments || [])).next());
9 | });
10 | };
11 | var __generator = (this && this.__generator) || function (thisArg, body) {
12 | var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
13 | return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
14 | function verb(n) { return function (v) { return step([n, v]); }; }
15 | function step(op) {
16 | if (f) throw new TypeError("Generator is already executing.");
17 | while (_) try {
18 | if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
19 | if (y = 0, t) op = [op[0] & 2, t.value];
20 | switch (op[0]) {
21 | case 0: case 1: t = op; break;
22 | case 4: _.label++; return { value: op[1], done: false };
23 | case 5: _.label++; y = op[1]; op = [0]; continue;
24 | case 7: op = _.ops.pop(); _.trys.pop(); continue;
25 | default:
26 | if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
27 | if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
28 | if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
29 | if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
30 | if (t[2]) _.ops.pop();
31 | _.trys.pop(); continue;
32 | }
33 | op = body.call(thisArg, _);
34 | } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
35 | if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
36 | }
37 | };
38 | /*
39 | * This is a utility class for profiling GPU-side operations using the
40 | * EXT_disjoint_timer_query OpenGL extension.
41 | *
42 | * We need to do special profiling GPU-side because CPU-side gl
43 | * calls are not synchronized with the GPU's actual execution of those
44 | * commands. Instead, to measure how long things are taking on the GPU, we
45 | * need to insert special commands into the GPU's command queue telling it
46 | * when to start a timer and when to stop the timer.
47 | *
48 | * This extension has a number of annoying limitations:
49 | * - Only one query can be active at a time. This means that we need to
50 | * implement nested timers ourselves in order to be able to produce
51 | * helpful flamegraphs.
52 | * - This currently only works in Desktop Chrome >= 70.
53 | * The extension was completedly removed in Chrome in Chrome 65
54 | * (https://crbug.com/808744) and Firefox 63 due to a severe security
55 | * vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229).
56 | * It was re-introduced in Chrome 70 (https://crbug.com/820891). There's
57 | * an open bug for re-exposing this in Android Chrome (https://crbug.com/870491).
58 | * - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT`
59 | * was designed for, but it was removed in 2016 (https://crbug.com/595172).
60 | * This makes it difficult to see how much time has elapsed between queries,
61 | * so instead we need to have queries always running.
62 | * - It seems like the elapsed time for every command other than draw calls is
63 | * indicated as zero on GPUs I've tested. The total elapsed times still seem
64 | * ballpark correct when comparing against active GPU time in a Chrome
65 | * performance profile, however. This could either mean that the GPU times of
66 | * other commands are negligible, or that the EXT_disjoint_timer_query is lying
67 | * in this cases :|
68 | *
69 | * Since only one disjoint timer query can be active at a time, in order to create
70 | * nested timers, we mark "OPEN_FRAME" and "CLOSE_FRAME" events along the timeline
71 | * by changing the active timer at each event. It should look something like this:
72 | *
73 | * ---------- Time --------->
74 | *
75 | * Queries q1 q2 q3 q4 q5 q6 q7 q8 q9
76 | * <-> <---------> <---> <-----------> <---> <--> <----------> <-------> <->
77 | *
78 | * Stack +---+-----------------------------------------------------------------+---+
79 | * | Draw Frame |
80 | * +-----------+-------------------------+----+------------+---------+
81 | * | Draw Node | | Draw Hover |
82 | * +-----+-------------+-----+ +------------+
83 | * | Draw Shadow |
84 | * +-------------+
85 | *
86 | * Events
87 | * q1 start: profile start
88 | * q2 start: OPEN_FRAME "Draw Frame"
89 | * q3 start: OPEN_FRAME "Draw Node"
90 | * q4 start: OPEN_FRAME "Draw Shadow"
91 | * q5 start: CLOSE_FRAME "Draw Shadow"
92 | * q6 start: CLOSE_FRAME "Draw Node"
93 | * q7 start: OPEN_FRAME "Draw Hover"
94 | * q8 start: CLOSE_FRAME "Draw Hover"
95 | * q9 start: CLOSE_FRAME "Draw Frame"
96 | * q9 end: profile end
97 | *
98 | * For each query, the only information we know about it is its duration.
99 | * Assuming we have timing queries running for the entire duration of the
100 | * profile, however, this is sufficient to construct a flamegraph as long as
101 | * we remember what event is associated with the start/end of each query.
102 | */
103 | var WebGLProfiler = /** @class */ (function () {
104 | function WebGLProfiler(context) {
105 | this.ext = null;
106 | this.activeQuery = null;
107 | this.isRunning = false;
108 | // This list contains events whose beginQueryEXT/endQueryEXT calls have been
109 | // enqueued in the GPU command buffer, but whose timing results aren't yet
110 | // available. These are in chronological order.
111 | this.eventsPendingTimestamps = [];
112 | // This list contains events whose timestamps have already been inferred based
113 | // on the durations retrieved from the GPU. These are also in chronological order.
114 | this.resolvedEvents = [];
115 | // This is a stack of currently active named contexts. This is used to validate
116 | // that the pushContext/popContext calls match up properly.
117 | this.namedContextStack = [];
118 | this.context = context;
119 | this.ext = context.getExtension("EXT_disjoint_timer_query");
120 | }
121 | WebGLProfiler.prototype.isProfilerRunning = function () {
122 | return this.isRunning;
123 | };
124 | WebGLProfiler.prototype.start = function () {
125 | if (this.ext == null) {
126 | throw new Error("EXT_disjoint_timer_query WebGL extension is not available. Cannot start profiler.");
127 | }
128 | if (this.isRunning) {
129 | throw new Error("Profiler is already running");
130 | }
131 | var infoExt = this.context.getExtension("WEBGL_debug_renderer_info");
132 | if (infoExt != null) {
133 | var renderer = this.context.getParameter(infoExt.UNMASKED_RENDERER_WEBGL);
134 | if (renderer.indexOf("NVIDIA GeForce GT 750M") !== -1) {
135 | // See: https://twitter.com/jlfwong/status/1058475013546770432
136 | throw new Error(renderer + " cards seem to have a buggy implementation of EXT_disjoint_timer_query. Refusing to record to avoid misleading results.");
137 | }
138 | }
139 | this.isRunning = true;
140 | this.eventsPendingTimestamps = [];
141 | this.resolvedEvents = [];
142 | this.activeQuery = this.ext.createQueryEXT();
143 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery);
144 | this.pushContext("profile");
145 | };
146 | WebGLProfiler.prototype.stop = function () {
147 | if (this.ext == null) {
148 | return;
149 | }
150 | if (!this.isRunning) {
151 | throw new Error("Profiler is already stopped");
152 | }
153 | this.isRunning = false;
154 | this.popContext("profile");
155 | this.activeQuery = null;
156 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT);
157 | };
158 | WebGLProfiler.prototype.pushContext = function (name) {
159 | this.markAction({ type: GPUProfilerActionType.OPEN_FRAME, name: name });
160 | this.namedContextStack.push(name);
161 | };
162 | WebGLProfiler.prototype.popContext = function (name) {
163 | if (this.namedContextStack.length === 0) {
164 | throw new Error("Tried to pop a context when the context stack is empty!");
165 | }
166 | var popped = this.namedContextStack.pop();
167 | if (popped !== name) {
168 | throw new Error("Expected popContext to be called with " + popped + ", but it was called with " + name);
169 | }
170 | this.markAction({ type: GPUProfilerActionType.CLOSE_FRAME, name: name });
171 | };
172 | WebGLProfiler.prototype.withContext = function (name, callback) {
173 | this.pushContext(name);
174 | callback();
175 | this.popContext(name);
176 | };
177 | WebGLProfiler.prototype.exportSpeedscopeProfile = function () {
178 | return __awaiter(this, void 0, void 0, function () {
179 | return __generator(this, function (_a) {
180 | switch (_a.label) {
181 | case 0:
182 | if (!(this.eventsPendingTimestamps.length > 0)) return [3 /*break*/, 2];
183 | this.resolveEventsIfPossible();
184 | return [4 /*yield*/, new Promise(function (resolve) { return requestAnimationFrame(resolve); })];
185 | case 1:
186 | _a.sent();
187 | return [3 /*break*/, 0];
188 | case 2: return [2 /*return*/, this.toSpeedscopeProfile()];
189 | }
190 | });
191 | });
192 | };
193 | WebGLProfiler.prototype.downloadWhenReady = function () {
194 | return __awaiter(this, void 0, void 0, function () {
195 | var profileText, link;
196 | return __generator(this, function (_a) {
197 | switch (_a.label) {
198 | case 0: return [4 /*yield*/, this.exportSpeedscopeProfile()];
199 | case 1:
200 | profileText = _a.sent();
201 | link = document.createElement("a");
202 | link.href = URL.createObjectURL(new Blob([profileText], { "type": "application/json" }));
203 | link.download = "gpuprofile-" + +new Date() + ".speedscope.json";
204 | document.body.appendChild(link);
205 | link.click();
206 | document.body.removeChild(link);
207 | return [2 /*return*/];
208 | }
209 | });
210 | });
211 | };
212 | WebGLProfiler.prototype.stopAndDownload = function () {
213 | return __awaiter(this, void 0, void 0, function () {
214 | return __generator(this, function (_a) {
215 | switch (_a.label) {
216 | case 0:
217 | this.stop();
218 | return [4 /*yield*/, this.downloadWhenReady()];
219 | case 1:
220 | _a.sent();
221 | return [2 /*return*/];
222 | }
223 | });
224 | });
225 | };
226 | WebGLProfiler.prototype.markAction = function (action) {
227 | if (this.ext == null) {
228 | return;
229 | }
230 | if (this.activeQuery == null) {
231 | throw new Error("Cannot mark actions while no profile is active");
232 | }
233 | var oldQuery = this.activeQuery;
234 | this.activeQuery = this.ext.createQueryEXT();
235 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT);
236 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery);
237 | this.eventsPendingTimestamps.push({ action: action, query: oldQuery });
238 | };
239 | WebGLProfiler.prototype.resolveEventsIfPossible = function () {
240 | if (this.ext == null) {
241 | return;
242 | }
243 | var i = 0;
244 | while (i < this.eventsPendingTimestamps.length) {
245 | var pendingAction = this.eventsPendingTimestamps[i];
246 | var query = pendingAction.query;
247 | if (!this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_AVAILABLE_EXT)) {
248 | break;
249 | }
250 | // I don't totally understand what this means, but apparently if this is true,
251 | // it means that the GPU timing information is definitely going to unreliable.
252 | // This is based on this example:
253 | // https://developer.mozilla.org/en-US/docs/Web/API/EXT_disjoint_timer_query/getQueryObjectEXT#Examples
254 | if (this.context.getParameter(this.ext.GPU_DISJOINT_EXT)) {
255 | throw new Error("GPU_DISJOINT_EXT");
256 | }
257 | var elapsed = this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_EXT);
258 | // TODO(jlfwong): If the creation & deletion of queries ends up having non-trivial
259 | // overhead, we could generate a bunch of queries up-front, and then use a free list
260 | // instead of needing to call createQueryEXT and deleteQueryEXT all the time.
261 | this.ext.deleteQueryEXT(query);
262 | var lastTimestamp = this.resolvedEvents.length === 0 ? 0 : this.resolvedEvents[this.resolvedEvents.length - 1].timestamp;
263 | var timestamp = lastTimestamp + elapsed;
264 | this.resolvedEvents.push({ action: pendingAction.action, timestamp: timestamp });
265 | i++;
266 | }
267 | if (i > 0) {
268 | this.eventsPendingTimestamps = this.eventsPendingTimestamps.slice(i);
269 | }
270 | };
271 | // Convert the currently recorded profile into speedscope's
272 | // file format.
273 | WebGLProfiler.prototype.toSpeedscopeProfile = function () {
274 | var frames = [];
275 | var speedscopeEvents = [];
276 | if (this.resolvedEvents.length === 0) {
277 | throw new Error("Profile is empty");
278 | }
279 | var profile = {
280 | "type": SpeedscopeProfileType.EVENTED,
281 | "name": "GPU Profile",
282 | "unit": "nanoseconds",
283 | "startValue": 0,
284 | "endValue": this.resolvedEvents[this.resolvedEvents.length - 1].timestamp,
285 | "events": speedscopeEvents
286 | };
287 | var file = {
288 | "$schema": "https://www.Speedscopeapp/file-format-schema.json",
289 | "shared": {
290 | "frames": frames,
291 | },
292 | "profiles": [profile]
293 | };
294 | var frameToIndex = {};
295 | function getOrInsertFrame(name) {
296 | if (!(name in frameToIndex)) {
297 | frameToIndex[name] = frames.length;
298 | frames.push({
299 | "name": name
300 | });
301 | }
302 | return frameToIndex[name];
303 | }
304 | for (var _i = 0, _a = this.resolvedEvents; _i < _a.length; _i++) {
305 | var event_1 = _a[_i];
306 | speedscopeEvents.push({
307 | "type": event_1.action.type == GPUProfilerActionType.OPEN_FRAME ? SpeedscopeEventType.OPEN_FRAME : SpeedscopeEventType.CLOSE_FRAME,
308 | "frame": getOrInsertFrame(event_1.action.name),
309 | "at": event_1.timestamp
310 | });
311 | }
312 | return JSON.stringify(file);
313 | };
314 | return WebGLProfiler;
315 | }());
316 | var GPUProfilerActionType;
317 | (function (GPUProfilerActionType) {
318 | GPUProfilerActionType[GPUProfilerActionType["OPEN_FRAME"] = 0] = "OPEN_FRAME";
319 | GPUProfilerActionType[GPUProfilerActionType["CLOSE_FRAME"] = 1] = "CLOSE_FRAME";
320 | })(GPUProfilerActionType || (GPUProfilerActionType = {}));
321 | var SpeedscopeProfileType;
322 | (function (SpeedscopeProfileType) {
323 | SpeedscopeProfileType["EVENTED"] = "evented";
324 | SpeedscopeProfileType["SAMPLED"] = "sampled";
325 | })(SpeedscopeProfileType || (SpeedscopeProfileType = {}));
326 | var SpeedscopeEventType;
327 | (function (SpeedscopeEventType) {
328 | SpeedscopeEventType["OPEN_FRAME"] = "O";
329 | SpeedscopeEventType["CLOSE_FRAME"] = "C";
330 | })(SpeedscopeEventType || (SpeedscopeEventType = {}));
331 |
332 | if (typeof module === "object" && typeof module.exports === "object") {
333 | module.exports = WebGLProfiler
334 | } else if (typeof window !== 'undefined') {
335 | window['WebGLProfiler'] = WebGLProfiler
336 | }
337 | })();
--------------------------------------------------------------------------------
/webgl-profiler.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * This is a utility class for profiling GPU-side operations using the
3 | * EXT_disjoint_timer_query OpenGL extension.
4 | *
5 | * We need to do special profiling GPU-side because CPU-side gl
6 | * calls are not synchronized with the GPU's actual execution of those
7 | * commands. Instead, to measure how long things are taking on the GPU, we
8 | * need to insert special commands into the GPU's command queue telling it
9 | * when to start a timer and when to stop the timer.
10 | *
11 | * This extension has a number of annoying limitations:
12 | * - Only one query can be active at a time. This means that we need to
13 | * implement nested timers ourselves in order to be able to produce
14 | * helpful flamegraphs.
15 | * - This currently only works in Desktop Chrome >= 70.
16 | * The extension was completedly removed in Chrome in Chrome 65
17 | * (https://crbug.com/808744) and Firefox 63 due to a severe security
18 | * vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229).
19 | * It was re-introduced in Chrome 70 (https://crbug.com/820891). There's
20 | * an open bug for re-exposing this in Android Chrome (https://crbug.com/870491).
21 | * - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT`
22 | * was designed for, but it was removed in 2016 (https://crbug.com/595172).
23 | * This makes it difficult to see how much time has elapsed between queries,
24 | * so instead we need to have queries always running.
25 | * - It seems like the elapsed time for every command other than draw calls is
26 | * indicated as zero on GPUs I've tested. The total elapsed times still seem
27 | * ballpark correct when comparing against active GPU time in a Chrome
28 | * performance profile, however. This could either mean that the GPU times of
29 | * other commands are negligible, or that the EXT_disjoint_timer_query is lying
30 | * in this cases :|
31 | *
32 | * Since only one disjoint timer query can be active at a time, in order to create
33 | * nested timers, we mark "OPEN_FRAME" and "CLOSE_FRAME" events along the timeline
34 | * by changing the active timer at each event. It should look something like this:
35 | *
36 | * ---------- Time --------->
37 | *
38 | * Queries q1 q2 q3 q4 q5 q6 q7 q8 q9
39 | * <-> <---------> <---> <-----------> <---> <--> <----------> <-------> <->
40 | *
41 | * Stack +---+-----------------------------------------------------------------+---+
42 | * | Draw Frame |
43 | * +-----------+-------------------------+----+------------+---------+
44 | * | Draw Node | | Draw Hover |
45 | * +-----+-------------+-----+ +------------+
46 | * | Draw Shadow |
47 | * +-------------+
48 | *
49 | * Events
50 | * q1 start: profile start
51 | * q2 start: OPEN_FRAME "Draw Frame"
52 | * q3 start: OPEN_FRAME "Draw Node"
53 | * q4 start: OPEN_FRAME "Draw Shadow"
54 | * q5 start: CLOSE_FRAME "Draw Shadow"
55 | * q6 start: CLOSE_FRAME "Draw Node"
56 | * q7 start: OPEN_FRAME "Draw Hover"
57 | * q8 start: CLOSE_FRAME "Draw Hover"
58 | * q9 start: CLOSE_FRAME "Draw Frame"
59 | * q9 end: profile end
60 | *
61 | * For each query, the only information we know about it is its duration.
62 | * Assuming we have timing queries running for the entire duration of the
63 | * profile, however, this is sufficient to construct a flamegraph as long as
64 | * we remember what event is associated with the start/end of each query.
65 | */
66 | class WebGLProfiler {
67 | private readonly context: WebGLRenderingContext
68 | private readonly ext: EXTDisjointTimerQuery | null = null
69 | private activeQuery: WebGLTimerQueryEXT | null = null
70 | private isRunning = false
71 |
72 | // This list contains events whose beginQueryEXT/endQueryEXT calls have been
73 | // enqueued in the GPU command buffer, but whose timing results aren't yet
74 | // available. These are in chronological order.
75 | private eventsPendingTimestamps: GPUProfilerEventPendingTimestamp[] = []
76 |
77 | // This list contains events whose timestamps have already been inferred based
78 | // on the durations retrieved from the GPU. These are also in chronological order.
79 | private resolvedEvents: GPUProfilerResolvedEvent[] = []
80 |
81 | // This is a stack of currently active named contexts. This is used to validate
82 | // that the pushContext/popContext calls match up properly.
83 | private namedContextStack: string[] = []
84 |
85 | constructor(context: WebGLRenderingContext) {
86 | this.context = context
87 | this.ext = context.getExtension("EXT_disjoint_timer_query") as EXTDisjointTimerQuery | null
88 | }
89 |
90 | isProfilerRunning(): boolean {
91 | return this.isRunning
92 | }
93 |
94 | start(): void {
95 | if (this.ext == null) {
96 | throw new Error("EXT_disjoint_timer_query WebGL extension is not available. Cannot start profiler.")
97 | }
98 | if (this.isRunning) {
99 | throw new Error("Profiler is already running")
100 | }
101 | const infoExt = this.context.getExtension("WEBGL_debug_renderer_info")
102 | if (infoExt != null) {
103 | const renderer: string = this.context.getParameter(infoExt.UNMASKED_RENDERER_WEBGL)
104 | if (renderer.indexOf("NVIDIA GeForce GT 750M") !== -1) {
105 | // See: https://twitter.com/jlfwong/status/1058475013546770432
106 | throw new Error(`${renderer} cards seem to have a buggy implementation of EXT_disjoint_timer_query. Refusing to record to avoid misleading results.`)
107 | }
108 | }
109 |
110 | this.isRunning = true
111 | this.eventsPendingTimestamps = []
112 | this.resolvedEvents = []
113 |
114 | this.activeQuery = this.ext.createQueryEXT()
115 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery)
116 |
117 | this.pushContext("profile")
118 | }
119 |
120 | stop(): void {
121 | if (this.ext == null) {
122 | return
123 | }
124 | if (!this.isRunning) {
125 | throw new Error("Profiler is already stopped")
126 | }
127 | this.isRunning = false
128 |
129 | this.popContext("profile")
130 | this.activeQuery = null
131 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT)
132 | }
133 |
134 | pushContext(name: string): void {
135 | this.markAction({type: GPUProfilerActionType.OPEN_FRAME, name})
136 | this.namedContextStack.push(name)
137 | }
138 |
139 | popContext(name: string): void {
140 | if (this.namedContextStack.length === 0) {
141 | throw new Error("Tried to pop a context when the context stack is empty!")
142 | }
143 | const popped = this.namedContextStack.pop()
144 | if (popped !== name) {
145 | throw new Error(`Expected popContext to be called with ${popped}, but it was called with ${name}`)
146 | }
147 | this.markAction({type: GPUProfilerActionType.CLOSE_FRAME, name})
148 | }
149 |
150 | withContext(name: string, callback: () => void): void {
151 | this.pushContext(name)
152 | callback()
153 | this.popContext(name)
154 | }
155 |
156 | async exportSpeedscopeProfile(): Promise {
157 | while (this.eventsPendingTimestamps.length > 0) {
158 | this.resolveEventsIfPossible()
159 | await new Promise((resolve) => requestAnimationFrame(resolve))
160 | }
161 |
162 | return this.toSpeedscopeProfile()
163 | }
164 |
165 | async downloadWhenReady() {
166 | const profileText = await this.exportSpeedscopeProfile()
167 |
168 | const link = document.createElement("a")
169 | link.href = URL.createObjectURL(new Blob([profileText], { "type": "application/json" }))
170 | link.download = `gpuprofile-${+new Date()}.speedscope.json`
171 | document.body.appendChild(link)
172 | link.click()
173 | document.body.removeChild(link)
174 | }
175 |
176 | async stopAndDownload() {
177 | this.stop()
178 | await this.downloadWhenReady()
179 | }
180 |
181 | private markAction(action: GPUProfilerAction): void {
182 | if (this.ext == null) {
183 | return
184 | }
185 |
186 | if (this.activeQuery == null) {
187 | throw new Error("Cannot mark actions while no profile is active")
188 | }
189 |
190 | const oldQuery = this.activeQuery
191 | this.activeQuery = this.ext.createQueryEXT()
192 |
193 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT)
194 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery)
195 |
196 | this.eventsPendingTimestamps.push({action, query: oldQuery})
197 | }
198 |
199 | private resolveEventsIfPossible(): void {
200 | if (this.ext == null) {
201 | return
202 | }
203 |
204 | let i = 0
205 | while (i < this.eventsPendingTimestamps.length) {
206 | let pendingAction = this.eventsPendingTimestamps[i]
207 | let query = pendingAction.query
208 | if (!this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_AVAILABLE_EXT)) {
209 | break
210 | }
211 |
212 | // I don't totally understand what this means, but apparently if this is true,
213 | // it means that the GPU timing information is definitely going to unreliable.
214 | // This is based on this example:
215 | // https://developer.mozilla.org/en-US/docs/Web/API/EXT_disjoint_timer_query/getQueryObjectEXT#Examples
216 | if (this.context.getParameter(this.ext.GPU_DISJOINT_EXT)) {
217 | throw new Error("GPU_DISJOINT_EXT")
218 | }
219 |
220 | const elapsed = this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_EXT)
221 |
222 | // TODO(jlfwong): If the creation & deletion of queries ends up having non-trivial
223 | // overhead, we could generate a bunch of queries up-front, and then use a free list
224 | // instead of needing to call createQueryEXT and deleteQueryEXT all the time.
225 | this.ext.deleteQueryEXT(query)
226 |
227 | var lastTimestamp = this.resolvedEvents.length === 0 ? 0 : this.resolvedEvents[this.resolvedEvents.length - 1].timestamp
228 | var timestamp = lastTimestamp + elapsed
229 |
230 | this.resolvedEvents.push({action: pendingAction.action, timestamp})
231 | i++
232 | }
233 |
234 | if (i > 0) {
235 | this.eventsPendingTimestamps = this.eventsPendingTimestamps.slice(i)
236 | }
237 | }
238 |
239 | // Convert the currently recorded profile into speedscope's
240 | // file format.
241 | private toSpeedscopeProfile(): string {
242 | const frames: SpeedscopeFrame[] = []
243 | const speedscopeEvents: (SpeedscopeOpenFrameEvent | SpeedscopeCloseFrameEvent)[] = []
244 |
245 | if (this.resolvedEvents.length === 0) {
246 | throw new Error("Profile is empty")
247 | }
248 |
249 | const profile: SpeedscopeEventedProfile = {
250 | "type": SpeedscopeProfileType.EVENTED,
251 | "name": "GPU Profile",
252 | "unit": "nanoseconds",
253 | "startValue": 0,
254 | "endValue": this.resolvedEvents[this.resolvedEvents.length - 1].timestamp,
255 | "events": speedscopeEvents
256 | }
257 |
258 | const file: SpeedscopeFile = {
259 | "$schema": "https://www.Speedscopeapp/file-format-schema.json",
260 | "shared": {
261 | "frames": frames,
262 | },
263 | "profiles": [profile]
264 | }
265 |
266 | const frameToIndex: {[key: string]: number} = {}
267 |
268 |
269 | function getOrInsertFrame(name: string): number {
270 | if (!(name in frameToIndex)) {
271 | frameToIndex[name] = frames.length
272 | frames.push({
273 | "name": name
274 | })
275 | }
276 | return frameToIndex[name]
277 | }
278 |
279 | for (let event of this.resolvedEvents) {
280 | speedscopeEvents.push({
281 | "type": event.action.type == GPUProfilerActionType.OPEN_FRAME ? SpeedscopeEventType.OPEN_FRAME : SpeedscopeEventType.CLOSE_FRAME,
282 | "frame": getOrInsertFrame(event.action.name),
283 | "at": event.timestamp
284 | } as (SpeedscopeOpenFrameEvent | SpeedscopeCloseFrameEvent))
285 | }
286 |
287 | return JSON.stringify(file)
288 | }
289 | }
290 |
291 | enum GPUProfilerActionType {
292 | OPEN_FRAME,
293 | CLOSE_FRAME
294 | }
295 |
296 | interface GPUProfilerAction {
297 | readonly type: GPUProfilerActionType
298 | readonly name: string
299 | }
300 |
301 | interface GPUProfilerEventPendingTimestamp {
302 | readonly action: GPUProfilerAction
303 | readonly query: WebGLTimerQueryEXT
304 | }
305 |
306 | interface GPUProfilerResolvedEvent {
307 | readonly action: GPUProfilerAction
308 | readonly timestamp: number
309 | }
310 |
311 | // DOM APIs
312 | interface WebGLTimerQueryEXT {}
313 |
314 | interface EXTDisjointTimerQuery {
315 | QUERY_COUNTER_BITS_EXT: 0x8864
316 | CURRENT_QUERY_EXT: 0x8865
317 | QUERY_RESULT_EXT: 0x8866
318 | QUERY_RESULT_AVAILABLE_EXT: 0x8867
319 | TIME_ELAPSED_EXT: 0x88BF
320 | TIMESTAMP_EXT: 0x8E28
321 | GPU_DISJOINT_EXT: 0x8FBB
322 |
323 | createQueryEXT(): WebGLTimerQueryEXT
324 | deleteQueryEXT(query: WebGLTimerQueryEXT): void
325 | isQueryEXT(query: WebGLTimerQueryEXT): boolean
326 | beginQueryEXT(target: GLenum, query: WebGLTimerQueryEXT): void
327 | endQueryEXT(target: GLenum): void
328 | getQueryEXT(target: GLenum, pname: GLenum): any
329 | getQueryObjectEXT(query: WebGLTimerQueryEXT, pname: 0x8867 /* QUERY_RESULT_AVAILBLE_EXT */): boolean
330 | getQueryObjectEXT(query: WebGLTimerQueryEXT, pname: 0x8866 /* QUERY_RESULT_EXT */): number
331 | getQueryObjectEXT(query: WebGLTimerQueryEXT, pname: GLenum): any
332 | }
333 |
334 | // speedscope types (from https://github.com/jlfwong/speedscope/blob/master/src/lib/file-format-spec.ts)
335 | interface SpeedscopeFile {
336 | $schema: 'https://www.Speedscopeapp/file-format-schema.json'
337 |
338 | // Data shared between profiles
339 | shared: {
340 | frames: SpeedscopeFrame[]
341 | }
342 |
343 | // List of profile definitions
344 | profiles: SpeedscopeEventedProfile[]
345 |
346 | // The name of the contained profile group. If omitted, will use the name of
347 | // the file itself.
348 | // Added in 0.6.0
349 | name?: string
350 |
351 | // The index into the `profiles` array that should be displayed upon file
352 | // load. If omitted, will default to displaying the first profile in the
353 | // file.
354 | //
355 | // Added in 0.6.0
356 | activeProfileIndex?: number
357 |
358 | // The name of the the program which exported this profile. This isn't
359 | // consumed but can be helpful for debugging generated data by seeing what
360 | // was generating it! Recommended format is "name@version". e.g. when the
361 | // file was exported by speedscope v0.6.0 itself, it will be
362 | // "speedscope@0.6.0"
363 | //
364 | // Added in 0.6.0
365 | exporter?: string
366 | }
367 |
368 | interface SpeedscopeFrame {
369 | name: string
370 | file?: string
371 | line?: number
372 | col?: number
373 | }
374 |
375 | enum SpeedscopeProfileType {
376 | EVENTED = 'evented',
377 | SAMPLED = 'sampled',
378 | }
379 |
380 | interface SpeedscopeEventedProfile {
381 | type: SpeedscopeProfileType.EVENTED
382 |
383 | // Name of the profile. Typically a filename for the source of the profile.
384 | name: string
385 |
386 | // Unit which all value are specified using in the profile.
387 | unit: SpeedscopeValueUnit
388 |
389 | // The starting value of the profile. This will typically be a timestamp.
390 | // All event values will be relative to this startValue.
391 | startValue: number
392 |
393 | // The final value of the profile. This will typically be a timestamp. This
394 | // must be greater than or equal to the startValue. This is useful in
395 | // situations where the recorded profile extends past the end of the recorded
396 | // events, which may happen if nothing was happening at the end of the
397 | // profile.
398 | endValue: number
399 |
400 | // List of events that occured as part of this profile.
401 | // The "at" field of every event must be in non-decreasing order.
402 | events: (SpeedscopeOpenFrameEvent | SpeedscopeCloseFrameEvent)[]
403 | }
404 |
405 | type SpeedscopeValueUnit =
406 | | 'none'
407 | | 'nanoseconds'
408 | | 'microseconds'
409 | | 'milliseconds'
410 | | 'seconds'
411 | | 'bytes'
412 |
413 | enum SpeedscopeEventType {
414 | OPEN_FRAME = 'O',
415 | CLOSE_FRAME = 'C',
416 | }
417 |
418 | // Indicates a stack frame opened. Every opened stack frame must have a
419 | // corresponding close frame event, and the ordering must be balanced.
420 | interface SpeedscopeOpenFrameEvent {
421 | type: SpeedscopeEventType.OPEN_FRAME
422 | // An index into the frames array in the shared data within the profile
423 | frame: number
424 | }
425 |
426 | interface SpeedscopeCloseFrameEvent {
427 | type: SpeedscopeEventType.CLOSE_FRAME
428 | // An index into the frames array in the shared data within the profile
429 | frame: number
430 | }
--------------------------------------------------------------------------------
/webgl-profiler.umd.js:
--------------------------------------------------------------------------------
1 | // This file is generated from webgl-profiler.ts. Do not edit this file directly.
2 | ;(function() {"use strict";
3 | var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
4 | return new (P || (P = Promise))(function (resolve, reject) {
5 | function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6 | function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7 | function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
8 | step((generator = generator.apply(thisArg, _arguments || [])).next());
9 | });
10 | };
11 | var __generator = (this && this.__generator) || function (thisArg, body) {
12 | var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
13 | return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
14 | function verb(n) { return function (v) { return step([n, v]); }; }
15 | function step(op) {
16 | if (f) throw new TypeError("Generator is already executing.");
17 | while (_) try {
18 | if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
19 | if (y = 0, t) op = [op[0] & 2, t.value];
20 | switch (op[0]) {
21 | case 0: case 1: t = op; break;
22 | case 4: _.label++; return { value: op[1], done: false };
23 | case 5: _.label++; y = op[1]; op = [0]; continue;
24 | case 7: op = _.ops.pop(); _.trys.pop(); continue;
25 | default:
26 | if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
27 | if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
28 | if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
29 | if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
30 | if (t[2]) _.ops.pop();
31 | _.trys.pop(); continue;
32 | }
33 | op = body.call(thisArg, _);
34 | } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
35 | if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
36 | }
37 | };
38 | /*
39 | * This is a utility class for profiling GPU-side operations using the
40 | * EXT_disjoint_timer_query OpenGL extension.
41 | *
42 | * We need to do special profiling GPU-side because CPU-side gl
43 | * calls are not synchronized with the GPU's actual execution of those
44 | * commands. Instead, to measure how long things are taking on the GPU, we
45 | * need to insert special commands into the GPU's command queue telling it
46 | * when to start a timer and when to stop the timer.
47 | *
48 | * This extension has a number of annoying limitations:
49 | * - Only one query can be active at a time. This means that we need to
50 | * implement nested timers ourselves in order to be able to produce
51 | * helpful flamegraphs.
52 | * - This currently only works in Desktop Chrome >= 70.
53 | * The extension was completedly removed in Chrome in Chrome 65
54 | * (https://crbug.com/808744) and Firefox 63 due to a severe security
55 | * vulnerability (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-10229).
56 | * It was re-introduced in Chrome 70 (https://crbug.com/820891). There's
57 | * an open bug for re-exposing this in Android Chrome (https://crbug.com/870491).
58 | * - There's no way to ask for a timestamp. This is what `TIMESTAMP_EXT`
59 | * was designed for, but it was removed in 2016 (https://crbug.com/595172).
60 | * This makes it difficult to see how much time has elapsed between queries,
61 | * so instead we need to have queries always running.
62 | * - It seems like the elapsed time for every command other than draw calls is
63 | * indicated as zero on GPUs I've tested. The total elapsed times still seem
64 | * ballpark correct when comparing against active GPU time in a Chrome
65 | * performance profile, however. This could either mean that the GPU times of
66 | * other commands are negligible, or that the EXT_disoint_timer_query is lying
67 | * in this cases :|
68 | *
69 | * Since only one disjoint timer query can be active at a time, in order to create
70 | * nested timers, we mark "OPEN_FRAME" and "CLOSE_FRAME" events along the timeline
71 | * by changing the active timer at each event. It should look something like this:
72 | *
73 | * ---------- Time --------->
74 | *
75 | * Queries q1 q2 q3 q4 q5 q6 q7 q8 q9
76 | * <-> <---------> <---> <-----------> <---> <--> <----------> <-------> <->
77 | *
78 | * Stack +---+-----------------------------------------------------------------+---+
79 | * | Draw Frame |
80 | * +-----------+-------------------------+----+------------+---------+
81 | * | Draw Node | | Draw Hover |
82 | * +-----+-------------+-----+ +------------+
83 | * | Draw Shadow |
84 | * +-------------+
85 | *
86 | * Events
87 | * q1 start: profile start
88 | * q2 start: OPEN_FRAME "Draw Frame"
89 | * q3 start: OPEN_FRAME "Draw Node"
90 | * q4 start: OPEN_FRAME "Draw Shadow"
91 | * q5 start: CLOSE_FRAME "Draw Shadow"
92 | * q6 start: CLOSE_FRAME "Draw Node"
93 | * q7 start: OPEN_FRAME "Draw Hover"
94 | * q8 start: CLOSE_FRAME "Draw Hover"
95 | * q9 start: CLOSE_FRAME "Draw Frame"
96 | * q9 end: profile end
97 | *
98 | * For each query, the only information we know about it is its duration.
99 | * Assuming we have timing queries running for the entire duration of the
100 | * profile, however, this is sufficient to construct a flamegraph as long as
101 | * we remember what event is associated with the start/end of each query.
102 | */
103 | var WebGLProfiler = /** @class */ (function () {
104 | function WebGLProfiler(context) {
105 | this.ext = null;
106 | this.activeQuery = null;
107 | this.isRunning = false;
108 | // This list contains events whose beginQueryEXT/endQueryEXT calls have been
109 | // enqueued in the GPU command buffer, but whose timing results aren't yet
110 | // available. These are in chronological order.
111 | this.eventsPendingTimestamps = [];
112 | // This list contains events whose timestamps have already been inferred based
113 | // on the durations retrieved from the GPU. These are also in chronological order.
114 | this.resolvedEvents = [];
115 | this.context = context;
116 | this.ext = context.getExtension("EXT_disjoint_timer_query");
117 | }
118 | WebGLProfiler.prototype.isProfilerRunning = function () {
119 | return this.isRunning;
120 | };
121 | WebGLProfiler.prototype.start = function () {
122 | if (this.ext == null) {
123 | throw new Error("EXT_disjoint_timer_query WebGL extension is not available. Cannot start profiler.");
124 | }
125 | if (this.isRunning) {
126 | throw new Error("Profiler is already running");
127 | }
128 | var infoExt = this.context.getExtension("WEBGL_debug_renderer_info");
129 | if (infoExt != null) {
130 | var renderer = this.context.getParameter(infoExt.UNMASKED_RENDERER_WEBGL);
131 | if (renderer.indexOf("NVIDIA GeForce GT 750M") !== -1) {
132 | // See: https://twitter.com/jlfwong/status/1058475013546770432
133 | console.warn("\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F");
134 | console.warn(renderer + " cards seem to have a buggy implementation of EXT_disjoint_timer_query. Results may be very misleading.");
135 | console.warn("\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F\u26A0\uFE0F");
136 | }
137 | }
138 | this.isRunning = true;
139 | this.eventsPendingTimestamps = [];
140 | this.resolvedEvents = [];
141 | this.activeQuery = this.ext.createQueryEXT();
142 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery);
143 | this.pushContext("profile");
144 | };
145 | WebGLProfiler.prototype.stop = function () {
146 | if (this.ext == null) {
147 | return;
148 | }
149 | if (!this.isRunning) {
150 | throw new Error("Profiler is already stopped");
151 | }
152 | this.isRunning = false;
153 | this.popContext("profile");
154 | this.activeQuery = null;
155 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT);
156 | };
157 | WebGLProfiler.prototype.pushContext = function (name) {
158 | this.markAction({ type: GPUProfilerActionType.OPEN_FRAME, name: name });
159 | };
160 | WebGLProfiler.prototype.popContext = function (name) {
161 | this.markAction({ type: GPUProfilerActionType.CLOSE_FRAME, name: name });
162 | };
163 | WebGLProfiler.prototype.exportSpeedscopeProfile = function () {
164 | return __awaiter(this, void 0, void 0, function () {
165 | return __generator(this, function (_a) {
166 | switch (_a.label) {
167 | case 0:
168 | if (!(this.eventsPendingTimestamps.length > 0)) return [3 /*break*/, 2];
169 | this.resolveEventsIfPossible();
170 | return [4 /*yield*/, new Promise(function (resolve) { return requestAnimationFrame(resolve); })];
171 | case 1:
172 | _a.sent();
173 | return [3 /*break*/, 0];
174 | case 2: return [2 /*return*/, this.toSpeedscopeProfile()];
175 | }
176 | });
177 | });
178 | };
179 | WebGLProfiler.prototype.downloadWhenReady = function () {
180 | return __awaiter(this, void 0, void 0, function () {
181 | var profileText, link;
182 | return __generator(this, function (_a) {
183 | switch (_a.label) {
184 | case 0: return [4 /*yield*/, this.exportSpeedscopeProfile()];
185 | case 1:
186 | profileText = _a.sent();
187 | link = document.createElement("a");
188 | link.href = URL.createObjectURL(new Blob([profileText], { "type": "application/json" }));
189 | link.download = "gpuprofile-" + +new Date() + ".speedscope.json";
190 | document.body.appendChild(link);
191 | link.click();
192 | document.body.removeChild(link);
193 | return [2 /*return*/];
194 | }
195 | });
196 | });
197 | };
198 | WebGLProfiler.prototype.stopAndDownload = function () {
199 | return __awaiter(this, void 0, void 0, function () {
200 | return __generator(this, function (_a) {
201 | switch (_a.label) {
202 | case 0:
203 | this.stop();
204 | return [4 /*yield*/, this.downloadWhenReady()];
205 | case 1:
206 | _a.sent();
207 | return [2 /*return*/];
208 | }
209 | });
210 | });
211 | };
212 | WebGLProfiler.prototype.markAction = function (action) {
213 | if (this.ext == null) {
214 | return;
215 | }
216 | if (this.activeQuery == null) {
217 | throw new Error("Cannot mark actions while no profile is active");
218 | }
219 | var oldQuery = this.activeQuery;
220 | this.activeQuery = this.ext.createQueryEXT();
221 | this.ext.endQueryEXT(this.ext.TIME_ELAPSED_EXT);
222 | this.ext.beginQueryEXT(this.ext.TIME_ELAPSED_EXT, this.activeQuery);
223 | this.eventsPendingTimestamps.push({ action: action, query: oldQuery });
224 | };
225 | WebGLProfiler.prototype.resolveEventsIfPossible = function () {
226 | if (this.ext == null) {
227 | return;
228 | }
229 | var i = 0;
230 | while (i < this.eventsPendingTimestamps.length) {
231 | var pendingAction = this.eventsPendingTimestamps[i];
232 | var query = pendingAction.query;
233 | if (!this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_AVAILABLE_EXT)) {
234 | break;
235 | }
236 | // I don't totally understand what this means, but apparently if this is true,
237 | // it means that the GPU timing information is definitely going to unreliable.
238 | // This is based on this example:
239 | // https://developer.mozilla.org/en-US/docs/Web/API/EXT_disjoint_timer_query/getQueryObjectEXT#Examples
240 | if (this.context.getParameter(this.ext.GPU_DISJOINT_EXT)) {
241 | throw new Error("GPU_DISJOINT_EXT");
242 | }
243 | var elapsed = this.ext.getQueryObjectEXT(query, this.ext.QUERY_RESULT_EXT);
244 | // TODO(jlfwong): If the creation & deletion of queries ends up having non-trivial
245 | // overhead, we could generate a bunch of queries up-front, and then use a free list
246 | // instead of needing to call createQueryEXT and deleteQueryEXT all the time.
247 | this.ext.deleteQueryEXT(query);
248 | var lastTimestamp = this.resolvedEvents.length === 0 ? 0 : this.resolvedEvents[this.resolvedEvents.length - 1].timestamp;
249 | var timestamp = lastTimestamp + elapsed;
250 | this.resolvedEvents.push({ action: pendingAction.action, timestamp: timestamp });
251 | i++;
252 | }
253 | if (i > 0) {
254 | this.eventsPendingTimestamps = this.eventsPendingTimestamps.slice(i);
255 | }
256 | };
257 | // Convert the currently recorded profile into speedscope's
258 | // file format.
259 | WebGLProfiler.prototype.toSpeedscopeProfile = function () {
260 | var frames = [];
261 | var speedscopeEvents = [];
262 | if (this.resolvedEvents.length === 0) {
263 | throw new Error("Profile is empty");
264 | }
265 | var profile = {
266 | "type": SpeedscopeProfileType.EVENTED,
267 | "name": "GPU Profile",
268 | "unit": "nanoseconds",
269 | "startValue": 0,
270 | "endValue": this.resolvedEvents[this.resolvedEvents.length - 1].timestamp,
271 | "events": speedscopeEvents
272 | };
273 | var file = {
274 | "$schema": "https://www.Speedscopeapp/file-format-schema.json",
275 | "shared": {
276 | "frames": frames,
277 | },
278 | "profiles": [profile]
279 | };
280 | var frameToIndex = {};
281 | function getOrInsertFrame(name) {
282 | if (!(name in frameToIndex)) {
283 | frameToIndex[name] = frames.length;
284 | frames.push({
285 | "name": name
286 | });
287 | }
288 | return frameToIndex[name];
289 | }
290 | for (var _i = 0, _a = this.resolvedEvents; _i < _a.length; _i++) {
291 | var event_1 = _a[_i];
292 | speedscopeEvents.push({
293 | "type": event_1.action.type == GPUProfilerActionType.OPEN_FRAME ? SpeedscopeEventType.OPEN_FRAME : SpeedscopeEventType.CLOSE_FRAME,
294 | "frame": getOrInsertFrame(event_1.action.name),
295 | "at": event_1.timestamp
296 | });
297 | }
298 | return JSON.stringify(file);
299 | };
300 | return WebGLProfiler;
301 | }());
302 | var GPUProfilerActionType;
303 | (function (GPUProfilerActionType) {
304 | GPUProfilerActionType[GPUProfilerActionType["OPEN_FRAME"] = 0] = "OPEN_FRAME";
305 | GPUProfilerActionType[GPUProfilerActionType["CLOSE_FRAME"] = 1] = "CLOSE_FRAME";
306 | })(GPUProfilerActionType || (GPUProfilerActionType = {}));
307 | var SpeedscopeProfileType;
308 | (function (SpeedscopeProfileType) {
309 | SpeedscopeProfileType["EVENTED"] = "evented";
310 | SpeedscopeProfileType["SAMPLED"] = "sampled";
311 | })(SpeedscopeProfileType || (SpeedscopeProfileType = {}));
312 | var SpeedscopeEventType;
313 | (function (SpeedscopeEventType) {
314 | SpeedscopeEventType["OPEN_FRAME"] = "O";
315 | SpeedscopeEventType["CLOSE_FRAME"] = "C";
316 | })(SpeedscopeEventType || (SpeedscopeEventType = {}));
317 |
318 | if (typeof module === "object" && typeof module.exports === "object") {
319 | module.exports = WebGLProfiler
320 | } else if (typeof window !== 'undefined') {
321 | window['WebGLProfiler'] = WebGLProfiler
322 | }
323 | })();
--------------------------------------------------------------------------------