├── .vscode └── settings.json ├── Analyzer ├── .gitignore ├── Analyze_Coroutines_Test.lua ├── Analyze_Test.lua ├── Main.lua ├── LuaParser.lua └── Analyze.lua ├── Screenshots ├── CPU.png ├── Memory.png └── CPU-DBM-Test.png ├── AddOn ├── !!!Perfy.toc ├── CLI.lua ├── CLI_Test.lua ├── TraceLoadingScreen.lua ├── Perfy_Test.lua └── Perfy.lua ├── Instrumentation ├── Main.lua ├── TocHandler.lua ├── Instrument_Test.lua └── Instrument.lua ├── Accuracy.md └── README.md /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyzer/.gitignore: -------------------------------------------------------------------------------- 1 | stacks-cpu.txt 2 | stacks-memory.txt -------------------------------------------------------------------------------- /Screenshots/CPU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emmericp/Perfy/HEAD/Screenshots/CPU.png -------------------------------------------------------------------------------- /Screenshots/Memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emmericp/Perfy/HEAD/Screenshots/Memory.png -------------------------------------------------------------------------------- /Screenshots/CPU-DBM-Test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emmericp/Perfy/HEAD/Screenshots/CPU-DBM-Test.png -------------------------------------------------------------------------------- /AddOn/!!!Perfy.toc: -------------------------------------------------------------------------------- 1 | ## Interface: 100205 2 | ## Interface-Classic: 11501 3 | ## Interface-Wrath: 30403 4 | ## Title: Perfy 5 | ## Notes: Fancy profiling 6 | ## SavedVariables: Perfy_Export 7 | 8 | Perfy.lua 9 | TraceLoadingScreen.lua 10 | CLI.lua 11 | -------------------------------------------------------------------------------- /AddOn/CLI.lua: -------------------------------------------------------------------------------- 1 | local function usage() 2 | print("[Perfy] Usage:") 3 | print("/perfy start [time] -- Starts Perfy and optionally automatically stops it after time seconds.") 4 | print("/perfy stop -- Stops Perfy.") 5 | print("/perfy [time] -- Toggles Perfy, if starting optionally stops after time seconds.") 6 | print("/perfy ls|loadingscreen -- Starts Perfy once the next loading screen is shown, stops once loading completes.") 7 | print("/perfy load -- Loads an on-demand loadable addon and traces its loading process.") 8 | print("/perfy run
 -- Starts Perfy, runs the given code, and stops Perfy again.")
 9 | 	print("/perfy clear -- Deletes all collected traces.")
10 | end
11 | 
12 | local loadstring = loadstring or load -- Lua 5.2+ support to not fail tests if running under a later Lua version
13 | 
14 | SLASH_PERFY1 = '/perfy'
15 | function SlashCmdList.PERFY(msg)
16 | 	local arg1, arg2 = msg:match("%s*([^%s]+)%s*([^%s]*)")
17 | 	if not arg1 or tonumber(arg1) then
18 | 		if not Perfy_Running() then
19 | 			Perfy_Start(tonumber(arg1))
20 | 		else
21 | 			Perfy_Stop()
22 | 		end
23 | 	else
24 | 		arg1 = arg1:lower()
25 | 		if arg1 == "start" then
26 | 			Perfy_Start(tonumber(arg2))
27 | 		elseif arg1 == "stop" then
28 | 			Perfy_Stop()
29 | 		elseif arg1 == "clear" then
30 | 			Perfy_Clear()
31 | 		elseif arg1 == "ls" or arg1:lower() == "loadingscreen" then
32 | 			Perfy_LogLoadingScreen()
33 | 		elseif arg1 == "load" and arg2 ~= "" then
34 | 			Perfy_LoadAddOn(arg2)
35 | 		elseif arg1 == "run" and arg2 ~= "" then
36 | 			local code = msg:match("%s*[^%s]+%s+([^%s]+)")
37 | 			local func, err = loadstring(code, "(/perfy run)")
38 | 			if not func then
39 | 				error(err)
40 | 			end
41 | 			Perfy_Run(func)
42 | 		else
43 | 			usage()
44 | 		end
45 | 	end
46 | end
47 | 


--------------------------------------------------------------------------------
/AddOn/CLI_Test.lua:
--------------------------------------------------------------------------------
  1 | SlashCmdList = {}
  2 | 
  3 | local running, runtime, cleared, logLoadingScreen, loadAddon
  4 | function Perfy_Start(time)
  5 | 	running = true
  6 | 	runtime = time
  7 | end
  8 | 
  9 | function Perfy_Stop()
 10 | 	running = false
 11 | 	runtime = nil
 12 | end
 13 | 
 14 | function Perfy_Running()
 15 | 	return running
 16 | end
 17 | 
 18 | function Perfy_Clear()
 19 | 	cleared = true
 20 | end
 21 | 
 22 | function Perfy_LogLoadingScreen()
 23 | 	logLoadingScreen = true
 24 | end
 25 | 
 26 | function Perfy_LoadAddOn(addon)
 27 | 	loadAddon = addon
 28 | end
 29 | 
 30 | function Perfy_Run(func)
 31 | 	func()
 32 | end
 33 | 
 34 | local function reset()
 35 | 	running, runtime, cleared, logLoadingScreen, loadAddon = nil, nil, nil, nil, nil
 36 | end
 37 | 
 38 | require "CLI"
 39 | 
 40 | reset()
 41 | SlashCmdList.PERFY("")
 42 | assert(running)
 43 | assert(runtime == nil)
 44 | SlashCmdList.PERFY("")
 45 | assert(not running)
 46 | 
 47 | reset()
 48 | SlashCmdList.PERFY("start")
 49 | assert(running)
 50 | assert(runtime == nil)
 51 | SlashCmdList.PERFY("stop")
 52 | assert(not running)
 53 | SlashCmdList.PERFY("clear")
 54 | assert(cleared)
 55 | 
 56 | reset()
 57 | SlashCmdList.PERFY("start 5")
 58 | assert(running)
 59 | assert(runtime == 5)
 60 | 
 61 | reset()
 62 | SlashCmdList.PERFY(" start	5  ")
 63 | assert(running)
 64 | assert(runtime == 5)
 65 | 
 66 | reset()
 67 | SlashCmdList.PERFY("10")
 68 | assert(running)
 69 | assert(runtime == 10)
 70 | SlashCmdList.PERFY("")
 71 | assert(not running)
 72 | 
 73 | reset()
 74 | SlashCmdList.PERFY("ls")
 75 | assert(logLoadingScreen)
 76 | reset()
 77 | SlashCmdList.PERFY("loadingscreen")
 78 | assert(logLoadingScreen)
 79 | 
 80 | reset()
 81 | SlashCmdList.PERFY("load")
 82 | assert(not loadAddon)
 83 | reset()
 84 | SlashCmdList.PERFY("load asdf")
 85 | assert(loadAddon == "asdf")
 86 | 
 87 | reset()
 88 | TEST_GLOBAL_VAR=false
 89 | SlashCmdList.PERFY("run TEST_GLOBAL_VAR=true")
 90 | assert(not running)
 91 | assert(TEST_GLOBAL_VAR)
 92 | 
 93 | reset()
 94 | local ok, err = pcall(SlashCmdList.PERFY, "run fail")
 95 | assert(not running)
 96 | assert(not ok)
 97 | 
 98 | reset()
 99 | SlashCmdList.PERFY("foo")
100 | assert(not running)
101 | 


--------------------------------------------------------------------------------
/Analyzer/Analyze_Coroutines_Test.lua:
--------------------------------------------------------------------------------
 1 | local analyze = require "Analyze"
 2 | 
 3 | -- Trace that would be generated by something like this:
 4 | --[[
 5 | function Fun1()	sleep(1) coroutine.yield() Fun2() end
 6 | function Fun2() sleep(2) coroutine.yield() end
 7 | function Fun3()	sleep(1) coroutine.yield() Fun4() end
 8 | function Fun4() sleep(2) coroutine.yield() end
 9 | 
10 | local c1, c2 = coroutine.create(Fun1), coroutine.create(Fun3)
11 | 
12 | function Run()
13 | 	while  do
14 | 		sleep(0.1)
15 | 		coroutine.resume(c1)
16 | 		sleep(0.1)
17 | 		coroutine.resume(c2)
18 | 	end
19 | end
20 | ]]
21 | local testData = [[
22 | local Fun1, Fun2, Fun3, Fun4, Run, Cr1, Cr2 = 1, 2, 3, 4, 5, 6, 7
23 | local Enter, Leave, CoroutineResume, CoroutineYield = 1, 2, 3, 4
24 | Perfy_Export = {
25 | 	FunctionNames = {
26 | 		Fun1 = Fun1, Fun2 = Fun2, Fun3 = Fun3, Fun4 = Fun4, Run = Run, Cr1 = Cr1, Cr2 = Cr2
27 | 	},
28 | 	EventNames = {
29 | 		Enter = Enter, Leave = Leave, CoroutineResume = CoroutineResume, CoroutineYield = CoroutineYield
30 | 	},
31 | 	Trace = {
32 | 		{0.0, Enter, Run, 0, 0, 0},
33 | 		{0.1, CoroutineResume, Cr1, 0, 0, 0},
34 | 		{0.1, Enter, Fun1, 0, 0, 0},
35 | 		{1.1, CoroutineYield, Cr1, 0, 0, 0},
36 | 		{1.2, CoroutineResume, Cr2, 0, 0, 0},
37 | 		{1.2, Enter, Fun3, 0, 0, 0},
38 | 		{2.2, CoroutineYield, Cr2, 0, 0, 0},
39 | 		{2.3, CoroutineResume, Cr1, 0, 0, 0},
40 | 		{2.3, Enter, Fun2, 0, 0, 0},
41 | 		{4.3, CoroutineYield, Cr1, 0, 0, 0},
42 | 		{4.4, CoroutineResume, Cr2, 0, 0, 0},
43 | 		{4.4, Enter, Fun4, 0, 0, 0},
44 | 		{6.4, CoroutineYield, Cr2, 0, 0, 0},
45 | 		{6.5, CoroutineResume, Cr1, 0, 0, 0},
46 | 		{6.5, Leave, Fun2, 0, 0, 0},
47 | 		{6.5, Leave, Fun1, 0, 0, 0},
48 | 		{6.6, CoroutineResume, Cr2, 0, 0, 0},
49 | 		{6.6, Leave, Fun4, 0, 0, 0},
50 | 		{6.6, Leave, Fun3, 0, 0, 0},
51 | 		{6.6, Leave, Run, 0, 0, 0},
52 | 	}
53 | }
54 | ]]
55 | 
56 | local oldLoadfile = loadfile
57 | function loadfile(file, ...)
58 | 	if file == "test" then
59 | 		local loaded
60 | 		---@diagnostic disable-next-line: redundant-parameter
61 | 		return load(function() if loaded then return nil end loaded = true return testData end, file, ...)
62 | 	else
63 | 		return oldLoadfile(file, ...)
64 | 	end
65 | end
66 | 
67 | local trace = analyze:LoadSavedVars("test")
68 | local cpuStacks = analyze:FlameGraph(trace)
69 | 
70 | assert(cpuStacks["Unknown addon;Run"] == 0.6e6)
71 | assert(cpuStacks["Unknown addon;Run;Fun1"] == 1e6)
72 | assert(cpuStacks["Unknown addon;Run;Fun1;Fun2"] == 2e6)
73 | assert(cpuStacks["Unknown addon;Run;Fun3"] == 1e6)
74 | assert(cpuStacks["Unknown addon;Run;Fun3;Fun4"] == 2e6)
75 | 


--------------------------------------------------------------------------------
/Analyzer/Analyze_Test.lua:
--------------------------------------------------------------------------------
 1 | local analyze = require "Analyze"
 2 | 
 3 | -- Trace that would be generated by something like this:
 4 | --[[
 5 | function Fun1()	sleep(1) Fun2() sleep(1) end
 6 | function Fun2()	sleep(1) Fun3() sleep(1) Fun4() sleep(1) end
 7 | function Fun3() alloc(10) sleep(1) end
 8 | function Fun4() alloc(20) sleep(1) end
 9 | Fun1() -- Sleeps 7 seconds
10 | sleep(10) -- this isn't traced
11 | Fun4() -- Sleeps 1 second, covers that we don't account the 10 seconds we don't see here
12 | ]]
13 | local testData = [[
14 | local Fun1, Fun2, Fun3, Fun4 = 1, 2, 3, 4
15 | local Enter, Leave = 1, 2
16 | Perfy_Export = {
17 | 	FunctionNames = {
18 | 		Fun1 = Fun1, Fun2 = Fun2, Fun3 = Fun3, Fun4 = Fun4
19 | 	},
20 | 	EventNames = {
21 | 		Enter = Enter, Leave = Leave
22 | 	},
23 | 	Trace = { -- Tracing overhead is .25 seconds and 10 bytes
24 | 		{1.00, Enter, Fun1, 0.25, 10, 10},
25 | 		{2.25, Enter, Fun2, 0.25, 20, 10},
26 | 		{3.50, Enter, Fun3, 0.25, 30, 10},
27 | 		{4.75, Leave, Fun3, 0.25, 50, 10},
28 | 		{6.00, Enter, Fun4, 0.25, 60, 10},
29 | 		{7.25, Leave, Fun4, 0.25, 90, 10},
30 | 		{8.50, Leave, Fun2, 0.25, 100, 10},
31 | 		{9.75, Leave, Fun1, 0.25, 110, 10},
32 | 		{20.00, Enter, Fun4, 0.25, 110, 10},
33 | 		{21.25, Leave, Fun4, 0.25, 140, 10},
34 | 	}
35 | }
36 | ]]
37 | 
38 | -- FIXME: this broke with the introduction of the Lua parser which can't handle the file above :/
39 | local oldLoadfile = loadfile
40 | function loadfile(file, ...)
41 | 	if file == "test" then
42 | 		local loaded
43 | 		---@diagnostic disable-next-line: redundant-parameter
44 | 		return load(function() if loaded then return nil end loaded = true return testData end, file, ...)
45 | 	else
46 | 		return oldLoadfile(file, ...)
47 | 	end
48 | end
49 | 
50 | local trace = analyze:LoadSavedVars("test")
51 | assert(#trace == 10)
52 | assert(trace[5].timestamp == 6)
53 | assert(trace[5].event == "Enter")
54 | assert(trace[5].functionName == "Fun4")
55 | assert(trace[5].timeOverhead == 0.25)
56 | assert(trace[5].memory == 60)
57 | assert(trace[5].memoryOverhead == 10)
58 | 
59 | local cpuStacks = analyze:FlameGraph(trace)
60 | assert(cpuStacks["Unknown addon;Fun1"] == 2000000)
61 | assert(cpuStacks["Unknown addon;Fun1;Fun2"] == 3000000)
62 | assert(cpuStacks["Unknown addon;Fun1;Fun2;Fun3"] == 1000000)
63 | assert(cpuStacks["Unknown addon;Fun1;Fun2;Fun4"] == 1000000)
64 | assert(cpuStacks["Unknown addon;Fun4"] == 1000000)
65 | 
66 | local memStacks = analyze:FlameGraph(trace, "memory", "memoryOverhead")
67 | assert(memStacks["Unknown addon;Fun1;Fun2;Fun3"] == 10)
68 | assert(memStacks["Unknown addon;Fun1;Fun2;Fun4"] == 20)
69 | assert(memStacks["Unknown addon;Fun4"] == 20)
70 | 


--------------------------------------------------------------------------------
/AddOn/TraceLoadingScreen.lua:
--------------------------------------------------------------------------------
 1 | -- Uncomment this line to enable login loading screen tracing.
 2 | --Perfy_Start()
 3 | 
 4 | -- Login/reload loading screen event log looks like this:
 5 | -- 1. Run Lua files (Enter/Leave on main chunks)
 6 | -- 2. ADDON_LOADED fires
 7 | -- 3. PLAYER_LOGIN fires
 8 | -- 4. PLAYER_ENTERING_WORLD fires
 9 | -- 5. LOADING_SCREEN_DISABLED fires
10 | -- 6. OnUpdate fires once
11 | -- 7. SPELLS_CHANGED fires
12 | --
13 | -- We care about when the user considers the game running, a reasonable definition for running is "after the first frame has been drawn".
14 | -- A reasonable close approximation for this is the start of the second (!) OnUpdate call because they happen before a frame is drawn.
15 | -- Note that being off here by a frame isn't too bad because we only account time that we can "see" anyways, so the extra time during the two frames the game does other things doesn't show up anyways.
16 | local counter = 0
17 | local loginLoadingScreenFrame = CreateFrame("Frame")
18 | loginLoadingScreenFrame:SetScript("OnUpdate", function(self)
19 | 	counter = counter + 1
20 | 	if counter == 2 then
21 | 		Perfy_Stop()
22 | 		self:Hide()
23 | 	end
24 | end)
25 | loginLoadingScreenFrame:Show()
26 | 
27 | -- Other loading screens look like this:
28 | -- 1. LOADING_SCREEN_ENABLED fires
29 | -- 2. OnUpdate continues firing normally
30 | -- 3. PLAYER_LEAVING_WORLD fires
31 | -- 4. OnUpdate continues firing normally
32 | -- 5. PLAYER_ENTERING_WORLD fires and OnUpdate stops
33 | -- 6. LOADING_SCREEN_DISABLED fires
34 | -- 7. SPELLS_CHANGED fires
35 | -- 8. OnUpdate continues firing normally
36 | --
37 | -- LOADING_SCREEN_ENABLED is a good point to start
38 | -- The second OnUpdate after LOADING_SCREEN_DISABLED is a reasonable point to end following the same logic as above.
39 | local loadingScreenFrame = CreateFrame("Frame")
40 | loadingScreenFrame:RegisterEvent("LOADING_SCREEN_ENABLED")
41 | loadingScreenFrame:RegisterEvent("LOADING_SCREEN_DISABLED")
42 | 
43 | local logNextLoadingScreen = false
44 | local counter = 0
45 | loadingScreenFrame:SetScript("OnEvent", function(self, event)
46 | 	if not logNextLoadingScreen then return end
47 | 	if event == "LOADING_SCREEN_ENABLED" then
48 | 		Perfy_Start()
49 | 	elseif event == "LOADING_SCREEN_DISABLED" then
50 | 		logNextLoadingScreen = false
51 | 		counter = 0
52 | 		self:Show()
53 | 	end
54 | end)
55 | loadingScreenFrame:SetScript("OnUpdate", function(self)
56 | 	counter = counter + 1
57 | 	if counter == 2 then
58 | 		Perfy_Stop()
59 | 		self:Hide()
60 | 	end
61 | end)
62 | 
63 | -- Trace a non-login loading screen.
64 | function Perfy_LogLoadingScreen()
65 | 	print("[Perfy] Next loading screen will be logged by Perfy.")
66 | 	print("[Perfy] This will not work for loading screens due to UI reload or logging in. See file TraceLoadingScreen.lua for instructions to trace these initial loading screens.")
67 | 	logNextLoadingScreen = true
68 | end
69 | 


--------------------------------------------------------------------------------
/Instrumentation/Main.lua:
--------------------------------------------------------------------------------
 1 | -- LuaLS environment setup
 2 | local basePath = arg[0]:gsub("[/\\]*[^/\\]-$", "") -- The dir under which this file is
 3 | package.path = "./script/?.lua;./script/?/init.lua;./test/?.lua;./test/?/init.lua;"
 4 | package.path = package.path .. basePath .. "/?.lua;"
 5 | package.path = package.path .. basePath .. "/?/init.lua"
 6 | _G.log = require "log"
 7 | local fs = require "bee.filesystem"
 8 | local util = require "utility"
 9 | local rootPath = debug.getinfo(1, "S").source:sub(2):gsub("[/\\]*[^/\\]-$", "")
10 | rootPath = rootPath == "" and "." or rootPath
11 | ROOT = util.expandPath(rootPath)
12 | LUA_VER = "Lua 5.1"
13 | 
14 | local instrument = require "Instrument"
15 | local toc = require "TocHandler"
16 | 
17 | if #arg < 1 then
18 | 	print("Usage: " .. arg[0] .. " ")
19 | 	return
20 | end
21 | 
22 | local function stripPathPrefix(path)
23 | 	return path:gsub("^.*Interface[/\\]AddOns[/\\]", "")
24 | end
25 | 
26 | local seenFiles = {}
27 | local function deduplicateFile(fileName)
28 | 	-- Some addons reference their libraries through seemingly different paths, canonicalizing the path avoids some wrong warnings.
29 | 	-- Also, WoW is not case sensitive and some AddOns such as WeakAuras reference the same file but with different cases (libs/ vs. Libs/).
30 | 	local canonical = fs.canonical(fs.path(fileName)):string():lower()
31 | 	if seenFiles[canonical] then
32 | 		return false
33 | 	end
34 | 	seenFiles[canonical] = true
35 | 	return true
36 | end
37 | 
38 | local function handleTocFile(fileName)
39 | 	print("Instrumenting AddOn " .. stripPathPrefix(fileName))
40 | 	local file, err = io.open(fileName, "r")
41 | 	if not file then error(err) end
42 | 
43 | 	local lines = {}
44 | 	for line in file:lines() do
45 | 		-- Why are line ending differences between Windows and others still a problem in 2024?
46 | 		lines[#lines + 1] = line:match("(.-)\r?$")
47 | 	end
48 | 	file:close()
49 | 
50 | 	toc:InjectDependency(lines)
51 | 
52 | 	file, err = io.open(fileName, "w+b")
53 | 	if not file then error(err) end
54 | 	for _, line in ipairs(lines) do
55 | 		file:write(line)
56 | 		file:write("\n")
57 | 	end
58 | 	file:close()
59 | 
60 | 	local dir = fileName:gsub("[^/\\]-$", "")
61 | 	local files = toc:FindFiles(lines, dir)
62 | 
63 | 	for _, fileName in ipairs(files) do
64 | 		if deduplicateFile(fileName) then  -- avoids errors about files being referenced multiple times (by tocs for different game versions)
65 | 			instrument:InstrumentFile(fileName) -- TODO: handle failure gracefully to not fail completely on a single bad addon
66 | 		end
67 | 	end
68 | end
69 | 
70 | local function handleFile(fileName)
71 | 	if fileName:match(".toc$") then
72 | 		return handleTocFile(fileName)
73 | 	end
74 | 	-- TODO: handle XML files here
75 | 	if deduplicateFile(fileName)then
76 | 		instrument:InstrumentFile(fileName)
77 | 	end
78 | end
79 | 
80 | for _, fileName in ipairs(arg) do
81 | 	if fileName:match("Perfy.toc$") or fileName:match("!!!Perfy/") then
82 | 		print("File " .. fileName .. " seems to belong to Perfy itself -- skipping.")
83 | 	else
84 | 		handleFile(fileName)
85 | 	end
86 | end
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/Instrumentation/TocHandler.lua:
--------------------------------------------------------------------------------
  1 | local mod = {}
  2 | 
  3 | function mod:InjectDependency(lines, dep)
  4 | 	dep = dep or "!!!Perfy"
  5 | 	local lastMetadataLine = 0
  6 | 	local foundDependencyEntry = false
  7 | 	local foundPerfyMetadata = false
  8 | 	for i, line in ipairs(lines) do
  9 | 		local key, value = line:match("^##%s*([^:%s]+)%s*:%s*(.-)%s*$")
 10 | 		if key and value then
 11 | 			lastMetadataLine = i
 12 | 			if key == "Dependencies" then
 13 | 				foundDependencyEntry = true
 14 | 				local foundDep = false
 15 | 				for entry in value:gmatch("([^%s,]+)") do
 16 | 					if entry == dep then
 17 | 						foundDep = true
 18 | 					end
 19 | 				end
 20 | 				if not foundDep then
 21 | 					lines[i] = line .. (value ~= "" and ", " or "") .. dep
 22 | 				end
 23 | 			end
 24 | 			if key == "X-Perfy-Instrumented" then
 25 | 				foundPerfyMetadata = true
 26 | 			end
 27 | 		end
 28 | 	end
 29 | 	if not foundPerfyMetadata then
 30 | 		table.insert(lines, lastMetadataLine + 1, "## X-Perfy-Instrumented: true")
 31 | 	end
 32 | 	if not foundDependencyEntry then
 33 | 		table.insert(lines, lastMetadataLine + 1, "## Dependencies: " .. dep)
 34 | 	end
 35 | end
 36 | 
 37 | -- "Mom, can we have an XML parser?" -- "No, we have XML parser at home."
 38 | -- XML parser at home:
 39 | local function parseXml(fileName, addonBasePath, files)
 40 | 	local dir = fileName:gsub("[^/\\]-$", "")
 41 | 	local file, err = io.open(fileName, "r")
 42 | 	if not file then error(err) end -- TODO: could handle gracefully to not fail completely on one invalid toc
 43 | 	local xml = file:read("*a")
 44 | 	file:close()
 45 | 	local luaFiles = {}
 46 | 	-- "No, you can't parse HTML/XML like that" -- "Haha, regex goes 
"
 47 | 	xml = xml:gsub("", "")
 48 | 	for ref in xml:gmatch("<%s*[iI][nN][cC][lL][uU][dD][eE]%s+[fF][iI][lL][eE]%s*=%s*(.-)%s*/?%s*>") do
 49 | 		local delim = ref:sub(1, 1)
 50 | 		if delim == "\"" or delim == "'" then
 51 | 			ref = ref:sub(2, -2)
 52 | 		end
 53 | 		ref = ref:gsub("\\", "/")
 54 | 		if ref:lower():match("%.xml$") then
 55 | 			parseXml(dir .. ref, addonBasePath, files)
 56 | 		elseif ref:lower():match("%.lua$") then -- Yes, this is apparently valid
 57 | 			luaFiles[#luaFiles + 1] = ref
 58 | 		else
 59 | 			print("File " .. fileName .. " references file " .. ref .. " which is neither XML nor Lua, ignoring.")
 60 | 		end
 61 | 	end
 62 | 	for ref in xml:gmatch("<%s*[sS][cC][rR][iI][pP][tT]%s+[fF][iI][lL][eE]%s*=%s*(.-)%s*/?%s*>") do
 63 | 		local delim = ref:sub(1, 1)
 64 | 		if delim == "\"" or delim == "'" then
 65 | 			ref = ref:sub(2, -2)
 66 | 		end
 67 | 		ref = ref:gsub("\\", "/")
 68 | 		luaFiles[#luaFiles+1] = ref
 69 | 	end
 70 | 	for _, ref in ipairs(luaFiles) do
 71 | 		local fileRelToXml = io.open(dir .. ref, "r")
 72 | 		local fileRelToToc = io.open(addonBasePath .. ref, "r")
 73 | 		if fileRelToXml then
 74 | 			fileRelToXml:close()
 75 | 			files[#files + 1] = dir .. ref
 76 | 		end
 77 | 		if fileRelToToc then -- Apparently this is valid, e.g., RXPGuides does this for its database files
 78 | 			fileRelToToc:close()
 79 | 			files[#files + 1] = addonBasePath .. ref
 80 | 		end
 81 | 		if not fileRelToXml and not fileRelToToc then
 82 | 			print("File " .. fileName .. " references unknown file " .. ref)
 83 | 		end
 84 | 	end
 85 | end
 86 | 
 87 | function mod:FindFiles(lines, dir)
 88 | 	local files = {}
 89 | 	for _, line in ipairs(lines) do
 90 | 		if not line:match("^%s*#") and not line:match("^%s*$") then
 91 | 			local file = line:gsub("^%s*(.-)%s*$", "%1"):gsub("\\", "/")
 92 | 			if file:match("%.[xX][mM][lL]$") then
 93 | 				parseXml(dir .. file, dir, files)
 94 | 			else
 95 | 				files[#files + 1] = dir .. file
 96 | 			end
 97 | 		end
 98 | 	end
 99 | 	return files
100 | end
101 | 
102 | return mod
103 | 


--------------------------------------------------------------------------------
/Analyzer/Main.lua:
--------------------------------------------------------------------------------
 1 | local analyze = require "Analyze"
 2 | 
 3 | local inFile, frameCmd, frameIds = ...
 4 | if not inFile then
 5 | 	return print("Usage: Main.lua /Perfy.lua [--frames -] [--frame ] [--split-frames]")
 6 | end
 7 | 
 8 | local function writeFile(fileNameSuffix, fullData)
 9 | 	for frame, data in pairs(fullData) do
10 | 		local fileNames = {}
11 | 		if frame == "all" then
12 | 			fileNames[#fileNames + 1] = fileNameSuffix
13 | 		else
14 | 			for _, name in ipairs(frame.names) do
15 | 				fileNames[#fileNames + 1] = name .. "-" .. fileNameSuffix
16 | 			end
17 | 		end
18 | 		for _, fileName in ipairs(fileNames) do
19 | 			local file, err = io.open(fileName, "w+b")
20 | 			if not file then error(err) end
21 | 			local count = 0
22 | 			for k, v in pairs(data) do
23 | 				file:write(k)
24 | 				file:write(" ")
25 | 				file:write(tostring(v))
26 | 				file:write("\n")
27 | 				if v ~= 0 then
28 | 					count = count + 1
29 | 				end
30 | 			end
31 | 			file:close()
32 | 			print(("Wrote %d non-zero stacks to %s"):format(count, fileName))
33 | 		end
34 | 	end
35 | end
36 | 
37 | local firstFrame, lastFrame, splitFrames
38 | if frameCmd and frameCmd:match("^%-%-?frames?$") then
39 | 	if not frameIds then return print("expected frame numbers for " .. frameCmd) end
40 | 	firstFrame, lastFrame = frameIds:match("(%d*)%-?(%d*)$")
41 | 	if lastFrame == "" then
42 | 		lastFrame = firstFrame
43 | 	end
44 | 	firstFrame = tonumber(firstFrame)
45 | 	lastFrame = tonumber(lastFrame)
46 | 	if not firstFrame or not lastFrame then
47 | 		return print("couldn't parse " .. frameCmd .. " argument")
48 | 	end
49 | elseif frameCmd and frameCmd:match("^%-%-?split%-frames$") then
50 | 	splitFrames = true
51 | end
52 | 
53 | local trace = analyze:LoadSavedVars(inFile)
54 | 
55 | -- FIXME: properly split stack reconstruction and flame graph generation, this currently needs to be called prior to FindSlowFrames to have empty stack info
56 | analyze:FlameGraph(trace, "timestamp", "timeOverhead")
57 | local frames = analyze:FindSlowFrames(trace)
58 | print("number of frames: ", #frames)
59 | local topFramesByCpu = analyze:GetTopFrames(frames, 10, function(e1, e2) return e1.time - e1.timeOverhead > e2.time - e2.timeOverhead end)
60 | local topFramesByMemory = analyze:GetTopFrames(frames, 10, function(e1, e2) return e1.memory - e1.memOverhead > e2.memory - e2.memOverhead end)
61 | 
62 | local stacksCpu, stacksMemory
63 | if splitFrames then
64 | 	local topFrames = {}
65 | 	for i, v in ipairs(topFramesByCpu) do
66 | 		v.names = {("top-cpu-%d-frame-%d"):format(i, v.id)}
67 | 		topFrames[#topFrames + 1] = v
68 | 	end
69 | 	for i, v in ipairs(topFramesByMemory) do
70 | 		-- Frames can have multiple names because it's confusing if like half of your frames for a metric are missing because they happen to overlap with the other metric
71 | 		if v.names then
72 | 			v.names[#v.names + 1] = ("top-memory-%d-frame-%d"):format(i, v.id)
73 | 		else
74 | 			v.names = {("top-memory-%d-frame-%d"):format(i, v.id)}
75 | 		end
76 | 		topFrames[#topFrames + 1] = v
77 | 	end
78 | 	stacksCpu = analyze:FlameGraph(trace, "timestamp", "timeOverhead", nil, nil, topFrames)
79 | 	stacksMemory = analyze:FlameGraph(trace, "memory", "memoryOverhead", nil, nil, topFrames)
80 | else
81 | 	stacksCpu = analyze:FlameGraph(trace, "timestamp", "timeOverhead", frames[firstFrame], frames[lastFrame])
82 | 	stacksMemory = analyze:FlameGraph(trace, "memory", "memoryOverhead", frames[firstFrame], frames[lastFrame])
83 | end
84 | 
85 | if firstFrame then
86 | 	print("Only reporting trace entries " .. frames[firstFrame].first .. " to " .. frames[lastFrame].last)
87 | else
88 | 	print("Top frames by CPU time:")
89 | 	analyze:PrintSlowFrames(topFramesByCpu, 10)
90 | 	print()
91 | 	print("Top frames by memory allocations:")
92 | 	analyze:PrintSlowFrames(topFramesByMemory, 10)
93 | 	print("Frame CPU time and memory may include uninstrumented code, run full analysis per-frame by using \"--split-frames\" to get one result per top frame or by selecting frames via \"--frames -\"")
94 | end
95 | 
96 | writeFile("stacks-cpu.txt", stacksCpu)
97 | writeFile("stacks-memory.txt", stacksMemory)
98 | 


--------------------------------------------------------------------------------
/AddOn/Perfy_Test.lua:
--------------------------------------------------------------------------------
  1 | local mockTime = 0
  2 | local function mockGetTime()
  3 | 	return mockTime
  4 | end
  5 | GetTimePreciseSec = mockGetTime
  6 | 
  7 | local lastError
  8 | local errorHandler = function(err) lastError = err end
  9 | function geterrorhandler()
 10 | 	return errorHandler
 11 | end
 12 | 
 13 | function seterrorhandler(f)
 14 | 	errorHandler = f
 15 | end
 16 | 
 17 | C_Timer = {
 18 | 	After = function(_, f) f() end,
 19 | 	NewTicker = function(_, f) f() end,
 20 | }
 21 | 
 22 | PERFY_TEST_ENVIRONMENT = true
 23 | require "Perfy"
 24 | 
 25 | function TestHappyPath()
 26 | 	Perfy_Clear()
 27 | 	mockTime = 0
 28 | 	Perfy_Start()
 29 | 	assert(Perfy_Running())
 30 | 	mockTime = 0.25
 31 | 	Perfy_Trace(0, "Enter", "Fun1")
 32 | 	mockTime = 1.25
 33 | 	Perfy_Trace(1, "Enter", "Fun2")
 34 | 	mockTime = 2
 35 | 	Perfy_Trace_Passthrough("Leave", "Fun2")
 36 | 	mockTime = 3
 37 | 	Perfy_Trace_Passthrough("Leave", "Fun1")
 38 | 	Perfy_Stop()
 39 | 	assert(not Perfy_Running())
 40 | 
 41 | 	-- FIXME: use some assertion library/test framework, what a mess
 42 | 	assert(Perfy_Export.FunctionNames["Perfy_Start Perfy/internal"] == 1)
 43 | 	assert(Perfy_Export.FunctionNames.Fun1 == 2)
 44 | 	assert(Perfy_Export.FunctionNames.Fun2 == 3)
 45 | 	assert(Perfy_Export.FunctionNames["Perfy_Stop Perfy/internal"] == 4)
 46 | 	assert(Perfy_Export.EventNames.PerfyStart == 1)
 47 | 	assert(Perfy_Export.EventNames.Enter == 2)
 48 | 	assert(Perfy_Export.EventNames.Leave == 3)
 49 | 	assert(Perfy_Export.EventNames.PerfyStop == 4)
 50 | 	assert(#Perfy_Export.Trace == 6)
 51 | 
 52 | 	-- Entry 1: Start Perfy
 53 | 	assert(Perfy_Export.Trace[1][1] == 0) -- Timestamp
 54 | 	assert(Perfy_Export.Trace[1][2] == Perfy_Export.EventNames.PerfyStart) -- Event
 55 | 
 56 | 	-- Entry 2: Enter Fun1
 57 | 	assert(Perfy_Export.Trace[2][1] == 0) -- Timestamp
 58 | 	assert(Perfy_Export.Trace[2][2] == Perfy_Export.EventNames.Enter) -- Event
 59 | 	assert(Perfy_Export.Trace[2][3] == Perfy_Export.FunctionNames.Fun1) -- Function
 60 | 	assert(Perfy_Export.Trace[2][4] == 0.25) -- Overhead
 61 | 	assert(Perfy_Export.Trace[2][5] > 0) -- Memory
 62 | 	assert(Perfy_Export.Trace[2][6] > 0) -- Memory overhead
 63 | 
 64 | 	-- Entry 4: Leave Fun2
 65 | 	assert(Perfy_Export.Trace[4][1] == 2) -- Timestamp
 66 | 	assert(Perfy_Export.Trace[4][2] == Perfy_Export.EventNames.Leave) -- Event
 67 | 	assert(Perfy_Export.Trace[4][3] == Perfy_Export.FunctionNames.Fun2) -- Function
 68 | 	assert(Perfy_Export.Trace[4][4] == 0) -- Overhead (0 on leave because it's updated internally and the mock doesn't update)
 69 | 	assert(Perfy_Export.Trace[4][5] > 0) -- Memory
 70 | 	assert(Perfy_Export.Trace[4][6] > 0) -- Memory overhead
 71 | 
 72 | 	-- Entry 6: Stop Perfy
 73 | 	assert(Perfy_Export.Trace[6][1] == 3) -- Timestamp
 74 | 	assert(Perfy_Export.Trace[6][2] == Perfy_Export.EventNames.PerfyStop) -- Event
 75 | end
 76 | 
 77 | function TestClear()
 78 | 	Perfy_Clear()
 79 | 	Perfy_Start()
 80 | 	Perfy_Trace(0, "Enter", "Fun1")
 81 | 	Perfy_Stop()
 82 | 	assert(#Perfy_Export.Trace > 0)
 83 | 	Perfy_Clear()
 84 | 	assert(not Perfy_Export.Trace)
 85 | end
 86 | 
 87 | function TestLeavePassthrough()
 88 | 	Perfy_Start()
 89 | 	local a, b = Perfy_Trace_Passthrough("Leave", "Fun1", "foo", "bar")
 90 | 	Perfy_Stop()
 91 | 	assert(a == "foo" and b == "bar")
 92 | end
 93 | 
 94 | function TestErrorHandlerHook()
 95 | 	Perfy_Clear()
 96 | 	Perfy_Start()
 97 | 	geterrorhandler()("test")
 98 | 	Perfy_Stop()
 99 | 
100 | 	assert(#Perfy_Export.Trace == 3)
101 | 	assert(lastError == "test")
102 | end
103 | 
104 | function TestMultipleStarts()
105 | 	Perfy_Clear()
106 | 	Perfy_Start()
107 | 	Perfy_Trace(0, "Enter", "Fun1")
108 | 	Perfy_Trace(0, "Leave", "Fun1")
109 | 	Perfy_Trace(0, "Enter", "Fun3")
110 | 	Perfy_Stop()
111 | 	assert(not Perfy_Running())
112 | 	Perfy_Start()
113 | 	Perfy_Trace(0, "Enter", "Fun1")
114 | 	Perfy_Trace(0, "Enter", "Fun2")
115 | 	Perfy_Stop()
116 | 	assert(#Perfy_Export.Trace == 9)
117 | 	 -- Double translation would enter a 1 =  entry because they see already translated entries as something to translate again
118 | 	assert(#Perfy_Export.FunctionNames == 0)
119 | 	assert(#Perfy_Export.EventNames == 0)
120 | 	assert(Perfy_Export.FunctionNames.Fun1 == 2)
121 | 	assert(Perfy_Export.FunctionNames.Fun3 == 3)
122 | 	assert(Perfy_Export.FunctionNames["Perfy_Stop Perfy/internal"] == 4)
123 | 	assert(Perfy_Export.FunctionNames.Fun2 == 5)
124 | end
125 | 
126 | function TestLoadAddon()
127 | 	Perfy_Clear()
128 | 	local addonLoaded
129 | 	_G.LoadAddOn = function(addon)
130 | 		addonLoaded = addon
131 | 		return true
132 | 	end
133 | 	Perfy_LoadAddOn("FooAddOn")
134 | 	assert(addonLoaded == "FooAddOn")
135 | 	assert(#Perfy_Export.Trace == 4)
136 | 	assert(Perfy_Export.EventNames.LoadAddOn == 2)
137 | 	assert(Perfy_Export.EventNames.LoadAddOnFinished == 3)
138 | 	assert(Perfy_Export.FunctionNames.FooAddOn == 2)
139 | end
140 | 
141 | function TestRunFunc()
142 | 	Perfy_Clear()
143 | 	local called = false
144 | 	local function f()
145 | 		called = true
146 | 	end
147 | 	Perfy_Run(f)
148 | 	assert(not Perfy_Running())
149 | 	assert(called)
150 | 	assert(#Perfy_Export.Trace == 2)
151 | 
152 | 	Perfy_Clear()
153 | 	Perfy_Start()
154 | 	called = false
155 | 	Perfy_Run(f)
156 | 	assert(called)
157 | 	assert(Perfy_Running()) -- Doesn't stop if it was already running
158 | 	Perfy_Stop()
159 | 	assert(not Perfy_Running())
160 | end
161 | 
162 | function TestRunFuncError()
163 | 	Perfy_Clear()
164 | 	local called = false
165 | 	local function f()
166 | 		called = true
167 | 		error("test")
168 | 	end
169 | 	local ok = pcall(Perfy_Run, f)
170 | 	assert(not Perfy_Running())
171 | 	assert(not ok)
172 | 	assert(called)
173 | 	assert(#Perfy_Export.Trace == 2)
174 | end
175 | 
176 | TestHappyPath()
177 | TestClear()
178 | TestLeavePassthrough()
179 | TestErrorHandlerHook()
180 | TestMultipleStarts()
181 | TestLoadAddon()
182 | TestRunFunc()
183 | TestRunFuncError()
184 | 


--------------------------------------------------------------------------------
/Accuracy.md:
--------------------------------------------------------------------------------
  1 | # Validating accuracy
  2 | 
  3 | WoW classic still has the builtin profiler (CVar `scriptProfile`), so we can compare Perfy against it to see how good or bad we are.
  4 | 
  5 | ## Notes on the builtin profiler
  6 | 
  7 | We can make an educated guess on how the builtin profiler probably works based on the APIs it offers:
  8 | The core reporting mechanic it provides is that it can tell you how much time a given function used (with or without including subroutines) and how often it was called.
  9 | The additional functions about reporting per AddOn or per frame are just aggregations built on top of this.
 10 | 
 11 | Since it accurately reports how often a function is called it must be based on tracing each call and not on sampling.
 12 | My guess is that it hooks into the Lua VM for handling the opcodes `CALL`, `TAILCALL`, and `RETURN` and then stores the elapsed time and a counter in the Lua object representing the function.
 13 | `GetFunctionCPUUsage(func, includeSubroutines)` then just returns these fields for the given function.
 14 | This is low overhead and easy to implement but does not allow you to reconstruct the entire call stack as it does not remember which function called which.
 15 | Fun fact: the reported time per function has the same granularity as `GetTimePreciseSec()` (100 ns), I don't think that is a coincidence.
 16 | 
 17 | Overall the builtin profiler is a bit cumbersome to use because you need the actual function object to query the results and it can't tell you anything about the relation between functions.
 18 | 
 19 | 
 20 | ## Test setup
 21 | 
 22 | Since the builtin profiler is a bit annoying to use we need to look at something that involves only a few functions.
 23 | Whatever we are testing also needs to be reproducible because we want to run both profilers independently.
 24 | Finally, it should also be somewhat realistic.
 25 | 
 26 | DBM-StatusBarTimers' update logic for DBM timers fits this description.
 27 | It only has 5 relevant functions, presents a realistic workload (5% of total CPU load in the Gnomeregan example from README.md), and it is reproducible by running `/dbm test` which starts a few timers lasting 60 seconds total.
 28 | 
 29 | 
 30 | ## Test results
 31 | 
 32 | [![FlameGraph of CPU usage](Screenshots/CPU-DBM-Test.png)](https://emmericp.github.io/Perfy/perfy-cpu-dbm-test.svg)
 33 | 
 34 | The functions we are looking at are the five largest in the flame graph above: `onUpdate`, `barPrototype:Update`, `DBT:UpdateBars`, `stringFromTimer`, and `AnimateEnlarge`.
 35 | 
 36 | The run with the builtin profiler was done without Perfy instrumentation in place and `scriptProfile` was disabled when running Perfy.
 37 | The total number of calls to all functions was identical for Perfy and the builtin profiler, and stayed identical across all runs.
 38 | It always took exactly 18176 calls to the `onUpdate` handler to run the DBM test mode with my game running at a stable 60 fps.
 39 | 
 40 | Each run was repeated 5 times, the table shows the average and standard deviation.
 41 | 
 42 | | Function              |      Builtin profiler (µs) |             Perfy (µs) | Discrepancy |
 43 | |-----------------------|---------------------------:|-----------------------:|------------:|
 44 | | `onUpdate`            |              273740 ± 0.5% |          282690 ± 0.3% |       3.3%  |
 45 | | `barPrototype:Update` |              260560 ± 0.6% |          265522 ± 0.3% |       1.9%  |
 46 | | `DBT:UpdateBars`      |               98564 ± 0.7% |           94413 ± 1.2% |      -4.2%  |
 47 | | `stringFromTimer`     |               24465 ± 0.9% |           29985 ± 0.8% |      22.6%  |
 48 | | `AnimateEnlarge`      |                2916 ± 1.8% |            2992 ± 1.2% |       2.5%  |
 49 | 
 50 | Perfy tends to report a slightly higher CPU usage -- this is expected because even with all the logic to account for overhead it will still at least add the cost of one function call (to `Perfy_GetTime()`) to each function.
 51 | Neither Perfy nor the builtin profiler are perfect, I'm happy that these agree to within a few percent :)
 52 | 
 53 | Two results are a bit odd an warrant further investigation:
 54 | 
 55 | 
 56 | ### DBT:UpdateBars() is reports a lower time, everything else reports a higher time
 57 | 
 58 | This is because the most commonly executed path in the function is just a tight loop calling some WoW API functions.
 59 | Perfy does not add extra overhead to these functions -- but the builtin profiler does.
 60 | This can be validated by running Perfy with and without the builtin profiler enabled: it adds 11% overhead to this function.
 61 | For other functions such as `stringFromTimer` or `AnimateEnlarge` it only adds 3% and 5% respectively.
 62 | 
 63 | This means it's not Perfy that is wrong here but the builtin profiler.
 64 | Again, neither of these is perfect.
 65 | 
 66 | 
 67 | ### stringFromTimer() has a 22% discrepancy
 68 | 
 69 | This is a simple leaf function that formats the remaining time into a human-readable format.
 70 | 
 71 | ```
 72 | local function stringFromTimer(t)
 73 | 	if t <= DBT.Options.TDecimal then
 74 | 		return ("%.1f"):format(t)
 75 | 	elseif t <= 60 then
 76 | 		return ("%d"):format(t)
 77 | 	else
 78 | 		return ("%d:%0.2d"):format(t / 60, math.fmod(t, 60))
 79 | 	end
 80 | end
 81 | ```
 82 | 
 83 | The problem here is are the tail calls to `string.format`, the function is instrumented as follows:
 84 | 
 85 | ```
 86 | local function stringFromTimer(t) Perfy_Trace(Perfy_GetTime(), "Enter", "stringFromTimer")
 87 | 	if t <= DBT.Options.TDecimal then
 88 | 		return Perfy_Trace_Leave("Leave", "stringFromTimer", ("%.1f"):format(t))
 89 | 	elseif t <= 60 then
 90 | 		return Perfy_Trace_Leave("Leave", "stringFromTimer", ("%d"):format(t))
 91 | 	else
 92 | 		return Perfy_Trace_Leave("Leave", "stringFromTimer", ("%d:%0.2d"):format(t / 60, math.fmod(t, 60)))
 93 | 	end
 94 | end
 95 | ```
 96 | 
 97 | `Perfy_Trace_Leave` wraps the original return expression and returns it again, that allows us to inject a tracepoint between the evaluation of the return expression and the actual return.
 98 | But unlike `Perfy_Trace` at the beginning of the function it can't contain a call to `Perfy_GetTime()` to determine when the return is happening -- this is instead done inside Perfy.
 99 | That means the time it takes to call into Perfy is incorrectly attributed to `stringFromTimer` and cannot be subtracted by the analysis script.
100 | `stringFromTimer` has a discrepancy of 5520 µs vs. our reference, it is called 16010 times in the trace, so that's 0.345 µs of error per call.
101 | 
102 | Unfortunately I don't think this is fixable for the general case:
103 | we would need to add the `Perfy_GetTime()` call to the end of the expression list to make sure it is evaluated after all return expressions, but the last expression can return a vararg of unknown length, so we can't just add it.
104 | 
105 | I validated this by rewriting all return points in `stringFromTimer` as follows
106 | 
107 | ```
108 | local res = (""):format(t)
109 | Perfy_Trace(Perfy_GetTime(), "Leave", "stringFromTimer")
110 | return res
111 | ```
112 | 
113 | Perfy now reports only 27175 µs of total time (it got faster) for this function and the builtin profiler reports 26588 µs (it got slower) when changing from a tail call to an intermediate variable.
114 | That's a discrepancy of only 2.2% and in line with other functions.
115 | 
116 | ### Conclusion
117 | 
118 | Overall it's pretty accurate, especially when looking at the big picture.
119 | Note how the error for functions with tail calls is only large when looking at the relative error of a small function.
120 | The absolute error is on the order of hundreds of nanoseconds per function call.
121 | 


--------------------------------------------------------------------------------
/Analyzer/LuaParser.lua:
--------------------------------------------------------------------------------
  1 | -- Parser for Lua table expressions
  2 | -- Taken from https://github.com/DeadlyBossMods/DeadlyBossMods/blob/master/DBM-Test/Tools/Shared/Parser.lua
  3 | local parser = {}
  4 | 
  5 | local function syntaxError(message, code, pos, level)
  6 | 	-- pos itself is useless for errors because it's post comment stripping
  7 | 	error("parse error: " .. message .. " while trying to parse \"" .. code:sub(pos, pos + 30) .. "\"", 1 + (level or 1))
  8 | end
  9 | 
 10 | -- FIXME: probably a bad idea to do comment stripping as preprocessing because Lua comments are actually pretty damn complex at least if we want multi-line strings and comments
 11 | local function stripComments(code)
 12 | 	local result = {}
 13 | 	local offset = 1
 14 | 	-- it's easier to just do this line-by-line, but doing so decreases total parser performance by ~80%
 15 | 	while true do
 16 | 		local commentStart = code:find("--", offset, true)
 17 | 		if not commentStart then
 18 | 			if offset < #code then
 19 | 				result[#result + 1] = code:sub(offset)
 20 | 			end
 21 | 			break
 22 | 		end
 23 | 		local oldOffset = offset
 24 | 		offset = commentStart + 2
 25 | 		if code:match("^%[=*%[", offset) then
 26 | 			syntaxError("comments in multi-line style (--[[]]) aren't supported", code, commentStart)
 27 | 		end
 28 | 		local lineStart = 0
 29 | 		local lineEnd = 0
 30 | 		while code:byte(offset - lineStart, offset - lineStart) ~= 10 and offset - lineStart >= 1 do
 31 | 			lineStart = lineStart + 1
 32 | 		end
 33 | 		while code:byte(offset + lineEnd, offset + lineEnd) ~= 10 and offset + lineEnd < #code do
 34 | 			lineEnd = lineEnd + 1
 35 | 		end
 36 | 		local line = code:sub(offset - lineStart, offset + lineEnd)
 37 | 		commentStart = commentStart - (offset - lineStart)
 38 | 		if offset - lineStart == 0 then
 39 | 			commentStart = commentStart -1
 40 | 		end
 41 | 		result[#result + 1] = code:sub(oldOffset, offset - lineStart)
 42 | 		offset = offset + lineEnd
 43 | 		if line:sub(commentStart + 3):match("[\"']") then
 44 | 			commentStart = nil
 45 | 			local inString = nil
 46 | 			local escapeCount, commentCount = 0, 0
 47 | 			for i = 1, #line do
 48 | 				local char = line:sub(i, i)
 49 | 				if not inString and (char == "\"" or char == "'") then
 50 | 					inString = char
 51 | 				elseif not inString and char == "-" then
 52 | 					if commentCount == 1 then
 53 | 						commentStart = i - 2
 54 | 						break
 55 | 					else
 56 | 						commentCount = 1
 57 | 					end
 58 | 				elseif inString and char == "\\" then
 59 | 					escapeCount = escapeCount + 1
 60 | 				elseif inString and char == inString then
 61 | 					if escapeCount % 2 == 0 then
 62 | 						inString = false
 63 | 						commentCount = 0
 64 | 					else
 65 | 						escapeCount = 0
 66 | 					end
 67 | 				else
 68 | 					escapeCount, commentCount = 0, 0
 69 | 				end
 70 | 			end
 71 | 		end
 72 | 		result[#result + 1] = line:sub(1, commentStart and commentStart or #line)
 73 | 	end
 74 | 	return table.concat(result, "")
 75 | end
 76 | 
 77 | local function expectChar(code, pos, expected)
 78 | 	local _, newPos, actual = code:find("%s*(.)%s*", pos) -- TODO: whitespace handling is a mess, consider doing proper tokenization prior to parsing
 79 | 	if actual ~= expected then
 80 | 		syntaxError("expected " .. expected .. ", got " .. (actual or ""), code, pos)
 81 | 	end
 82 | 	return newPos + 1
 83 | end
 84 | 
 85 | local function consumeOptional(code, pos, expected)
 86 | 	local _, newPos, actual = code:find("%s*(.)%s*", pos)
 87 | 	if actual == expected then
 88 | 		return newPos + 1
 89 | 	else
 90 | 		return pos
 91 | 	end
 92 | end
 93 | 
 94 | local function consumeChar(code, pos)
 95 | 	local _, pos = code:find("%s*(.)", pos)
 96 | 	return pos + 1
 97 | end
 98 | 
 99 | local function peekChar(code, pos)
100 | 	return code:match("%s*(.)", pos)
101 | end
102 | 
103 | local function parseIdentifier(code, pos)
104 | 	local _, newPos, identifier = code:find("^%s*([_%a][_%w]*)", pos)
105 | 	if not newPos then
106 | 		syntaxError("expected , got ", code, pos)
107 | 	end
108 | 	return newPos + 1, identifier
109 | end
110 | 
111 | local function parseNumber(code, pos)
112 | 	local _, pos, value = code:find("^%s*([-.ex%x]*)%s*", pos)
113 | 	value = tonumber(value)
114 | 	if not tonumber(value) then
115 | 		syntaxError("invalid number " .. tostring(value), code, pos)
116 | 	end
117 | 	return pos + 1, value
118 | end
119 | 
120 | local function parseBool(code, pos)
121 | 	local _, pos, value = code:find("^%s*([%a]*)%s*", pos)
122 | 	if value == "true" then
123 | 		value = true
124 | 	elseif value == "false" then
125 | 		value = false
126 | 	else
127 | 		syntaxError("invalid assignment")
128 | 	end
129 | 	return pos + 1, value
130 | end
131 | 
132 | local function parseNil(code, pos)
133 | 	local _, pos, value = code:find("^%s*(nil)%s*", pos)
134 | 	if not pos then
135 | 		syntaxError("invalid assignment")
136 | 	end
137 | 	return pos + 1, nil
138 | end
139 | 
140 | local validEscapes = {
141 | 	["\\a"] = "\a", ["\\b"] = "\b", ["\\f"] = "\f", ["\\n"] = "\n", ["\\r"] = "\r", ["\\t"] = "\t", ["\\v"] = "\v", ["\\\\"] = "\\" ,["\\\""] = "\"", ["\\'"] = "'",
142 | 	["\\\n"] = "\n", ["\\\r"] = "" -- \r is probably followed by a \n anyways, so good enough to just strip it, also, who uses multi-line strings like this anyways?
143 | }
144 | local function unescapeString(str)
145 | 	if str:find("\\", nil, true) then -- This check increases performance by 105% because escapes are very rare and str:gsub() seems to be very expensive
146 | 		return str
147 | 			:gsub("\\(%d%d?%d?)", function(match) return string.char(tonumber(match) or 0) end)
148 | 			:gsub("\\.", function(match) return validEscapes[match] or match:sub(2) end)
149 | 	else
150 | 		return str
151 | 	end
152 | end
153 | 
154 | local function parseString(code, pos)
155 | 	local delimiter = code:sub(pos, pos)
156 | 	local strStart = pos
157 | 	while true do
158 | 		local _, nextDelimPos = code:find(delimiter, pos + 1)
159 | 		if not nextDelimPos then
160 | 			syntaxError("unterminated string", code, strStart)
161 | 		end
162 | 		local escapes = 0
163 | 		while code:sub(nextDelimPos - escapes - 1, nextDelimPos - escapes - 1) == "\\" and nextDelimPos - escapes > strStart do
164 | 			escapes = escapes + 1
165 | 		end
166 | 		if escapes % 2 == 0 then
167 | 			return nextDelimPos + 1, unescapeString(code:sub(strStart + 1, nextDelimPos - 1))
168 | 		else
169 | 			pos = nextDelimPos
170 | 		end
171 | 	end
172 | end
173 | 
174 | 
175 | local parseValue
176 | 
177 | local function parseAssignment(code, pos)
178 | 	local pos, identifier = parseIdentifier(code, pos)
179 | 	pos = expectChar(code, pos, "=")
180 | 	local pos, value = parseValue(code, pos)
181 | 	return pos, identifier, value
182 | end
183 | 
184 | local function parseTableEntry(code, pos)
185 | 	local nextChar = peekChar(code, pos)
186 | 	if nextChar == "[" then
187 | 		pos = consumeChar(code, pos)
188 | 		local pos, key = parseValue(code, pos)
189 | 		pos = expectChar(code, pos, "]")
190 | 		pos = expectChar(code, pos, "=")
191 | 		local pos, value = parseValue(code, pos)
192 | 		pos = consumeOptional(code, pos, ",")
193 | 		return pos, key, value
194 | 	elseif nextChar == "}" then
195 | 		pos = consumeChar(code, pos)
196 | 		return pos, nil, nil, true
197 | 	elseif nextChar == "\"" or nextChar == "'" then -- Optimization to not use the assignment checking regex for the common "list of strings" case
198 | 		local pos, value = parseValue(code, pos)
199 | 		pos = consumeOptional(code, pos, ",")
200 | 		return pos, nil, value
201 | 	elseif code:find("^%s*([_%a][_%w]*%s*=)", pos) then
202 | 		local pos, key, value = parseAssignment(code, pos)
203 | 		pos = consumeOptional(code, pos, ",")
204 | 		return pos, key, value
205 | 	else
206 | 		local pos, value = parseValue(code, pos)
207 | 		pos = consumeOptional(code, pos, ",")
208 | 		return pos, nil, value
209 | 	end
210 | end
211 | 
212 | local function parseTable(code, pos)
213 | 	local result = {}
214 | 	pos = expectChar(code, pos, "{")
215 | 	local key, value, endOfTable
216 | 	local arrayIndex = 1
217 | 	while true do
218 | 		pos, key, value, endOfTable = parseTableEntry(code, pos)
219 | 		if endOfTable then
220 | 			break
221 | 		end
222 | 		if key == nil then
223 | 			key = arrayIndex
224 | 			arrayIndex = arrayIndex + 1
225 | 		end
226 | 		if key then
227 | 			result[key] = value
228 | 		end
229 | 	end
230 | 	return pos, result
231 | end
232 | 
233 | local printCounter = 0
234 | function parseValue(code, pos)
235 | 	printCounter = printCounter + 1
236 | 	-- Logs can have hundreds of millions of values, just printing some stats regularly so it doesn't look dead
237 | 	if printCounter % 1000000 == 0 then
238 | 		local before = collectgarbage("count")
239 | 		collectgarbage("collect") -- This seems to be important, makes the difference between OOM'ing after ~100M literals vs ~300M literals on a 16 GiB VM
240 | 		print(("Parsed %d literals"):format(printCounter))
241 | 	end
242 | 	local nextChar = peekChar(code, pos)
243 | 	if not nextChar then
244 | 		syntaxError("expected , got ", code, pos)
245 | 	elseif nextChar == "{" then
246 | 		return parseTable(code, pos)
247 | 	elseif nextChar == "t" or nextChar == "f" then
248 | 		return parseBool(code, pos)
249 | 	elseif nextChar:match("%d") or nextChar == "." or nextChar == "-" then
250 | 		return parseNumber(code, pos)
251 | 	elseif nextChar == "\"" or nextChar == "'" then
252 | 		return parseString(code, pos)
253 | 	elseif nextChar == "n" then
254 | 		return parseNil(code, pos)
255 | 	elseif nextChar == "[" then
256 | 		syntaxError("multi-line style strings aren't supported", code, pos)
257 | 	else
258 | 		syntaxError("unsupported value", code, pos)
259 | 	end
260 | end
261 | 
262 | local function parseChunk(code, pos)
263 | 	local env = {}
264 | 	while pos < #code and not code:match("^%s*$", pos) do
265 | 		local identifier, value
266 | 		pos, identifier, value = parseAssignment(code, pos)
267 | 		env[identifier] = value
268 | 	end
269 | 	return env
270 | end
271 | 
272 | -- Simple recursive descent parser for Lua tables to avoid Lua 5.1 constant limits
273 | -- This is in no way a complete or correct parser for Lua tables, just something that happens to work for what WoW generates as saved variables (Transcriptor logs etc)
274 | -- Specifically it doesn't handle multi-line comments and multi-line strings correctly as they are pretty complex.
275 | function parser:ParseLua(code)
276 | 	print("Parsing Lua in trace file")
277 | 	code = stripComments(code)
278 | 	local pos = 1
279 | 	return parseChunk(code, pos)
280 | end
281 | 
282 | return parser


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Instrumentation-based performance profiling for Lua in WoW
  2 | 
  3 | Perfy injects code into World of Warcraft AddOns to measure performance, it can tell you exactly where CPU time is spent and which functions allocate how much memory.
  4 | 
  5 | # Example: Finding the source of micro stuttering in classic raids
  6 | 
  7 | I traced the Mechanical Menagerie fight in Gnomeregan with all AddOns I use instrumented with Perfy.
  8 | The trace contains about 5 million entries that span a time of 214 seconds including the whole fight (195 seconds) and is a good example for tracking down a real performance problem.
  9 | 
 10 | ## CPU usage
 11 | 
 12 | [![FlameGraph of CPU usage](Screenshots/CPU.png)](https://emmericp.github.io/Perfy/perfy-cpu.svg)
 13 | 
 14 | This visualization is called a [Flame Graph](https://www.brendangregg.com/flamegraphs.html), a neat way to visualize hierarchical data such as resource usage by call stacks. Click on the graph above to open an interactive SVG to fully explore it!
 15 | 
 16 | 
 17 | In total Perfy traced 6.8 seconds of execution time, that is an average CPU load of only 3.2% due to 3rd party addons.
 18 | So nothing to worry about, but it is a bit odd that 43% of this load is due to ClassicHealPrediction and LibHealComm.
 19 | 
 20 | ## Memory allocations
 21 | 
 22 | [![FlameGraph of memory allocations](Screenshots/Memory.png)](https://emmericp.github.io/Perfy/perfy-memory.svg)
 23 | Click on the graph above to open an interactive SVG to fully explore it!
 24 | 
 25 | Perfy found a total of 489 MB of memory being allocated during the trace, that's 122 MB per minute.
 26 | 91% of that was due to ClassicHealPrediction.
 27 | 
 28 | Now that is an interesting result and probably the cause of our micro stuttering.
 29 | Allocating memory isn't a bad thing per se, the Lua garbage collector is pretty decent.
 30 | But garbage collection pauses can still lead to micro stuttering, so AddOns typically avoid allocating memory, especially during boss fights in big raids.
 31 | There's clearly something going wrong in ClassicHealPrediction because why would it need so much memory in a simple 10 man raid?
 32 | There are [reports of it being worse in larger raids](https://github.com/dev7355608/ClassicHealPrediction/issues/2), I wonder how much it needs in a 40 man raid, I would love to see a trace!
 33 | 
 34 | The function `updateHealPrediction` that Perfy identified gets called OnUpdate (i.e., 60 times per second for me) and for every unit frame.
 35 | It then re-draws the heal predictions (even if nothing changed) and calls `CreateColor()` up to 8 times to do so.
 36 | `CreateColor()` allocates a new `ColorMixin` every time for the same color and that's how we got the excessive allocations.
 37 | 
 38 | How did I identify `CreateColor()`, a function that does not show up at all in the Flame Graph above at all?
 39 | The reason why it doesn't show up separately is that we cannot instrument functions that belong to the default UI, so identifying the culprit was an educated guess.
 40 | 
 41 | I confirmed that this is indeed the culprit by pulling out the color creation into a separate function that wraps `CreateColor()`.
 42 | Running the whole process again shows that virtually all of the memory allocation is now in that new wrapper function.
 43 | 
 44 | # Usage
 45 | 
 46 | ## Install dependencies
 47 | 
 48 | * [Lua 5.3](https://lua.org) or newer
 49 | * [lua-language-server](https://github.com/LuaLS/lua-language-server)
 50 | * [FlameGraph](https://github.com/brendangregg/FlameGraph)
 51 | 
 52 | ## Instrument 
 53 | 
 54 | Run `Instrumentation/Main.lua` under lua-language-server and provide it a list of .toc files as input.
 55 | It will automatically find all Lua files referenced there.
 56 | **Caution: it modifies files in place, so it's best to make a backup before running this on your AddOns folder.**
 57 | 
 58 | ```
 59 | ./bin/lua-language-server /Instrumentation/Main.lua /*/*.toc
 60 | ```
 61 | 
 62 | You don't have to add it to every AddOn, it's perfectly fine to only instrument the AddOns you are interested in.
 63 | But note that shared libraries are only instanced by one AddOn even if multiple AddOns are trying to load them, so they may be missing from the traces if the used instance is not instrumented.
 64 | If uninstrumented AddOns use shared libraries from instrumented AddOns there may be large unexplained self-times of functions in the instrumented shared library if it is the initial entry point of a call trace (e.g., timers).
 65 | 
 66 | ## Measure
 67 | 
 68 | Install the WoW AddOn in `AddOn/` as `AddOns/!!!Perfy` and start measuring in game as follows:
 69 | 
 70 | ```
 71 | /perfy start
 72 | /perfy stop
 73 | ```
 74 | 
 75 | Reload your UI or log out afterwards to make WoW export the data.
 76 | 
 77 | `/perfy start ` also takes an optional parameter for runtime in seconds to stop it automatically.
 78 | You can also run just `/perfy` to toggle it.
 79 | 
 80 | **Keep measurement times short, this is not something that can run continously.**
 81 | Short means a few minutes depending on the load and number of instrumented AddOns.
 82 | The main bottleneck is memory, it needs an average of 240 bytes per trace entry (`216 * #entries + 2^ceil(log_2(#entries)) * 24` bytes to be exact) and the whole UI will leak memory/accumulate garbage since tracing disables garbage collection.
 83 | With my personal UI setup this adds up to about 100-200 MB per minute during a Gnomeregan boss fight.
 84 | 
 85 | Perfy regularly reports how many events it already gathered, as a rule of thumb you shouldn't exceed 10 to 20 million trace entries and a few gigabytes of memory.
 86 | Tracing a whole boss fight should be fine, but a whole raid night is definitely not feasible (nor would it be useful).
 87 | 
 88 | ## Analyze
 89 | 
 90 | Run `Analyzer/Main.lua` on the saved variables export of the AddOn.
 91 | 
 92 | ```
 93 | lua Main.lua /WTF/Account//SavedVariables/Perfy.lua
 94 | ```
 95 | 
 96 | If you get an error about the data file being too large: make sure to run Lua version 5.3 or newer.
 97 | 
 98 | This outputs two files: `stacks-cpu.txt` with CPU usage in microseconds and `stacks-memory.txt` with memory allocations in bytes.
 99 | 
100 | ## Visualize
101 | 
102 | The files from the previous step are in the folded/collapsed stack format expected by [flamegraph.pl](https://github.com/brendangregg/FlameGraph).
103 | 
104 | ```
105 | ./flamegraph.pl stacks-cpu.txt --countname "Microseconds" --title "CPU time"  --width 1600 > perfy-cpu.sv
106 | ./flamegraph.pl stacks-memory.txt --countname "Bytes" --title "Memory allocations" --width 1600 > perfy-memory.svg
107 | ```
108 | 
109 | The input text files can also be pre-filtered (just use `grep`, the format is pretty self-explanatory) to remove things you are not interested in.
110 | For example, the following command generates a [graph that excludes ClassicHealPrediction](https://emmericp.github.io/Perfy/perfy-memory-filtered.svg) with the data from the example above.
111 | 
112 | ```
113 |  grep -v ClassicHealPrediction stacks-memory.txt | ./flamegraph.pl --countname "Bytes" --title "Memory allocations" --width 1600 > perfy-memory.svg
114 | ```
115 | 
116 | # FAQ
117 | 
118 | ## How accurate is it?
119 | 
120 | See [Accuracy.md](./Accuracy.md) for measurements and details.
121 | 
122 | The tl;dr is that it's accurate within a few percent with one exception:
123 | non-trivial return expressions (anything not returning just locals or literals) introduce a small systematic error.
124 | On my system that seems to be about 300 ns per such return.
125 | This is not relevant in absolute terms for real-world usage, but it can lead to a large relative error for small functions -- in the example above there is a small leaf function that looks 22% larger than it actually is.
126 | 
127 | ## What about the default UI/Blizzard AddOns?
128 | 
129 | Impossible to instrument on the code level due to code signing.
130 | We could hook some functions but that always risks breaking everything due to taint.
131 | 
132 | ## Do the colors in the Flame Graph mean anything?
133 | 
134 | The color is a hash of the function name, so two entries of the same color are probably the same function called at different points.
135 | 
136 | ## How does it measure CPU time?
137 | 
138 | The injected code calls `GetTimePreciseSec()` which maps to some high frequency counter, probably in a system-specific manner.
139 | On my system (Windows 11) this has a granularity of 100 ns which is good enough for profiling interpreted code.
140 | 
141 | ## How does it measure memory allocations?
142 | 
143 | Starting Perfy disables garbage collection and the injected code calls `collectgarbage("count")` to measure changes in allocations.
144 | 
145 | For some unknown reason the reported memory usage is still sometimes non-monotonic (and no, it's not due to `table.wipe`).
146 | This happens rarely: 0.02% of trace events above report a negative memory delta.
147 | Perfy counts these as 0.
148 | These negative events seem to be following the same distribution as normal events, so there should be no systematic error being introduced.
149 | 
150 | ## No fair, you changed the outcome by measuring it!
151 | 
152 | Measuring performance will always affect performance, this is especially true for profilers based on instrumentation.
153 | Perfy adds around 1 µs of overhead (on my Ryzen 7800X3D) and 480 bytes of memory allocations to every function call (two trace entries).
154 | This overhead is accounted for separately and subtracted during analysis, so the overall measurement results are still pretty accurate.
155 | See [Accuracy.md](./Accuracy.md) for measurements and details. 
156 | 
157 | ## What about dynamically loaded code?
158 | 
159 | Will be accounted to the function that calls it.
160 | 
161 | ## What about coroutines?
162 | 
163 | Perfy keeps track of the call stack for each coroutine by hooking the coroutine functions.
164 | Coroutines are accounted to the first function that calls `coroutine.resume()` on them.
165 | Coroutines that are already running when tracing is started will trigger some warnings about failed stack reconstruction and you may see some partial stacks, but in general they are well supported.
166 | 
167 | ## What about pcall?
168 | 
169 | `pcall` can confuse the stack reconstruction a bit, but it shouldn't be too bad or relevant.
170 | In some cases we catch it and add a "missing stack information" entry to the stack, in others Perfy reports a warning when running the analyzer.
171 | 
172 | ## Why do I get an error about the constant table size in Perfy saved variables when reloading?
173 | 
174 | Perfy writes out a lot of data and WoW tries to load this again on reloading.
175 | For profiling anything non-trivial it writes out more data than Lua 5.1 can load.
176 | This is also the reason why a newer Lua version is required for the Analysis scripts.
177 | 
178 | This error can be ignored since we don't want to read back the data anyways.
179 | But unfortunately we can't surpress it.
180 | 
181 | ## Any other known limitations?
182 | 
183 | 1. Lua code directly embedded in XML files is currently not instrumented.
184 | 2. Files with lines longer than 10000 characters can cause problems during instrumentation.
185 | 


--------------------------------------------------------------------------------
/Instrumentation/Instrument_Test.lua:
--------------------------------------------------------------------------------
  1 | -- LuaLS environment setup
  2 | local basePath = arg[0]:gsub("[/\\]*[^/\\]-$", "") -- The dir under which this file is
  3 | package.path = "./script/?.lua;./script/?/init.lua;./test/?.lua;./test/?/init.lua;"
  4 | package.path = package.path .. basePath .. "/?.lua;"
  5 | package.path = package.path .. basePath .. "/?/init.lua"
  6 | _G.log = require "log"
  7 | local fs = require "bee.filesystem"
  8 | local util = require "utility"
  9 | local rootPath = debug.getinfo(1, "S").source:sub(2):gsub("[/\\]*[^/\\]-$", "")
 10 | rootPath = rootPath == "" and "." or rootPath
 11 | ROOT = util.expandPath(rootPath)
 12 | LUA_VER = "Lua 5.1"
 13 | TEST = true
 14 | 
 15 | local parser = require "parser"
 16 | 
 17 | local instrument = require "Instrument"
 18 | 
 19 | ---@param injections Injection[]
 20 | local function testInject(code, want, injections)
 21 | 	local state = parser.compile(code, "Lua", "Lua 5.1")
 22 | 	local got = table.concat(instrument:Inject(state, injections), "")
 23 | 	if got ~= want then
 24 | 		error(("Unexpected diff, want:\n%s\ngot:\n%s"):format(want, got), 2)
 25 | 	end
 26 | end
 27 | 
 28 | testInject("local foo", "local foo", {})
 29 | testInject("local foo", "test local foo", {
 30 | 	{pos = 0, text = "test"}
 31 | })
 32 | testInject("local foo", "local foo test", {
 33 | 	{pos = 9, text = "test"}
 34 | })
 35 | testInject("foo()bar()", "foo() test bar()", {
 36 | 	{pos = 5, text = "test"}
 37 | })
 38 | testInject("local foo\nlocal bar", "local foo test\nlocal bar", {
 39 | 	{pos = 9, text = "test"}
 40 | })
 41 | ---@diagnostic disable-next-line: err-esc
 42 | testInject("\xef\xbb\xbflocal foo", "\xef\xbb\xbftest local foo", {
 43 | 	{pos = 0, text = "test"}
 44 | })
 45 | testInject("local foo\nlocal bar", "local foo\nlocal bar\ntest", {
 46 | 	{pos = math.huge, text = "\ntest"}
 47 | })
 48 | 
 49 | local function testInstrumentFunction(code, want)
 50 | 	local state = parser.compile(code, "Lua", "Lua 5.1")
 51 | 	local got = table.concat(instrument:InstrumentFunctions(state, function(action) return instrument:String(action) end, nil, true), "")
 52 | 	if got ~= want then
 53 | 		error(("Unexpected diff, want:\n%s\ngot:\n%s"):format(want, got), 2)
 54 | 	end
 55 | end
 56 | 
 57 | -- Various types of function definitions
 58 | testInstrumentFunction("function foo() end", "function foo() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); end")
 59 | testInstrumentFunction("local function foo() end", "local function foo() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); end")
 60 | testInstrumentFunction("local foo = function() end", "local foo = function() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); end")
 61 | testInstrumentFunction("print(function() end)", "print(function() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); end)")
 62 | testInstrumentFunction("function foo:bar() end", "function foo:bar() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); end")
 63 | 
 64 | -- Return statements with "trivial" expressions
 65 | testInstrumentFunction("function foo() return end", "function foo() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); return end")
 66 | testInstrumentFunction("function foo() do return end end", "function foo() Perfy_Trace(\"Enter\"); do Perfy_Trace(\"Leave\"); return end Perfy_Trace(\"Leave\"); end")
 67 | testInstrumentFunction("function foo() return 1 end", "function foo() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); return 1 end")
 68 | testInstrumentFunction("function foo() return 1, nil, false, 1.1, 'str' end", "function foo() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); return 1, nil, false, 1.1, 'str' end")
 69 | testInstrumentFunction("function foo() local x, y return x, y end", "function foo() Perfy_Trace(\"Enter\"); local x, y Perfy_Trace(\"Leave\"); return x, y end")
 70 | testInstrumentFunction("local x, y function foo() return x, y end", "local x, y function foo() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); return x, y end")
 71 | 
 72 | -- Return statements with "non-trivial" expressions
 73 | testInstrumentFunction("function foo() return x, y end", "function foo() Perfy_Trace(\"Enter\"); return Perfy_Trace_Passthrough(\"Leave\", x, y) end")
 74 | testInstrumentFunction("function foo() return function() end end", "function foo() Perfy_Trace(\"Enter\"); return Perfy_Trace_Passthrough(\"Leave\", function() Perfy_Trace(\"Enter\"); Perfy_Trace(\"Leave\"); end) end")
 75 | testInstrumentFunction("function foo() return 1 + 2 end", "function foo() Perfy_Trace(\"Enter\"); return Perfy_Trace_Passthrough(\"Leave\", 1 + 2) end")
 76 | testInstrumentFunction("function foo() return bar() end", "function foo() Perfy_Trace(\"Enter\"); return Perfy_Trace_Passthrough(\"Leave\", bar()) end")
 77 | testInstrumentFunction("function foo() return x.y end", "function foo() Perfy_Trace(\"Enter\"); return Perfy_Trace_Passthrough(\"Leave\", x.y) end")
 78 | testInstrumentFunction("function foo() return false, 1 + 1 end", "function foo() Perfy_Trace(\"Enter\"); return Perfy_Trace_Passthrough(\"Leave\", false, 1 + 1) end")
 79 | 
 80 | -- Comments
 81 | testInstrumentFunction("function foo()--comment\nend", "function foo() Perfy_Trace(\"Enter\"); --comment\nPerfy_Trace(\"Leave\"); end")
 82 | 
 83 | -- No-ops
 84 | testInstrumentFunction("local foo='function() end'", "local foo='function() end'")
 85 | testInstrumentFunction("do return end", "do return end")
 86 | 
 87 | -- Multiple nested functions
 88 | testInstrumentFunction([[
 89 | function foo()
 90 | 	return function(bar)
 91 | 		if x then return else
 92 | 			return 5, 6, 7 end
 93 | 	end
 94 | end
 95 | ]], [[
 96 | function foo() Perfy_Trace("Enter");
 97 | 	return Perfy_Trace_Passthrough("Leave", function(bar) Perfy_Trace("Enter");
 98 | 		if x then Perfy_Trace("Leave"); return else
 99 | 			Perfy_Trace("Leave"); return 5, 6, 7 end
100 | 	Perfy_Trace("Leave"); end)
101 | end
102 | ]])
103 | 
104 | local perfyHeader = "--[[Perfy has instrumented this file]] local Perfy_GetTime, Perfy_Trace, Perfy_Trace_Passthrough = Perfy_GetTime, Perfy_Trace, Perfy_Trace_Passthrough; "
105 | local function testMainChunkInstruments(code, want)
106 | 	local state = parser.compile(code, "Lua", "Lua 5.1")
107 | 	local lines = instrument:Instrument(code, "test.lua")
108 | 	assert(lines)
109 | 	local got = table.concat(lines, "")
110 | 	got = got:sub(#perfyHeader + 1)
111 | 	if got ~= want then
112 | 		local firstDiff
113 | 		for i = 1, math.min(#got, #want) do
114 | 			if got:sub(i, i) ~= want:sub(i, i) then
115 | 				firstDiff = i
116 | 				break
117 | 			end
118 | 		end
119 | 		error(("Unexpected diff, want:\n%s\ngot:\n%s\n first diff: '%s' vs. '%s' at offset %d"):format(
120 | 			want, got,
121 | 			want:sub(firstDiff, firstDiff), got:sub(firstDiff, firstDiff),
122 | 			firstDiff
123 | 		), 2)
124 | 	end
125 | end
126 | local prefix = "Perfy_Trace(Perfy_GetTime(), \"Enter\", \"(main chunk) file://test.lua\");"
127 | local suffix = "Perfy_Trace(Perfy_GetTime(), \"Leave\", \"(main chunk) file://test.lua\");"
128 | local suffixPassthrough = "Perfy_Trace_Passthrough(\"Leave\", \"(main chunk) file://test.lua\","
129 | 
130 | testMainChunkInstruments("", prefix .. "\n" .. suffix)
131 | testMainChunkInstruments("foo = bar", prefix .. " foo = bar\n" .. suffix)
132 | testMainChunkInstruments("return 5", prefix .. " " .. suffix .. " return 5")
133 | testMainChunkInstruments("do return end", prefix .. " do " .. suffix .. " return end\n" .. suffix)
134 | testMainChunkInstruments("do return 1, 2 end", prefix .. " do " .. suffix .. " return 1, 2 end\n" .. suffix)
135 | testMainChunkInstruments("do return foo() end", prefix .. " do return " .. suffixPassthrough .. " foo()) end\n" .. suffix)
136 | testMainChunkInstruments("do return foo(), 2 end", prefix .. " do return " .. suffixPassthrough .. " foo(), 2) end\n" .. suffix)
137 | testMainChunkInstruments("-- Foo", prefix .. " -- Foo\n" .. suffix)
138 | testMainChunkInstruments([[
139 | local foo = bar
140 | if GetLocale() ~= "deDE" then
141 | 	return
142 | end
143 | foo = 5
144 | return foo
145 | ]], ([[
146 | %s local foo = bar
147 | if GetLocale() ~= "deDE" then
148 | 	%s return
149 | end
150 | foo = 5
151 | %s return foo
152 | ]]):format(prefix, suffix, suffix))
153 | 
154 | local function testGetFunctionName(code, want, fileName)
155 | 	local state = parser.compile(code, "Lua", "Lua 5.1")
156 | 	if fileName ~= false then
157 | 		fileName = fileName or "Interface/AddOns/test.lua"
158 | 		state.uri = "file://" .. fileName
159 | 	end
160 | 	local got
161 | 	instrument:InstrumentFunctions(state, function(_, f) got = f end, nil, true)
162 | 	if got ~= want then
163 | 		error(("Unexpected diff, want:\n%s\ngot:\n%s"):format(want, got), 2)
164 | 	end
165 | end
166 | 
167 | testGetFunctionName("function foo() end", "foo test.lua:1:0")
168 | testGetFunctionName("function foo() end", "foo file://prefix/not/stripped.lua:1:0", "prefix/not/stripped.lua")
169 | testGetFunctionName("function foo() end", "foo (unknown file):1:0", false)
170 | testGetFunctionName("local function foo() end", "foo test.lua:1:6")
171 | testGetFunctionName("local foo = function() end", "foo test.lua:1:12")
172 | testGetFunctionName("local foo\nfoo = function() end", "foo test.lua:2:6")
173 | testGetFunctionName("function foo:bar() end", "foo:bar test.lua:1:0")
174 | testGetFunctionName("function foo.bar() end", "foo.bar test.lua:1:0")
175 | testGetFunctionName("foo.bar = function() end", "foo.bar test.lua:1:10")
176 | testGetFunctionName("foo['bar'] = function() end", "foo.bar test.lua:1:13")
177 | testGetFunctionName("foo['foo\"bar'] = function() end", "foo[\"foo\\\"bar\"] test.lua:1:17")
178 | testGetFunctionName("foo[5] = function() end", "foo[5] test.lua:1:9")
179 | testGetFunctionName("foo[foo()] = function() end", "foo.? test.lua:1:13")
180 | testGetFunctionName("foo().bar = function() end", "?.bar test.lua:1:12")
181 | testGetFunctionName("foo = {bar = function() end}", "foo.bar test.lua:1:13")
182 | testGetFunctionName("foo = {['bar'] = function() end}", "foo.bar test.lua:1:17")
183 | testGetFunctionName("(foo)[foo()] = function() end", "(anonymous) test.lua:1:15")
184 | testGetFunctionName("foo(function() end)", "(anonymous) test.lua:1:4")
185 | testGetFunctionName("return function() end", "(anonymous) test.lua:1:7")
186 | testGetFunctionName("local foo = {['foo bar'] = function() end}", "foo[\"foo_bar\"] test.lua:1:27")
187 | testGetFunctionName("local foo = {[true] = function() end}", "foo[true] test.lua:1:22")
188 | testGetFunctionName("local foo = {[false] = function() end}", "foo[false] test.lua:1:23")
189 | 
190 | -- TODO: this is actually not ideal
191 | testGetFunctionName("foo.bar.x = function() end", "bar.x test.lua:1:12")
192 | -- TODO: maybe support this
193 | testGetFunctionName("foo = {bar = {x = function() end}}", "(anonymous) test.lua:1:18")
194 | 
195 | local function testLocalLimits(code, want)
196 | 	local state = parser.compile(code, "Lua", "Lua 5.1")
197 | 	local lines = instrument:Instrument(code, "test.lua")
198 | 	assert(lines)
199 | 	local got = table.concat(lines, "")
200 | 	if got ~= want then
201 | 		error(("Unexpected diff, want:\n%s\ngot:\n%s"):format(want, got), 2)
202 | 	end
203 | end
204 | 
205 | local locals = {}
206 | for i = 1, 197 do
207 | 	locals[#locals + 1] = "local" .. i
208 | end
209 | local code = "local " .. table.concat(locals, ", ")
210 | local prefix = "Perfy_Trace(Perfy_GetTime(), \"Enter\", \"(main chunk) file://test.lua\"); "
211 | local suffix = "\nPerfy_Trace(Perfy_GetTime(), \"Leave\", \"(main chunk) file://test.lua\");"
212 | testLocalLimits(code, "--[[Perfy has instrumented this file]] local Perfy_GetTime, Perfy_Trace, Perfy_Trace_Passthrough = Perfy_GetTime, Perfy_Trace, Perfy_Trace_Passthrough; " .. prefix .. code .. suffix)
213 | 
214 | code = code .. "\nlocal localNumber198"
215 | testLocalLimits(code, "--[[Perfy has instrumented this file]] " .. prefix .. code .. suffix)
216 | 


--------------------------------------------------------------------------------
/AddOn/Perfy.lua:
--------------------------------------------------------------------------------
  1 | Perfy_GetTime = GetTimePreciseSec
  2 | local Perfy_GetTime = Perfy_GetTime
  3 | local gc = collectgarbage
  4 | local type, tostring = type, tostring
  5 | 
  6 | --- Fields: timestamp, event, functionName, timeOverhead, memory, memoryOverhead
  7 | ---@type table[]
  8 | local trace = {}
  9 | 
 10 | local TraceFieldTimestamp, TraceFieldEvent, TraceFieldFunction, TraceFieldTimeOverhead, TraceFieldMemory, TraceFieldMemoryOverhead, TraceFieldExtraArg = 1, 2, 3, 4, 5, 6, 7
 11 | 
 12 | local isRunning
 13 | 
 14 | --[[
 15 | A few notes on the performance of the trace functions:
 16 | These are the priorities for them:
 17 |  1. It needs to be accurate and correct, so it should call GetTimePreciseSec() as close as possible to the beginning and end of it
 18 |  2. The main bottleneck is memory, so it should allocate as little as possible
 19 |  3. It should be as fast as possible to keep overhead low (even when overhead is accounted for, we don't want to slow down anything unnecessarily)
 20 | 
 21 | Random notes and thoughts:
 22 |  * The entry should be allocated via a single table expression, this gives an array of exact size of n entries. Growing the array after allocation grows it exponentially.
 23 |    6 entries allocated directly is (216 byte), growing an array to 6 entries allocates 8 internally in 264 bytes. That's 22% more.
 24 |  * Strings are interned and don't use extra memory if used multiple times
 25 |  * Traces can be a few GB large before you run into problems
 26 |  * Using number literals to index the entry because it's slightly faster than referencing an upvalue
 27 | 
 28 | Collection of ideas to maybe try:
 29 |  * Delta-encode entries and "compress" them to reduce the number of fields. (Probably doesn't work for the overhead fields)
 30 |  * Should we pre-allocate a large array for trace entries? The exponential growth will trigger huge reallocations O(log n) times
 31 |  * If we preallocate the trace array we don't need to store the memory overhead per entry as growing the array is the only reason why it's not constant
 32 |    * Even without preallocation we should be able to omit the memory overhead because the reallocation every 2^n entries should be deterministic
 33 | ]]
 34 | 
 35 | 
 36 | -- Generic trace function, timestamp taken before it is called.
 37 | local function Perfy_Trace(timestamp, event, func, extraArg)
 38 | 	if not isRunning then return end
 39 | 	local mem = gc("count") * 1024
 40 | 	local entry
 41 | 	if extraArg then -- Important to distinguish these two cases to get a table with an exact number of entries
 42 | 		entry = {
 43 | 			timestamp, event, func, 0, mem, mem, extraArg
 44 | 		}
 45 | 	else
 46 | 		entry = {
 47 | 			timestamp, event, func, 0, mem, mem
 48 | 		}
 49 | 	end
 50 | 	trace[#trace + 1] = entry
 51 | 	mem = gc("count") * 1024
 52 | 	entry[6] = mem - entry[6] -- Memory overhead
 53 | 	entry[4] = Perfy_GetTime() - timestamp -- Time overhead
 54 | end
 55 | _G.Perfy_Trace = Perfy_Trace
 56 | 
 57 | -- Trace function when leaving functions, arguments are passed through, required to instrument tail calls.
 58 | local function Perfy_Trace_Passthrough(event, func, ...)
 59 | 	-- Timestamp taken here instead of in args because it must be done after all args are evaluated.
 60 | 	-- Lua evaluates args left to right and vararg tail call reports mean we can't put it as last arg.
 61 | 	local timestamp = Perfy_GetTime()
 62 | 	if not isRunning then return ... end
 63 | 	local mem = gc("count") * 1024
 64 | 	local entry = {
 65 | 		timestamp, event, func, 0, mem, mem
 66 | 	}
 67 | 	trace[#trace + 1] = entry
 68 | 	mem = gc("count") * 1024
 69 | 	entry[6] = mem - entry[6] -- Memory overhead
 70 | 	entry[4] = Perfy_GetTime() - timestamp -- Time overhead
 71 | 	return ...
 72 | end
 73 | _G.Perfy_Trace_Passthrough = Perfy_Trace_Passthrough
 74 | 
 75 | -- Hook coroutines
 76 | local crWrap, crResume, crYield, crRunning = coroutine.wrap, coroutine.resume, coroutine.yield, coroutine.running
 77 | 
 78 | ---@diagnostic disable-next-line: duplicate-set-field
 79 | function coroutine.wrap(f)
 80 | 	local cr = crWrap(f)
 81 | 	return function(...)
 82 | 		Perfy_Trace(Perfy_GetTime(), "CoroutineResume", cr)
 83 | 		return cr(...)
 84 | 	end
 85 | end
 86 | 
 87 | ---@diagnostic disable-next-line: duplicate-set-field
 88 | function coroutine.resume(coroutine, ...)
 89 | 	Perfy_Trace(Perfy_GetTime(), "CoroutineResume", coroutine)
 90 | 	return crResume(coroutine, ...)
 91 | end
 92 | 
 93 | ---@diagnostic disable-next-line: duplicate-set-field
 94 | function coroutine.yield(...)
 95 | 	Perfy_Trace(Perfy_GetTime(), "CoroutineYield", crRunning())
 96 | 	return crYield(...)
 97 | end
 98 | 
 99 | 
100 | -- Hook error handlers
101 | local debugstack = debugstack or debug and debug.traceback
102 | local origErrorHandler
103 | local function errorHandler(...)
104 | 	if isRunning then
105 | 		Perfy_Trace(Perfy_GetTime(), "UncaughtError", debugstack())
106 | 	end
107 | 	return origErrorHandler(...)
108 | end
109 | 
110 | local function hookErrorHandler()
111 | 	local curErrorHandler = geterrorhandler()
112 | 	if curErrorHandler ~= errorHandler then
113 | 		-- We may hook it multiple times across multiple starts if someone else also changes it, but worst case we have duplicate trace entries
114 | 		origErrorHandler = curErrorHandler
115 | 		seterrorhandler(errorHandler)
116 | 	end
117 | end
118 | 
119 | local function printStats()
120 | 	local firstTrace = trace[1]
121 | 	local lastTrace = trace[#trace]
122 | 	if not lastTrace then
123 | 		print("[Perfy] Collected 0 traces, check if instrumentation was successful.")
124 | 		return
125 | 	end
126 | 	print(("[Perfy] Collected %d trace entries in %.1f seconds"):format(#trace, lastTrace[1] - firstTrace[1]))
127 | 	print(("[Perfy] Memory allocations (incl. overhead): %.2f MiB."):format((lastTrace[5] - firstTrace[5]) / 1024 / 1024))
128 | end
129 | 
130 | local funcId, eventId = 1, 1
131 | local function export()
132 | 	-- Mapping all strings to numbers makes the saved variables file a bit smaller.
133 | 	-- This doesn't save memory (actually increases memory to store the lookup tables) because strings are interned/unique anyways, so logging the full string above is fine.
134 | 	local functionNames = Perfy_Export and Perfy_Export.FunctionNames or {}
135 | 	local eventNames = Perfy_Export and Perfy_Export.EventNames or {}
136 | 	local numEntries = #trace
137 | 	local yieldInterval = 1e5 -- Large traces have ~millions of entries
138 | 	local printInterval = math.floor(numEntries / 10)
139 | 	for i, event in ipairs(trace) do
140 | 		if i % yieldInterval == 0 and coroutine.running() then
141 | 			coroutine.yield()
142 | 		end
143 | 		if #trace > 1e6 and i % printInterval == 0 then
144 | 			print(("[Perfy] Exporting... %d%%"):format(math.ceil(i / numEntries * 10) * 10))
145 | 		end
146 | 		local eventName, funcName, extraArg = event[TraceFieldEvent], event[TraceFieldFunction], event[TraceFieldExtraArg]
147 | 		if type(funcName) == "thread" or type(funcName) == "function" then
148 | 			funcName = tostring(funcName)
149 | 		end
150 | 		if extraArg ~= nil and (type(extraArg) == "thread" or type(extraArg) == "function" or type(extraArg) == "table") then
151 | 			-- nil check is important to not accidentally grow 6-entry tables
152 | 			event[TraceFieldExtraArg] = type(extraArg)
153 | 		end
154 | 		 -- Avoid translating functions/events twice if we log multiple times
155 | 		if type(eventName) == "string" then
156 | 			if not eventNames[eventName] then
157 | 				eventNames[eventName] = eventId
158 | 				eventId = eventId + 1
159 | 			end
160 | 			event[TraceFieldEvent] = eventNames[eventName]
161 | 		end
162 | 		if type(funcName) == "string" then
163 | 			if not functionNames[funcName] then
164 | 				functionNames[funcName] = funcId
165 | 				funcId = funcId + 1
166 | 			end
167 | 			event[TraceFieldFunction] = functionNames[funcName]
168 | 		end
169 | 	end
170 | 	if coroutine.running() then
171 | 		coroutine.yield() -- I've observed slow writes to very large exported variables, so yield one last time.
172 | 	end
173 | 	Perfy_Export = {
174 | 		FunctionNames = functionNames,
175 | 		EventNames = eventNames,
176 | 		Trace = trace
177 | 	}
178 | 	print(("[Perfy] Saved %d trace entries across %d unique functions."):format(#trace, funcId - 1))
179 | 	-- Delay restarting gc because the freshly restarted gc will trigger a lot when allocating for building the lookup tables above.
180 | 	-- This avoids several seconds of runtime here and reduces the risk of running into a timeout which corrupts the exported data 
181 | 	if coroutine.running() then
182 | 		coroutine.yield()
183 | 	end
184 | 	gc("restart")
185 | end
186 | 
187 | function Perfy_Stop()
188 | 	if not isRunning then return end
189 | 	Perfy_Trace(Perfy_GetTime(), "PerfyStop", "Perfy_Stop Perfy/internal")
190 | 	isRunning = false
191 | 	printStats()
192 | 	local thread = coroutine.create(export)
193 | 	local function runCoroutine()
194 | 		C_Timer.After(0, function()
195 | 			if coroutine.resume(thread) then
196 | 				runCoroutine()
197 | 			end
198 | 		end)
199 | 	end
200 | 	runCoroutine()
201 | 	-- GC is restarted after exporting above
202 | end
203 | 
204 | function Perfy_Start(timeout)
205 | 	if isRunning then
206 | 		return false
207 | 	end
208 | 	if #trace == 0 then
209 | 		-- Make sure we don't accidentally import old saved variables
210 | 		Perfy_Clear(true)
211 | 	end
212 | 	gc("stop")
213 | 	hookErrorHandler()
214 | 	isRunning = true
215 | 	Perfy_Trace(Perfy_GetTime(), "PerfyStart", "Perfy_Start Perfy/internal")
216 | 	if timeout then
217 | 		C_Timer.After(timeout, Perfy_Stop)
218 | 	end
219 | 	print("[Perfy] Started profiling.")
220 | 	C_Timer.NewTicker(10, function(self)
221 | 		if not isRunning then
222 | 			return self:Cancel()
223 | 		end
224 | 		printStats()
225 | 	end)
226 | 	return true
227 | end
228 | 
229 | function Perfy_Clear(quiet)
230 | 	if not quiet then
231 | 		print("[Perfy] Discarding " .. #trace .. " trace entries.")
232 | 	end
233 | 	isRunning = false
234 | 	trace = {}
235 | 	Perfy_Export = {}
236 | 	funcId = 1
237 | 	eventId = 1
238 | 	gc("restart")
239 | end
240 | 
241 | function Perfy_Running()
242 | 	return isRunning
243 | end
244 | 
245 | function Perfy_LoadAddOn(addon)
246 | 	---@diagnostic disable-next-line: deprecated
247 | 	local LoadAddOn = C_AddOns and C_AddOns.LoadAddOn or LoadAddOn
248 | 	print("[Perfy] Loading and tracing addon " .. addon)
249 | 	local didStart = Perfy_Start()
250 | 	Perfy_Trace(Perfy_GetTime(), "LoadAddOn", addon)
251 | 	local loaded, reason = LoadAddOn(addon)
252 | 	Perfy_Trace(Perfy_GetTime(), "LoadAddOnFinished", addon)
253 | 	if didStart then
254 | 		Perfy_Stop()
255 | 	end
256 | 	if not loaded then
257 | 		print("[Perfy] Failed to load AddOn " .. addon .. ": " .. (_G["ADDON_" .. reason] or reason))
258 | 	end
259 | end
260 | 
261 | function Perfy_Run(func)
262 | 	local pcall = pcall
263 | 	local didStart = Perfy_Start()
264 | 	local ok, err = pcall(func)
265 | 	if didStart then
266 | 		Perfy_Stop()
267 | 	end
268 | 	if not ok then
269 | 		error(err)
270 | 	end
271 | end
272 | 
273 | if not PERFY_TEST_ENVIRONMENT then
274 | 	-- Log some events for loading screens and some that are useful for general debugging
275 | 	local frame = CreateFrame("Frame", "Perfy")
276 | 	frame:SetScript("OnUpdate", function(_, elapsed)
277 | 		-- The extra argument is used to estimate FPS, unfortunate there is no good way to get realtime FPS:
278 | 		-- * elapsed and GetTime() have a  1ms granularity
279 | 		-- * GetFramerate() only gives an average over the last second
280 | 		-- * The OnUpdate call itself doesn't happen at a consistent point in time within a frame
281 | 		-- So let's used elapsed, it's close enough and less random than the Perfy timestamp
282 | 		-- For 60 fps this will either report 16 or 17 ms.
283 | 		Perfy_Trace(Perfy_GetTime(), "OnEvent", "OnUpdate", elapsed)
284 | 	end)
285 | 	frame:SetScript("OnEvent", function(_, event, arg1)
286 | 		Perfy_Trace(Perfy_GetTime(), "OnEvent", event, arg1)
287 | 	end)
288 | 	frame:RegisterEvent("ADDON_LOADED")
289 | 	frame:RegisterEvent("LOADING_SCREEN_DISABLED")
290 | 	frame:RegisterEvent("LOADING_SCREEN_ENABLED")
291 | 	frame:RegisterEvent("PLAYER_ENTERING_WORLD")
292 | 	frame:RegisterEvent("PLAYER_LEAVING_WORLD")
293 | 	frame:RegisterEvent("PLAYER_LOGIN")
294 | 	frame:RegisterEvent("PLAYER_LOGOUT")
295 | 	frame:RegisterEvent("SPELLS_CHANGED")
296 | 	frame:Show()
297 | 
298 | 	-- Reloading/logging out while Perfy is running is a bad idea because it either doesn't generate a trace at all (started only once) or generates a broken trace (started multiple times)
299 | 	-- Just try to stop it, but this may run into a timeout for a large trace because we can't run it in a coroutine at this point in time.
300 | 	local stopFrame = CreateFrame("Frame")
301 | 	stopFrame:RegisterEvent("ADDONS_UNLOADING")
302 | 	stopFrame:SetScript("OnEvent", function()
303 | 		if isRunning then
304 | 			isRunning = false
305 | 			export()
306 | 		end
307 | 	end)
308 | end
309 | 


--------------------------------------------------------------------------------
/Instrumentation/Instrument.lua:
--------------------------------------------------------------------------------
  1 | local mod = {}
  2 | 
  3 | local parser = require "parser"
  4 | local guide = require "parser.guide"
  5 | 
  6 | local function splitPos(pos)
  7 | 	return math.floor(pos / 10000) + 1, pos % 10000
  8 | end
  9 | 
 10 | -- Yeah, this is a thing and some AddOns have this in their files
 11 | -- DBM used to set that too -- in like 2008 when some editors wouldn't default to UTF-8 without this.
 12 | ---@diagnostic disable-next-line: err-esc -- The whole project is setup as Lua 5.1 (WoW Lua version), but everything that runs in LuaLS is actually Lua 5.3
 13 | local utf8Bom = "\xef\xbb\xbf"
 14 | 
 15 | local function injectLine(line, inj, offs)
 16 | 	local _, injPos = splitPos(inj.pos < math.huge and inj.pos or 0)
 17 | 	injPos = injPos + offs
 18 | 	local prePadding = not inj.skipPrepadding and injPos > 0 and not inj.text:match("^%s") and line:sub(injPos, injPos):match("[^%s]") and line:sub(0, injPos) ~= utf8Bom and " " or ""
 19 | 	local postPadding = injPos < #line and line:sub(injPos + 1, injPos + 1):match("[^%s]") and " " or ""
 20 | 	local injText = prePadding .. inj.text .. postPadding
 21 | 	offs = offs + #injText
 22 | 	return line:sub(0, injPos) .. injText .. line:sub(injPos + 1), offs
 23 | end
 24 | 
 25 | ---@class Injection
 26 | ---@field pos number Position to inject at in LuaLS format
 27 | ---@field text string What to inject
 28 | ---@field skipPrePadding boolean? Don't add a space in front of the injection
 29 | 
 30 | ---@param state parser.state
 31 | ---@param injections Injection[]
 32 | function mod:Inject(state, injections)
 33 | 	local buf = {}
 34 | 	local injIndex = 1
 35 | 	local last = 0
 36 | 	table.sort(injections, function(e1, e2) return e1.pos < e2.pos end)
 37 | 	local hasUtf8Bom = state.lua:match("^" .. utf8Bom)
 38 | 	for i = 0, #state.lines + 1 do
 39 | 		local v = state.lines[i] or #state.lua + 1
 40 | 		local line = state.lua:sub(last, v - 1)
 41 | 		local inj = injections[injIndex]
 42 | 		local offs = 0
 43 | 		if hasUtf8Bom and inj and splitPos(inj.pos) == 1 then
 44 | 			offs = offs + 3
 45 | 		end
 46 | 		while inj and splitPos(inj.pos) == i do
 47 | 			line, offs = injectLine(line, inj, offs)
 48 | 			injIndex = injIndex + 1
 49 | 			inj = injections[injIndex]
 50 | 		end
 51 | 		buf[#buf + 1] = line
 52 | 		last = v
 53 | 	end
 54 | 	-- End-of-file injections aren't in the loop above they refer to line infinity which doesn't exist, this is to avoid problems if the last line is > 10k characters long.
 55 | 	local suffix = ""
 56 | 	local offs = 0
 57 | 	for i = injIndex, #injections do
 58 | 		suffix, offs = injectLine(suffix, injections[i], offs)
 59 | 	end
 60 | 	buf[#buf + 1] = suffix
 61 | 	return buf
 62 | end
 63 | 
 64 | local function stripFilePrefix(file)
 65 | 	return file:match("^file://.-Interface/AddOns/(.*)") or file
 66 | end
 67 | 
 68 | local function getTableIndexPretty(key)
 69 | 	if type(key) == "string" and key:match("^[%a_][%w_]*$")  then
 70 | 		return "." .. key
 71 | 	elseif key == nil then
 72 | 		return ".?"
 73 | 	else
 74 | 		-- The analyzer expects this to not contain spaces, so we just get rid of them because I don't feel like parsing string literals correct in the analyzer.
 75 | 		-- Note that this is not a problem wrt uniqueness because the full name still includes the exact position.
 76 | 		if type(key) == "string" then
 77 | 			key = key:gsub(" ", "_")
 78 | 		end
 79 | 		return ("[%q]"):format(key)
 80 | 	end
 81 | end
 82 | 
 83 | -- TODO: doesn't support nested tables, let's see if this turns out to be relevant
 84 | local function getFunctionName(node, fileName)
 85 | 	local line, pos = splitPos(node.start)
 86 | 	local parent = node.parent
 87 | 	if not parent then
 88 | 		return "(main chunk) " .. stripFilePrefix(fileName)
 89 | 	end
 90 | 	local name = "(anonymous)" ---@type string?
 91 | 	if parent.type == "setglobal" or parent.type == "setlocal" or parent.type == "local" then
 92 | 		name = guide.getKeyName(parent)
 93 | 	elseif parent.type == "setmethod" then
 94 | 		name = guide.getKeyName(parent.node) .. ":" .. guide.getKeyName(parent)
 95 | 	elseif parent.type == "setfield" or parent.type == "setindex" then
 96 | 		local tbl = guide.getKeyName(parent.node)
 97 | 		local key = guide.getKeyName(parent)
 98 | 		if tbl or key then
 99 | 			name = (tbl or "?") .. getTableIndexPretty(key)
100 | 		end
101 | 	elseif parent.type == "tablefield" or parent.type == "tableindex" then
102 | 		local key = guide.getKeyName(parent)
103 | 		local tableVar = parent.parent.parent
104 | 		if tableVar.type == "setglobal" or tableVar.type == "setlocal" or tableVar.type == "local" then
105 | 			name = guide.getKeyName(tableVar) .. getTableIndexPretty(key)
106 | 		end
107 | 	end
108 | 	return name .. " " .. stripFilePrefix(fileName) .. ":" .. line .. ":" .. pos
109 | end
110 | 
111 | function mod:String(str)
112 | 	return ("%q"):format(str)
113 | end
114 | 
115 | ---@param state parser.state
116 | ---@param argFunc fun(action: string, funcName: string, node: parser.object, passthrough: boolean?): ...
117 | ---@param injections Injection[]?
118 | function mod:InstrumentFunctions(state, argFunc, injections, skipMainChunk)
119 | 	-- Note on semicolons:
120 | 	-- Normal trace injections need them to avoid the Lua grammar ambiguity for function call vs. new statement,
121 | 	-- e.g., foo()\n(bar).x = 5 (which is a parser error without a semicolon at the end of the line).
122 | 	-- Injections wrapping returns must not add a semicolon because we are replacing an expression, not a statement.
123 | 	-- If we the return already has a semicolon we would generate "return foo();;" which is invalid because empty statements are invalid.
124 | 	injections = injections or {}
125 | 	guide.eachSourceTypes(state.ast, {"function", "main"}, function(node)
126 | 		if node.type == "main" and skipMainChunk then return end
127 | 		local funcName = getFunctionName(node, state.uri or "(unknown file)")
128 | 		local enterArgs = argFunc and {argFunc("Enter", funcName, node)} or {}
129 | 		local leaveArgs = argFunc and {argFunc("Leave", funcName, node)} or {}
130 | 		local passthroughLeaveArgs = argFunc and {argFunc("Leave", funcName, node, true)} or {}
131 | 		if node.type ~= "main" then
132 | 			injections[#injections + 1] = {
133 | 				pos = node.args.finish,
134 | 				text = "Perfy_Trace(" .. table.concat(enterArgs, ", ") .. ");"
135 | 			}
136 | 			if not node[#node] or node[#node].type ~= "return" then
137 | 				injections[#injections + 1] = {
138 | 					pos = node.finish - 3,
139 | 					text = "Perfy_Trace(" .. table.concat(leaveArgs, ", ") .. ");"
140 | 				}
141 | 			end
142 | 		else
143 | 			-- Main chunk enter trace gets injected by file preamble.
144 | 			-- Main implicit exit point is just the last line, but unlike functions we don't have an "end" token here, so we need a newline to avoid conflicts with trailing comments
145 | 			if not node[#node] or node[#node].type ~= "return" then
146 | 				injections[#injections + 1] = {
147 | 					pos = math.huge, -- Don't use node.finish here, it will fail if the last line is longer than 10k characters
148 | 					text = "\nPerfy_Trace(" .. table.concat(leaveArgs, ", ") .. ");"
149 | 				}
150 | 			end
151 | 		end
152 | 		if node.returns then
153 | 			for k, ret in pairs(node.returns) do
154 | 				-- Two ways to instrument returns:
155 | 				-- 1. return Perfy_Trace_Passthrough("Leave", ...)
156 | 				-- 2. Perfy_Trace(Perfy_GetTime(), "Leave") return ...
157 | 				-- Neither of them is perfect
158 | 				-- (1) Samples the current time inside Perfy, i.e., the time to call into Perfy is incorrectly accounted to the calling function, see Accuracy.md for how this can skew small functions.
159 | 				-- (2) Samples the current time before evaluating the return expression, this means the time for the return expression is accounted to the calling function.
160 | 				-- Neither is perfect, the logic below picks (2) if the return expressions are deemed trivial (constants or locals) and (1) otherwise.
161 | 				-- We could expand what we consider trivial (e.g., are closure creations trivial? are things like binary_op(local, literal) trivial?).
162 | 				-- But the fundamental problem is unsolvable in a source-to-source translation: we can't capture the time between calling into Perfy but immediately before returning in the general case.
163 | 				-- The reason are tail calls that can return varargs: "return foo()" can't be translated to "return Perfy_Trace_Passthrough("Leave", foo(), Perfy_GetTime())" (and time sampling needs to be last, args are evaluated left-to-right).
164 | 				-- One potential improvement would be having tracer functions for known number of return parameters, e.g. return foo(), 5 could be translated to Perfy_Trace_Passthrough2("Leave", foo(), 5, GetTime()).
165 | 				-- But that'd either be an injected getglobal (but correctly accounted for) or an extra variable (and the 200 local limit is already triggering on a few files).
166 | 				local returnHasNonTrivialExpression = false
167 | 				for i, v in ipairs(ret) do
168 | 					while v.type == "paren" do
169 | 						v = v.exp
170 | 					end
171 | 					if v.type ~= "getlocal" and v.type ~= "string" and v.type ~= "boolean" and v.type ~= "nil" and v.type ~= "number" and v.type ~= "integer" then
172 | 						returnHasNonTrivialExpression = true
173 | 						break
174 | 					end
175 | 				end
176 | 				if returnHasNonTrivialExpression then
177 | 					injections[#injections + 1] = {
178 | 						pos = ret[1].start,
179 | 						text = "Perfy_Trace_Passthrough(" .. table.concat(passthroughLeaveArgs, ", ") .. ","
180 | 					}
181 | 					injections[#injections + 1] = {
182 | 						pos = ret[#ret].finish,
183 | 						text = ")",
184 | 						skipPrepadding = true
185 | 					}
186 | 				else
187 | 					injections[#injections + 1] = {
188 | 						pos = ret.start,
189 | 						text = "Perfy_Trace(" .. table.concat(leaveArgs, ", ") .. ");"
190 | 					}
191 | 				end
192 | 			end
193 | 		end
194 | 	end)
195 | 	return self:Inject(state, injections)
196 | end
197 | 
198 | local perfyTag = "--[[Perfy has instrumented this file]]"
199 | function mod:Instrument(code, fileName, retryAfterLocalLimitExceeded)
200 | 	if code:sub(1, #perfyTag) == perfyTag then
201 | 		return nil, "is already instrumented, skipping"
202 | 	end
203 | 	local state = parser.compile(code, "Lua", "Lua 5.1")
204 | 	state.uri = "file://" .. fileName
205 | 	local perfyEnterFile = (" Perfy_Trace(Perfy_GetTime(), %q, %q);"):format("Enter", getFunctionName(state.ast, state.uri))
206 | 	---@type Injection[]
207 | 	local injections = {}
208 | 	if not retryAfterLocalLimitExceeded then
209 | 		injections[#injections + 1] = {pos = 0, text = perfyTag .. " local Perfy_GetTime, Perfy_Trace, Perfy_Trace_Passthrough = Perfy_GetTime, Perfy_Trace, Perfy_Trace_Passthrough;" .. perfyEnterFile}
210 | 	else
211 | 		-- TODO: As we're adding more and more functions it might make sense to support partially adding
212 | 		print("File " .. fileName .. " hit > 200 local variables at line " .. splitPos(retryAfterLocalLimitExceeded.start) .. " after injecting. Skipping local cache, Perfy's overhead for this file will be higher.")
213 | 		injections[#injections + 1] = {pos = 0, text = perfyTag .. perfyEnterFile}
214 | 	end
215 | 	local lines = self:InstrumentFunctions(state, function(action, funcName, _, passthrough)
216 | 		if passthrough then
217 | 			return self:String(action), self:String(funcName)
218 | 		else
219 | 			return "Perfy_GetTime()", self:String(action), self:String(funcName)
220 | 		end
221 | 	end, injections)
222 | 	local newState = parser.compile(table.concat(lines, ""), "Lua", "Lua 5.1")
223 | 	-- Lua 5.1 only allows 200 local variables, so we can only inject our locals at the top if the file doesn't already define more than this.
224 | 	-- And yes, there are AddOns out there which are at exactly this limit: Plater and NovaWorldBuffs
225 | 	-- Also, there is a limit of 60 upvalues per function that we may hit with the injections, but I haven't encountered this yet for any real code.
226 | 	-- Unfortunately LuaLS currently does not support this, so this case is currently unhandled, see https://github.com/LuaLS/lua-language-server/issues/2578.
227 | 	for _, v in ipairs(newState.errs) do
228 | 		if v.type == "LOCAL_LIMIT" and not retryAfterLocalLimitExceeded then
229 | 			return self:Instrument(code, fileName, v)
230 | 		end
231 | 	end
232 | 	if #newState.errs > #state.errs then
233 | 		local newError = newState.errs[#state.errs + 1]
234 | 		for i = 1, #state.errs do
235 | 			if newState.errs[i].type ~= state.errs[i].type then
236 | 				newError = newState.errs[i]
237 | 				break
238 | 			end
239 | 		end
240 | 		print("File " .. fileName .. " reported an unexpected new parsing error after instrumentation: " .. newError.type .. " at line " .. splitPos(newError.start))
241 | 	end
242 | 	return lines
243 | end
244 | 
245 | function mod:InstrumentFile(fileName)
246 | 	if not fileName:lower():match(".lua$") then
247 | 		print("File " .. fileName .. " does not seem to be a Lua file, skipping.")
248 | 		return
249 | 	end
250 | 	-- TODO: this is case sensitive on reasonable filesystems, but Lua in WoW it isn't case sensitive.
251 | 	local file, err = io.open(fileName, "r")
252 | 	if not file then error(err) end
253 | 	local code = file:read("*a")
254 | 	file:close()
255 | 	local output, err = self:Instrument(code, fileName)
256 | 	if not output then
257 | 		print("Could not instrument " .. fileName .. ": " .. err)
258 | 		return
259 | 	end
260 | 	file, err = io.open(fileName, "w+b")
261 | 	if not file then error(err) end
262 | 	for _, line in ipairs(output) do
263 | 		file:write(line) -- line already contains the original line ending
264 | 	end
265 | 	file:close()
266 | end
267 | 
268 | return mod
269 | 


--------------------------------------------------------------------------------
/Analyzer/Analyze.lua:
--------------------------------------------------------------------------------
  1 | local parser = require "LuaParser"
  2 | 
  3 | local mod = {}
  4 | 
  5 | function mod:readFile(fileName)
  6 | 	local file, err = io.open(fileName, "rb")
  7 | 	if not file then error(err) end
  8 | 	return file:read("*a")
  9 | end
 10 | 
 11 | ---@return TraceEntry[]
 12 | function mod:LoadSavedVars(fileName)
 13 | 	local fileContents = self:readFile(fileName)
 14 | 	print(("Read %.1f MiB trace data"):format(#fileContents / 1024 / 1024))
 15 | 	local env = parser:ParseLua(fileContents)
 16 | 	print(("Trace has %d"):format(#env.Perfy_Export.Trace))
 17 | 	local eventNames, functionNames = {}, {}
 18 | 	for k, v in pairs(env.Perfy_Export.EventNames) do
 19 | 		if eventNames[v] then error("Duplicate event name mapping: " .. k .. " has the same mapping as " .. eventNames[v]) end
 20 | 		eventNames[v] = k
 21 | 	end
 22 | 	for k, v in pairs(env.Perfy_Export.FunctionNames) do
 23 | 		if functionNames[v] then error("Duplicate function name mapping: " .. k .. " has the same mapping as " .. functionNames[v]) end
 24 | 		functionNames[v] = k
 25 | 	end
 26 | 	local trace = env.Perfy_Export.Trace -- Traces can be hundreds of millions of entries, avoid creating a new array of that size
 27 | 	for i, v in ipairs(trace) do
 28 | 		---@class TraceEntry
 29 | 		local entry = {
 30 | 			---@type number
 31 | 			timestamp = v[1],
 32 | 			---@type "Enter"|"Leave"|"CoroutineResume"|"CoroutineYield"|"OnEvent"|"LoadAddOn"|"LoadAddOnFinished"|"UncaughtError"|"PerfyStart"|"PerfyStop"
 33 | 			event = eventNames[v[2]] or error("bad event id: " .. tostring(v[2])),
 34 | 			---@type string
 35 | 			functionName = functionNames[v[3]] or error("bad function id: " .. tostring(v[3])),
 36 | 			timeOverhead = v[4] or 0,
 37 | 			memory = v[5] or 0,
 38 | 			memoryOverhead = v[6] or 0,
 39 | 			extraArg = v[7]
 40 | 		}
 41 | 		trace[i] = entry
 42 | 	end
 43 | 	local delta = #trace > 0 and trace[#trace].timestamp - trace[1].timestamp or 0
 44 | 	print(("Loaded file with %d trace entries covering %.2f seconds."):format(#trace, delta))
 45 | 	return trace
 46 | end
 47 | 
 48 | local stackEntryCache, isLibCache = {}, {}
 49 | local function parseStackEntry(str)
 50 | 	if not str then return end
 51 | 	if stackEntryCache[str] then return stackEntryCache[str], isLibCache[str] end
 52 | 	-- Format is "functionName fileName:line:col"
 53 | 	-- Function name can contain spaces iff in parentheses like "(main chunk)"
 54 | 	local fileName
 55 | 	if str:sub(1, 1) == "(" then
 56 | 		fileName = str:match("[^)]+%) (.*)")
 57 | 	else
 58 | 		fileName = str:match("[^%s]+ (.*)")
 59 | 	end
 60 | 	if not fileName then return end
 61 | 	local result, isLib
 62 | 	if fileName:match("^file://") then
 63 | 		result = "(unknown addon)"
 64 | 		isLib = false
 65 | 	else
 66 | 		local addon, firstSubDir, secondSubDir = fileName:match("([^/]+)/?([^/]*)/?([^/]*)/")
 67 | 		result = not firstSubDir and addon or firstSubDir and firstSubDir:match("[lL]ibs?") and secondSubDir or addon
 68 | 		isLib = not not (firstSubDir and firstSubDir:match("[lL]ibs?"))
 69 | 	end
 70 | 	stackEntryCache[str] = result
 71 | 	isLibCache[str] = isLib
 72 | 	return result
 73 | end
 74 | 
 75 | local function backtrace(stack, coroutine, overrideAddOnAssociation)
 76 | 	if #stack == 0 then return "" end
 77 | 	local bt = {}
 78 | 	local addonAssociation = overrideAddOnAssociation
 79 | 	for _, v in ipairs(stack) do
 80 | 		bt[#bt + 1] = v.functionName
 81 | 		-- First addon in a call stack gets associated with the whole trace, this makes sure we don't "blame"
 82 | 		-- a random addon for a shared library that is at the start of a call trace (e.g., callback or sync handler libraries)
 83 | 		if not addonAssociation then
 84 | 			local addon, isLib = parseStackEntry(v.functionName)
 85 | 			if not isLib then
 86 | 				addonAssociation = addon
 87 | 			end
 88 | 		end
 89 | 	end
 90 | 	-- Pure library trace, e.g., self time of the lowest stack entry of addons or the library actually doing something for itself.
 91 | 	-- For self-times of something like libcallback we could associate this based on a lookahead on the trace, but these seem to be small/irrelevant anyways.
 92 | 	addonAssociation = addonAssociation or parseStackEntry(stack[1].functionName)
 93 | 	addonAssociation = addonAssociation or "Unknown addon"
 94 | 	local stackPreamble = {addonAssociation}
 95 | 	if coroutine and coroutine.firstResume then
 96 | 		stackPreamble[#stackPreamble + 1] = coroutine.firstResume
 97 | 	end
 98 | 	return table.concat(stackPreamble, ";") .. (#stackPreamble > 0 and ";" or "") .. table.concat(bt, ";")
 99 | end
100 | 
101 | ---@param trace TraceEntry[]
102 | ---@param firstFrame? TrackedFrame
103 | ---@param lastFrame? TrackedFrame
104 | ---@param frameList? TrackedFrame[]
105 | function mod:FlameGraph(trace, field, overheadField, firstFrame, lastFrame, frameList)
106 | 	local firstTrackedOffset = firstFrame and firstFrame.first or 1
107 | 	local lastTrackedOffset = lastFrame and lastFrame.last or #trace
108 | 	if frameList then
109 | 		table.sort(frameList, function(e1, e2) return e1.id < e2.id end)
110 | 		for i = #frameList, 2, -1 do
111 | 			if frameList[i].id == frameList[i - 1].id then
112 | 				table.remove(frameList, i)
113 | 			end
114 | 		end
115 | 		lastTrackedOffset = frameList[#frameList].last
116 | 	end
117 | 	local currentTrackedFrame = nil
118 | 	local nextTrackedFrame = frameList and frameList[1]
119 | 	local nextTrackedFrameIndex = 1
120 | 	local result = {all = {}}
121 | 	local warningsShown = {}
122 | 	---@type table
123 | 	local coroutines = {
124 | 		main = {stack = {}}
125 | 	}
126 | 	---@type string[]
127 | 	local currentCoroutine = {"main"}
128 | 	local stack = coroutines.main.stack
129 | 	local prev
130 | 	local lastAddonLoaded
131 | 	local loadingAddOns = false
132 | 	-- We still need to start at the beginning even if firstTrackedOffset is set because we need to reconstruct stacks of coroutines
133 | 	for i = 1, math.min(lastTrackedOffset, #trace) do
134 | 		local v = trace[i]
135 | 		local delta = prev and v[field] - prev[field] - prev[overheadField] or 0
136 | 		local activeCoroutine = coroutines[currentCoroutine[#currentCoroutine]]
137 | 		local bt = backtrace(stack, activeCoroutine)
138 | 		if frameList then
139 | 			if nextTrackedFrame and i >= nextTrackedFrame.first then
140 | 				currentTrackedFrame = nextTrackedFrame
141 | 				nextTrackedFrameIndex = nextTrackedFrameIndex + 1
142 | 				nextTrackedFrame = frameList[nextTrackedFrameIndex]
143 | 				result[currentTrackedFrame] = {}
144 | 			end
145 | 			if currentTrackedFrame and i > currentTrackedFrame.last then
146 | 				currentTrackedFrame = nil
147 | 			end
148 | 		end
149 | 		-- FIXME: this all-in-one stack reconstruction is getting pretty messy, this should be split and cleaned up
150 | 		if v.event == "Enter" then
151 | 			if #stack > 0 then
152 | 				if delta > 0 then
153 | 					if currentTrackedFrame then
154 | 						result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
155 | 					end
156 | 					if i >= firstTrackedOffset then
157 | 						result.all[bt] = (result.all[bt] or 0) + delta
158 | 					end
159 | 				end
160 | 			elseif loadingAddOns and v.functionName:find("(main chunk)", nil, true) == 1 then
161 | 				-- Usually the time passed before an Enter from an empty stack is not accounted to anything because the time was likely spent in something that we can't account for.
162 | 				-- However, if this is the execution of main chunk and the last event is either leaving a main chunk or an ADDON_LOADED event then this was the time it took to load the file.
163 | 				-- First event we see is Perfy itself which enables this and we stop once PLAYER_LOGIN fires.
164 | 				local prevEvent = lastAddonLoaded and lastAddonLoaded.timestamp > (prev and prev.timestamp or 0) and lastAddonLoaded or prev
165 | 				if prevEvent and (prevEvent.event == "OnEvent" or prevEvent.event == "LoadAddOn" or prevEvent.event == "Leave" and prevEvent.functionName:find("(main chunk)", nil, true) == 1) then
166 | 					local delta = v[field] - prevEvent[field] - prevEvent[overheadField]
167 | 					if delta > 0 then
168 | 						local fakeStack = {}
169 | 						fakeStack[#fakeStack + 1] = {
170 | 							functionName = "(loading/compiling files, unreliable if you have uninstrumented addons)"
171 | 						}
172 | 						local fileName = v.functionName:match("%) (.*)")
173 | 						local path = ""
174 | 						for part in fileName:match("/(.*)"):gmatch("([^/]*)") do
175 | 							path = path .. "/" .. part
176 | 							fakeStack[#fakeStack + 1] = {
177 | 								functionName = path:sub(2)
178 | 							}
179 | 						end
180 | 						local bt = backtrace(fakeStack, nil, parseStackEntry(v.functionName))
181 | 						if currentTrackedFrame then
182 | 							result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
183 | 						end
184 | 						if i >= firstTrackedOffset then
185 | 							result.all[bt] = (result.all[bt] or 0) + delta
186 | 						end
187 | 					end
188 | 				end
189 | 			end
190 | 			stack[#stack + 1] = v
191 | 		elseif v.event == "Leave" then
192 | 			if #stack == 0 then
193 | 				-- incomplete traces of coroutines can trigger this
194 | 				bt = backtrace({v, {functionName = "(missing stack information due coroutine or pcall: underflow)"}}, activeCoroutine)
195 | 				if currentTrackedFrame then
196 | 					result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
197 | 				end
198 | 				if i >= firstTrackedOffset then
199 | 					result.all[bt] = (result.all[bt] or 0) + delta
200 | 				end
201 | 				v.stackEmpty = true
202 | 			else
203 | 				local top = stack[#stack]
204 | 				if top.functionName == v.functionName then
205 | 					if delta > 0 then
206 | 						if currentTrackedFrame then
207 | 							result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
208 | 						end
209 | 						if i >= firstTrackedOffset then
210 | 							result.all[bt] = (result.all[bt] or 0) + delta
211 | 						end
212 | 					end
213 | 					stack[#stack] = nil
214 | 					if #stack == 0 and #currentCoroutine > 1 then
215 | 						-- We are either leaving a coroutine or the start of a coroutine was not traced
216 | 						currentCoroutine[#currentCoroutine] = nil
217 | 						stack = coroutines[currentCoroutine[#currentCoroutine]].stack
218 | 					elseif #stack == 0 then
219 | 						v.stackEmpty = true
220 | 					end
221 | 				else
222 | 					local warningId = "bad stack " .. v.functionName .. " " .. top.functionName
223 | 					if not warningsShown[warningId] then
224 | 						warningsShown[warningId] = true
225 | 						print("bad stack (likely coroutines or pcall/error) at " .. i .. ": leaving " .. v.functionName .. " after entering " .. top.functionName .. " backtrace: " .. bt .. " results for this stack will be off")
226 | 					end
227 | 					while top and top.functionName ~= v.functionName do
228 | 						stack[#stack] = nil
229 | 						top = stack[#stack]
230 | 					end
231 | 					bt = backtrace({v, {functionName = "(missing stack information due coroutine or pcall: stack mismatch)"}}, activeCoroutine)
232 | 					if delta > 0 then
233 | 						if currentTrackedFrame then
234 | 							result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
235 | 						end
236 | 						if i >= firstTrackedOffset then
237 | 							result.all[bt] = (result.all[bt] or 0) + delta
238 | 						end
239 | 					end
240 | 					stack[#stack] = nil
241 | 					if #stack == 0 then
242 | 						v.stackEmpty = true
243 | 					end
244 | 				end
245 | 			end
246 | 		elseif v.event == "CoroutineResume" then
247 | 			if #stack > 0 then
248 | 				if delta > 0 then
249 | 					if currentTrackedFrame then
250 | 						result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
251 | 					end
252 | 					if i >= firstTrackedOffset then
253 | 						result.all[bt] = (result.all[bt] or 0) + delta
254 | 					end
255 | 				end
256 | 				coroutines[v.functionName] = coroutines[v.functionName] or {stack = {}}
257 | 				if #stack > 0 then
258 | 					coroutines[v.functionName].firstResume = coroutines[v.functionName].firstResume or stack[#stack].functionName
259 | 				end
260 | 				currentCoroutine[#currentCoroutine + 1] = v.functionName
261 | 				stack = coroutines[v.functionName].stack
262 | 			else
263 | 				local warning = "Resuming coroutine from unknown location, likely uninstrumented code, ignoring. Coroutine stacks will be off if this coroutine calls instrumented code."
264 | 				if not warningsShown[warning] then
265 | 					warningsShown[warning] = true
266 | 					print(warning)
267 | 				end
268 | 			end
269 | 		elseif v.event == "CoroutineYield" then
270 | 			if #stack > 0 then
271 | 				if delta > 0 then
272 | 					if currentTrackedFrame then
273 | 						result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
274 | 					end
275 | 					if i >= firstTrackedOffset then
276 | 						result.all[bt] = (result.all[bt] or 0) + delta
277 | 					end
278 | 				end
279 | 				if #currentCoroutine <= 1 then -- yielding from main
280 | 					local warningId = "coroutine stack underflow " .. (#stack > 0 and stack[#stack].functionName or "(unknown function)")
281 | 					if not warningsShown[warningId] then
282 | 						warningsShown[warningId] = true
283 | 						print("coroutine stack underflow at " .. i .. " in " .. (#stack > 0 and stack[#stack].functionName or "(unknown function)") .. " likely missing the start of a coroutine in a trace")
284 | 					end
285 | 					v.stackEmpty = true
286 | 				else
287 | 					currentCoroutine[#currentCoroutine] = nil
288 | 					stack = coroutines[currentCoroutine[#currentCoroutine]].stack
289 | 				end
290 | 			else
291 | 				local warning = "yielding coroutine from unknown location, likely uninstrumented code, ignoring. Coroutine stacks will be off if this coroutine calls instrumented code."
292 | 				if not warningsShown[warning] then
293 | 					warningsShown[warning] = true
294 | 					print(warning)
295 | 				end
296 | 				v.stackEmpty = true
297 | 			end
298 | 		elseif v.event == "LoadAddOn" then
299 | 			loadingAddOns = true
300 | 			lastAddonLoaded = v
301 | 			v.stackEmpty = true
302 | 		elseif v.event == "LoadAddOnFinished" then
303 | 			loadingAddOns = false
304 | 			lastAddonLoaded = nil
305 | 			v.stackEmpty = true
306 | 		elseif v.event == "UncaughtError" then -- FIXME: this has 0% test coverage
307 | 			if #stack > 0 and delta > 0 then
308 | 				if currentTrackedFrame then
309 | 					result[currentTrackedFrame][bt] = (result[currentTrackedFrame][bt] or 0) + delta
310 | 				end
311 | 				if i >= firstTrackedOffset then
312 | 					result.all[bt] = (result.all[bt] or 0) + delta
313 | 				end
314 | 			end
315 | 			for _, coroutine in ipairs(currentCoroutine) do
316 | 				coroutines[coroutine].stack = {}
317 | 			end
318 | 			currentCoroutine = {"main"}
319 | 			v.stackEmpty = true
320 | 		elseif v.event == "OnEvent" then
321 | 			-- Legacy format: event and arg in function name separated by space
322 | 			local event, eventArg = v.functionName:match("^([^%s]+) (.*)")
323 | 			if not event then
324 | 				event, eventArg = v.functionName, v.extraArg
325 | 			end
326 | 			if event == "PLAYER_LOGIN" then
327 | 				-- This only fires during a reload/login, not during normal loading screens
328 | 				loadingAddOns = false
329 | 			elseif event == "ADDON_LOADED" then
330 | 				if eventArg == "!!!Perfy" then
331 | 					loadingAddOns = true
332 | 				end
333 | 				lastAddonLoaded = v
334 | 			end
335 | 			v.stackEmpty = true
336 | 		elseif v.event == "PerfyStart" or v.event == "PerfyStop" then
337 | 			v.stackEmpty = true
338 | 			-- No-op at the moment, but useful for debugging.
339 | 		else
340 | 			if not warningsShown[v.event] then
341 | 				print(("unknown event at entry %d: %s"):format(i, v.event))
342 | 			end
343 | 			warningsShown[v.event] = true
344 | 		end
345 | 		if v.event == "Enter" or v.event == "Leave" or v.event == "CoroutineResume" or v.event == "CoroutineYield" then
346 | 			prev = v
347 | 		end
348 | 	end
349 | 	local multiplier = field == "timestamp" and 1e6 or 1
350 | 	for _, v in pairs(result) do
351 | 		for stack, value in pairs(v) do
352 | 			v[stack] = math.floor(value * multiplier + 0.5)
353 | 		end
354 | 	end
355 | 	return result
356 | end
357 | 
358 | ---@param trace TraceEntry[]
359 | local function findOnUpdate(trace, offset)
360 | 	for i = offset, #trace do
361 | 		local v = trace[i]
362 | 		if v.event == "OnEvent" and v.functionName == "OnUpdate" then
363 | 			return i
364 | 		end
365 | 	end
366 | 	return nil
367 | end
368 | 
369 | 
370 | local function findLargestDelta(trace, offset, offsetEnd)
371 | 	local largest = 0
372 | 	local largestOffsetPos
373 | 	local lastTime = trace[offset].timestamp
374 | 	for i = offset + 1, offsetEnd do
375 | 		local v = trace[i]
376 | 		local delta = v.timestamp - lastTime
377 | 		lastTime = v.timestamp
378 | 		if delta > largest and trace[i - 1].stackEmpty then
379 | 			largest = delta
380 | 			largestOffsetPos = i
381 | 		end
382 | 	end
383 | 	return largestOffsetPos
384 | end
385 | 
386 | ---@param trace TraceEntry[]
387 | ---@return TrackedFrame
388 | local function frameEntry(trace, first, last, id)
389 | 	local memOverhead, timeOverhead = 0, 0
390 | 	for i = first, last do
391 | 		memOverhead = memOverhead + math.max(trace[i].memoryOverhead, 0)
392 | 		timeOverhead = timeOverhead + trace[i].timeOverhead
393 | 	end
394 | 	---@class TrackedFrame
395 | 	local frame = {
396 | 		first = first,
397 | 		last = last,
398 | 		numEvents = last - first,
399 | 		time = trace[last].timestamp - trace[first].timestamp,
400 | 		memory = trace[last].memory - trace[first].memory,
401 | 		timeOverhead = timeOverhead,
402 | 		memOverhead = memOverhead,
403 | 		id = id,
404 | 		fps = 0, -- set by the next frame
405 | 		names = nil, -- set by the main script to give the files useful names
406 | 	}
407 | 	return frame
408 | end
409 | 
410 | ---@param trace TraceEntry[]
411 | ---@return TrackedFrame[]
412 | function mod:FindSlowFrames(trace)
413 | 	---@type TrackedFrame[]
414 | 	local frames = {}
415 | 	local lastFrameStart = nil
416 | 	local lastOnUpdate = nil
417 | 	local inLoadingScreen = false
418 | 	local i = 0
419 | 	local nextFpsFromTime = 0
420 | 	while i < #trace do
421 | 		-- I'm not sure about the order in which OnUpdate handlers are invoked, it seems a bit inconsistent.
422 | 		-- But emperically this event seems to happen near the end of a frame, but the order between different handlers does not seem to be consistent.
423 | 		-- Anyhow, our definition of a frame is:
424 | 		--   A sequence of events containing exactly one OnUpdate call to the Perfy frame that starts/ends after/before the largest delta of the timestamp at an empty stack.
425 | 		-- Yes, it would obviously be better to just track the frame number in the trace (GetTime() uniquely identifies a frame),
426 | 		-- but I don't want to make the trace bigger for this niche use case.
427 | 		-- Algorithm to find the frames works as follows:
428 | 		--   1. Find the first 3 OnUpdate calls and use the largest two delta times between these there to mark the first frame (actually second real frame, but first frame is incomplete anyways)
429 | 		--   2. Find the next OnUpdate, the next frame end is at the highest delta between this and the last OnUpdate
430 | 		--   3. If we see a loading screen goto 1, otherwise goto to 2
431 | 		i = i + 1
432 | 		local v = trace[i]
433 | 		--[[
434 | 		new: 60555 to 99987
435 | 		frame 337: 60480 to 83748
436 | 		frame 338: 83749 to 100113
437 | 		60479: 0.002ms
438 | 		60480: frame 337 start, 0.032ms (!)
439 | 		60554: OnUpdate
440 | 		60555: frame end (16ms)
441 | 		83749: detected frame end (1ms)
442 | 		99987: OnUpdate
443 | 		100113: frame 338 end
444 | 		]]
445 | 		if i >= 60470 and i <= 100123 then
446 | 			local delta = v.timestamp - trace[i - 1].timestamp
447 | 			if delta > 0.001 or i == 60480 or i == 60481 or i == 60479 then
448 | 				--print(i, delta * 1000)
449 | 			end
450 | 			--if v.functionName == "OnUpdate" then print(i, "OnUpdate") end
451 | 		end
452 | 		if v.event == "OnEvent" and v.functionName == "OnUpdate" and not inLoadingScreen then
453 | 			if not lastFrameStart then
454 | 				local nextOnUpdate = findOnUpdate(trace, i + 1)
455 | 				local nextNextOnUpdate = nextOnUpdate and findOnUpdate(trace, nextOnUpdate + 1)
456 | 				if not nextNextOnUpdate or nextNextOnUpdate <= i + 4 then
457 | 					break
458 | 				end
459 | 				local frameFirst = findLargestDelta(trace, i, nextOnUpdate)
460 | 				local nextFrameFirst = findLargestDelta(trace, nextOnUpdate, nextNextOnUpdate)
461 | 				--print(i, "firstFrame", frameFirst, nextFrameFirst - 1)
462 | 				---@class TrackedFrame
463 | 				local frame = frameEntry(trace, frameFirst, nextFrameFirst - 1, #frames + 1)
464 | 				frames[#frames + 1] = frame
465 | 				if trace[nextNextOnUpdate].extraArg then
466 | 					frame.fps = 1 / trace[nextNextOnUpdate].extraArg
467 | 				end
468 | 				nextFpsFromTime = 1 / (v.timestamp - trace[nextOnUpdate].timestamp)
469 | 				lastFrameStart = nextFrameFirst
470 | 				lastOnUpdate = nextOnUpdate
471 | 				i = nextFrameFirst -- TODO: skips over loading screen detection if you somehow start right before a loading screen starts, but whatever
472 | 			else
473 | 				local nextFrameFirst = findLargestDelta(trace, lastOnUpdate, i)
474 | 				if not nextFrameFirst then
475 | 					break
476 | 				end
477 | 				---@class TrackedFrame
478 | 				local frame = frameEntry(trace, lastFrameStart, nextFrameFirst - 1, #frames + 1)
479 | 				frames[#frames + 1] = frame
480 | 				if v.extraArg then
481 | 					frame.fps = 1 / v.extraArg
482 | 				else -- to support old logs
483 | 					frame.fps = nextFpsFromTime
484 | 				end
485 | 				nextFpsFromTime = 1 / (v.timestamp - trace[lastOnUpdate].timestamp)
486 | 				lastFrameStart = nextFrameFirst
487 | 				lastOnUpdate = i
488 | 			end
489 | 		end
490 | 		-- Support old format where arg1 was embedded in function name
491 | 		if v.event == "OnEvent" and (v.functionName == "LOADING_SCREEN_ENABLED nil" or v.functionName == "LOADING_SCREEN_ENABLED") then
492 | 			inLoadingScreen = true
493 | 			lastFrameStart = nil
494 | 		elseif v.event == "OnEvent" and (v.functionName == "LOADING_SCREEN_DISABLED nil" or v.functionName == "LOADING_SCREEN_DISABLED") then
495 | 			inLoadingScreen = false
496 | 		end
497 | 	end
498 | 	return frames
499 | end
500 | 
501 | ---@param frames TrackedFrame[]
502 | function mod:GetTopFrames(frames, num, cmp)
503 | 	local result = {}
504 | 	table.sort(frames, cmp)
505 | 	for i = 1, num do
506 | 		result[i] = frames[i]
507 | 	end
508 | 	table.sort(frames, function(e1, e2) return e1.id < e2.id end)
509 | 	return result
510 | end
511 | 
512 | local function leftPadNums(tbl, precision)
513 | 	precision = precision or 0
514 | 	local maxLength = 0
515 | 	for i, v in ipairs(tbl) do
516 | 		tbl[i] = ("%." .. precision .. "f"):format(v)
517 | 		maxLength = math.max(maxLength, #tbl[i])
518 | 	end
519 | 	for i, v in ipairs(tbl) do
520 | 		tbl[i] = (" "):rep(maxLength - #v) .. v
521 | 	end
522 | end
523 | 
524 | ---@param frames TrackedFrame[]
525 | function mod:PrintSlowFrames(frames, count)
526 | 	local ids, fps, events, times, memory = {}, {}, {}, {}, {}
527 | 	for i = 1, math.min(count, #frames) do
528 | 		local frame = frames[i]
529 | 		ids[#ids + 1] = frame.id
530 | 		fps[#fps + 1] = frame.fps
531 | 		events[#events + 1] = frame.numEvents
532 | 		times[#times + 1] = (frame.time - frame.timeOverhead) * 1000
533 | 		memory[#memory + 1] = math.max(frame.memory - frame.memOverhead, 0) / 1024 / 1024
534 | 	end
535 | 	leftPadNums(ids)
536 | 	leftPadNums(fps, 2)
537 | 	leftPadNums(events)
538 | 	leftPadNums(times, 2)
539 | 	leftPadNums(memory, 2)
540 | 	for i = 1, math.min(count, #frames) do
541 | 		print(("\tFrame %s: %s fps, %s events, %s ms total time, %s MiB memory allocs "):format(ids[i], fps[i], events[i], times[i], memory[i]))
542 | 	end
543 | end
544 | 
545 | return mod
546 | 


--------------------------------------------------------------------------------