├── .vscode └── settings.json ├── Analyzer ├── .gitignore ├── Analyze_Coroutines_Test.lua ├── Analyze_Test.lua ├── Main.lua ├── LuaParser.lua └── Analyze.lua ├── Screenshots ├── CPU.png ├── Memory.png └── CPU-DBM-Test.png ├── AddOn ├── !!!Perfy.toc ├── CLI.lua ├── CLI_Test.lua ├── TraceLoadingScreen.lua ├── Perfy_Test.lua └── Perfy.lua ├── Instrumentation ├── Main.lua ├── TocHandler.lua ├── Instrument_Test.lua └── Instrument.lua ├── Accuracy.md └── README.md /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyzer/.gitignore: -------------------------------------------------------------------------------- 1 | stacks-cpu.txt 2 | stacks-memory.txt -------------------------------------------------------------------------------- /Screenshots/CPU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emmericp/Perfy/HEAD/Screenshots/CPU.png -------------------------------------------------------------------------------- /Screenshots/Memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emmericp/Perfy/HEAD/Screenshots/Memory.png -------------------------------------------------------------------------------- /Screenshots/CPU-DBM-Test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emmericp/Perfy/HEAD/Screenshots/CPU-DBM-Test.png -------------------------------------------------------------------------------- /AddOn/!!!Perfy.toc: -------------------------------------------------------------------------------- 1 | ## Interface: 100205 2 | ## Interface-Classic: 11501 3 | ## Interface-Wrath: 30403 4 | ## Title: Perfy 5 | ## Notes: Fancy profiling 6 | ## SavedVariables: Perfy_Export 7 | 8 | Perfy.lua 9 | TraceLoadingScreen.lua 10 | CLI.lua 11 | -------------------------------------------------------------------------------- /AddOn/CLI.lua: -------------------------------------------------------------------------------- 1 | local function usage() 2 | print("[Perfy] Usage:") 3 | print("/perfy start [time] -- Starts Perfy and optionally automatically stops it after time seconds.") 4 | print("/perfy stop -- Stops Perfy.") 5 | print("/perfy [time] -- Toggles Perfy, if starting optionally stops after time seconds.") 6 | print("/perfy ls|loadingscreen -- Starts Perfy once the next loading screen is shown, stops once loading completes.") 7 | print("/perfy load -- Loads an on-demand loadable addon and traces its loading process.") 8 | print("/perfy run -- Starts Perfy, runs the given code, and stops Perfy again.") 9 | print("/perfy clear -- Deletes all collected traces.") 10 | end 11 | 12 | local loadstring = loadstring or load -- Lua 5.2+ support to not fail tests if running under a later Lua version 13 | 14 | SLASH_PERFY1 = '/perfy' 15 | function SlashCmdList.PERFY(msg) 16 | local arg1, arg2 = msg:match("%s*([^%s]+)%s*([^%s]*)") 17 | if not arg1 or tonumber(arg1) then 18 | if not Perfy_Running() then 19 | Perfy_Start(tonumber(arg1)) 20 | else 21 | Perfy_Stop() 22 | end 23 | else 24 | arg1 = arg1:lower() 25 | if arg1 == "start" then 26 | Perfy_Start(tonumber(arg2)) 27 | elseif arg1 == "stop" then 28 | Perfy_Stop() 29 | elseif arg1 == "clear" then 30 | Perfy_Clear() 31 | elseif arg1 == "ls" or arg1:lower() == "loadingscreen" then 32 | Perfy_LogLoadingScreen() 33 | elseif arg1 == "load" and arg2 ~= "" then 34 | Perfy_LoadAddOn(arg2) 35 | elseif arg1 == "run" and arg2 ~= "" then 36 | local code = msg:match("%s*[^%s]+%s+([^%s]+)") 37 | local func, err = loadstring(code, "(/perfy run)") 38 | if not func then 39 | error(err) 40 | end 41 | Perfy_Run(func) 42 | else 43 | usage() 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /AddOn/CLI_Test.lua: -------------------------------------------------------------------------------- 1 | SlashCmdList = {} 2 | 3 | local running, runtime, cleared, logLoadingScreen, loadAddon 4 | function Perfy_Start(time) 5 | running = true 6 | runtime = time 7 | end 8 | 9 | function Perfy_Stop() 10 | running = false 11 | runtime = nil 12 | end 13 | 14 | function Perfy_Running() 15 | return running 16 | end 17 | 18 | function Perfy_Clear() 19 | cleared = true 20 | end 21 | 22 | function Perfy_LogLoadingScreen() 23 | logLoadingScreen = true 24 | end 25 | 26 | function Perfy_LoadAddOn(addon) 27 | loadAddon = addon 28 | end 29 | 30 | function Perfy_Run(func) 31 | func() 32 | end 33 | 34 | local function reset() 35 | running, runtime, cleared, logLoadingScreen, loadAddon = nil, nil, nil, nil, nil 36 | end 37 | 38 | require "CLI" 39 | 40 | reset() 41 | SlashCmdList.PERFY("") 42 | assert(running) 43 | assert(runtime == nil) 44 | SlashCmdList.PERFY("") 45 | assert(not running) 46 | 47 | reset() 48 | SlashCmdList.PERFY("start") 49 | assert(running) 50 | assert(runtime == nil) 51 | SlashCmdList.PERFY("stop") 52 | assert(not running) 53 | SlashCmdList.PERFY("clear") 54 | assert(cleared) 55 | 56 | reset() 57 | SlashCmdList.PERFY("start 5") 58 | assert(running) 59 | assert(runtime == 5) 60 | 61 | reset() 62 | SlashCmdList.PERFY(" start 5 ") 63 | assert(running) 64 | assert(runtime == 5) 65 | 66 | reset() 67 | SlashCmdList.PERFY("10") 68 | assert(running) 69 | assert(runtime == 10) 70 | SlashCmdList.PERFY("") 71 | assert(not running) 72 | 73 | reset() 74 | SlashCmdList.PERFY("ls") 75 | assert(logLoadingScreen) 76 | reset() 77 | SlashCmdList.PERFY("loadingscreen") 78 | assert(logLoadingScreen) 79 | 80 | reset() 81 | SlashCmdList.PERFY("load") 82 | assert(not loadAddon) 83 | reset() 84 | SlashCmdList.PERFY("load asdf") 85 | assert(loadAddon == "asdf") 86 | 87 | reset() 88 | TEST_GLOBAL_VAR=false 89 | SlashCmdList.PERFY("run TEST_GLOBAL_VAR=true") 90 | assert(not running) 91 | assert(TEST_GLOBAL_VAR) 92 | 93 | reset() 94 | local ok, err = pcall(SlashCmdList.PERFY, "run fail") 95 | assert(not running) 96 | assert(not ok) 97 | 98 | reset() 99 | SlashCmdList.PERFY("foo") 100 | assert(not running) 101 | -------------------------------------------------------------------------------- /Analyzer/Analyze_Coroutines_Test.lua: -------------------------------------------------------------------------------- 1 | local analyze = require "Analyze" 2 | 3 | -- Trace that would be generated by something like this: 4 | --[[ 5 | function Fun1() sleep(1) coroutine.yield() Fun2() end 6 | function Fun2() sleep(2) coroutine.yield() end 7 | function Fun3() sleep(1) coroutine.yield() Fun4() end 8 | function Fun4() sleep(2) coroutine.yield() end 9 | 10 | local c1, c2 = coroutine.create(Fun1), coroutine.create(Fun3) 11 | 12 | function Run() 13 | while do 14 | sleep(0.1) 15 | coroutine.resume(c1) 16 | sleep(0.1) 17 | coroutine.resume(c2) 18 | end 19 | end 20 | ]] 21 | local testData = [[ 22 | local Fun1, Fun2, Fun3, Fun4, Run, Cr1, Cr2 = 1, 2, 3, 4, 5, 6, 7 23 | local Enter, Leave, CoroutineResume, CoroutineYield = 1, 2, 3, 4 24 | Perfy_Export = { 25 | FunctionNames = { 26 | Fun1 = Fun1, Fun2 = Fun2, Fun3 = Fun3, Fun4 = Fun4, Run = Run, Cr1 = Cr1, Cr2 = Cr2 27 | }, 28 | EventNames = { 29 | Enter = Enter, Leave = Leave, CoroutineResume = CoroutineResume, CoroutineYield = CoroutineYield 30 | }, 31 | Trace = { 32 | {0.0, Enter, Run, 0, 0, 0}, 33 | {0.1, CoroutineResume, Cr1, 0, 0, 0}, 34 | {0.1, Enter, Fun1, 0, 0, 0}, 35 | {1.1, CoroutineYield, Cr1, 0, 0, 0}, 36 | {1.2, CoroutineResume, Cr2, 0, 0, 0}, 37 | {1.2, Enter, Fun3, 0, 0, 0}, 38 | {2.2, CoroutineYield, Cr2, 0, 0, 0}, 39 | {2.3, CoroutineResume, Cr1, 0, 0, 0}, 40 | {2.3, Enter, Fun2, 0, 0, 0}, 41 | {4.3, CoroutineYield, Cr1, 0, 0, 0}, 42 | {4.4, CoroutineResume, Cr2, 0, 0, 0}, 43 | {4.4, Enter, Fun4, 0, 0, 0}, 44 | {6.4, CoroutineYield, Cr2, 0, 0, 0}, 45 | {6.5, CoroutineResume, Cr1, 0, 0, 0}, 46 | {6.5, Leave, Fun2, 0, 0, 0}, 47 | {6.5, Leave, Fun1, 0, 0, 0}, 48 | {6.6, CoroutineResume, Cr2, 0, 0, 0}, 49 | {6.6, Leave, Fun4, 0, 0, 0}, 50 | {6.6, Leave, Fun3, 0, 0, 0}, 51 | {6.6, Leave, Run, 0, 0, 0}, 52 | } 53 | } 54 | ]] 55 | 56 | local oldLoadfile = loadfile 57 | function loadfile(file, ...) 58 | if file == "test" then 59 | local loaded 60 | ---@diagnostic disable-next-line: redundant-parameter 61 | return load(function() if loaded then return nil end loaded = true return testData end, file, ...) 62 | else 63 | return oldLoadfile(file, ...) 64 | end 65 | end 66 | 67 | local trace = analyze:LoadSavedVars("test") 68 | local cpuStacks = analyze:FlameGraph(trace) 69 | 70 | assert(cpuStacks["Unknown addon;Run"] == 0.6e6) 71 | assert(cpuStacks["Unknown addon;Run;Fun1"] == 1e6) 72 | assert(cpuStacks["Unknown addon;Run;Fun1;Fun2"] == 2e6) 73 | assert(cpuStacks["Unknown addon;Run;Fun3"] == 1e6) 74 | assert(cpuStacks["Unknown addon;Run;Fun3;Fun4"] == 2e6) 75 | -------------------------------------------------------------------------------- /Analyzer/Analyze_Test.lua: -------------------------------------------------------------------------------- 1 | local analyze = require "Analyze" 2 | 3 | -- Trace that would be generated by something like this: 4 | --[[ 5 | function Fun1() sleep(1) Fun2() sleep(1) end 6 | function Fun2() sleep(1) Fun3() sleep(1) Fun4() sleep(1) end 7 | function Fun3() alloc(10) sleep(1) end 8 | function Fun4() alloc(20) sleep(1) end 9 | Fun1() -- Sleeps 7 seconds 10 | sleep(10) -- this isn't traced 11 | Fun4() -- Sleeps 1 second, covers that we don't account the 10 seconds we don't see here 12 | ]] 13 | local testData = [[ 14 | local Fun1, Fun2, Fun3, Fun4 = 1, 2, 3, 4 15 | local Enter, Leave = 1, 2 16 | Perfy_Export = { 17 | FunctionNames = { 18 | Fun1 = Fun1, Fun2 = Fun2, Fun3 = Fun3, Fun4 = Fun4 19 | }, 20 | EventNames = { 21 | Enter = Enter, Leave = Leave 22 | }, 23 | Trace = { -- Tracing overhead is .25 seconds and 10 bytes 24 | {1.00, Enter, Fun1, 0.25, 10, 10}, 25 | {2.25, Enter, Fun2, 0.25, 20, 10}, 26 | {3.50, Enter, Fun3, 0.25, 30, 10}, 27 | {4.75, Leave, Fun3, 0.25, 50, 10}, 28 | {6.00, Enter, Fun4, 0.25, 60, 10}, 29 | {7.25, Leave, Fun4, 0.25, 90, 10}, 30 | {8.50, Leave, Fun2, 0.25, 100, 10}, 31 | {9.75, Leave, Fun1, 0.25, 110, 10}, 32 | {20.00, Enter, Fun4, 0.25, 110, 10}, 33 | {21.25, Leave, Fun4, 0.25, 140, 10}, 34 | } 35 | } 36 | ]] 37 | 38 | -- FIXME: this broke with the introduction of the Lua parser which can't handle the file above :/ 39 | local oldLoadfile = loadfile 40 | function loadfile(file, ...) 41 | if file == "test" then 42 | local loaded 43 | ---@diagnostic disable-next-line: redundant-parameter 44 | return load(function() if loaded then return nil end loaded = true return testData end, file, ...) 45 | else 46 | return oldLoadfile(file, ...) 47 | end 48 | end 49 | 50 | local trace = analyze:LoadSavedVars("test") 51 | assert(#trace == 10) 52 | assert(trace[5].timestamp == 6) 53 | assert(trace[5].event == "Enter") 54 | assert(trace[5].functionName == "Fun4") 55 | assert(trace[5].timeOverhead == 0.25) 56 | assert(trace[5].memory == 60) 57 | assert(trace[5].memoryOverhead == 10) 58 | 59 | local cpuStacks = analyze:FlameGraph(trace) 60 | assert(cpuStacks["Unknown addon;Fun1"] == 2000000) 61 | assert(cpuStacks["Unknown addon;Fun1;Fun2"] == 3000000) 62 | assert(cpuStacks["Unknown addon;Fun1;Fun2;Fun3"] == 1000000) 63 | assert(cpuStacks["Unknown addon;Fun1;Fun2;Fun4"] == 1000000) 64 | assert(cpuStacks["Unknown addon;Fun4"] == 1000000) 65 | 66 | local memStacks = analyze:FlameGraph(trace, "memory", "memoryOverhead") 67 | assert(memStacks["Unknown addon;Fun1;Fun2;Fun3"] == 10) 68 | assert(memStacks["Unknown addon;Fun1;Fun2;Fun4"] == 20) 69 | assert(memStacks["Unknown addon;Fun4"] == 20) 70 | -------------------------------------------------------------------------------- /AddOn/TraceLoadingScreen.lua: -------------------------------------------------------------------------------- 1 | -- Uncomment this line to enable login loading screen tracing. 2 | --Perfy_Start() 3 | 4 | -- Login/reload loading screen event log looks like this: 5 | -- 1. Run Lua files (Enter/Leave on main chunks) 6 | -- 2. ADDON_LOADED fires 7 | -- 3. PLAYER_LOGIN fires 8 | -- 4. PLAYER_ENTERING_WORLD fires 9 | -- 5. LOADING_SCREEN_DISABLED fires 10 | -- 6. OnUpdate fires once 11 | -- 7. SPELLS_CHANGED fires 12 | -- 13 | -- We care about when the user considers the game running, a reasonable definition for running is "after the first frame has been drawn". 14 | -- A reasonable close approximation for this is the start of the second (!) OnUpdate call because they happen before a frame is drawn. 15 | -- Note that being off here by a frame isn't too bad because we only account time that we can "see" anyways, so the extra time during the two frames the game does other things doesn't show up anyways. 16 | local counter = 0 17 | local loginLoadingScreenFrame = CreateFrame("Frame") 18 | loginLoadingScreenFrame:SetScript("OnUpdate", function(self) 19 | counter = counter + 1 20 | if counter == 2 then 21 | Perfy_Stop() 22 | self:Hide() 23 | end 24 | end) 25 | loginLoadingScreenFrame:Show() 26 | 27 | -- Other loading screens look like this: 28 | -- 1. LOADING_SCREEN_ENABLED fires 29 | -- 2. OnUpdate continues firing normally 30 | -- 3. PLAYER_LEAVING_WORLD fires 31 | -- 4. OnUpdate continues firing normally 32 | -- 5. PLAYER_ENTERING_WORLD fires and OnUpdate stops 33 | -- 6. LOADING_SCREEN_DISABLED fires 34 | -- 7. SPELLS_CHANGED fires 35 | -- 8. OnUpdate continues firing normally 36 | -- 37 | -- LOADING_SCREEN_ENABLED is a good point to start 38 | -- The second OnUpdate after LOADING_SCREEN_DISABLED is a reasonable point to end following the same logic as above. 39 | local loadingScreenFrame = CreateFrame("Frame") 40 | loadingScreenFrame:RegisterEvent("LOADING_SCREEN_ENABLED") 41 | loadingScreenFrame:RegisterEvent("LOADING_SCREEN_DISABLED") 42 | 43 | local logNextLoadingScreen = false 44 | local counter = 0 45 | loadingScreenFrame:SetScript("OnEvent", function(self, event) 46 | if not logNextLoadingScreen then return end 47 | if event == "LOADING_SCREEN_ENABLED" then 48 | Perfy_Start() 49 | elseif event == "LOADING_SCREEN_DISABLED" then 50 | logNextLoadingScreen = false 51 | counter = 0 52 | self:Show() 53 | end 54 | end) 55 | loadingScreenFrame:SetScript("OnUpdate", function(self) 56 | counter = counter + 1 57 | if counter == 2 then 58 | Perfy_Stop() 59 | self:Hide() 60 | end 61 | end) 62 | 63 | -- Trace a non-login loading screen. 64 | function Perfy_LogLoadingScreen() 65 | print("[Perfy] Next loading screen will be logged by Perfy.") 66 | print("[Perfy] This will not work for loading screens due to UI reload or logging in. See file TraceLoadingScreen.lua for instructions to trace these initial loading screens.") 67 | logNextLoadingScreen = true 68 | end 69 | -------------------------------------------------------------------------------- /Instrumentation/Main.lua: -------------------------------------------------------------------------------- 1 | -- LuaLS environment setup 2 | local basePath = arg[0]:gsub("[/\\]*[^/\\]-$", "") -- The dir under which this file is 3 | package.path = "./script/?.lua;./script/?/init.lua;./test/?.lua;./test/?/init.lua;" 4 | package.path = package.path .. basePath .. "/?.lua;" 5 | package.path = package.path .. basePath .. "/?/init.lua" 6 | _G.log = require "log" 7 | local fs = require "bee.filesystem" 8 | local util = require "utility" 9 | local rootPath = debug.getinfo(1, "S").source:sub(2):gsub("[/\\]*[^/\\]-$", "") 10 | rootPath = rootPath == "" and "." or rootPath 11 | ROOT = util.expandPath(rootPath) 12 | LUA_VER = "Lua 5.1" 13 | 14 | local instrument = require "Instrument" 15 | local toc = require "TocHandler" 16 | 17 | if #arg < 1 then 18 | print("Usage: " .. arg[0] .. " ") 19 | return 20 | end 21 | 22 | local function stripPathPrefix(path) 23 | return path:gsub("^.*Interface[/\\]AddOns[/\\]", "") 24 | end 25 | 26 | local seenFiles = {} 27 | local function deduplicateFile(fileName) 28 | -- Some addons reference their libraries through seemingly different paths, canonicalizing the path avoids some wrong warnings. 29 | -- Also, WoW is not case sensitive and some AddOns such as WeakAuras reference the same file but with different cases (libs/ vs. Libs/). 30 | local canonical = fs.canonical(fs.path(fileName)):string():lower() 31 | if seenFiles[canonical] then 32 | return false 33 | end 34 | seenFiles[canonical] = true 35 | return true 36 | end 37 | 38 | local function handleTocFile(fileName) 39 | print("Instrumenting AddOn " .. stripPathPrefix(fileName)) 40 | local file, err = io.open(fileName, "r") 41 | if not file then error(err) end 42 | 43 | local lines = {} 44 | for line in file:lines() do 45 | -- Why are line ending differences between Windows and others still a problem in 2024? 46 | lines[#lines + 1] = line:match("(.-)\r?$") 47 | end 48 | file:close() 49 | 50 | toc:InjectDependency(lines) 51 | 52 | file, err = io.open(fileName, "w+b") 53 | if not file then error(err) end 54 | for _, line in ipairs(lines) do 55 | file:write(line) 56 | file:write("\n") 57 | end 58 | file:close() 59 | 60 | local dir = fileName:gsub("[^/\\]-$", "") 61 | local files = toc:FindFiles(lines, dir) 62 | 63 | for _, fileName in ipairs(files) do 64 | if deduplicateFile(fileName) then -- avoids errors about files being referenced multiple times (by tocs for different game versions) 65 | instrument:InstrumentFile(fileName) -- TODO: handle failure gracefully to not fail completely on a single bad addon 66 | end 67 | end 68 | end 69 | 70 | local function handleFile(fileName) 71 | if fileName:match(".toc$") then 72 | return handleTocFile(fileName) 73 | end 74 | -- TODO: handle XML files here 75 | if deduplicateFile(fileName)then 76 | instrument:InstrumentFile(fileName) 77 | end 78 | end 79 | 80 | for _, fileName in ipairs(arg) do 81 | if fileName:match("Perfy.toc$") or fileName:match("!!!Perfy/") then 82 | print("File " .. fileName .. " seems to belong to Perfy itself -- skipping.") 83 | else 84 | handleFile(fileName) 85 | end 86 | end 87 | 88 | 89 | -------------------------------------------------------------------------------- /Instrumentation/TocHandler.lua: -------------------------------------------------------------------------------- 1 | local mod = {} 2 | 3 | function mod:InjectDependency(lines, dep) 4 | dep = dep or "!!!Perfy" 5 | local lastMetadataLine = 0 6 | local foundDependencyEntry = false 7 | local foundPerfyMetadata = false 8 | for i, line in ipairs(lines) do 9 | local key, value = line:match("^##%s*([^:%s]+)%s*:%s*(.-)%s*$") 10 | if key and value then 11 | lastMetadataLine = i 12 | if key == "Dependencies" then 13 | foundDependencyEntry = true 14 | local foundDep = false 15 | for entry in value:gmatch("([^%s,]+)") do 16 | if entry == dep then 17 | foundDep = true 18 | end 19 | end 20 | if not foundDep then 21 | lines[i] = line .. (value ~= "" and ", " or "") .. dep 22 | end 23 | end 24 | if key == "X-Perfy-Instrumented" then 25 | foundPerfyMetadata = true 26 | end 27 | end 28 | end 29 | if not foundPerfyMetadata then 30 | table.insert(lines, lastMetadataLine + 1, "## X-Perfy-Instrumented: true") 31 | end 32 | if not foundDependencyEntry then 33 | table.insert(lines, lastMetadataLine + 1, "## Dependencies: " .. dep) 34 | end 35 | end 36 | 37 | -- "Mom, can we have an XML parser?" -- "No, we have XML parser at home." 38 | -- XML parser at home: 39 | local function parseXml(fileName, addonBasePath, files) 40 | local dir = fileName:gsub("[^/\\]-$", "") 41 | local file, err = io.open(fileName, "r") 42 | if not file then error(err) end -- TODO: could handle gracefully to not fail completely on one invalid toc 43 | local xml = file:read("*a") 44 | file:close() 45 | local luaFiles = {} 46 | -- "No, you can't parse HTML/XML like that" -- "Haha, regex goes
" 47 | xml = xml:gsub("", "") 48 | for ref in xml:gmatch("<%s*[iI][nN][cC][lL][uU][dD][eE]%s+[fF][iI][lL][eE]%s*=%s*(.-)%s*/?%s*>") do 49 | local delim = ref:sub(1, 1) 50 | if delim == "\"" or delim == "'" then 51 | ref = ref:sub(2, -2) 52 | end 53 | ref = ref:gsub("\\", "/") 54 | if ref:lower():match("%.xml$") then 55 | parseXml(dir .. ref, addonBasePath, files) 56 | elseif ref:lower():match("%.lua$") then -- Yes, this is apparently valid 57 | luaFiles[#luaFiles + 1] = ref 58 | else 59 | print("File " .. fileName .. " references file " .. ref .. " which is neither XML nor Lua, ignoring.") 60 | end 61 | end 62 | for ref in xml:gmatch("<%s*[sS][cC][rR][iI][pP][tT]%s+[fF][iI][lL][eE]%s*=%s*(.-)%s*/?%s*>") do 63 | local delim = ref:sub(1, 1) 64 | if delim == "\"" or delim == "'" then 65 | ref = ref:sub(2, -2) 66 | end 67 | ref = ref:gsub("\\", "/") 68 | luaFiles[#luaFiles+1] = ref 69 | end 70 | for _, ref in ipairs(luaFiles) do 71 | local fileRelToXml = io.open(dir .. ref, "r") 72 | local fileRelToToc = io.open(addonBasePath .. ref, "r") 73 | if fileRelToXml then 74 | fileRelToXml:close() 75 | files[#files + 1] = dir .. ref 76 | end 77 | if fileRelToToc then -- Apparently this is valid, e.g., RXPGuides does this for its database files 78 | fileRelToToc:close() 79 | files[#files + 1] = addonBasePath .. ref 80 | end 81 | if not fileRelToXml and not fileRelToToc then 82 | print("File " .. fileName .. " references unknown file " .. ref) 83 | end 84 | end 85 | end 86 | 87 | function mod:FindFiles(lines, dir) 88 | local files = {} 89 | for _, line in ipairs(lines) do 90 | if not line:match("^%s*#") and not line:match("^%s*$") then 91 | local file = line:gsub("^%s*(.-)%s*$", "%1"):gsub("\\", "/") 92 | if file:match("%.[xX][mM][lL]$") then 93 | parseXml(dir .. file, dir, files) 94 | else 95 | files[#files + 1] = dir .. file 96 | end 97 | end 98 | end 99 | return files 100 | end 101 | 102 | return mod 103 | -------------------------------------------------------------------------------- /Analyzer/Main.lua: -------------------------------------------------------------------------------- 1 | local analyze = require "Analyze" 2 | 3 | local inFile, frameCmd, frameIds = ... 4 | if not inFile then 5 | return print("Usage: Main.lua /Perfy.lua [--frames -] [--frame ] [--split-frames]") 6 | end 7 | 8 | local function writeFile(fileNameSuffix, fullData) 9 | for frame, data in pairs(fullData) do 10 | local fileNames = {} 11 | if frame == "all" then 12 | fileNames[#fileNames + 1] = fileNameSuffix 13 | else 14 | for _, name in ipairs(frame.names) do 15 | fileNames[#fileNames + 1] = name .. "-" .. fileNameSuffix 16 | end 17 | end 18 | for _, fileName in ipairs(fileNames) do 19 | local file, err = io.open(fileName, "w+b") 20 | if not file then error(err) end 21 | local count = 0 22 | for k, v in pairs(data) do 23 | file:write(k) 24 | file:write(" ") 25 | file:write(tostring(v)) 26 | file:write("\n") 27 | if v ~= 0 then 28 | count = count + 1 29 | end 30 | end 31 | file:close() 32 | print(("Wrote %d non-zero stacks to %s"):format(count, fileName)) 33 | end 34 | end 35 | end 36 | 37 | local firstFrame, lastFrame, splitFrames 38 | if frameCmd and frameCmd:match("^%-%-?frames?$") then 39 | if not frameIds then return print("expected frame numbers for " .. frameCmd) end 40 | firstFrame, lastFrame = frameIds:match("(%d*)%-?(%d*)$") 41 | if lastFrame == "" then 42 | lastFrame = firstFrame 43 | end 44 | firstFrame = tonumber(firstFrame) 45 | lastFrame = tonumber(lastFrame) 46 | if not firstFrame or not lastFrame then 47 | return print("couldn't parse " .. frameCmd .. " argument") 48 | end 49 | elseif frameCmd and frameCmd:match("^%-%-?split%-frames$") then 50 | splitFrames = true 51 | end 52 | 53 | local trace = analyze:LoadSavedVars(inFile) 54 | 55 | -- FIXME: properly split stack reconstruction and flame graph generation, this currently needs to be called prior to FindSlowFrames to have empty stack info 56 | analyze:FlameGraph(trace, "timestamp", "timeOverhead") 57 | local frames = analyze:FindSlowFrames(trace) 58 | print("number of frames: ", #frames) 59 | local topFramesByCpu = analyze:GetTopFrames(frames, 10, function(e1, e2) return e1.time - e1.timeOverhead > e2.time - e2.timeOverhead end) 60 | local topFramesByMemory = analyze:GetTopFrames(frames, 10, function(e1, e2) return e1.memory - e1.memOverhead > e2.memory - e2.memOverhead end) 61 | 62 | local stacksCpu, stacksMemory 63 | if splitFrames then 64 | local topFrames = {} 65 | for i, v in ipairs(topFramesByCpu) do 66 | v.names = {("top-cpu-%d-frame-%d"):format(i, v.id)} 67 | topFrames[#topFrames + 1] = v 68 | end 69 | for i, v in ipairs(topFramesByMemory) do 70 | -- Frames can have multiple names because it's confusing if like half of your frames for a metric are missing because they happen to overlap with the other metric 71 | if v.names then 72 | v.names[#v.names + 1] = ("top-memory-%d-frame-%d"):format(i, v.id) 73 | else 74 | v.names = {("top-memory-%d-frame-%d"):format(i, v.id)} 75 | end 76 | topFrames[#topFrames + 1] = v 77 | end 78 | stacksCpu = analyze:FlameGraph(trace, "timestamp", "timeOverhead", nil, nil, topFrames) 79 | stacksMemory = analyze:FlameGraph(trace, "memory", "memoryOverhead", nil, nil, topFrames) 80 | else 81 | stacksCpu = analyze:FlameGraph(trace, "timestamp", "timeOverhead", frames[firstFrame], frames[lastFrame]) 82 | stacksMemory = analyze:FlameGraph(trace, "memory", "memoryOverhead", frames[firstFrame], frames[lastFrame]) 83 | end 84 | 85 | if firstFrame then 86 | print("Only reporting trace entries " .. frames[firstFrame].first .. " to " .. frames[lastFrame].last) 87 | else 88 | print("Top frames by CPU time:") 89 | analyze:PrintSlowFrames(topFramesByCpu, 10) 90 | print() 91 | print("Top frames by memory allocations:") 92 | analyze:PrintSlowFrames(topFramesByMemory, 10) 93 | print("Frame CPU time and memory may include uninstrumented code, run full analysis per-frame by using \"--split-frames\" to get one result per top frame or by selecting frames via \"--frames -\"") 94 | end 95 | 96 | writeFile("stacks-cpu.txt", stacksCpu) 97 | writeFile("stacks-memory.txt", stacksMemory) 98 | -------------------------------------------------------------------------------- /AddOn/Perfy_Test.lua: -------------------------------------------------------------------------------- 1 | local mockTime = 0 2 | local function mockGetTime() 3 | return mockTime 4 | end 5 | GetTimePreciseSec = mockGetTime 6 | 7 | local lastError 8 | local errorHandler = function(err) lastError = err end 9 | function geterrorhandler() 10 | return errorHandler 11 | end 12 | 13 | function seterrorhandler(f) 14 | errorHandler = f 15 | end 16 | 17 | C_Timer = { 18 | After = function(_, f) f() end, 19 | NewTicker = function(_, f) f() end, 20 | } 21 | 22 | PERFY_TEST_ENVIRONMENT = true 23 | require "Perfy" 24 | 25 | function TestHappyPath() 26 | Perfy_Clear() 27 | mockTime = 0 28 | Perfy_Start() 29 | assert(Perfy_Running()) 30 | mockTime = 0.25 31 | Perfy_Trace(0, "Enter", "Fun1") 32 | mockTime = 1.25 33 | Perfy_Trace(1, "Enter", "Fun2") 34 | mockTime = 2 35 | Perfy_Trace_Passthrough("Leave", "Fun2") 36 | mockTime = 3 37 | Perfy_Trace_Passthrough("Leave", "Fun1") 38 | Perfy_Stop() 39 | assert(not Perfy_Running()) 40 | 41 | -- FIXME: use some assertion library/test framework, what a mess 42 | assert(Perfy_Export.FunctionNames["Perfy_Start Perfy/internal"] == 1) 43 | assert(Perfy_Export.FunctionNames.Fun1 == 2) 44 | assert(Perfy_Export.FunctionNames.Fun2 == 3) 45 | assert(Perfy_Export.FunctionNames["Perfy_Stop Perfy/internal"] == 4) 46 | assert(Perfy_Export.EventNames.PerfyStart == 1) 47 | assert(Perfy_Export.EventNames.Enter == 2) 48 | assert(Perfy_Export.EventNames.Leave == 3) 49 | assert(Perfy_Export.EventNames.PerfyStop == 4) 50 | assert(#Perfy_Export.Trace == 6) 51 | 52 | -- Entry 1: Start Perfy 53 | assert(Perfy_Export.Trace[1][1] == 0) -- Timestamp 54 | assert(Perfy_Export.Trace[1][2] == Perfy_Export.EventNames.PerfyStart) -- Event 55 | 56 | -- Entry 2: Enter Fun1 57 | assert(Perfy_Export.Trace[2][1] == 0) -- Timestamp 58 | assert(Perfy_Export.Trace[2][2] == Perfy_Export.EventNames.Enter) -- Event 59 | assert(Perfy_Export.Trace[2][3] == Perfy_Export.FunctionNames.Fun1) -- Function 60 | assert(Perfy_Export.Trace[2][4] == 0.25) -- Overhead 61 | assert(Perfy_Export.Trace[2][5] > 0) -- Memory 62 | assert(Perfy_Export.Trace[2][6] > 0) -- Memory overhead 63 | 64 | -- Entry 4: Leave Fun2 65 | assert(Perfy_Export.Trace[4][1] == 2) -- Timestamp 66 | assert(Perfy_Export.Trace[4][2] == Perfy_Export.EventNames.Leave) -- Event 67 | assert(Perfy_Export.Trace[4][3] == Perfy_Export.FunctionNames.Fun2) -- Function 68 | assert(Perfy_Export.Trace[4][4] == 0) -- Overhead (0 on leave because it's updated internally and the mock doesn't update) 69 | assert(Perfy_Export.Trace[4][5] > 0) -- Memory 70 | assert(Perfy_Export.Trace[4][6] > 0) -- Memory overhead 71 | 72 | -- Entry 6: Stop Perfy 73 | assert(Perfy_Export.Trace[6][1] == 3) -- Timestamp 74 | assert(Perfy_Export.Trace[6][2] == Perfy_Export.EventNames.PerfyStop) -- Event 75 | end 76 | 77 | function TestClear() 78 | Perfy_Clear() 79 | Perfy_Start() 80 | Perfy_Trace(0, "Enter", "Fun1") 81 | Perfy_Stop() 82 | assert(#Perfy_Export.Trace > 0) 83 | Perfy_Clear() 84 | assert(not Perfy_Export.Trace) 85 | end 86 | 87 | function TestLeavePassthrough() 88 | Perfy_Start() 89 | local a, b = Perfy_Trace_Passthrough("Leave", "Fun1", "foo", "bar") 90 | Perfy_Stop() 91 | assert(a == "foo" and b == "bar") 92 | end 93 | 94 | function TestErrorHandlerHook() 95 | Perfy_Clear() 96 | Perfy_Start() 97 | geterrorhandler()("test") 98 | Perfy_Stop() 99 | 100 | assert(#Perfy_Export.Trace == 3) 101 | assert(lastError == "test") 102 | end 103 | 104 | function TestMultipleStarts() 105 | Perfy_Clear() 106 | Perfy_Start() 107 | Perfy_Trace(0, "Enter", "Fun1") 108 | Perfy_Trace(0, "Leave", "Fun1") 109 | Perfy_Trace(0, "Enter", "Fun3") 110 | Perfy_Stop() 111 | assert(not Perfy_Running()) 112 | Perfy_Start() 113 | Perfy_Trace(0, "Enter", "Fun1") 114 | Perfy_Trace(0, "Enter", "Fun2") 115 | Perfy_Stop() 116 | assert(#Perfy_Export.Trace == 9) 117 | -- Double translation would enter a 1 = entry because they see already translated entries as something to translate again 118 | assert(#Perfy_Export.FunctionNames == 0) 119 | assert(#Perfy_Export.EventNames == 0) 120 | assert(Perfy_Export.FunctionNames.Fun1 == 2) 121 | assert(Perfy_Export.FunctionNames.Fun3 == 3) 122 | assert(Perfy_Export.FunctionNames["Perfy_Stop Perfy/internal"] == 4) 123 | assert(Perfy_Export.FunctionNames.Fun2 == 5) 124 | end 125 | 126 | function TestLoadAddon() 127 | Perfy_Clear() 128 | local addonLoaded 129 | _G.LoadAddOn = function(addon) 130 | addonLoaded = addon 131 | return true 132 | end 133 | Perfy_LoadAddOn("FooAddOn") 134 | assert(addonLoaded == "FooAddOn") 135 | assert(#Perfy_Export.Trace == 4) 136 | assert(Perfy_Export.EventNames.LoadAddOn == 2) 137 | assert(Perfy_Export.EventNames.LoadAddOnFinished == 3) 138 | assert(Perfy_Export.FunctionNames.FooAddOn == 2) 139 | end 140 | 141 | function TestRunFunc() 142 | Perfy_Clear() 143 | local called = false 144 | local function f() 145 | called = true 146 | end 147 | Perfy_Run(f) 148 | assert(not Perfy_Running()) 149 | assert(called) 150 | assert(#Perfy_Export.Trace == 2) 151 | 152 | Perfy_Clear() 153 | Perfy_Start() 154 | called = false 155 | Perfy_Run(f) 156 | assert(called) 157 | assert(Perfy_Running()) -- Doesn't stop if it was already running 158 | Perfy_Stop() 159 | assert(not Perfy_Running()) 160 | end 161 | 162 | function TestRunFuncError() 163 | Perfy_Clear() 164 | local called = false 165 | local function f() 166 | called = true 167 | error("test") 168 | end 169 | local ok = pcall(Perfy_Run, f) 170 | assert(not Perfy_Running()) 171 | assert(not ok) 172 | assert(called) 173 | assert(#Perfy_Export.Trace == 2) 174 | end 175 | 176 | TestHappyPath() 177 | TestClear() 178 | TestLeavePassthrough() 179 | TestErrorHandlerHook() 180 | TestMultipleStarts() 181 | TestLoadAddon() 182 | TestRunFunc() 183 | TestRunFuncError() 184 | -------------------------------------------------------------------------------- /Accuracy.md: -------------------------------------------------------------------------------- 1 | # Validating accuracy 2 | 3 | WoW classic still has the builtin profiler (CVar `scriptProfile`), so we can compare Perfy against it to see how good or bad we are. 4 | 5 | ## Notes on the builtin profiler 6 | 7 | We can make an educated guess on how the builtin profiler probably works based on the APIs it offers: 8 | The core reporting mechanic it provides is that it can tell you how much time a given function used (with or without including subroutines) and how often it was called. 9 | The additional functions about reporting per AddOn or per frame are just aggregations built on top of this. 10 | 11 | Since it accurately reports how often a function is called it must be based on tracing each call and not on sampling. 12 | My guess is that it hooks into the Lua VM for handling the opcodes `CALL`, `TAILCALL`, and `RETURN` and then stores the elapsed time and a counter in the Lua object representing the function. 13 | `GetFunctionCPUUsage(func, includeSubroutines)` then just returns these fields for the given function. 14 | This is low overhead and easy to implement but does not allow you to reconstruct the entire call stack as it does not remember which function called which. 15 | Fun fact: the reported time per function has the same granularity as `GetTimePreciseSec()` (100 ns), I don't think that is a coincidence. 16 | 17 | Overall the builtin profiler is a bit cumbersome to use because you need the actual function object to query the results and it can't tell you anything about the relation between functions. 18 | 19 | 20 | ## Test setup 21 | 22 | Since the builtin profiler is a bit annoying to use we need to look at something that involves only a few functions. 23 | Whatever we are testing also needs to be reproducible because we want to run both profilers independently. 24 | Finally, it should also be somewhat realistic. 25 | 26 | DBM-StatusBarTimers' update logic for DBM timers fits this description. 27 | It only has 5 relevant functions, presents a realistic workload (5% of total CPU load in the Gnomeregan example from README.md), and it is reproducible by running `/dbm test` which starts a few timers lasting 60 seconds total. 28 | 29 | 30 | ## Test results 31 | 32 | [![FlameGraph of CPU usage](Screenshots/CPU-DBM-Test.png)](https://emmericp.github.io/Perfy/perfy-cpu-dbm-test.svg) 33 | 34 | The functions we are looking at are the five largest in the flame graph above: `onUpdate`, `barPrototype:Update`, `DBT:UpdateBars`, `stringFromTimer`, and `AnimateEnlarge`. 35 | 36 | The run with the builtin profiler was done without Perfy instrumentation in place and `scriptProfile` was disabled when running Perfy. 37 | The total number of calls to all functions was identical for Perfy and the builtin profiler, and stayed identical across all runs. 38 | It always took exactly 18176 calls to the `onUpdate` handler to run the DBM test mode with my game running at a stable 60 fps. 39 | 40 | Each run was repeated 5 times, the table shows the average and standard deviation. 41 | 42 | | Function | Builtin profiler (µs) | Perfy (µs) | Discrepancy | 43 | |-----------------------|---------------------------:|-----------------------:|------------:| 44 | | `onUpdate` | 273740 ± 0.5% | 282690 ± 0.3% | 3.3% | 45 | | `barPrototype:Update` | 260560 ± 0.6% | 265522 ± 0.3% | 1.9% | 46 | | `DBT:UpdateBars` | 98564 ± 0.7% | 94413 ± 1.2% | -4.2% | 47 | | `stringFromTimer` | 24465 ± 0.9% | 29985 ± 0.8% | 22.6% | 48 | | `AnimateEnlarge` | 2916 ± 1.8% | 2992 ± 1.2% | 2.5% | 49 | 50 | Perfy tends to report a slightly higher CPU usage -- this is expected because even with all the logic to account for overhead it will still at least add the cost of one function call (to `Perfy_GetTime()`) to each function. 51 | Neither Perfy nor the builtin profiler are perfect, I'm happy that these agree to within a few percent :) 52 | 53 | Two results are a bit odd an warrant further investigation: 54 | 55 | 56 | ### DBT:UpdateBars() is reports a lower time, everything else reports a higher time 57 | 58 | This is because the most commonly executed path in the function is just a tight loop calling some WoW API functions. 59 | Perfy does not add extra overhead to these functions -- but the builtin profiler does. 60 | This can be validated by running Perfy with and without the builtin profiler enabled: it adds 11% overhead to this function. 61 | For other functions such as `stringFromTimer` or `AnimateEnlarge` it only adds 3% and 5% respectively. 62 | 63 | This means it's not Perfy that is wrong here but the builtin profiler. 64 | Again, neither of these is perfect. 65 | 66 | 67 | ### stringFromTimer() has a 22% discrepancy 68 | 69 | This is a simple leaf function that formats the remaining time into a human-readable format. 70 | 71 | ``` 72 | local function stringFromTimer(t) 73 | if t <= DBT.Options.TDecimal then 74 | return ("%.1f"):format(t) 75 | elseif t <= 60 then 76 | return ("%d"):format(t) 77 | else 78 | return ("%d:%0.2d"):format(t / 60, math.fmod(t, 60)) 79 | end 80 | end 81 | ``` 82 | 83 | The problem here is are the tail calls to `string.format`, the function is instrumented as follows: 84 | 85 | ``` 86 | local function stringFromTimer(t) Perfy_Trace(Perfy_GetTime(), "Enter", "stringFromTimer") 87 | if t <= DBT.Options.TDecimal then 88 | return Perfy_Trace_Leave("Leave", "stringFromTimer", ("%.1f"):format(t)) 89 | elseif t <= 60 then 90 | return Perfy_Trace_Leave("Leave", "stringFromTimer", ("%d"):format(t)) 91 | else 92 | return Perfy_Trace_Leave("Leave", "stringFromTimer", ("%d:%0.2d"):format(t / 60, math.fmod(t, 60))) 93 | end 94 | end 95 | ``` 96 | 97 | `Perfy_Trace_Leave` wraps the original return expression and returns it again, that allows us to inject a tracepoint between the evaluation of the return expression and the actual return. 98 | But unlike `Perfy_Trace` at the beginning of the function it can't contain a call to `Perfy_GetTime()` to determine when the return is happening -- this is instead done inside Perfy. 99 | That means the time it takes to call into Perfy is incorrectly attributed to `stringFromTimer` and cannot be subtracted by the analysis script. 100 | `stringFromTimer` has a discrepancy of 5520 µs vs. our reference, it is called 16010 times in the trace, so that's 0.345 µs of error per call. 101 | 102 | Unfortunately I don't think this is fixable for the general case: 103 | we would need to add the `Perfy_GetTime()` call to the end of the expression list to make sure it is evaluated after all return expressions, but the last expression can return a vararg of unknown length, so we can't just add it. 104 | 105 | I validated this by rewriting all return points in `stringFromTimer` as follows 106 | 107 | ``` 108 | local res = (""):format(t) 109 | Perfy_Trace(Perfy_GetTime(), "Leave", "stringFromTimer") 110 | return res 111 | ``` 112 | 113 | Perfy now reports only 27175 µs of total time (it got faster) for this function and the builtin profiler reports 26588 µs (it got slower) when changing from a tail call to an intermediate variable. 114 | That's a discrepancy of only 2.2% and in line with other functions. 115 | 116 | ### Conclusion 117 | 118 | Overall it's pretty accurate, especially when looking at the big picture. 119 | Note how the error for functions with tail calls is only large when looking at the relative error of a small function. 120 | The absolute error is on the order of hundreds of nanoseconds per function call. 121 | -------------------------------------------------------------------------------- /Analyzer/LuaParser.lua: -------------------------------------------------------------------------------- 1 | -- Parser for Lua table expressions 2 | -- Taken from https://github.com/DeadlyBossMods/DeadlyBossMods/blob/master/DBM-Test/Tools/Shared/Parser.lua 3 | local parser = {} 4 | 5 | local function syntaxError(message, code, pos, level) 6 | -- pos itself is useless for errors because it's post comment stripping 7 | error("parse error: " .. message .. " while trying to parse \"" .. code:sub(pos, pos + 30) .. "\"", 1 + (level or 1)) 8 | end 9 | 10 | -- FIXME: probably a bad idea to do comment stripping as preprocessing because Lua comments are actually pretty damn complex at least if we want multi-line strings and comments 11 | local function stripComments(code) 12 | local result = {} 13 | local offset = 1 14 | -- it's easier to just do this line-by-line, but doing so decreases total parser performance by ~80% 15 | while true do 16 | local commentStart = code:find("--", offset, true) 17 | if not commentStart then 18 | if offset < #code then 19 | result[#result + 1] = code:sub(offset) 20 | end 21 | break 22 | end 23 | local oldOffset = offset 24 | offset = commentStart + 2 25 | if code:match("^%[=*%[", offset) then 26 | syntaxError("comments in multi-line style (--[[]]) aren't supported", code, commentStart) 27 | end 28 | local lineStart = 0 29 | local lineEnd = 0 30 | while code:byte(offset - lineStart, offset - lineStart) ~= 10 and offset - lineStart >= 1 do 31 | lineStart = lineStart + 1 32 | end 33 | while code:byte(offset + lineEnd, offset + lineEnd) ~= 10 and offset + lineEnd < #code do 34 | lineEnd = lineEnd + 1 35 | end 36 | local line = code:sub(offset - lineStart, offset + lineEnd) 37 | commentStart = commentStart - (offset - lineStart) 38 | if offset - lineStart == 0 then 39 | commentStart = commentStart -1 40 | end 41 | result[#result + 1] = code:sub(oldOffset, offset - lineStart) 42 | offset = offset + lineEnd 43 | if line:sub(commentStart + 3):match("[\"']") then 44 | commentStart = nil 45 | local inString = nil 46 | local escapeCount, commentCount = 0, 0 47 | for i = 1, #line do 48 | local char = line:sub(i, i) 49 | if not inString and (char == "\"" or char == "'") then 50 | inString = char 51 | elseif not inString and char == "-" then 52 | if commentCount == 1 then 53 | commentStart = i - 2 54 | break 55 | else 56 | commentCount = 1 57 | end 58 | elseif inString and char == "\\" then 59 | escapeCount = escapeCount + 1 60 | elseif inString and char == inString then 61 | if escapeCount % 2 == 0 then 62 | inString = false 63 | commentCount = 0 64 | else 65 | escapeCount = 0 66 | end 67 | else 68 | escapeCount, commentCount = 0, 0 69 | end 70 | end 71 | end 72 | result[#result + 1] = line:sub(1, commentStart and commentStart or #line) 73 | end 74 | return table.concat(result, "") 75 | end 76 | 77 | local function expectChar(code, pos, expected) 78 | local _, newPos, actual = code:find("%s*(.)%s*", pos) -- TODO: whitespace handling is a mess, consider doing proper tokenization prior to parsing 79 | if actual ~= expected then 80 | syntaxError("expected " .. expected .. ", got " .. (actual or ""), code, pos) 81 | end 82 | return newPos + 1 83 | end 84 | 85 | local function consumeOptional(code, pos, expected) 86 | local _, newPos, actual = code:find("%s*(.)%s*", pos) 87 | if actual == expected then 88 | return newPos + 1 89 | else 90 | return pos 91 | end 92 | end 93 | 94 | local function consumeChar(code, pos) 95 | local _, pos = code:find("%s*(.)", pos) 96 | return pos + 1 97 | end 98 | 99 | local function peekChar(code, pos) 100 | return code:match("%s*(.)", pos) 101 | end 102 | 103 | local function parseIdentifier(code, pos) 104 | local _, newPos, identifier = code:find("^%s*([_%a][_%w]*)", pos) 105 | if not newPos then 106 | syntaxError("expected , got ", code, pos) 107 | end 108 | return newPos + 1, identifier 109 | end 110 | 111 | local function parseNumber(code, pos) 112 | local _, pos, value = code:find("^%s*([-.ex%x]*)%s*", pos) 113 | value = tonumber(value) 114 | if not tonumber(value) then 115 | syntaxError("invalid number " .. tostring(value), code, pos) 116 | end 117 | return pos + 1, value 118 | end 119 | 120 | local function parseBool(code, pos) 121 | local _, pos, value = code:find("^%s*([%a]*)%s*", pos) 122 | if value == "true" then 123 | value = true 124 | elseif value == "false" then 125 | value = false 126 | else 127 | syntaxError("invalid assignment") 128 | end 129 | return pos + 1, value 130 | end 131 | 132 | local function parseNil(code, pos) 133 | local _, pos, value = code:find("^%s*(nil)%s*", pos) 134 | if not pos then 135 | syntaxError("invalid assignment") 136 | end 137 | return pos + 1, nil 138 | end 139 | 140 | local validEscapes = { 141 | ["\\a"] = "\a", ["\\b"] = "\b", ["\\f"] = "\f", ["\\n"] = "\n", ["\\r"] = "\r", ["\\t"] = "\t", ["\\v"] = "\v", ["\\\\"] = "\\" ,["\\\""] = "\"", ["\\'"] = "'", 142 | ["\\\n"] = "\n", ["\\\r"] = "" -- \r is probably followed by a \n anyways, so good enough to just strip it, also, who uses multi-line strings like this anyways? 143 | } 144 | local function unescapeString(str) 145 | if str:find("\\", nil, true) then -- This check increases performance by 105% because escapes are very rare and str:gsub() seems to be very expensive 146 | return str 147 | :gsub("\\(%d%d?%d?)", function(match) return string.char(tonumber(match) or 0) end) 148 | :gsub("\\.", function(match) return validEscapes[match] or match:sub(2) end) 149 | else 150 | return str 151 | end 152 | end 153 | 154 | local function parseString(code, pos) 155 | local delimiter = code:sub(pos, pos) 156 | local strStart = pos 157 | while true do 158 | local _, nextDelimPos = code:find(delimiter, pos + 1) 159 | if not nextDelimPos then 160 | syntaxError("unterminated string", code, strStart) 161 | end 162 | local escapes = 0 163 | while code:sub(nextDelimPos - escapes - 1, nextDelimPos - escapes - 1) == "\\" and nextDelimPos - escapes > strStart do 164 | escapes = escapes + 1 165 | end 166 | if escapes % 2 == 0 then 167 | return nextDelimPos + 1, unescapeString(code:sub(strStart + 1, nextDelimPos - 1)) 168 | else 169 | pos = nextDelimPos 170 | end 171 | end 172 | end 173 | 174 | 175 | local parseValue 176 | 177 | local function parseAssignment(code, pos) 178 | local pos, identifier = parseIdentifier(code, pos) 179 | pos = expectChar(code, pos, "=") 180 | local pos, value = parseValue(code, pos) 181 | return pos, identifier, value 182 | end 183 | 184 | local function parseTableEntry(code, pos) 185 | local nextChar = peekChar(code, pos) 186 | if nextChar == "[" then 187 | pos = consumeChar(code, pos) 188 | local pos, key = parseValue(code, pos) 189 | pos = expectChar(code, pos, "]") 190 | pos = expectChar(code, pos, "=") 191 | local pos, value = parseValue(code, pos) 192 | pos = consumeOptional(code, pos, ",") 193 | return pos, key, value 194 | elseif nextChar == "}" then 195 | pos = consumeChar(code, pos) 196 | return pos, nil, nil, true 197 | elseif nextChar == "\"" or nextChar == "'" then -- Optimization to not use the assignment checking regex for the common "list of strings" case 198 | local pos, value = parseValue(code, pos) 199 | pos = consumeOptional(code, pos, ",") 200 | return pos, nil, value 201 | elseif code:find("^%s*([_%a][_%w]*%s*=)", pos) then 202 | local pos, key, value = parseAssignment(code, pos) 203 | pos = consumeOptional(code, pos, ",") 204 | return pos, key, value 205 | else 206 | local pos, value = parseValue(code, pos) 207 | pos = consumeOptional(code, pos, ",") 208 | return pos, nil, value 209 | end 210 | end 211 | 212 | local function parseTable(code, pos) 213 | local result = {} 214 | pos = expectChar(code, pos, "{") 215 | local key, value, endOfTable 216 | local arrayIndex = 1 217 | while true do 218 | pos, key, value, endOfTable = parseTableEntry(code, pos) 219 | if endOfTable then 220 | break 221 | end 222 | if key == nil then 223 | key = arrayIndex 224 | arrayIndex = arrayIndex + 1 225 | end 226 | if key then 227 | result[key] = value 228 | end 229 | end 230 | return pos, result 231 | end 232 | 233 | local printCounter = 0 234 | function parseValue(code, pos) 235 | printCounter = printCounter + 1 236 | -- Logs can have hundreds of millions of values, just printing some stats regularly so it doesn't look dead 237 | if printCounter % 1000000 == 0 then 238 | local before = collectgarbage("count") 239 | collectgarbage("collect") -- This seems to be important, makes the difference between OOM'ing after ~100M literals vs ~300M literals on a 16 GiB VM 240 | print(("Parsed %d literals"):format(printCounter)) 241 | end 242 | local nextChar = peekChar(code, pos) 243 | if not nextChar then 244 | syntaxError("expected , got ", code, pos) 245 | elseif nextChar == "{" then 246 | return parseTable(code, pos) 247 | elseif nextChar == "t" or nextChar == "f" then 248 | return parseBool(code, pos) 249 | elseif nextChar:match("%d") or nextChar == "." or nextChar == "-" then 250 | return parseNumber(code, pos) 251 | elseif nextChar == "\"" or nextChar == "'" then 252 | return parseString(code, pos) 253 | elseif nextChar == "n" then 254 | return parseNil(code, pos) 255 | elseif nextChar == "[" then 256 | syntaxError("multi-line style strings aren't supported", code, pos) 257 | else 258 | syntaxError("unsupported value", code, pos) 259 | end 260 | end 261 | 262 | local function parseChunk(code, pos) 263 | local env = {} 264 | while pos < #code and not code:match("^%s*$", pos) do 265 | local identifier, value 266 | pos, identifier, value = parseAssignment(code, pos) 267 | env[identifier] = value 268 | end 269 | return env 270 | end 271 | 272 | -- Simple recursive descent parser for Lua tables to avoid Lua 5.1 constant limits 273 | -- This is in no way a complete or correct parser for Lua tables, just something that happens to work for what WoW generates as saved variables (Transcriptor logs etc) 274 | -- Specifically it doesn't handle multi-line comments and multi-line strings correctly as they are pretty complex. 275 | function parser:ParseLua(code) 276 | print("Parsing Lua in trace file") 277 | code = stripComments(code) 278 | local pos = 1 279 | return parseChunk(code, pos) 280 | end 281 | 282 | return parser -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instrumentation-based performance profiling for Lua in WoW 2 | 3 | Perfy injects code into World of Warcraft AddOns to measure performance, it can tell you exactly where CPU time is spent and which functions allocate how much memory. 4 | 5 | # Example: Finding the source of micro stuttering in classic raids 6 | 7 | I traced the Mechanical Menagerie fight in Gnomeregan with all AddOns I use instrumented with Perfy. 8 | The trace contains about 5 million entries that span a time of 214 seconds including the whole fight (195 seconds) and is a good example for tracking down a real performance problem. 9 | 10 | ## CPU usage 11 | 12 | [![FlameGraph of CPU usage](Screenshots/CPU.png)](https://emmericp.github.io/Perfy/perfy-cpu.svg) 13 | 14 | This visualization is called a [Flame Graph](https://www.brendangregg.com/flamegraphs.html), a neat way to visualize hierarchical data such as resource usage by call stacks. Click on the graph above to open an interactive SVG to fully explore it! 15 | 16 | 17 | In total Perfy traced 6.8 seconds of execution time, that is an average CPU load of only 3.2% due to 3rd party addons. 18 | So nothing to worry about, but it is a bit odd that 43% of this load is due to ClassicHealPrediction and LibHealComm. 19 | 20 | ## Memory allocations 21 | 22 | [![FlameGraph of memory allocations](Screenshots/Memory.png)](https://emmericp.github.io/Perfy/perfy-memory.svg) 23 | Click on the graph above to open an interactive SVG to fully explore it! 24 | 25 | Perfy found a total of 489 MB of memory being allocated during the trace, that's 122 MB per minute. 26 | 91% of that was due to ClassicHealPrediction. 27 | 28 | Now that is an interesting result and probably the cause of our micro stuttering. 29 | Allocating memory isn't a bad thing per se, the Lua garbage collector is pretty decent. 30 | But garbage collection pauses can still lead to micro stuttering, so AddOns typically avoid allocating memory, especially during boss fights in big raids. 31 | There's clearly something going wrong in ClassicHealPrediction because why would it need so much memory in a simple 10 man raid? 32 | There are [reports of it being worse in larger raids](https://github.com/dev7355608/ClassicHealPrediction/issues/2), I wonder how much it needs in a 40 man raid, I would love to see a trace! 33 | 34 | The function `updateHealPrediction` that Perfy identified gets called OnUpdate (i.e., 60 times per second for me) and for every unit frame. 35 | It then re-draws the heal predictions (even if nothing changed) and calls `CreateColor()` up to 8 times to do so. 36 | `CreateColor()` allocates a new `ColorMixin` every time for the same color and that's how we got the excessive allocations. 37 | 38 | How did I identify `CreateColor()`, a function that does not show up at all in the Flame Graph above at all? 39 | The reason why it doesn't show up separately is that we cannot instrument functions that belong to the default UI, so identifying the culprit was an educated guess. 40 | 41 | I confirmed that this is indeed the culprit by pulling out the color creation into a separate function that wraps `CreateColor()`. 42 | Running the whole process again shows that virtually all of the memory allocation is now in that new wrapper function. 43 | 44 | # Usage 45 | 46 | ## Install dependencies 47 | 48 | * [Lua 5.3](https://lua.org) or newer 49 | * [lua-language-server](https://github.com/LuaLS/lua-language-server) 50 | * [FlameGraph](https://github.com/brendangregg/FlameGraph) 51 | 52 | ## Instrument 53 | 54 | Run `Instrumentation/Main.lua` under lua-language-server and provide it a list of .toc files as input. 55 | It will automatically find all Lua files referenced there. 56 | **Caution: it modifies files in place, so it's best to make a backup before running this on your AddOns folder.** 57 | 58 | ``` 59 | ./bin/lua-language-server /Instrumentation/Main.lua /*/*.toc 60 | ``` 61 | 62 | You don't have to add it to every AddOn, it's perfectly fine to only instrument the AddOns you are interested in. 63 | But note that shared libraries are only instanced by one AddOn even if multiple AddOns are trying to load them, so they may be missing from the traces if the used instance is not instrumented. 64 | If uninstrumented AddOns use shared libraries from instrumented AddOns there may be large unexplained self-times of functions in the instrumented shared library if it is the initial entry point of a call trace (e.g., timers). 65 | 66 | ## Measure 67 | 68 | Install the WoW AddOn in `AddOn/` as `AddOns/!!!Perfy` and start measuring in game as follows: 69 | 70 | ``` 71 | /perfy start 72 | /perfy stop 73 | ``` 74 | 75 | Reload your UI or log out afterwards to make WoW export the data. 76 | 77 | `/perfy start