├── README.md ├── process.py └── process.sh /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Profiler Parser 2 | parser script to process pytorch autograd profiler result, convert json file to excel. 3 | 4 | ## Performance Profiling on PyTorch 5 | #### 1. Enable profiler in user code 6 | ```python 7 | # To enable GPU profiling, provide use_cuda=True for profiler() 8 | with torch.autograd.profiler.profile() as prof: 9 | func_() 10 | prof.export_chrome_trace("result.json") 11 | ``` 12 | #### 2. Convert the output json record file to a more human friendly excel 13 | ```bash 14 | python process.py --input result.json --output result.xlsx 15 | ``` 16 | OR: 17 | ```bash 18 | bash process.sh result.json 19 | ``` 20 | #### 3. Annotation 21 | PyTorch autograd profiler records each operator executed by autograd engine, the profiler overcounts nested function calls from both engine side and underlying ATen library side, so total summation will exceed actual total runtime. 22 | Columns in the output excel: 23 | - `name`: kernel name from PyTorch ATen library (the native C++ Tensor library) 24 | - `ts` : time stamp 25 | - `dur` : execution sum time in us 26 | - `tid` : 0 for CPU forward path; for CPU backward path; N+2 for GPU N (tid2 refers to GPU 0) 27 | - `call_num` : iteration count 28 | Sort by `dur` column, then you will get hotspot kernels. 29 | -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | ################################################################ 2 | # This file is used to process the profile files resulted by 3 | # autoprofiler of pytorch 4 | # The following is a example : 5 | 6 | # import torch 7 | # x = torch.randn((1, 1), requires_grad=True) 8 | # with torch.autograd.profiler.profile() as prof: 9 | # y=x ** 2 10 | # y.backward() 11 | 12 | # print(prof) 13 | # prof.export_chrome_trace("result.json") 14 | 15 | 16 | # After get a json file, you can run the file on terminal 17 | # python process.py --input result.json --output result.xlsx 18 | 19 | ################################################################ 20 | 21 | import json 22 | import xlsxwriter 23 | import argparse 24 | from tqdm import tqdm 25 | def main(args): 26 | 27 | f = open(args.input,encoding = 'utf-8') 28 | #setting is a array of one dim 29 | setting = json.load(f) 30 | # getting the total opt name 31 | opt_name = [] 32 | # save unique opt data 33 | dic_unique = {} 34 | keys = ['name', 'ph', 'ts', 'dur', 'tid', 'pid'] # the call_num is the numbers calls of a function 35 | for i in tqdm(range(len(setting))): 36 | if 'cat' not in setting[i]: 37 | if setting[i]['name'] + setting[i]['pid'] + str(setting[i]['tid']) not in opt_name: 38 | opt_name.append(setting[i]['name'] + setting[i]['pid'] + str(setting[i]['tid'])) 39 | dic_unique[setting[i]['name'] + setting[i]['pid'] + str(setting[i]['tid'])] = setting[i] 40 | #print(dic_unique[setting[i]['name']]) 41 | dic_unique[setting[i]['name'] + setting[i]['pid'] + str(setting[i]['tid'])]['call_num'] = 1 42 | else: 43 | dic_unique[setting[i]['name'] + setting[i]['pid'] + str(setting[i]['tid'])]['call_num'] += 1 # call_num should add one 44 | dic_unique[setting[i]['name'] + setting[i]['pid'] + str(setting[i]['tid'])]['dur'] += setting[i]['dur'] ## add dur time 45 | 46 | keys.append('call_num') #the call_num is the numbers calls of a function 47 | Values=[dic_unique[x] for x in opt_name] 48 | 49 | 50 | workbook = xlsxwriter.Workbook(args.output) 51 | worksheet = workbook.add_worksheet() 52 | 53 | for j in range(len(keys)): # columns 54 | worksheet.write(0,j,keys[j]) 55 | for i in range(len(Values)): # rows 56 | worksheet.write(i + 1, j, Values[i][keys[j]]) 57 | 58 | workbook.close() 59 | 60 | 61 | if __name__=='__main__': 62 | parser = argparse.ArgumentParser(description='manual to this script') 63 | parser.add_argument('--input', type=str, default=None) 64 | parser.add_argument('--output', type=str, default=None) 65 | args = parser.parse_args() 66 | main(args) 67 | -------------------------------------------------------------------------------- /process.sh: -------------------------------------------------------------------------------- 1 | ### 2 | ### helper script 3 | ### 4 | 5 | if [ $# != 1 ]; then 6 | echo 'usage: ./process.sh [log.json]' 7 | exit 8 | fi 9 | 10 | INPUT=$1 11 | OUTPUT=${INPUT%%.*} 12 | OUTPUT=${OUTPUT}.xlsx 13 | 14 | python process.py --input $INPUT --output $OUTPUT 15 | --------------------------------------------------------------------------------