├── python
    ├── .gitignore
    ├── prof_common.py
    ├── dvdt_prof_cli
    │   ├── dvdt_parser-v0.1.py
    │   └── dvdt_function.py
    ├── prof_wrangler.py
    └── prof_parser.py
├── tests
    ├── pipe.sh
    ├── .gitignore
    ├── clEnqueueReadBuffer.py
    ├── clEnqueueWriteBuffer.py
    ├── clCreateKernel.cpp
    ├── clSetKernelArg.cpp
    ├── clSetKernelArg_str.cpp
    ├── clCreateKernelsInProgram.cpp
    ├── clCreateBuffer.cpp
    ├── clCreateCommandQueue.cpp
    ├── clCreateProgramWithSource.cpp
    ├── clBuildProgram.cpp
    ├── clCreateProgramWithBinary.cpp
    ├── clEnqueueReadBuffer.cpp
    ├── clEnqueueWriteBuffer.cpp
    ├── clEnqueueNDRangeKernel.cpp
    ├── clEnqueueNDRangeKernel_LWS.cpp
    ├── clEnqueueNDRangeKernel_LWS_NULL.cpp
    ├── README.md
    ├── clCreateKernel.py
    ├── clCreateCommandQueue.py
    ├── clSetKernelArg.py
    ├── clSetKernelArg_str.py
    ├── clCreateBuffer.py
    ├── clCreateProgramWithSource.py
    ├── clCreateKernelsInProgram.py
    ├── CMakeLists.txt
    ├── clBuildProgram.py
    ├── clCreateProgramWithBinary.py
    ├── clEnqueueNDRangeKernel.py
    ├── clEnqueueReadOrWriteBuffer.py
    ├── clEnqueueNDRangeKernel_LWS_NULL.py
    └── clEnqueueNDRangeKernel_LWS.py
├── .gitignore
├── CONTRIBUTORS.txt
├── CHANGES.txt
├── cpp
    ├── prof_info.hpp.in
    ├── prof.cpp
    └── prof.hpp
├── LICENSE.txt
├── CMakeLists.txt
└── README.md


/python/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/tests/pipe.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | $1 2>&1 | $2
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | prof.so
2 | libprof_test.so
3 | Testing/
4 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | libprof_test.so
2 | *.exe
3 | *.pyc
4 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
1 | Anton Lokhmotov, anton@dividiti.com
2 | Grigori Fursin, grigori@dividiti.com
3 | 


--------------------------------------------------------------------------------
/CHANGES.txt:
--------------------------------------------------------------------------------
1 | v0.2 First public release.
2 | * Output JSON online.
3 | 
4 | v0.1 Internal development.
5 | * Output to stdout online; parse to JSON offline.
6 | 


--------------------------------------------------------------------------------
/tests/clEnqueueReadBuffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys, re
 3 | 
 4 | sys.path.append('.')
 5 | from clEnqueueReadOrWriteBuffer import run
 6 | 
 7 | # Test info.
 8 | call = 'clEnqueueReadBuffer'
 9 | _id  = ''
10 | 
11 | # Run test.
12 | run(call, _id)
13 | 


--------------------------------------------------------------------------------
/cpp/prof_info.hpp.in:
--------------------------------------------------------------------------------
1 | // 2015-2017 (c) dividiti
2 | 
3 | // The configured options and settings for Prof.
4 | #define Prof_VERSION_MAJOR @Prof_VERSION_MAJOR@
5 | #define Prof_VERSION_MINOR @Prof_VERSION_MINOR@
6 | #define Prof_COPYRIGHT_DIVIDITI @Prof_COPYRIGHT_DIVIDITI@
7 | 


--------------------------------------------------------------------------------
/tests/clEnqueueWriteBuffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys, re
 3 | 
 4 | sys.path.append('.')
 5 | from clEnqueueReadOrWriteBuffer import run
 6 | 
 7 | # Test info.
 8 | call = 'clEnqueueWriteBuffer'
 9 | _id  = ''
10 | 
11 | # Run test.
12 | run(call, _id)
13 | 


--------------------------------------------------------------------------------
/tests/clCreateKernel.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_program program = (cl_program) 0x01234567;
 7 |     const char * kernel_name = "DGEMM_NT_2x2";
 8 |     cl_int * errcode = (cl_int *) 0x12345678;
 9 | 
10 |     cl_kernel kernel = clCreateKernel(program, kernel_name, errcode);
11 |     assert((cl_kernel) 0x00000000 == kernel);
12 | 
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/clSetKernelArg.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_kernel kernel = (cl_kernel) 0x01234567;
 7 |     cl_uint arg_index = 1;
 8 |     cl_ushort arg_value = 1234;
 9 |     size_t arg_size = 2;
10 | 
11 |     cl_int errcode = clSetKernelArg(kernel, arg_index, arg_size, (const void*) &arg_value);
12 |     assert(CL_SUCCESS == errcode);
13 | 
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/clSetKernelArg_str.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_kernel kernel = (cl_kernel) 0x01234567;
 7 |     cl_uint arg_index = 2;
 8 |     char arg_value[] = "hello world";
 9 |     size_t arg_size = 11;
10 |     assert(sizeof(arg_value) == arg_size+1);
11 | 
12 |     cl_int errcode = clSetKernelArg(kernel, arg_index, arg_size, (const void*) &arg_value);
13 |     assert(CL_SUCCESS == errcode);
14 | 
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/clCreateKernelsInProgram.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_program program = (cl_program) 0x01234567;
 7 |     cl_uint num_kernels = 2;
 8 |     cl_kernel * kernels = (cl_kernel *) 0x12345678;
 9 |     cl_uint * num_kernels_ret_ptr = (cl_uint *) 0x23456789;
10 | 
11 |     cl_int errcode = clCreateKernelsInProgram(program, num_kernels, kernels, num_kernels_ret_ptr);
12 |     assert(CL_SUCCESS == errcode);
13 | 
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/clCreateBuffer.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_context context = (cl_context) 0x01234567;
 7 |     cl_mem_flags flags = (cl_mem_flags) 17; // CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR;
 8 |     size_t size = 4096;
 9 |     void * host_ptr = (void *) 0x12345678;
10 |     cl_int * errcode = (cl_int *) 0x23456789;
11 |     
12 |     cl_mem buffer = clCreateBuffer(context, flags, size, host_ptr, errcode);
13 |     assert((cl_mem) 0x00000000 == buffer);
14 | 
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/clCreateCommandQueue.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_context context = (cl_context) 0x01234567;
 7 |     cl_device_id device = (cl_device_id) 0x12345678;
 8 |     cl_command_queue_properties properties = (cl_command_queue_properties) 0;
 9 |     cl_int * errcode = (cl_int *) 0x23456789;
10 |     
11 |     cl_command_queue queue = clCreateCommandQueue(context, device, properties, errcode);
12 |     assert(((cl_command_queue) 0x00000000 == queue));
13 | 
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/clCreateProgramWithSource.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_context context = (cl_context) 0x01234567;
 7 |     cl_uint count = 1;
 8 |     const char * strings[1] = { "kernel void f() {}" };
 9 |     const size_t * lengths = (const size_t *) 0x00000000;
10 |     cl_int * errcode = (cl_int *) 0x12345678;
11 | 
12 |     cl_program program = clCreateProgramWithSource(context, count, strings, lengths, errcode);
13 |     assert((cl_program) 0x00000000 == program);
14 | 
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/clBuildProgram.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | typedef void (CL_CALLBACK pfn_notify_t)(cl_program program, void * user_data);
 5 | 
 6 | int main()
 7 | {
 8 |     cl_program program = (cl_program) 0x01234567;
 9 |     const cl_uint num_devices = 2;
10 |     cl_device_id device_list[2] = { (cl_device_id) 0x12345678, (cl_device_id) 0x23456789 };
11 |     const char * options = "-Werror -DN=1024";
12 |     pfn_notify_t * pfn_notify = (pfn_notify_t *) 0x3456789a;
13 |     void * user_data = (void *) 0x456789ab;
14 | 
15 |     cl_int errcode = clBuildProgram(program, num_devices, device_list, options, pfn_notify, user_data);
16 |     assert(CL_SUCCESS == errcode);
17 | 
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/clCreateProgramWithBinary.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | 
 4 | int main()
 5 | {
 6 |     cl_context context = (cl_context) 0x01234567;
 7 |     cl_uint num_devices = 2;
 8 |     cl_device_id device_list[2] = { (cl_device_id) 0x12345678, (cl_device_id) 0x12345678 };
 9 |     const size_t * lengths = (const size_t *) 0x23456789;
10 |     const unsigned char ** binaries = (const unsigned char **) 0x3456789A;
11 |     cl_int * binary_status = (cl_int *) 0x456789AB;
12 |     cl_int * errcode_ret = (cl_int *) 0x56789ABC;
13 | 
14 |     cl_program program = clCreateProgramWithBinary(context,
15 |         num_devices, device_list,
16 |         lengths, binaries,
17 |         binary_status, errcode_ret);
18 |     assert((cl_program) 0x00000000 == program);
19 | 
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/python/prof_common.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # 2015-2017 (c) dividiti
 3 | #
 4 | 
 5 | import re
 6 | import json
 7 | 
 8 | #
 9 | # Common definitions.
10 | #
11 | 
12 | prefix = '(\[dv\/dt\])'
13 | call_regex = '(cl[a-zA-Z]*)'
14 | opts_regex = '([ \-\w_=]*)'
15 | iso_regex  = '(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6})'
16 | ptr_regex  = '((0x[0-9a-fA-F]{1,8})|(0))'
17 | int_regex  = '(\d+)'
18 | hex_regex  = '([a-fA-F\d]+)'
19 | bool_regex = '(\d)'
20 | 
21 | # Check that definitions from this file are available.
22 | def test():
23 |     print ("prof_common.py")
24 | 
25 | # Convert hexadecimal string into integer.
26 | def hex_str_as_int(hex_str):
27 |     hex_str_reversed = ''.join(reversed(
28 |         [ hex_str[n:n+2] for n in range(0,len(hex_str),2) ]
29 |     ))
30 |     return int(hex_str_reversed, 16)
31 | 
32 | # Convert hexadecimal string into text string.
33 | def hex_str_as_str(hex_str):
34 |     return hex_str.decode('hex')
35 | 


--------------------------------------------------------------------------------
/tests/clEnqueueReadBuffer.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | #include <iostream>
 4 | 
 5 | int main()
 6 | {
 7 |     cl_command_queue queue = (cl_command_queue) 0x01234567;
 8 |     cl_mem buffer = (cl_mem) 0x12345678;
 9 |     cl_bool blocking = 1;
10 |     size_t offset = 44;
11 |     size_t size = 55;
12 |     void *ptr = (void *) 0x23456789;
13 |     cl_uint num_events_in_wait_list = 3;
14 |     cl_event event_wait_list[3] = { (cl_event) 0x3456789a, (cl_event) 0x456789ab, (cl_event) 0x56789abc };
15 |     cl_event * event = (cl_event *) 0;
16 | 
17 |     cl_int errcode = clEnqueueReadBuffer(queue, buffer, blocking, offset, size, ptr, num_events_in_wait_list, event_wait_list, event);
18 |     assert(CL_SUCCESS == errcode);
19 | 
20 |     // Uncomment to emulate ostream profiling output (deprecated approach).
21 |     // NB: Pattern matching still works even when it's commented out.
22 |     // std::cout << "[dv/dt] clEnqueueReadBuffer profiling 100200300400 100200300500 100200300600 100200300700\n";
23 | 
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/clEnqueueWriteBuffer.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | #include <iostream>
 4 | 
 5 | int main()
 6 | {
 7 |     cl_command_queue queue = (cl_command_queue) 0x01234567;
 8 |     cl_mem buffer = (cl_mem) 0x12345678;
 9 |     cl_bool blocking = 1;
10 |     size_t offset = 44;
11 |     size_t size = 55;
12 |     const void *ptr = (const void *) 0x23456789;
13 |     cl_uint num_events_in_wait_list = 3;
14 |     cl_event event_wait_list[3] = { (cl_event) 0x3456789a, (cl_event) 0x456789ab, (cl_event) 0x56789abc };
15 |     cl_event * event = (cl_event *) 0;
16 | 
17 |     cl_int errcode = clEnqueueWriteBuffer(queue, buffer, blocking, offset, size, ptr, num_events_in_wait_list, event_wait_list, event);
18 |     assert(CL_SUCCESS == errcode);
19 | 
20 |     // Uncomment to emulate ostream profiling output (deprecated approach).
21 |     // NB: Pattern matching still works even when it's commented out.
22 |     // std::cout << "[dv/dt] clEnqueueWriteBuffer profiling 100200300400 100200300500 100200300600 100200300700\n";
23 | 
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/clEnqueueNDRangeKernel.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | #include <iostream>
 4 | 
 5 | int main()
 6 | {
 7 |     cl_command_queue queue = (cl_command_queue) 0x01234567;
 8 |     cl_kernel kernel = (cl_kernel) 0x12345678;
 9 |     cl_uint work_dim = 2;
10 |     size_t global_work_offset[2] = { 0, 1 };
11 |     size_t global_work_size[2]   = { 1024, 2 };
12 |     size_t * local_work_size = NULL;
13 |     cl_uint num_events_in_wait_list = 2;
14 |     cl_event event_wait_list[2] = { (cl_event) 0x23456789, (cl_event) 0x3456789a };
15 |     cl_event * event = (cl_event *) 0x456789ab;
16 | 
17 |     cl_int errcode = clEnqueueNDRangeKernel(queue, kernel, \
18 |         work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event);
19 |     assert(CL_SUCCESS == errcode);
20 | 
21 |     // Uncomment to emulate ostream profiling output (deprecated approach).
22 |     // NB: Pattern matching still works even when it's commented out.
23 |     // std::cout << "[dv/dt] clEnqueueNDRangeKernel profiling 100200300400 100200300500 100200300600 100200300700\n";
24 | 
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/clEnqueueNDRangeKernel_LWS.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | #include <iostream>
 4 | 
 5 | int main()
 6 | {
 7 |     cl_command_queue queue = (cl_command_queue) 0x01234567;
 8 |     cl_kernel kernel = (cl_kernel) 0x12345678;
 9 |     cl_uint work_dim = 2;
10 |     size_t global_work_offset[2] = { 0, 1 };
11 |     size_t global_work_size[2]   = { 1024, 4 };
12 |     size_t local_work_size[2]    = { 128, 2 };
13 |     cl_uint num_events_in_wait_list = 2;
14 |     cl_event event_wait_list[2] = { (cl_event) 0x23456789, (cl_event) 0x3456789a };
15 |     cl_event * event = (cl_event *) 0x456789ab;
16 | 
17 |     cl_int errcode = clEnqueueNDRangeKernel(queue, kernel, \
18 |         work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event);
19 |     assert(CL_SUCCESS == errcode);
20 | 
21 |     // Uncomment to emulate ostream profiling output (deprecated approach).
22 |     // NB: Pattern matching still works even when it's commented out.
23 |     // std::cout << "[dv/dt] clEnqueueNDRangeKernel profiling 100200300400 100200300500 100200300600 100200300700\n";
24 | 
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/clEnqueueNDRangeKernel_LWS_NULL.cpp:
--------------------------------------------------------------------------------
 1 | #include <CL/opencl.h>
 2 | #include <cassert>
 3 | #include <iostream>
 4 | 
 5 | int main()
 6 | {
 7 |     cl_command_queue queue = (cl_command_queue) 0x01234567;
 8 |     cl_kernel kernel = (cl_kernel) 0x12345678;
 9 |     cl_uint work_dim = 2;
10 |     size_t global_work_offset[2] = { 0, 1 };
11 |     size_t global_work_size[2]   = { 1024, 4 };
12 |     size_t local_work_size[2]    = { 128, 2 };
13 |     cl_uint num_events_in_wait_list = 2;
14 |     cl_event event_wait_list[2] = { (cl_event) 0x23456789, (cl_event) 0x3456789a };
15 |     cl_event * event = (cl_event *) 0x456789ab;
16 | 
17 |     cl_int errcode = clEnqueueNDRangeKernel(queue, kernel, \
18 |         work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event);
19 |     assert(CL_SUCCESS == errcode);
20 | 
21 |     // Uncomment to emulate ostream profiling output (deprecated approach).
22 |     // NB: Pattern matching still works even when it's commented out.
23 |     // std::cout << "[dv/dt] clEnqueueNDRangeKernel profiling 100200300400 100200300500 100200300600 100200300700\n";
24 | 
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Rationale.
 2 | 
 3 | The initial unit tests (for `clCreateKernel()`, `clEnqueueNDRangeKernel()`, etc.) are written using a C program and a Python program with the same base file name.
 4 | 
 5 | The C program gets compiled and run with `libprof_test.so` so that the output resembles that of `libprof.so`. The only difference is that `libprof_test.so` only intercepts the arguments but does not pass them further into a real `libOpenCL.so` library. Indeed, the C program uses some random values for pointer arguments (e.g. 0x12345678`) so calling `libOpenCL.so` would result in a segmentation fault.
 6 | 
 7 | The output of the C program is input into the Python program. The Python program parses the output using the parser in `python/prof.py` producing a dictionary called `result`. The Python program also parses the C program file to extract the original values producing a dictionary called `source`. Finally, the two dictionaries are compared for equality.
 8 | 
 9 | Getting the original values by parsing the C program is arguably hard. (Perhaps harder than it should be.) Another approach would be to generate the C program from a template. On the other hand, writing the Python program first helps writing the parser (cf. test driven development). Proper comparison of the approaches is left for future work.
10 | 


--------------------------------------------------------------------------------
/tests/clCreateKernel.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | from prof_parser import opts_regex
11 | 
12 | # Test info.
13 | call = 'clCreateKernel'
14 | _id  = ''
15 | print '%s%s' % (call, _id)
16 | 
17 | # Parse test source.
18 | source = {}
19 | with open(call + _id + '.cpp', 'r') as f:
20 |     source['text'] = f.read()
21 |     source['program'] = re.search('\(cl_program\) (?P<program>%s)' % ptr_regex, source['text']).group('program')
22 |     source['name'] = re.search('kernel_name = \"(?P<name>%s)\"' % opts_regex, source['text']).group('name')
23 |     # The following should match the assert statement.
24 |     source['kernel'] = re.search('\(cl_kernel\) (?P<kernel>%s)' % ptr_regex, source['text']).group('kernel')
25 | 
26 | # Read from stdin (via pipe).
27 | output = sys.stdin.read()
28 | print 'OUTPUT'
29 | print output
30 | 
31 | result = prof_parse(output)[0]
32 | print 'RESULT'
33 | print result
34 | print
35 | 
36 | status = True
37 | status &= (source['program'] == result['program'])
38 | status &= (source['name'] == result['name'])
39 | status &= (source['kernel'] == result['kernel'])
40 | 
41 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
42 | print
43 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | 2015-2017 (c) dividiti and contributors
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 |     1. Redistributions of source code must retain the above copyright notice,
 7 |     this list of conditions and the following disclaimer.
 8 | 
 9 |     2. Redistributions in binary form must reproduce the above copyright
10 |     notice, this list of conditions and the following disclaimer in the
11 |     documentation and/or other materials provided with the distribution.
12 | 
13 |     3. Neither the name of dividiti nor the names of contributors may be used
14 |     to endorse or promote products derived from this software without specific
15 |     prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/tests/clCreateCommandQueue.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | 
11 | # Test info.
12 | call = 'clCreateCommandQueue'
13 | _id  = ''
14 | print '%s%s' % (call, _id)
15 | 
16 | # Parse test source.
17 | source = {}
18 | with open(call + _id + '.cpp', 'r') as f:
19 |     source['text'] = f.read()
20 |     source['context'] = re.search('\(cl_context\) (?P<context>%s)' % ptr_regex, source['text']).group('context')
21 |     source['device'] = re.search('\(cl_device_id\) (?P<device>%s)' % ptr_regex, source['text']).group('device')
22 |     source['properties'] = int(re.search('\(cl_command_queue_properties\) (?P<props>\d*)', source['text']).group('props'))
23 |     source['errcode_ret'] = re.search('\(cl_int \*\) (?P<errcode_ret>%s)' % ptr_regex, source['text']).group('errcode_ret')
24 |     # The following should match the assert statement.
25 |     source['queue'] = re.search('\(cl_command_queue\) (?P<queue>%s)' % ptr_regex, source['text']).group('queue')
26 | 
27 | # Read from stdin (via pipe).
28 | output = sys.stdin.read()
29 | print 'OUTPUT'
30 | print output
31 | 
32 | result = prof_parse(output)[0]
33 | print 'RESULT'
34 | print result
35 | print
36 | 
37 | status = True
38 | status &= (source['context'] == result['context'])
39 | status &= (source['device'] == result['device'])
40 | status &= (source['properties'] == result['properties'])
41 | status &= (source['errcode_ret']  == result['errcode_ret'])
42 | 
43 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
44 | print
45 | 


--------------------------------------------------------------------------------
/tests/clSetKernelArg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | from prof_parser import opts_regex
11 | from prof_common import hex_str_as_int
12 | 
13 | # Test info.
14 | call = 'clSetKernelArg'
15 | _id  = ''
16 | print '%s%s' % (call, _id)
17 | 
18 | # Parse test source.
19 | source = {}
20 | with open(call + _id + '.cpp', 'r') as f:
21 |     source['text'] = f.read()
22 |     source['kernel'] = re.search('\(cl_kernel\) (?P<kernel>%s)' % ptr_regex, source['text']).group('kernel')
23 |     source['arg_index'] = int(re.search('arg_index(\s*)=(\s*)(?P<arg_index>\d+)', source['text']).group('arg_index'))
24 |     source['arg_value_as_int'] = int(re.search('arg_value(\s*)=(\s*)(?P<arg_value>\d+)', source['text']).group('arg_value'))
25 |     source['arg_size'] = int(re.search('arg_size(\s*)=(\s*)(?P<arg_size>\d+)', source['text']).group('arg_size'))
26 | 
27 | # Read from stdin (via pipe).
28 | output = sys.stdin.read()
29 | print 'OUTPUT'
30 | print output
31 | 
32 | result = prof_parse(output)[0]
33 | result['arg_value_as_int'] = hex_str_as_int(result['arg_value'])
34 | print 'RESULT'
35 | print result
36 | print
37 | 
38 | 
39 | status = True
40 | status &= (source['kernel'] == result['kernel'])
41 | status &= (source['arg_index'] == result['arg_index'])
42 | status &= (source['arg_size'] == result['arg_size'])
43 | status &= (source['arg_value_as_int'] == result['arg_value_as_int'])
44 | status &= (0 == result['errcode'])
45 | 
46 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
47 | print
48 | 


--------------------------------------------------------------------------------
/tests/clSetKernelArg_str.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | from prof_parser import opts_regex
11 | from prof_common import hex_str_as_str
12 | 
13 | # Test info.
14 | call = 'clSetKernelArg'
15 | _id  = '_str'
16 | print '%s%s' % (call, _id)
17 | 
18 | # Parse test source.
19 | source = {}
20 | with open(call + _id + '.cpp', 'r') as f:
21 |     source['text'] = f.read()
22 |     source['kernel'] = re.search('\(cl_kernel\) (?P<kernel>%s)' % ptr_regex, source['text']).group('kernel')
23 |     source['arg_index'] = int(re.search('arg_index(\s*)=(\s*)(?P<arg_index>\d+)', source['text']).group('arg_index'))
24 |     source['arg_value_as_str'] = re.search('arg_value\[\](\s*)=(\s*)\"(?P<arg_value>.+)\"', source['text']).group('arg_value')
25 |     source['arg_size'] = int(re.search('arg_size(\s*)=(\s*)(?P<arg_size>\d+)', source['text']).group('arg_size'))
26 | 
27 | # Read from stdin (via pipe).
28 | output = sys.stdin.read()
29 | print 'OUTPUT'
30 | print output
31 | 
32 | result = prof_parse(output)[0]
33 | result['arg_value_as_str'] = hex_str_as_str(result['arg_value'])
34 | print 'RESULT'
35 | print result
36 | print
37 | 
38 | 
39 | status = True
40 | status &= (source['kernel'] == result['kernel'])
41 | status &= (source['arg_index'] == result['arg_index'])
42 | status &= (source['arg_size'] == result['arg_size'])
43 | status &= (source['arg_value_as_str'] == result['arg_value_as_str'])
44 | status &= (0 == result['errcode'])
45 | 
46 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
47 | print
48 | 


--------------------------------------------------------------------------------
/tests/clCreateBuffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | 
11 | # Test info.
12 | call = 'clCreateBuffer'
13 | _id  = ''
14 | print '%s%s' % (call, _id)
15 | 
16 | # Parse test source.
17 | source = {}
18 | with open(call + _id + '.cpp', 'r') as f:
19 |     source['text'] = f.read()
20 |     source['context'] = re.search('\(cl_context\) (?P<context>%s)' % ptr_regex, source['text']).group('context')
21 |     source['flags'] = int(re.search('\(cl_mem_flags\) (?P<flags>\d*)', source['text']).group('flags'))
22 |     source['size'] = int(re.search('size(\s*)=(\s*)(?P<size>\d+)', source['text']).group('size'))
23 |     source['host_ptr'] = re.search('\(void \*\) (?P<host_ptr>%s)' % ptr_regex, source['text']).group('host_ptr')
24 |     source['errcode_ret'] = re.search('\(cl_int \*\) (?P<errcode_ret>%s)' % ptr_regex, source['text']).group('errcode_ret')
25 |     # The following should match the assert statement.
26 |     source['buffer'] = re.search('\(cl_mem\) (?P<buffer>%s)' % ptr_regex, source['text']).group('buffer')
27 | 
28 | # Read from stdin (via pipe).
29 | output = sys.stdin.read()
30 | print 'OUTPUT'
31 | print output
32 | 
33 | result = prof_parse(output)[0]
34 | print 'RESULT'
35 | print result
36 | print
37 | 
38 | status = True
39 | status &= (source['context'] == result['context'])
40 | status &= (source['flags'] == result['flags'])
41 | status &= (source['size'] == result['size'])
42 | status &= (source['host_ptr']  == result['host_ptr'])
43 | status &= (source['errcode_ret']  == result['errcode_ret'])
44 | 
45 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
46 | print
47 | 


--------------------------------------------------------------------------------
/tests/clCreateProgramWithSource.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | 
11 | # Test info.
12 | call = 'clCreateProgramWithSource'
13 | _id  = ''
14 | print '%s%s' % (call, _id)
15 | 
16 | # Parse test source.
17 | source = {}
18 | with open(call + _id + '.cpp', 'r') as f:
19 |     source['text'] = f.read()
20 |     source['context'] = re.search('\(cl_context\) (?P<context>%s)' % ptr_regex, source['text']).group('context')
21 |     source['count'] = int(re.search('count(\s*)=(\s*)(?P<count>\d+)', source['text']).group('count'))
22 |     source['string0'] = re.search('strings\[\d+\](\s*)=(\s*)\{(\s*)"(?P<string0>.*)"', source['text']).group('string0')
23 |     source['lengths'] = re.search('\(const size_t \*\) (?P<lengths>%s)' % ptr_regex, source['text']).group('lengths')
24 |     source['errcode_ret'] = re.search('\(cl_int \*\) (?P<errcode_ret>%s)' % ptr_regex, source['text']).group('errcode_ret')
25 |     # The following should match the assert statement.
26 |     source['program'] = re.search('\(cl_program\) (?P<program>%s)' % ptr_regex, source['text']).group('program')
27 | 
28 | # Read from stdin (via pipe).
29 | output = sys.stdin.read()
30 | print 'OUTPUT'
31 | print output
32 | 
33 | result = prof_parse(output)[0]
34 | print 'RESULT'
35 | print result
36 | print
37 | 
38 | status = True
39 | status &= (source['context'] == result['context'])
40 | status &= (source['count'] == result['count'])
41 | status &= (source['string0'] == result['source']['0'])
42 | status &= (source['lengths'] == result['lengths'])
43 | status &= (source['errcode_ret']  == result['errcode_ret'])
44 | status &= (source['program']  == result['program'])
45 | 
46 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
47 | print
48 | 


--------------------------------------------------------------------------------
/tests/clCreateKernelsInProgram.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | from prof_parser import opts_regex
11 | 
12 | # Test info.
13 | call = 'clCreateKernelsInProgram'
14 | _id  = ''
15 | print '%s%s' % (call, _id)
16 | 
17 | # Parse test source.
18 | source = {}
19 | with open(call + _id + '.cpp', 'r') as f:
20 |     source['text'] = f.read()
21 |     # cl_program program = (cl_program) 0x01234567;
22 |     source['program'] = re.search('\(cl_program\) (?P<program>%s)' % ptr_regex, source['text']).group('program')
23 |     # cl_uint num_kernels = 2;
24 |     source['num_kernels'] = int(re.search('num_kernels(\s*)=(\s*)(?P<num_kernels>\d+)', source['text']).group('num_kernels'))
25 |     # cl_kernel * kernels = (cl_kernel *) 0x12345678;
26 |     source['kernels'] = re.search('\(cl_kernel(\s*)\*\)(\s*)(?P<kernels>%s)' % ptr_regex, source['text']).group('kernels')
27 |     # cl_uint * num_kernels_ret_ptr = (cl_uint *) 0x23456789;
28 |     source['num_kernels_ret_ptr'] = re.search('\(cl_uint(\s*)\*\)(\s*)(?P<num_kernels_ret_ptr>%s)' % ptr_regex, source['text']).group('num_kernels_ret_ptr')
29 | 
30 | # Read from stdin (via pipe).
31 | output = sys.stdin.read()
32 | print 'OUTPUT'
33 | print output
34 | 
35 | result = prof_parse(output)[0]
36 | print 'RESULT'
37 | print result
38 | print
39 | 
40 | status = True
41 | status &= (source['program'] == result['program'])
42 | status &= (source['num_kernels'] == result['num_kernels'])
43 | status &= (source['kernels'] == result['kernels'])
44 | status &= (source['num_kernels_ret_ptr'] == result['num_kernels_ret_ptr'])
45 | status &= (0 == result['num_kernels_ret'])
46 | status &= (0 == result['errcode'])
47 | 
48 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
49 | print
50 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # 2015-2017 (c) dividiti
 2 | 
 3 | # Add an interceptor library that intercepts some calls in 'libOpenCL.so'.
 4 | add_library(prof_test SHARED ${SOURCE})
 5 | set_target_properties(prof_test
 6 |   PROPERTIES
 7 |     COMPILE_FLAGS "-D DVDT_PROF_TEST=1 -D DVDT_PROF_WALLCLOCK_TIMEOFDAY=1")
 8 | target_link_libraries(prof_test dl "${CJSON_LIB_PATH}")
 9 | 
10 | # Get path to 'libprof_test.so'.
11 | set(PROF_TEST $<TARGET_FILE:prof_test>)
12 | 
13 | # Add OpenCL API test sources.
14 | file(GLOB cl_api_test_sources ${PROJECT_SOURCE_DIR}/tests/cl*.cpp)
15 | list(LENGTH cl_api_test_sources num_cl_api_test_sources)
16 | message(STATUS "Using ${num_cl_api_test_sources} OpenCL API tests")
17 | 
18 | # Build OpenCL API test binaries.
19 | foreach(test_cpp ${cl_api_test_sources})
20 |     get_filename_component(test ${test_cpp} NAME_WE)
21 |     add_executable(${test} ${test_cpp})
22 |     add_dependencies(${test} prof_test)
23 |     target_link_libraries(${test} OpenCL)
24 |     list(APPEND cl_api_test_binaries ${test})
25 | endforeach()
26 | 
27 | # Add OpenCL API tests.
28 | foreach(test ${cl_api_test_binaries})
29 |     add_test(NAME ${test}
30 |         COMMAND
31 |             ${PROJECT_SOURCE_DIR}/tests/pipe.sh
32 |             "${PROJECT_BINARY_DIR}/bin/${test}"
33 |             "${PROJECT_SOURCE_DIR}/tests/${test}.py"
34 |         WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}/tests"
35 |     )
36 |     string(CONCAT cl_api_test_regex "${test}" ": PASSED")
37 |     set_tests_properties(${test}
38 |         PROPERTIES
39 |             PASS_REGULAR_EXPRESSION ${cl_api_test_regex}
40 |             ENVIRONMENT "LD_PRELOAD=${PROF_TEST};PARSE_JSON=${CJSON_SET}")
41 | endforeach()
42 | 
43 | set_property(TEST clEnqueueNDRangeKernel_LWS
44 |     APPEND PROPERTY ENVIRONMENT DVDT_PROF_LWS="dvdt_prof_kernel:1,2")
45 | set_property(TEST clEnqueueNDRangeKernel_LWS_NULL
46 |     APPEND PROPERTY ENVIRONMENT DVDT_PROF_LWS_NULL=1)
47 | 
48 | # Custom target: "make check"
49 | add_custom_target(check
50 |     COMMAND ${CMAKE_CTEST_COMMAND}
51 |     DEPENDS prof_test ${cl_api_test_binaries}
52 | )
53 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # 2015-2017 (c) dividiti
 3 | #
 4 | 
 5 | cmake_minimum_required(VERSION 3.0)
 6 | 
 7 | project(Prof)
 8 | 
 9 | # The Prof copyright messages.
10 | set(Prof_COPYRIGHT_DIVIDITI "\"2015-2017 (c) dividiti\"")
11 | 
12 | # The Prof version.
13 | set(Prof_VERSION_MAJOR 0)
14 | set(Prof_VERSION_MINOR 2)
15 | message("dividiti OpenCL API Profiler v${Prof_VERSION_MAJOR}.${Prof_VERSION_MINOR}")
16 | 
17 | # The WALLCLOCK option.
18 | set(WALLCLOCK "boost" CACHE STRING "How to measure wall-clock time.")
19 | if(WALLCLOCK STREQUAL "boost")
20 |   message(STATUS "Measuring wall-clock time using boost::chrono")
21 |   add_definitions(-D DVDT_PROF_WALLCLOCK_BOOST=1)
22 |   include_directories("${BOOST_INCLUDE_DIR}")
23 |   SET(BOOST_LIB_PATH "${BOOST_LIB_DIR}/libboost_date_time.a")
24 | elseif(WALLCLOCK STREQUAL "timeofday")
25 |   message(STATUS "Measuring wall-clock time using gettimeofday()")
26 |   add_definitions(-D DVDT_PROF_WALLCLOCK_TIMEOFDAY=1)
27 | else()
28 |   message(WARNING "Unsupported WALLCLOCK option: ${WALLCLOCK}.")
29 | endif()
30 | 
31 | # The CJSON option.
32 | set(CJSON_SET "0" CACHE BOOLEAN "Parse JSON or default output.")
33 | if("${CJSON_SET}" STREQUAL "1")
34 |   add_definitions(-D DVDT_PROF_CJSON=1)
35 |   include_directories("${CJSON_INCLUDE_DIR}")
36 |   SET(CJSON_LIB_PATH "${CJSON_LIB_DIR}/${CJSON_LIB_NAME}")
37 | endif()
38 | 
39 | # Set build options.
40 | set(CMAKE_CXX_FLAGS "-O2 -W -Wall")
41 | message(STATUS "Using compiler flags: ${CMAKE_CXX_FLAGS}")
42 | message(STATUS "Using linker flags: ${CMAKE_SHARED_LINKER_FLAGS}")
43 | 
44 | # Output directory for executables.
45 | SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin)
46 | 
47 | # Output directory for libraries.
48 | SET(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib)
49 | 
50 | # Pass CMake settings to the source code via a header file.
51 | configure_file(
52 |   "${PROJECT_SOURCE_DIR}/cpp/prof_info.hpp.in"
53 |   "${PROJECT_BINARY_DIR}/include/prof_info.hpp"
54 | )
55 | 
56 | # Add the binary tree to the search path for include files
57 | # so that 'prof_info.hpp' can be found.
58 | include_directories("${PROJECT_BINARY_DIR}/include")
59 | 
60 | # Add source files.
61 | set(SOURCE
62 |   ${CMAKE_CURRENT_SOURCE_DIR}/cpp/prof.cpp
63 | )
64 | 
65 | # Add an interceptor library that intercepts some calls in 'libOpenCL.so'.
66 | add_library(prof SHARED ${SOURCE})
67 | target_link_libraries(prof dl "${CJSON_LIB_PATH}" "${BOOST_LIB_PATH}")
68 | 
69 | # Test descriptions are in a separate file.
70 | include(CTest)
71 | include(${PROJECT_SOURCE_DIR}/tests/CMakeLists.txt)
72 | 


--------------------------------------------------------------------------------
/tests/clBuildProgram.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | from prof_parser import opts_regex
11 | 
12 | # Test info.
13 | call = 'clBuildProgram'
14 | _id  = ''
15 | print '%s%s' % (call, _id)
16 | 
17 | # FIXME: taken from clBuildProgram.py - avoid duplication.
18 | # Parse initialisation list of form: lhs = { elem, ... }.
19 | def match_init_list(text, lhs_regex, elem_regex):
20 |     result = []
21 |     match = re.search('%s(\s*)=(\s*)\{(\s*)(?P<elem>%s)' % (lhs_regex, elem_regex), text)
22 |     while match and match.group('elem') != '}':
23 |         result.append(match.group('elem'))
24 |         text = text[match.end():]
25 |         match = re.search('(?P<elem>%s|\})' % elem_regex, text)
26 |     return result
27 | 
28 | # Parse test source.
29 | source = {}
30 | with open(call + _id + '.cpp', 'r') as f:
31 |     source['text'] = f.read()
32 |     source['program'] = re.search('\(cl_program\) (?P<program>%s)' % ptr_regex, source['text']).group('program')
33 |     source['options'] = re.search('options(\s*)=(\s*)\"(?P<options>%s)\"' % opts_regex, source['text']).group('options')
34 |     source['pfn_notify'] = re.search('\(pfn_notify_t \*\) (?P<pfn_notify>%s)' % ptr_regex, source['text']).group('pfn_notify')
35 |     source['user_data'] = re.search('\(void \*\) (?P<user_data>%s)' % ptr_regex, source['text']).group('user_data')
36 |     # Parse device list.
37 |     num_devices = int(re.search('num_devices(\s*)=(\s*)(?P<num_devices>\d+)', source['text']).group('num_devices'))
38 |     cl_device_ptr_list = match_init_list(source['text'], 'device_list\[%d\]' % num_devices, '\(cl_device_id\) %s' % ptr_regex)
39 |     source['device_list'] = [re.match('\(cl_device_id\) (?P<ptr>%s)' % ptr_regex, cl_device_ptr).group('ptr') for cl_device_ptr in cl_device_ptr_list]
40 | 
41 | # Read from stdin (via pipe).
42 | output = sys.stdin.read()
43 | print 'OUTPUT'
44 | print output
45 | 
46 | result = prof_parse(output)[0]
47 | print 'RESULT'
48 | print result
49 | print
50 | 
51 | status = True
52 | status &= (source['program'] == result['program'])
53 | status &= (cmp(source['device_list'], result['device_list']) == 0)
54 | status &= (source['options'] == result['options'])
55 | status &= (source['pfn_notify'].lower() == result['pfn_notify'].lower())
56 | status &= (source['user_data'].lower() == result['user_data'].lower())
57 | status &= (0 == result['errcode'])
58 | 
59 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
60 | print
61 | 


--------------------------------------------------------------------------------
/tests/clCreateProgramWithBinary.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import ptr_regex
10 | 
11 | # Test info.
12 | call = 'clCreateProgramWithBinary'
13 | _id  = ''
14 | print '%s%s' % (call, _id)
15 | 
16 | # FIXME: taken from clBuildProgram.py - avoid duplication.
17 | # Parse initialisation list of form: lhs = { elem, ... }.
18 | def match_init_list(text, lhs_regex, elem_regex):
19 |     result = []
20 |     match = re.search('%s(\s*)=(\s*)\{(\s*)(?P<elem>%s)' % (lhs_regex, elem_regex), text)
21 |     while match and match.group('elem') != '}':
22 |         result.append(match.group('elem'))
23 |         text = text[match.end():]
24 |         match = re.search('(?P<elem>%s|\})' % elem_regex, text)
25 |     return result
26 | 
27 | # Parse test source.
28 | source = {}
29 | with open(call + _id + '.cpp', 'r') as f:
30 |     source['text'] = f.read()
31 |     source['context'] = re.search('\(cl_context\) (?P<context>%s)' % ptr_regex, source['text']).group('context')
32 |     source['lengths'] = re.search('\(const size_t \*\) (?P<lengths>%s)' % ptr_regex, source['text']).group('lengths')
33 |     source['binaries'] = re.search('\(const unsigned char \*\*\) (?P<binaries>%s)' % ptr_regex, source['text']).group('binaries')
34 |     source['binary_status'] = re.search('binary_status(\s*)=(\s*)\(cl_int \*\) (?P<binary_status>%s)' % ptr_regex, source['text']).group('binary_status')
35 |     source['errcode_ret'] = re.search('errcode_ret(\s*)=(\s*)\(cl_int \*\) (?P<errcode_ret>%s)' % ptr_regex, source['text']).group('errcode_ret')
36 |     # Parse device list.
37 |     num_devices = int(re.search('num_devices(\s*)=(\s*)(?P<num_devices>\d+)', source['text']).group('num_devices'))
38 |     cl_device_ptr_list = match_init_list(source['text'], 'device_list\[%d\]' % num_devices, '\(cl_device_id\) %s' % ptr_regex)
39 |     source['device_list'] = [re.match('\(cl_device_id\) (?P<ptr>%s)' % ptr_regex, cl_device_ptr).group('ptr') for cl_device_ptr in cl_device_ptr_list]
40 |     # The following should match the assert statement.
41 |     source['program'] = re.search('\(cl_program\) (?P<program>%s)' % ptr_regex, source['text']).group('program')
42 | 
43 | # Read from stdin (via pipe).
44 | output = sys.stdin.read()
45 | print 'OUTPUT'
46 | print output
47 | 
48 | result = prof_parse(output)[0]
49 | print 'RESULT'
50 | print result
51 | print
52 | 
53 | status = True
54 | status &= (source['context'].lower() == result['context'].lower())
55 | status &= (cmp(source['device_list'], result['device_list']) == 0)
56 | status &= (source['lengths'].lower() == result['lengths'].lower())
57 | status &= (source['binaries'].lower() == result['binaries'].lower())
58 | status &= (source['binary_status'].lower() == result['binary_status'].lower())
59 | status &= (source['errcode_ret'].lower() == result['errcode_ret'].lower())
60 | status &= (source['program'].lower()  == result['program'].lower())
61 | 
62 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
63 | print
64 | 


--------------------------------------------------------------------------------
/python/dvdt_prof_cli/dvdt_parser-v0.1.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2018 cTuning foundation.
 3 | # See CK COPYRIGHT.txt for copyright details.
 4 | #
 5 | # SPDX-License-Identifier: BSD-3-Clause.
 6 | # See CK LICENSE.txt for licensing details.
 7 | #
 8 | 
 9 | #
10 | # Developer(s):
11 | #   - Grigori Fursin, cTuning foundation, 2018
12 | #   - Anton Lokhmotov, dividiti, 2018
13 | #   - Flavio Vella, dividiti, 2018
14 | #
15 | ##############################################################################
16 | # The program provide a friendly interface to parse and visualize information from 
17 | # dividi-prof. 
18 | # 
19 | 
20 | # MODULE 
21 | # deps: tabulate. To install: $ pip install tabulate --user
22 | #
23 | import os
24 | import dateutil.parser
25 | import json
26 | import argparse
27 | from pprint import pprint
28 | from tabulate import tabulate
29 | #
30 | import dvdt_function as tools
31 | #
32 | ####
33 | ###  Options todo: --call=NDRange specify opencl operations. NDRange by default
34 | #### Table options... Maybe we want raw data or cvs 
35 | 
36 | parser = argparse.ArgumentParser(description='Dividiti Profiler Command Line Interface v0')
37 | #parser.add_argument("--filter_by", action="store", dest="bool_aggregate", help="Show information of unique kernel name")
38 | parser.add_argument("--aggregate", action="store_true",  dest="aggregate_bool", default=None, help="Show information by of unique kernel names")
39 | parser.add_argument("--verbose", action="store",  dest="verbose_lvl", default=0, help="verbose level")
40 | parser.add_argument("--files", action="store", dest="files_name", default="tmp-dvdt-prof.json", type=str, help="pass dvdt-prof.json files column separated")
41 | parser.add_argument("--filter-by-name", action="store", dest="filter_kernel_list", help="show information by a given kernel name")
42 | parser.add_argument("--filter-by-percent", action="store", dest="filter_percent", default=0, type=float, help="show information by a given kernel name")
43 | parser.add_argument("--sort", action="store_true",  dest="sort_bool", default=False, help="Show information sorted by time")
44 | 
45 | args=parser.parse_args()
46 | 
47 | 
48 | ### from here variable 
49 | _is_aggregate = args.aggregate_bool
50 | _verbose = args.verbose_lvl
51 | _limit = tools.filter_percent_manager(args.filter_percent)  
52 | _files_lst = tools.files_manager(args.files_name, "tmp-dvdt-prof.json")
53 | _filters_kernel = tools.fiter_by_name_manager(args.filter_kernel_list )
54 | _is_sorted = args.sort_bool
55 | 
56 | _data_list = []
57 | print _files_lst
58 | for i in _files_lst:
59 |     _data_list.append(tools.json_manager(i))
60 | 
61 | config_dict = {}
62 | ##n Only 1 dvdt-prof file is supported at the moment. 
63 | config_dict['data'] = _data_list
64 | config_dict['filter_kernel'] = _filters_kernel
65 | config_dict['verbose'] = _verbose
66 | config_dict['percent_limit'] = _limit
67 | config_dict['aggregate'] = _is_aggregate
68 | config_dict['sort'] = _is_sorted
69 | config_dict['files_list'] = _files_lst
70 | config_dict['call_name'] = ['clEnqueueNDRangeKernel']
71 | if _verbose > 2:
72 |    tools.print_args
73 | if _verbose > 1:
74 |    print "Print configuration"
75 |    tools.print_configuration(config_dict)
76 | 
77 | ### just one file is supported and NRange is the only supported call
78 | _NDRange_lst =  tools.get_data_from_call(config_dict['data'][0], config_dict['call_name'][0])
79 | _kernel_stat_lst = tools.get_data_from_ndrange(_NDRange_lst)
80 | 
81 | if config_dict['sort'] is True:
82 |     _kernel_stat_lst = sorted(_kernel_stat_lst, key=lambda k: k['total_time'], reverse=True)
83 | 
84 | _app_stat_lst = tools.get_application_stat(_kernel_stat_lst)
85 | 
86 | tools.computing_percent(_kernel_stat_lst, _app_stat_lst['total_kernel_time'])
87 | # othee options here https://pypi.python.org/pypi/tabulate/
88 | print "===== " + config_dict['files_list'][0] + " ====="
89 |  
90 | tools.print_table(_kernel_stat_lst, _app_stat_lst, config_dict['percent_limit'], config_dict['filter_kernel'], "simple")
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/tests/clEnqueueNDRangeKernel.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import int_regex
10 | from prof_parser import ptr_regex
11 | from prof_parser import opts_regex
12 | 
13 | max_work_dim = 3
14 | default_offset = 0
15 | null_offset = 0
16 | default_gws = 1
17 | default_lws = 1
18 | null_lws = 0
19 | 
20 | # Test info.
21 | call = 'clEnqueueNDRangeKernel'
22 | _id  = ''
23 | print '%s%s' % (call, _id)
24 | 
25 | # Parse initialisation list of form: lhs = { elem, ... }.
26 | def match_init_list(text, lhs_regex, elem_regex):
27 |     result = []
28 |     match = re.search('%s(\s*)=(\s*)\{(\s*)(?P<elem>%s)' % (lhs_regex, elem_regex), text)
29 |     while match and match.group('elem') != '}':
30 |         result.append(match.group('elem'))
31 |         text = text[match.end():]
32 |         match = re.search('(?P<elem>%s|\})' % elem_regex, text)
33 |     return result
34 | 
35 | # Parse test source.
36 | source = {}
37 | with open(call + _id + '.cpp', 'r') as f:
38 |     source['text'] = f.read()
39 |     source['queue'] = re.search('\(cl_command_queue\) (?P<queue>%s)' % ptr_regex, source['text']).group('queue')
40 |     source['kernel'] = re.search('\(cl_kernel\) (?P<kernel>%s)' % ptr_regex, source['text']).group('kernel')
41 |     
42 |     work_dim = int(re.search('work_dim(\s*)=(\s*)(?P<work_dim>\d+)', source['text']).group('work_dim'))
43 |     gwo = match_init_list(source['text'], 'global_work_offset\[%d\]' % work_dim, int_regex)
44 |     source['gwo'] = ([int(i) for i in gwo] + [default_offset] * (max_work_dim - work_dim)) if gwo else [null_offset] * max_work_dim
45 |     gws = match_init_list(source['text'], 'global_work_size\[%d\]' % work_dim, int_regex)
46 |     source['gws'] = [int(i) for i in gws] + [default_gws] * (max_work_dim - work_dim)
47 |     lws = match_init_list(source['text'], 'local_work_size\[%d\]' % work_dim, int_regex)
48 |     source['lws'] = ([int(i) for i in lws] + [default_lws] * (max_work_dim - work_dim)) if lws else [null_lws] * max_work_dim
49 | 
50 |     num_events = int(re.search('num_events_in_wait_list(\s*)=(\s*)(?P<num_events>\d+)', source['text']).group('num_events'))
51 |     cl_event_ptr_list = match_init_list(source['text'], 'event_wait_list\[%d\]' % num_events, '\(cl_event\) %s' % ptr_regex)
52 |     source['event_wait_list'] = [re.match('\(cl_event\) (?P<ptr>%s)' % ptr_regex, cl_event_ptr).group('ptr') for cl_event_ptr in cl_event_ptr_list]
53 | 
54 |     source['event'] = re.search('\(cl_event \*\) (?P<event>%s)' % ptr_regex, source['text']).group('event')
55 | 
56 |     profiling_match = re.search('%s (?P<queued>%s) (?P<submit>%s) (?P<start>%s) (?P<end>%s)' % \
57 |         ('profiling', int_regex, int_regex, int_regex, int_regex), source['text'])
58 |     if profiling_match:
59 |         source['profiling'] = {}
60 |         source['profiling']['queued'] = int(profiling_match.group('queued'))
61 |         source['profiling']['submit'] = int(profiling_match.group('submit'))
62 |         source['profiling']['start']  = int(profiling_match.group('start'))
63 |         source['profiling']['end']    = int(profiling_match.group('end'))
64 | 
65 | 
66 | # Read from stdin (via pipe).
67 | output = sys.stdin.read()
68 | print 'OUTPUT'
69 | print output
70 | 
71 | result = prof_parse(output)[0]
72 | print 'RESULT'
73 | print result
74 | print
75 | 
76 | status = True
77 | status &= ("dvdt_prof_kernel" == result['name'])
78 | status &= (source['queue'] == result['queue'])
79 | status &= (source['kernel'] == result['kernel'])
80 | status &= (cmp(source['gwo'], result['gwo']) == 0)
81 | status &= (cmp(source['gws'], result['gws']) == 0)
82 | status &= (cmp(source['lws'], result['lws']) == 0)
83 | status &= (cmp(source['event_wait_list'], result['event_wait_list']) == 0)
84 | status &= (source['event'] == result['event'])
85 | status &= (cmp(source['profiling'], result['profiling']) == 0)
86 | 
87 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
88 | print
89 | 


--------------------------------------------------------------------------------
/tests/clEnqueueReadOrWriteBuffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import re
 4 | import os
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import int_regex
10 | from prof_parser import ptr_regex
11 | from prof_parser import opts_regex
12 | 
13 | # Parse initialisation list of form: lhs = { elem, ... }.
14 | def match_init_list(text, lhs_regex, elem_regex):
15 |     result = []
16 |     match = re.search('%s(\s*)=(\s*)\{(\s*)(?P<elem>%s)' % (lhs_regex, elem_regex), text)
17 |     while match and match.group('elem') != '}':
18 |         result.append(match.group('elem'))
19 |         text = text[match.end():]
20 |         match = re.search('(?P<elem>%s|\})' % elem_regex, text)
21 |     return result
22 | 
23 | # Parse test source.
24 | def get_source(call, _id):
25 |     source = {}
26 |     with open('%s%s.cpp' % (call, _id), 'r') as f:
27 |         text = f.read()
28 |     source['queue'] = re.search('\(cl_command_queue\)(\s*)(?P<queue>%s)' % ptr_regex, text).group('queue')
29 |     source['buffer'] = re.search('\(cl_mem\)(\s*)(?P<buffer>%s)' % ptr_regex, text).group('buffer')
30 |     source['blocking'] = int(re.search('blocking(\s*)=(\s*)(?P<blocking>\d)', text).group('blocking'))
31 |     source['offset'] = int(re.search('offset(\s*)=(\s*)(?P<offset>\d+)', text).group('offset'))
32 |     source['size'] = int(re.search('size(\s*)=(\s*)(?P<size>\d+)', text).group('size'))
33 |     source['ptr'] = re.search('\(%s(\s*)void(\s*)\*\)(\s*)(?P<ptr>%s)' % \
34 |         ('const' if call == 'clEnqueueWriteBuffer' else '', ptr_regex), text).group('ptr')
35 | 
36 |     num_events = int(re.search('num_events_in_wait_list(\s*)=(\s*)(?P<num_events>\d+)', text).group('num_events'))
37 |     cl_event_ptr_list = match_init_list(text, 'event_wait_list\[%d\]' % num_events, '\(cl_event\) %s' % ptr_regex)
38 |     source['event_wait_list'] = [re.match('\(cl_event\) (?P<ptr>%s)' % ptr_regex, cl_event_ptr).group('ptr') for cl_event_ptr in cl_event_ptr_list]
39 |     source['event'] = re.search('\(cl_event \*\) (?P<event>%s|0)' % ptr_regex, text).group('event')
40 | 
41 |     profiling_match = re.search('%s (?P<queued>%s) (?P<submit>%s) (?P<start>%s) (?P<end>%s)' % \
42 |         ('profiling', int_regex, int_regex, int_regex, int_regex), text)
43 |     if profiling_match:
44 |         source['profiling'] = {}
45 |         source['profiling']['queued'] = int(profiling_match.group('queued'))
46 |         source['profiling']['submit'] = int(profiling_match.group('submit'))
47 |         source['profiling']['start']  = int(profiling_match.group('start'))
48 |         source['profiling']['end']    = int(profiling_match.group('end'))
49 |     return source
50 | 
51 | # Get result.
52 | def get_result(output):
53 |     result = prof_parse(output)[0]
54 |     return result
55 | 
56 | # Test source and result for comparison.
57 | def cmp_source_and_result(source, result):
58 |     status = True
59 |     status &= (source['queue'] == result['queue'])
60 |     status &= (source['buffer'] == result['buffer'])
61 |     status &= (source['blocking'] == result['blocking'])
62 |     status &= (source['offset'] == result['offset'])
63 |     status &= (source['size'] == result['size'])
64 |     status &= (source['ptr'] == result['ptr'])
65 |     status &= (cmp(source['event_wait_list'], result['event_wait_list']) == 0)
66 |     # FIXME: watch for NULL pointers (0 != '0x00000000').
67 |     # status &= (source['event'] == result['event'])
68 |     status &= (cmp(source['profiling'], result['profiling']) == 0)
69 |     return status
70 | 
71 | def run(call, _id):
72 |     print '%s%s' % (call, _id)
73 | 
74 |     # Parse test source file.
75 |     source = get_source(call, _id)
76 |     print 'SOURCE'
77 |     print source
78 | 
79 |     # Read test executable output from stdin (via pipe).
80 |     output = sys.stdin.read()
81 |     print 'OUTPUT'
82 |     print output
83 | 
84 |     # Parse test executable output.
85 |     result = get_result(output)
86 |     print 'RESULT'
87 |     print result
88 | 
89 |     print '%s%s: %s' % (call, _id, 'PASSED' if cmp_source_and_result(source, result) else 'FAILED')
90 |     print
91 | 


--------------------------------------------------------------------------------
/tests/clEnqueueNDRangeKernel_LWS_NULL.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | import re
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import int_regex
10 | from prof_parser import ptr_regex
11 | from prof_parser import opts_regex
12 | 
13 | max_work_dim = 3
14 | default_offset = 0
15 | null_offset = 0
16 | default_gws = 1
17 | default_lws = 1
18 | null_lws = 0
19 | 
20 | # Test info.
21 | call = 'clEnqueueNDRangeKernel'
22 | _id  = '_LWS_NULL'
23 | print '%s%s' % (call, _id)
24 | print
25 | 
26 | # Environment.
27 | env = dict(os.environ)
28 | print 'DVDT_PROF_LWS_NULL=%s' % env['DVDT_PROF_LWS_NULL']
29 | print 'LD_PRELOAD=%s' % env['LD_PRELOAD']
30 | print
31 | 
32 | # Parse initialisation list of form: lhs = { elem, ... }.
33 | def match_init_list(text, lhs_regex, elem_regex):
34 |     result = []
35 |     match = re.search('%s(\s*)=(\s*)\{(\s*)(?P<elem>%s)' % (lhs_regex, elem_regex), text)
36 |     while match and match.group('elem') != '}':
37 |         result.append(match.group('elem'))
38 |         text = text[match.end():]
39 |         match = re.search('(?P<elem>%s|\})' % elem_regex, text)
40 |     return result
41 | 
42 | # Parse test source.
43 | source = {}
44 | with open(call + _id + '.cpp', 'r') as f:
45 |     source['text'] = f.read()
46 |     source['queue'] = re.search('\(cl_command_queue\) (?P<queue>%s)' % ptr_regex, source['text']).group('queue')
47 |     source['kernel'] = re.search('\(cl_kernel\) (?P<kernel>%s)' % ptr_regex, source['text']).group('kernel')
48 |     
49 |     work_dim = int(re.search('work_dim(\s*)=(\s*)(?P<work_dim>\d+)', source['text']).group('work_dim'))
50 |     gwo = match_init_list(source['text'], 'global_work_offset\[%d\]' % work_dim, int_regex)
51 |     source['gwo'] = ([int(i) for i in gwo] + [default_offset] * (max_work_dim - work_dim)) if gwo else [null_offset] * max_work_dim
52 |     gws = match_init_list(source['text'], 'global_work_size\[%d\]' % work_dim, int_regex)
53 |     source['gws'] = [int(i) for i in gws] + [default_gws] * (max_work_dim - work_dim)
54 | 
55 |     # Incerceptor sets local work size to NULL when DVDT_PROF_LWS_NULL is defined, ignore test source here.
56 |     lws = None
57 |     source['lws'] = ([int(i) for i in lws] + [default_lws] * (max_work_dim - work_dim)) if lws else [null_lws] * max_work_dim
58 | 
59 |     num_events = int(re.search('num_events_in_wait_list(\s*)=(\s*)(?P<num_events>\d+)', source['text']).group('num_events'))
60 |     cl_event_ptr_list = match_init_list(source['text'], 'event_wait_list\[%d\]' % num_events, '\(cl_event\) %s' % ptr_regex)
61 |     source['event_wait_list'] = [re.match('\(cl_event\) (?P<ptr>%s)' % ptr_regex, cl_event_ptr).group('ptr') for cl_event_ptr in cl_event_ptr_list]
62 | 
63 |     source['event'] = re.search('\(cl_event \*\) (?P<event>%s)' % ptr_regex, source['text']).group('event')
64 | 
65 |     profiling_match = re.search('%s (?P<queued>%s) (?P<submit>%s) (?P<start>%s) (?P<end>%s)' % \
66 |         ('profiling', int_regex, int_regex, int_regex, int_regex), source['text'])
67 |     if profiling_match:
68 |         source['profiling'] = {}
69 |         source['profiling']['queued'] = int(profiling_match.group('queued'))
70 |         source['profiling']['submit'] = int(profiling_match.group('submit'))
71 |         source['profiling']['start']  = int(profiling_match.group('start'))
72 |         source['profiling']['end']    = int(profiling_match.group('end'))
73 | 
74 | 
75 | # Read from stdin (via pipe).
76 | output = sys.stdin.read()
77 | print 'OUTPUT'
78 | print output
79 | 
80 | result = prof_parse(output)[0]
81 | print 'RESULT'
82 | print result
83 | print
84 | 
85 | status = True
86 | status &= ("dvdt_prof_kernel" == result['name'])
87 | status &= (source['queue'] == result['queue'])
88 | status &= (source['kernel'] == result['kernel'])
89 | status &= (cmp(source['gwo'], result['gwo']) == 0)
90 | status &= (cmp(source['gws'], result['gws']) == 0)
91 | status &= (cmp(source['lws'], result['lws']) == 0)
92 | status &= (cmp(source['event_wait_list'], result['event_wait_list']) == 0)
93 | status &= (source['event'] == result['event'])
94 | status &= (cmp(source['profiling'], result['profiling']) == 0)
95 | 
96 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
97 | print
98 | 


--------------------------------------------------------------------------------
/tests/clEnqueueNDRangeKernel_LWS.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | import re
 5 | import json
 6 | 
 7 | sys.path.append('../python')
 8 | from prof_parser import prof_parse
 9 | from prof_parser import int_regex
10 | from prof_parser import ptr_regex
11 | from prof_parser import opts_regex
12 | 
13 | max_work_dim = 3
14 | default_offset = 0
15 | null_offset = 0
16 | default_gws = 1
17 | default_lws = 1
18 | null_lws = 0
19 | 
20 | # Test info.
21 | call = 'clEnqueueNDRangeKernel'
22 | _id  = '_LWS'
23 | print '%s%s' % (call, _id)
24 | print
25 | 
26 | # Environment.
27 | env = dict(os.environ)
28 | print 'DVDT_PROF_LWS=%s' % env['DVDT_PROF_LWS']
29 | print 'LD_PRELOAD=%s' % env['LD_PRELOAD']
30 | print
31 | 
32 | # Parse initialisation list of form: lhs = { elem, ... }.
33 | def match_init_list(text, lhs_regex, elem_regex):
34 |     result = []
35 |     match = re.search('%s(\s*)=(\s*)\{(\s*)(?P<elem>%s)' % (lhs_regex, elem_regex), text)
36 |     while match and match.group('elem') != '}':
37 |         result.append(match.group('elem'))
38 |         text = text[match.end():]
39 |         match = re.search('(?P<elem>%s|\})' % elem_regex, text)
40 |     return result
41 | 
42 | # Parse test source.
43 | source = {}
44 | with open(call + _id + '.cpp', 'r') as f:
45 |     source['text'] = f.read()
46 |     source['queue'] = re.search('\(cl_command_queue\) (?P<queue>%s)' % ptr_regex, source['text']).group('queue')
47 |     source['kernel'] = re.search('\(cl_kernel\) (?P<kernel>%s)' % ptr_regex, source['text']).group('kernel')
48 |     
49 |     work_dim = int(re.search('work_dim(\s*)=(\s*)(?P<work_dim>\d+)', source['text']).group('work_dim'))
50 |     gwo = match_init_list(source['text'], 'global_work_offset\[%d\]' % work_dim, int_regex)
51 |     source['gwo'] = ([int(i) for i in gwo] + [default_offset] * (max_work_dim - work_dim)) if gwo else [null_offset] * max_work_dim
52 |     gws = match_init_list(source['text'], 'global_work_size\[%d\]' % work_dim, int_regex)
53 |     source['gws'] = [int(i) for i in gws] + [default_gws] * (max_work_dim - work_dim)
54 |     # Interceptor updates local work size from DVDT_PROF_LWS, so ignore test source and parse DVDT_PROF_LWS instead.
55 |     lws = (env['DVDT_PROF_LWS'].strip('"').split(':')[1]).split(',')
56 |     source['lws'] = ([int(i) for i in lws] + [default_lws] * (max_work_dim - work_dim)) if lws else [null_lws] * max_work_dim
57 | 
58 |     num_events = int(re.search('num_events_in_wait_list(\s*)=(\s*)(?P<num_events>\d+)', source['text']).group('num_events'))
59 |     cl_event_ptr_list = match_init_list(source['text'], 'event_wait_list\[%d\]' % num_events, '\(cl_event\) %s' % ptr_regex)
60 |     source['event_wait_list'] = [re.match('\(cl_event\) (?P<ptr>%s)' % ptr_regex, cl_event_ptr).group('ptr') for cl_event_ptr in cl_event_ptr_list]
61 | 
62 |     source['event'] = re.search('\(cl_event \*\) (?P<event>%s)' % ptr_regex, source['text']).group('event')
63 | 
64 |     profiling_match = re.search('%s (?P<queued>%s) (?P<submit>%s) (?P<start>%s) (?P<end>%s)' % \
65 |         ('profiling', int_regex, int_regex, int_regex, int_regex), source['text'])
66 |     if profiling_match:
67 |         source['profiling'] = {}
68 |         source['profiling']['queued'] = int(profiling_match.group('queued'))
69 |         source['profiling']['submit'] = int(profiling_match.group('submit'))
70 |         source['profiling']['start']  = int(profiling_match.group('start'))
71 |         source['profiling']['end']    = int(profiling_match.group('end'))
72 | 
73 | 
74 | # Read from stdin (via pipe).
75 | output = sys.stdin.read()
76 | print 'OUTPUT'
77 | print output
78 | 
79 | result = prof_parse(output)[0]
80 | print 'RESULT'
81 | print result
82 | print
83 | 
84 | status = True
85 | status &= ("dvdt_prof_kernel" == result['name'])
86 | status &= (source['queue'] == result['queue'])
87 | status &= (source['kernel'] == result['kernel'])
88 | status &= (cmp(source['gwo'], result['gwo']) == 0)
89 | status &= (cmp(source['gws'], result['gws']) == 0)
90 | status &= (cmp(source['lws'], result['lws']) == 0)
91 | status &= (cmp(source['event_wait_list'], result['event_wait_list']) == 0)
92 | status &= (source['event'] == result['event'])
93 | status &= (cmp(source['profiling'], result['profiling']) == 0)
94 | 
95 | print '%s%s: %s' % (call, _id, 'PASSED' if status else 'FAILED')
96 | print
97 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dv/dt prof: OpenCL API profiler
  2 | 
  3 | The `dv/dt prof` profiler (`libprof.so`) intercepts some OpenCL API calls and
  4 | logs their arguments before invoking the underlying OpenCL implementation.
  5 | 
  6 | As of v0.2, the profiler supports two modes:
  7 | 
  8 | 1. The `ostream` mode logs to `stdout` blocks of text like the following:
  9 | ```
 10 | [dv/dt] clEnqueueNDRangeKernel
 11 | [dv/dt] clEnqueueNDRangeKernel name im2col_float
 12 | [dv/dt] clEnqueueNDRangeKernel queue 0x5d3240
 13 | [dv/dt] clEnqueueNDRangeKernel kernel 0xbb0300
 14 | [dv/dt] clEnqueueNDRangeKernel gwo 0 0 0
 15 | [dv/dt] clEnqueueNDRangeKernel gws 16384 1 1
 16 | [dv/dt] clEnqueueNDRangeKernel lws 128 1 1
 17 | [dv/dt] clEnqueueNDRangeKernel event_wait_list
 18 | [dv/dt] clEnqueueNDRangeKernel event 0
 19 | [dv/dt] clEnqueueNDRangeKernel start 2016-10-11T20:41:18.041468
 20 | [dv/dt] clEnqueueNDRangeKernel profiling 52910121520869 52910121595577 52910130751092 52910132647472
 21 | [dv/dt] clEnqueueNDRangeKernel end 2016-10-11T20:41:18.054802
 22 | [dv/dt] clEnqueueNDRangeKernel errcode 0
 23 | ```
 24 | 
 25 | In an offline post-processing step, the Python parser (`prof_parser.py`)
 26 | converts the profiler's output into JSON as the following:
 27 | ```
 28 | {
 29 |   "kernel": "0x7f8700",
 30 |   "profiling": {
 31 |     "start": 46559873667079,
 32 |     "end": 46559875636796,
 33 |     "queued": 46559863661412,
 34 |     "submit": 46559863742203
 35 |   },
 36 |   "name": "im2col_float",
 37 |   "lws": [
 38 |     128,
 39 |     1,
 40 |     1
 41 |   ],
 42 |   "gwo": [
 43 |     0,
 44 |     0,
 45 |     0
 46 |   ],
 47 |   "errcode": 0,
 48 |   "queue": "0x1cd240",
 49 |   "call": "clEnqueueNDRangeKernel",
 50 |   "gws": [
 51 |     16384,
 52 |     1,
 53 |     1
 54 |   ],
 55 |   "timestamp": {
 56 |     "start": "2016-10-11T15:50:45.883538",
 57 |     "end": "2016-10-11T15:50:45.897364"
 58 |   },
 59 |   "enqueue_id": 24,
 60 |   "event_wait_list": [],
 61 |   "event": "0"
 62 | }
 63 | ```
 64 | 
 65 | 2. The `cjson` mode uses the [cJSON](https://github.com/DaveGamble/cJSON/)
 66 | library to build JSON online, which then gets logged to `stdout`. In an offline
 67 | post-processing step, the Python parser (`prof_parser.py`) simply loads JSON
 68 | between the `[dv/dt] <<` and `[dv/dt] >>` markers.
 69 | 
 70 | # Effect on execution time
 71 | 
 72 | Using the profiler can slow down the program for several reasons:
 73 | 
 74 | - To make parsing robust, the profiler uses formatted printing which is
 75 |   relatively expensive.
 76 | 
 77 | - To time non-blocking calls accurately, the profiler makes them blocking.
 78 | 
 79 | - Optionally, the profiler can alter the program behaviour in other ways, for
 80 |   example, by changing the local work size for one or more kernels in the
 81 |   program. This functionality requires keeping additional state.
 82 | 
 83 | The good news is that the kernel execution time and memory copy time are not
 84 | affected.
 85 | 
 86 | [OpenCL page at the Khronos Group](https://www.khronos.org/opencl)
 87 | 
 88 | # Installing the profiler
 89 | 
 90 | **NB:** The easiest way to install the profiler is by using
 91 | [CK-Caffe](http://github.com/dividiti/ck-caffe) packages:
 92 | ```
 93 | $ ck pull repo:ck-caffe --url=https://github.com/dividiti/ck-caffe
 94 | $ ck install ck-caffe:package:tool-dvdt-prof
 95 | $ ck install ck-caffe:package:tool-dvdt-prof-cjson
 96 | ```
 97 | 
 98 | ## Prerequisites
 99 | 
100 | - CMake 3.0.
101 | 
102 | - OpenCL headers and library.
103 | 
104 | ## Building the profiler.
105 | 
106 | Place the source into `${SRC_DIR}`. Create `${BUILD_DIR}`.
107 | 
108 | ```
109 | $ cd ${BUILD_DIR}
110 | $ cmake ${SRC_DIR}
111 | $ make prof
112 | ```
113 | 
114 | `${SRC_DIR}/lib` now contains `libprof.so`.
115 | 
116 | To build and run tests:
117 | 
118 | ```
119 | $ make check
120 | ```
121 | 
122 | `${SRC_DIR}/lib` now contains `libprof_test.so` which is only useful for testing.
123 | 
124 | ### Build options.
125 | 
126 | By default, the profiler uses `boost::chrono` to measure wall-clock time. This
127 | can be disabled by setting the `WALLCLOCK` option as follows:
128 | 
129 | ```
130 | cmake ${SRC_DIR} -DWALLCLOCK=timeofday
131 | ```
132 | 
133 | (This is particularly handy for Android platforms.)
134 | 
135 | Alternative mechanisms like `gettimeofday()` are not supported at the moment.
136 | 
137 | # Using the profiler
138 | 
139 | ## Collecting runtime information
140 | ```
141 | $ LD_PRELOAD=<path to profiler e.g. "${BUILD_DIR}/lib/libprof.so"> <path to program to be profiled>
142 | ```
143 | 
144 | ## Changing the program behaviour
145 | 
146 | Several environment variables can be defined when launching the program.
147 | 
148 | ### DVDT_PROF_LWS
149 | 
150 | `DVDT_PROF_LWS` specifies changes that should be made to the local work size
151 | when launching one or more kernels in the program.
152 | 
153 | ```
154 | DVDT_PROF_LWS="kernel_A:lws_A0,lws_A1,lws_A2 kernel_B:lws_B0,lws_B1,lws_B1 ..."
155 | ```
156 | For example:
157 | ```
158 | DVDT_PROF_LWS="transpose:8,8 gemm:4,16" LD_PRELOAD=<path to profiler> <path to program>
159 | ```
160 | 
161 | Namely, the per-kernel list elements are separated by spaces; the kernel names
162 | (strings) are separated from the local work size tuple by colons; the tuple
163 | elements (unsigned integers) are delimited by commas. The number of elements in
164 | a tuple must match the number of work-group dimensions as specified in the
165 | program or start with the value of `0` to use `NULL` as the local work size for
166 | this kernel.
167 | 
168 | Note that the profiler cannot check the correctness of any given specification.
169 | In particular, the usual execution constraints hold: the global work size
170 | dimensions must be divisible by the local work size dimensions; the total
171 | work-group size (the product of all the dimensions) cannot exceed
172 | `CL_KERNEL_WORK_GROUP_SIZE`.
173 | 
174 | ### DVDT_PROF_LWS_NULL
175 | 
176 | For convenience, if the environment defines `DVDT_PROF_LWS_NULL` then `NULL` is
177 | used when launching any kernel in the program. (In this case, `DVDT_PROF_LWS`
178 | gets ignored.)
179 | 


--------------------------------------------------------------------------------
/python/dvdt_prof_cli/dvdt_function.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import json
  3 | from tabulate import tabulate
  4 | 
  5 | 
  6 | 
  7 | def print_args(args):  
  8 |     if args.aggregate_bool is not None:
  9 |          print args.aggregate_bool
 10 |     if args.filter_kernel_list is not None:
 11 |          print args.filter_kernel_list
 12 |     print args.files_name
 13 |     print args.filter_percent
 14 |     print args.verbose_lvl
 15 | 
 16 | def print_configuration(conf_dict):
 17 |     for i in conf_dict:
 18 |         if i != 'data':
 19 |             print i + ": ", conf_dict[i]
 20 |  
 21 | def files_manager(files_str, file_default, verbosity=0):
 22 |     files_lst = []
 23 |     ### manage default case 
 24 |     if files_str.lower() == file_default:
 25 |         return files_lst.append(files_str.lower())
 26 |     ### manage string seperated by columns
 27 |     file_list_all = files_str.split(',')
 28 |     for f in file_list_all:
 29 |         if os.path.isfile(f) is True:
 30 |            files_lst.append(f) 
 31 |     ### here all the file should exist 
 32 |     return files_lst
 33 | 
 34 | 
 35 | def json_manager(fp):
 36 |     with open(fp, 'r') as f:
 37 |        dvp = json.load(f)
 38 |     ## return dictionary
 39 |     return dvp
 40 | 
 41 | 
 42 | def filter_percent_manager(percent, verbosity=0):
 43 |     # silent 
 44 |     new_percent = percent
 45 |     if percent > 100.0:
 46 |         new_percent = 0.0
 47 |     elif percent < 0.0:
 48 |         new_percent = 0.0    
 49 |     return new_percent
 50 | 
 51 | def fiter_by_name_manager(filter_kernel_str, verbosity=0):
 52 |     filter_kernel_lst = []
 53 |     if filter_kernel_str is None:
 54 |         return filter_kernel_lst
 55 |     else:
 56 |         filter_kernel_lst = filter_kernel_str.split(',')
 57 |     return filter_kernel_lst
 58 | 
 59 | 
 60 | def get_data_from_call(dvp, call_name="clEnqueueNDRangeKernel"):
 61 |     call_list = []
 62 |     for opencl_function in dvp:
 63 |         if opencl_function['call'] == call_name:
 64 |             call_list.append(opencl_function)
 65 |     return call_list
 66 | 
 67 | 
 68 | 
 69 | ##### Function per call/applcation and general function
 70 | 
 71 | ### !!!! THE Function modifies k
 72 | def computing_percent(k_lst, total_time):
 73 |     for i in k_lst:
 74 |         t_i = float(i['total_time'])
 75 |         percent = (t_i*100.0)/total_time
 76 |         i['percent'] = percent
 77 | 
 78 | 
 79 | 
 80 |  
 81 | def get_application_stat(kernel_stat_lst):
 82 |     application_statistics = {}
 83 |     application_statistics['total_kernel_num'] = len(kernel_stat_lst)
 84 |     total_time = 0.0 
 85 |     ## compute total time 
 86 |     for i in kernel_stat_lst:
 87 |         total_time += i['total_time']
 88 |         application_statistics['total_kernel_time'] = total_time
 89 |     application_statistics['unit'] = kernel_stat_lst[0]['unit']
 90 |     return application_statistics
 91 | ### NDRange
 92 | 
 93 | def get_data_from_ndrange(NDRange_list, unit="ms"):
 94 |     stat_lst = []
 95 |     ## Select unit
 96 |     ## To Do. Put in scientific format
 97 |     ms = 1000000.0
 98 |     sec = 1000000000.0
 99 |     if unit == "ms":
100 |        unit_scale = ms
101 |     else:
102 |        unit_scale = sec
103 | 
104 |     for k in NDRange_list:
105 |         ## kernel is in nano seconds
106 |         tmp = {}
107 |         tmp['kernel_id'] = str(k['kernel'])
108 |         tmp['kernel_name'] = k['name']
109 |         total_kernel_time = int(k['profiling']['end']) -int (k['profiling']['start'])
110 |         t_scale = total_kernel_time/unit_scale
111 |         tmp ['total_time'] = t_scale
112 |         tmp ['configuration'] = {'gws': k['gws'], 'lws':k['lws']}
113 |         tmp ['unit'] = unit
114 |         stat_lst.append(tmp)
115 |     return stat_lst
116 | 
117 | 
118 | 
119 | 
120 | ##### VISUALIZATION 
121 | 
122 | # add option to save format 
123 | # add regex per file name
124 | def print_table(k_lst, app_lst, limit=0, by_kernel=[], view="simple"):
125 |      header =  ["Kernel_id", "Kernel_name", "Time ("+ app_lst['unit']+ ")", "Percent (%)", "GWS", "LWS"] 
126 |      total_calls = 0
127 |      partial_percent = 0.0
128 |      partial_time = 0.0
129 |      value  = []
130 |      idd = 0
131 |      if len(by_kernel) == 0:
132 |          for i in k_lst:
133 |              idd +=1
134 |              if i["percent"] > limit:
135 |                  total_calls = total_calls + 1
136 |                  gws = i['configuration']['gws']
137 |                  lws = i['configuration']['lws']
138 |                  #value.append( [i["kernel_id"], i["kernel_name"], format(i['total_time'],'.2f'), format(i['percent'],'.2f'), gws, lws ])
139 |                  value.append( [idd, i["kernel_name"], format(i['total_time'],'.2f'), format(i['percent'],'.2f'), gws, lws ])
140 | 
141 |                  partial_time    += i['total_time']
142 |                  partial_percent += i["percent"]
143 | 
144 |      else:
145 |          for i in k_lst:
146 |              idd +=1 
147 |              if i["percent"] > limit and i["kernel_name"] in by_kernel:
148 |                  total_calls = total_calls + 1
149 |                  gws = i['configuration']['gws']
150 |                  lws = i['configuration']['lws']
151 |                  #value.append( [i["kernel_id"], i["kernel_name"], format(i['total_time'],'.2f'), format(i['percent'],'.2f'), gws, lws ])
152 |                  value.append( [str(idd), i["kernel_name"], format(i['total_time'],'.2f'), format(i['percent'],'.2f'), gws, lws ])
153 | 
154 |                  partial_time    += i['total_time']
155 |                  partial_percent += i["percent"]
156 |      ## APP INFO
157 |      by_kernel_str = ''
158 |      for k in by_kernel:
159 |          by_kernel_str += str(k)+'\n'
160 |      app_header = ["Filter_by", "Threshold % > ", "Calls", "Partial time", "Partial percent" ]
161 |      app_value = [[by_kernel_str, limit, total_calls, format(partial_time,'.2f'), format(partial_percent,'.2f') ]]
162 |      #value.append([total_calls,"-", format(partial_time,'.2f') , format(partial_percent,'.2f')])
163 |      print "\n"
164 |      print tabulate(value, header, tablefmt=view)        
165 |      print "\n"
166 |      print tabulate(app_value, app_header, tablefmt="rst")
167 | 
168 | 


--------------------------------------------------------------------------------
/python/prof_wrangler.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # 2015-2017 (c) dividiti
  3 | #
  4 | 
  5 | import prof_common
  6 | 
  7 | import dateutil.parser
  8 | 
  9 | import pandas as pd
 10 | 
 11 | # Check that definitions from this file are available.
 12 | def test():
 13 |     print ("prof_wrangler.py")
 14 | 
 15 | # Return the difference between the end and start timestamps in seconds.
 16 | def ts_delta_s(ts_end, ts_start):
 17 |     delta = dateutil.parser.parse(ts_end) - dateutil.parser.parse(ts_start)
 18 |     delta_s = delta.total_seconds()
 19 |     return delta_s
 20 | 
 21 | # Return the difference between the end and start timestamps in nanoseconds.
 22 | def ts_delta_ns(ts_end, ts_start):
 23 |     delta_s = ts_delta_s(ts_end, ts_start)
 24 |     delta_ns = int(delta_s * 1e9)
 25 |     return delta_ns
 26 | 
 27 | # For each call in the trace, add its index to the call dictionary.
 28 | def index_calls(trace):
 29 |     indexed_trace = [
 30 |         dict(trace, call_index=index)
 31 |         for trace, index in zip(trace, range(len(trace)))
 32 |     ]
 33 |     return indexed_trace
 34 | 
 35 | # Return calls in the trace whose names are in the call_names list.
 36 | # For example, when calls_names=['clEnqueueNDRangeKernel'], return
 37 | # only kernel enqueues.
 38 | def filter_calls(trace, call_names):
 39 |     filtered_trace = [
 40 |         call for call in trace if call['call'] in call_names
 41 |     ]
 42 |     return filtered_trace
 43 | 
 44 | # Return a DataFrame containing the differences between the profiling markers
 45 | # and the timestamps in nanoseconds for a trace (nqs) with any enqueues
 46 | # (clEnqueueNDRangeKernel, clEnqueueReadBuffer, clEnqueueWriteBuffer, etc).
 47 | def df_enqueues_ns(nqs,
 48 |                    label_fn = lambda nq: '%s' % str(nq['call_index']).zfill(6)) :
 49 |     def _df_data():
 50 | 	data = [
 51 |             {
 52 |                 'p1 - p0' : nq['profiling']['submit'] - nq['profiling']['queued'], # command queueing time
 53 |                 'p2 - p1' : nq['profiling']['start']  - nq['profiling']['submit'], # job queueing time
 54 |                 'p3 - p2' : nq['profiling']['end']    - nq['profiling']['start'],  # kernel execution time
 55 |                 'p3 - p0' : nq['profiling']['end']    - nq['profiling']['queued'], # total execution time
 56 |                 't1 - t0' : ts_delta_ns(ts_end=nq['timestamp']['end'], ts_start=nq['timestamp']['start']), # chrono time
 57 |                 '(t1 - t0) - (p3 - p0)' :
 58 |                             ts_delta_ns(ts_end=nq['timestamp']['end'], ts_start=nq['timestamp']['start']) -
 59 |                             nq['profiling']['end'] + nq['profiling']['queued'],    # chrono overhead
 60 |             }
 61 |             for nq in nqs
 62 |         ]
 63 |         return data
 64 | 
 65 |     def _df_index():
 66 |         index = pd.MultiIndex.from_tuples(
 67 |             names=('label', 'call'),
 68 |             tuples=[ (label_fn(nq), nq['call']) for nq in nqs ]
 69 |         )
 70 |         return index
 71 | 
 72 |     df = pd.DataFrame(data=_df_data(),index=_df_index())
 73 |     return df
 74 | 
 75 | # Return a DataFrame containing kernel enqueue info.
 76 | def df_kernel_enqueues(nqs, unit='ms'):
 77 |     multiplier = {
 78 |         'ns' : { 'profiling' : 1e-0, 'timestamp' : 1e+9 },
 79 |         'us' : { 'profiling' : 1e-3, 'timestamp' : 1e+6 },
 80 |         'ms' : { 'profiling' : 1e-6, 'timestamp' : 1e+3 },
 81 |         's'  : { 'profiling' : 1e-9, 'timestamp' : 1e+0 }
 82 |     }
 83 | 
 84 |     df_kernel_enqueues = pd.DataFrame()
 85 |     df_kernel_enqueues_tmp = pd.DataFrame(nqs)
 86 | 
 87 |     # Flatten work size and offset lists.
 88 |     df_kernel_enqueues[['lws0','lws1','lws2']] = df_kernel_enqueues_tmp['lws'].apply(pd.Series)
 89 |     df_kernel_enqueues[['gws0','gws1','gws2']] = df_kernel_enqueues_tmp['gws'].apply(pd.Series)
 90 |     df_kernel_enqueues[['gwo0','gwo1','gwo2']] = df_kernel_enqueues_tmp['gwo'].apply(pd.Series)
 91 | 
 92 |     # Flatten timestamp dictionaries
 93 |     df_kernel_enqueues_tmp[['t0','t1']] = df_kernel_enqueues_tmp['timestamp'].apply(pd.Series)
 94 |     # Compute the timestamp difference.
 95 |     df_kernel_enqueues['t1 - t0 (%s)' % unit] = df_kernel_enqueues_tmp[['t0','t1']] \
 96 |         .apply(lambda x: multiplier[unit]['timestamp'] * ts_delta_s(x[0],x[1]), axis=1)
 97 | 
 98 |     # Flatten profiling dictionaries
 99 |     # NB: Note this approach is different from the one used for timestamps
100 |     # due to non-intuitive order of flattening via .apply(pd.Series).
101 |     df_kernel_enqueues_tmp['p0'] = df_kernel_enqueues_tmp['profiling'].apply(lambda x: x['queued'])
102 |     df_kernel_enqueues_tmp['p1'] = df_kernel_enqueues_tmp['profiling'].apply(lambda x: x['submit'])
103 |     df_kernel_enqueues_tmp['p2'] = df_kernel_enqueues_tmp['profiling'].apply(lambda x: x['start'])
104 |     df_kernel_enqueues_tmp['p3'] = df_kernel_enqueues_tmp['profiling'].apply(lambda x: x['end'])
105 |     # Compute the profiling differences.
106 |     df_kernel_enqueues['p3 - p0 (%s)' % unit] = \
107 |         multiplier[unit]['profiling'] * (df_kernel_enqueues_tmp['p3'] - df_kernel_enqueues_tmp['p0'])
108 |     df_kernel_enqueues['p3 - p2 (%s)' % unit] = \
109 |         multiplier[unit]['profiling'] * (df_kernel_enqueues_tmp['p3'] - df_kernel_enqueues_tmp['p2'])
110 |     df_kernel_enqueues['p2 - p1 (%s)' % unit] = \
111 |         multiplier[unit]['profiling'] * (df_kernel_enqueues_tmp['p2'] - df_kernel_enqueues_tmp['p1'])
112 |     df_kernel_enqueues['p1 - p0 (%s)' % unit] = \
113 |         multiplier[unit]['profiling'] * (df_kernel_enqueues_tmp['p1'] - df_kernel_enqueues_tmp['p0'])
114 | 
115 |     # Set the index.
116 |     df_kernel_enqueues[['call_index','name']] = df_kernel_enqueues_tmp[['call_index','name']]
117 |     df_kernel_enqueues.set_index(['call_index', 'name'], inplace=True)
118 | 
119 |     return df_kernel_enqueues
120 | 
121 | 
122 | def df_kernel_enqueues_cumulative_time_num(df_kernel_enqueues_all, unit):
123 |     # For each kernel enqueue, create the time column and column of all ones.
124 |     df_time_num = df_kernel_enqueues_all[['p3 - p2 (%s)' % unit]].copy()
125 |     df_time_num['1'] = 1
126 | 
127 |     # Compute the cumulative time and the number of enqueues.
128 |     df_cumulative_time_num = df_time_num.groupby(level='name').sum()
129 |     # Update the column labels.
130 |     df_cumulative_time_num.columns = ['** Execution time (%s) **' % unit, '** Number of enqueues **']
131 |     # Update the index label.
132 |     df_cumulative_time_num.index.name = '** Kernel name **'
133 | 
134 |     # Compute the execution time percentage.
135 |     df_cumulative_time_num['** Execution time (%) **'] = 100 * ( \
136 |          df_cumulative_time_num['** Execution time (%s) **' % unit] / \
137 |          df_cumulative_time_num['** Execution time (%s) **' % unit].sum())
138 | 
139 |     # Sort the columns so that the number of enqueues comes first, and sort the rows in descending order.
140 |     return df_cumulative_time_num[
141 |         ['** Number of enqueues **', '** Execution time (%s) **' % unit, '** Execution time (%) **']
142 |     ].sort_values('** Execution time (%) **', ascending=False)
143 | 


--------------------------------------------------------------------------------
/python/prof_parser.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # 2015-2017 (c) dividiti
  3 | #
  4 | 
  5 | import re
  6 | import json
  7 | 
  8 | #
  9 | # Common definitions.
 10 | #
 11 | 
 12 | prefix = '(\[dv\/dt\])'
 13 | call_regex = '(cl[a-zA-Z]*)'
 14 | opts_regex = '([ \-\w_=]*)'
 15 | iso_regex  = '(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6})'
 16 | ptr_regex  = '((0x[0-9a-fA-F]{1,8})|(0))'
 17 | int_regex  = '(\d+)'
 18 | hex_regex  = '([a-fA-F\d]+)'
 19 | bool_regex = '(\d)'
 20 | 
 21 | #
 22 | # Parsers for API calls.
 23 | #
 24 | 
 25 | def match_clBuildProgram(output, result):
 26 |     call = 'clBuildProgram'
 27 | 
 28 |     # Arguments.
 29 |     result['program'] = re.search('%s %s %s (?P<program>%s)' % \
 30 |         (prefix, call, 'program', ptr_regex), output).group('program')
 31 |     result['device_list'] = re.search('%s %s %s(?P<device_list>( %s)*)' % \
 32 |         (prefix, call, 'device_list', ptr_regex), output).group('device_list').split()
 33 |     result['options'] = re.search('%s %s %s (?P<options>%s)' % \
 34 |         (prefix, call, 'options', opts_regex), output).group('options')
 35 |     result['pfn_notify']  = re.search('%s %s %s (?P<pfn_notify>%s)' % \
 36 |         (prefix, call, 'pfn_notify', ptr_regex), output).group('pfn_notify')
 37 |     result['user_data']  = re.search('%s %s %s (?P<user_data>%s)' % \
 38 |         (prefix, call, 'user_data', ptr_regex), output).group('user_data')
 39 | 
 40 |     # Return value.
 41 |     return_match = re.search('%s %s %s (?P<errcode>%s)' % \
 42 |         (prefix, call, 'errcode', int_regex), output)
 43 |     result['errcode'] = int(return_match.group('errcode'))
 44 | 
 45 |     return (output[return_match.end():], result)
 46 | 
 47 | 
 48 | def match_clCreateBuffer(output, result):
 49 |     call = 'clCreateBuffer'
 50 | 
 51 |     # Arguments.
 52 |     result['context'] = re.search('%s %s %s (?P<context>%s)' % \
 53 |         (prefix, call, 'context', ptr_regex), output).group('context')
 54 |     result['flags'] = int(re.search('%s %s %s (?P<flags>%s)'   % \
 55 |         (prefix, call, 'flags', int_regex), output).group('flags'))
 56 |     result['size']  = int(re.search('%s %s %s (?P<size>%s)' % \
 57 |         (prefix, call, 'size', int_regex), output).group('size'))
 58 |     result['host_ptr'] = re.search('%s %s %s (?P<host_ptr>%s)' % \
 59 |         (prefix, call, 'host_ptr', ptr_regex), output).group('host_ptr')
 60 |     result['errcode_ret'] = re.search('%s %s %s (?P<errcode_ret>%s)' % \
 61 |         (prefix, call, 'errcode_ret', ptr_regex), output).group('errcode_ret')
 62 | 
 63 |     # Return value.
 64 |     return_match = re.search('%s %s %s (?P<buffer>%s)' % \
 65 |         (prefix, call, 'buffer', ptr_regex), output)
 66 |     result['queue'] = return_match.group('buffer')
 67 | 
 68 |     return (output[return_match.end():], result)
 69 | 
 70 | 
 71 | def match_clCreateCommandQueue(output, result):
 72 |     call = 'clCreateCommandQueue'
 73 | 
 74 |     # Arguments.
 75 |     result['context'] = re.search('%s %s %s (?P<context>%s)' % \
 76 |         (prefix, call, 'context', ptr_regex), output).group('context')
 77 |     result['device'] = re.search('%s %s %s (?P<device>%s)'  % \
 78 |         (prefix, call, 'device', ptr_regex), output).group('device')
 79 |     result['properties'] = int(re.search('%s %s %s (?P<properties>%s)' % \
 80 |         (prefix, call, 'properties', int_regex), output).group('properties'))
 81 |     result['errcode_ret'] = re.search('%s %s %s (?P<errcode_ret>%s)' % \
 82 |         (prefix, call, 'errcode_ret', ptr_regex), output).group('errcode_ret')
 83 | 
 84 |     # Return value.
 85 |     return_match = re.search('%s %s %s (?P<queue>%s)' % \
 86 |         (prefix, call, 'queue', ptr_regex), output)
 87 |     result['queue'] = return_match.group('queue')
 88 | 
 89 |     return (output[return_match.end():], result)
 90 | 
 91 | 
 92 | def match_clCreateKernel(output, result):
 93 |     call = 'clCreateKernel'
 94 | 
 95 |     # Arguments.
 96 |     result['program'] = re.search('%s %s %s (?P<program>%s)' % \
 97 |         (prefix, call, 'program', ptr_regex), output).group('program')
 98 |     result['name']    = re.search('%s %s %s (?P<name>%s)' % \
 99 |         (prefix, call, 'name', opts_regex), output).group('name')
100 |     result['errcode_ret'] = re.search('%s %s %s (?P<errcode_ret>%s)' % \
101 |         (prefix, call, 'errcode_ret', ptr_regex), output).group('errcode_ret')
102 | 
103 |     # Return value.
104 |     return_match = re.search('%s %s %s (?P<kernel>%s)' % \
105 |         (prefix, call, 'kernel', ptr_regex), output)
106 |     result['kernel'] = return_match.group('kernel')
107 | 
108 |     return (output[return_match.end():], result)
109 | 
110 | 
111 | def match_clCreateKernelsInProgram(output, result):
112 |     call = 'clCreateKernelsInProgram'
113 | 
114 |     # Arguments.
115 |     result['program'] = re.search('%s %s %s (?P<program>%s)' % \
116 |         (prefix, call, 'program', ptr_regex), output).group('program')
117 |     result['num_kernels'] = int(re.search('%s %s %s (?P<num_kernels>%s)' % \
118 |         (prefix, call, 'num_kernels', int_regex), output).group('num_kernels'))
119 |     result['kernels'] = re.search('%s %s %s (?P<kernels>%s)' % \
120 |         (prefix, call, 'kernels', ptr_regex), output).group('kernels')
121 |     result['num_kernels_ret_ptr'] = re.search('%s %s %s (?P<num_kernels_ret_ptr>%s)' % \
122 |         (prefix, call, 'num_kernels_ret_ptr', ptr_regex), output).group('num_kernels_ret_ptr')
123 |     result['num_kernels_ret'] = int(re.search('%s %s %s (?P<num_kernels_ret>%s)' % \
124 |         (prefix, call, 'num_kernels_ret', int_regex), output).group('num_kernels_ret'))
125 | 
126 |     # Return value.
127 |     return_match = re.search('%s %s %s (?P<errcode>%s)' % \
128 |         (prefix, call, 'errcode', int_regex), output)
129 |     result['errcode'] = int(return_match.group('errcode'))
130 | 
131 |     return (output[return_match.end():], result)
132 | 
133 | 
134 | def match_clCreateProgramWithBinary(output, result):
135 |     call = 'clCreateProgramWithBinary'
136 | 
137 |     # Arguments.
138 |     result['context'] = re.search('%s %s %s (?P<context>%s)' % \
139 |         (prefix, call, 'context', ptr_regex), output).group('context')
140 |     result['device_list'] = re.search('%s %s %s(?P<device_list>( %s)*)' % \
141 |         (prefix, call, 'device_list', ptr_regex), output).group('device_list').split()
142 |     result['lengths'] = re.search('%s %s %s (?P<lengths>%s)' % \
143 |         (prefix, call, 'lengths', ptr_regex), output).group('lengths')
144 |     result['binaries'] = re.search('%s %s %s (?P<binaries>%s)' % \
145 |         (prefix, call, 'binaries', ptr_regex), output).group('binaries')
146 |     result['binary_status'] = re.search('%s %s %s (?P<binary_status>%s)' % \
147 |         (prefix, call, 'binary_status', ptr_regex), output).group('binary_status')
148 |     result['errcode_ret'] = re.search('%s %s %s (?P<errcode_ret>%s)' % \
149 |         (prefix, call, 'errcode_ret', ptr_regex), output).group('errcode_ret')
150 | 
151 |     # Return value.
152 |     return_match = re.search('%s %s %s (?P<program>%s)' % \
153 |         (prefix, call, 'program', ptr_regex), output)
154 |     result['program'] = return_match.group('program')
155 | 
156 |     return (output[return_match.end():], result)
157 | 
158 | 
159 | def match_clCreateProgramWithSource(output, result):
160 |     call = 'clCreateProgramWithSource'
161 | 
162 |     # Arguments.
163 |     result['context'] = re.search('%s %s %s (?P<context>%s)' % \
164 |         (prefix, call, 'context', ptr_regex), output).group('context')
165 |     result['count'] = int(re.search('%s %s %s (?P<count>%s)' % \
166 |         (prefix, call, 'count', int_regex), output).group('count'))
167 |     result['strings'] = re.search('%s %s %s (?P<strings>%s)' % \
168 |         (prefix, call, 'strings', ptr_regex), output).group('strings')
169 |     result['lengths'] = re.search('%s %s %s (?P<lengths>%s)' % \
170 |         (prefix, call, 'lengths', ptr_regex), output).group('lengths')
171 |     result['errcode_ret'] = re.search('%s %s %s (?P<errcode_ret>%s)' % \
172 |         (prefix, call, 'errcode_ret', ptr_regex), output).group('errcode_ret')
173 | 
174 |     result['source'] = {}
175 |     for k in range(result['count']):
176 |         prefix_call_string_k = '%s %s %s' % \
177 |                                (prefix, call, 'sources\[%d\]' % k)
178 |         # NB: '.*?' matches any characters between the markers
179 |         # in a non-greedy fashion.
180 |         result['source'][str(k)] = \
181 |             re.search('%s <<\n(?P<string>.*?)\n%s >>\n' % \
182 |             (prefix_call_string_k, prefix_call_string_k), \
183 |             output, re.DOTALL).group('string')
184 | 
185 |     # Return value.
186 |     return_match = re.search('%s %s %s (?P<program>%s)' % \
187 |         (prefix, call, 'program', ptr_regex), output)
188 |     result['program'] = return_match.group('program')
189 | 
190 |     return (output[return_match.end():], result)
191 | 
192 | 
193 | def match_clEnqueueNDRangeKernel(output, result):
194 |     call = 'clEnqueueNDRangeKernel'
195 | 
196 |     # Name.
197 |     result['name']   = re.search('%s %s %s (?P<name>%s)' % \
198 |         (prefix, call, 'name', opts_regex), output).group('name')
199 | 
200 |     # Arguments.
201 |     result['queue']  = re.search('%s %s %s (?P<queue>%s)' % \
202 |         (prefix, call, 'queue', ptr_regex), output).group('queue')
203 |     result['kernel'] = re.search('%s %s %s (?P<kernel>%s)' % \
204 |         (prefix, call, 'kernel', ptr_regex), output).group('kernel')
205 |     result['gwo'] = [int(i) for i in re.search('%s %s %s (?P<gwo>%s)' % \
206 |         (prefix, call, 'gwo', '.*'), output).group('gwo').split()]
207 |     result['gws']    = [int(i) for i in re.search('%s %s %s (?P<gws>%s)' % \
208 |         (prefix, call, 'gws', '.*'), output).group('gws').split()]
209 |     result['lws']    = [int(i) for i in re.search('%s %s %s (?P<lws>%s)' % \
210 |         (prefix, call, 'lws', '.*'), output).group('lws').split()]
211 |     result['event_wait_list'] = re.search('%s %s %s(?P<event_wait_list>( %s)*)' % \
212 |         (prefix, call, 'event_wait_list', ptr_regex), output).group('event_wait_list').split()
213 |     result['event']  = re.search('%s %s %s (?P<event>%s)' % \
214 |         (prefix, call, 'event', ptr_regex), output).group('event')
215 | 
216 |     # Return value.
217 |     return_match = re.search('%s %s %s (?P<errcode>%s)' % \
218 |         (prefix, call, 'errcode', int_regex), output)
219 |     result['errcode'] = int(return_match.group('errcode'))
220 | 
221 |     # Profiling info.
222 |     profiling_match = re.search('%s %s %s (?P<queued>%s) (?P<submit>%s) (?P<start>%s) (?P<end>%s)' % \
223 |         (prefix, call, 'profiling', int_regex, int_regex, int_regex, int_regex), output)
224 |     if profiling_match:
225 |         result['profiling'] = {}
226 |         result['profiling']['queued'] = int(profiling_match.group('queued'))
227 |         result['profiling']['submit'] = int(profiling_match.group('submit'))
228 |         result['profiling']['start']  = int(profiling_match.group('start'))
229 |         result['profiling']['end']    = int(profiling_match.group('end'))
230 | 
231 |     last_match = return_match if not profiling_match else profiling_match
232 | 
233 |     return (output[last_match.end():], result)
234 | 
235 | 
236 | # Auxiliary function for clEnqueueReadBuffer and clEnqueueWriteBuffer.
237 | def _match_clEnqueueReadOrWriteBuffer(call, output, result):
238 |     # Arguments.
239 |     result['queue']  = re.search('%s %s %s (?P<queue>%s)' % \
240 |         (prefix, call, 'queue', ptr_regex), output).group('queue')
241 |     result['buffer'] = re.search('%s %s %s (?P<buffer>%s)' % \
242 |         (prefix, call, 'buffer', ptr_regex), output).group('buffer')
243 |     result['blocking'] = int(re.search('%s %s %s (?P<blocking>%s)' % \
244 |         (prefix, call, 'blocking', bool_regex), output).group('blocking'))
245 |     result['offset']  = int(re.search('%s %s %s (?P<offset>%s)' % \
246 |         (prefix, call, 'offset', int_regex), output).group('offset'))
247 |     result['size']  = int(re.search('%s %s %s (?P<size>%s)' % \
248 |         (prefix, call, 'size', int_regex), output).group('size'))
249 |     result['ptr']  = re.search('%s %s %s (?P<ptr>%s)' % \
250 |         (prefix, call, 'ptr', ptr_regex), output).group('ptr')
251 |     result['event_wait_list'] = re.search('%s %s %s(?P<event_wait_list>( %s)*)' % \
252 |         (prefix, call, 'event_wait_list', ptr_regex), output).group('event_wait_list').split()
253 |     result['event']  = re.search('%s %s %s (?P<event>%s)' % \
254 |         (prefix, call, 'event', ptr_regex), output).group('event')
255 | 
256 |     # Return value.
257 |     return_match = re.search('%s %s %s (?P<errcode>%s)' % \
258 |         (prefix, call, 'errcode', int_regex), output)
259 |     result['errcode'] = int(return_match.group('errcode'))
260 | 
261 |     # Profiling info.
262 |     profiling_match = re.search('%s %s %s (?P<queued>%s) (?P<submit>%s) (?P<start>%s) (?P<end>%s)' % \
263 |         (prefix, call, 'profiling', int_regex, int_regex, int_regex, int_regex), output)
264 |     if profiling_match:
265 |         result['profiling'] = {}
266 |         result['profiling']['queued'] = int(profiling_match.group('queued'))
267 |         result['profiling']['submit'] = int(profiling_match.group('submit'))
268 |         result['profiling']['start']  = int(profiling_match.group('start'))
269 |         result['profiling']['end']    = int(profiling_match.group('end'))
270 | 
271 |     last_match = return_match if not profiling_match else profiling_match
272 | 
273 |     return (output[last_match.end():], result)
274 | 
275 | def match_clEnqueueReadBuffer(output, result):
276 |     return _match_clEnqueueReadOrWriteBuffer(call='clEnqueueReadBuffer', output=output, result=result)
277 | 
278 | def match_clEnqueueWriteBuffer(output, result):
279 |     return _match_clEnqueueReadOrWriteBuffer(call='clEnqueueWriteBuffer', output=output, result=result)
280 | 
281 | 
282 | def match_clSetKernelArg(output, result):
283 |     call = 'clSetKernelArg'
284 | 
285 |     # Arguments.
286 |     result['kernel'] = re.search('%s %s %s (?P<kernel>%s)' % \
287 |         (prefix, call, 'kernel', ptr_regex), output).group('kernel')
288 |     result['arg_index'] = int(re.search('%s %s %s (?P<arg_index>%s)' % \
289 |         (prefix, call, 'arg_index', int_regex), output).group('arg_index'))
290 |     result['arg_size'] = int(re.search('%s %s %s (?P<arg_size>%s)' % \
291 |         (prefix, call, 'arg_size',  int_regex), output).group('arg_size'))
292 |     result['arg_value'] = re.search('%s %s %s (?P<arg_value>%s)' % \
293 |         (prefix, call, 'arg_value', hex_regex), output).group('arg_value')
294 | 
295 |     # Return value.
296 |     return_match = re.search('%s %s %s (?P<errcode>%s)' % \
297 |         (prefix, call, 'errcode', int_regex), output)
298 |     result['errcode'] = int(return_match.group('errcode'))
299 | 
300 |     return (output[return_match.end():], result)
301 | 
302 | 
303 | # Map from API calls to parsers.
304 | map_call_to_parser = {
305 |     'clBuildProgram'            : match_clBuildProgram,
306 |     'clCreateBuffer'            : match_clCreateBuffer,
307 |     'clCreateCommandQueue'      : match_clCreateCommandQueue,
308 |     'clCreateKernel'            : match_clCreateKernel,
309 |     'clCreateKernelsInProgram'  : match_clCreateKernelsInProgram,
310 |     'clCreateProgramWithBinary' : match_clCreateProgramWithBinary,
311 |     'clCreateProgramWithSource' : match_clCreateProgramWithSource,
312 |     'clEnqueueNDRangeKernel'    : match_clEnqueueNDRangeKernel,
313 |     'clEnqueueReadBuffer'       : match_clEnqueueReadBuffer,
314 |     'clEnqueueWriteBuffer'      : match_clEnqueueWriteBuffer,
315 |     'clSetKernelArg'            : match_clSetKernelArg
316 | }
317 | 
318 | 
319 | def next_match(output):
320 |     result = {}
321 | 
322 |     # For robustness, a new block starts with just an API call name.
323 |     match = re.search('%s (?P<call>%s)\n' % (prefix, call_regex), output)
324 |     if not match:
325 |         return ('', {})
326 | 
327 |     result['call'] = match.group('call')
328 |     parser = map_call_to_parser[result['call']]
329 |     if not parser:
330 |         raise Exception('OpenCL API call %s not supported!' % result['call'])
331 | 
332 |     # Start and end timestamps are optional (especially in tests) but common to all calls.
333 |     result['timestamp'] = {}
334 |     start_match = re.search('%s %s start (?P<start>%s)' % (prefix, result['call'], iso_regex), output[match.end():])
335 |     if start_match:
336 |         result['timestamp']['start'] = start_match.group('start')
337 |     end_match = re.search('%s %s end (?P<end>%s)' % (prefix, result['call'], iso_regex), output[match.end():])
338 |     if end_match:
339 |         result['timestamp']['end'] = end_match.group('end')
340 | 
341 |     return parser(output, result)
342 | 
343 | 
344 | def prof_parse_ostream(output):
345 |     results = []
346 |     (output, result) = next_match(output)
347 |     while result:
348 |         results.append(result)
349 |         (output, result) = next_match(output)
350 |     return results
351 | 
352 | 
353 | def prof_parse_cjson(output):
354 |     results = []
355 |     match = re.search('%s <<\n(?P<json>.*)\n%s >>\n' % \
356 |                      (prefix, prefix), \
357 |                      output, re.DOTALL)
358 |     if match:
359 |         results = json.loads(match.group('json'))
360 |     return results
361 | 
362 | 
363 | def prof_parse(output):
364 |     results = prof_parse_cjson(output)
365 |     if not results:
366 |         results = prof_parse_ostream(output)
367 |     return results
368 | 


--------------------------------------------------------------------------------
/cpp/prof.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // 2015-2017 (c) dividiti
  3 | //
  4 | 
  5 | 
  6 | #include "prof.hpp"
  7 | 
  8 | // Static container for profiler's methods and data.
  9 | static dvdt::Prof prof;
 10 | 
 11 | // Static container for profiler's logger.
 12 | #if (1 == DVDT_PROF_CJSON)
 13 | static dvdt::cjsonLogger logger;
 14 | #else
 15 | static dvdt::ostreamLogger logger;
 16 | #endif
 17 | 
 18 | //
 19 | // Table of contents: OpenCL API functions in the alphabetical order.
 20 | //
 21 | // - clBuildProgram()
 22 | // - clCreateBuffer()
 23 | // - clCreateCommandQueue()
 24 | // - clCreateKernel()
 25 | // - clCreateKernelsInProgram()
 26 | // - clCreateProgramWithBinary()
 27 | // - clCreateProgramWithSource()
 28 | // - clEnqueueNDRangeKernel()
 29 | // - clEnqueueReadBuffer()
 30 | // - clEnqueueWriteBuffer()
 31 | // - clSetKernelArg()
 32 | //
 33 | 
 34 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clBuildProgram.html
 35 | extern CL_API_ENTRY cl_int CL_API_CALL
 36 | clBuildProgram(
 37 |     cl_program program,
 38 |     cl_uint num_devices,
 39 |     const cl_device_id * device_list,
 40 |     const char * options,
 41 |     void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data),
 42 |     void * user_data) CL_API_SUFFIX__VERSION_1_0
 43 | {
 44 |     // Return value.
 45 |     cl_int errcode = CL_SUCCESS;
 46 | 
 47 |     // API call.
 48 |     const char * call = "clBuildProgram";
 49 |     logger.log_call(call);
 50 | 
 51 |     if (NULL == prof.interceptor.clBuildProgram_original)
 52 |     {
 53 |         prof.interceptor.clBuildProgram_original = (dvdt::Prof::Interceptor::clBuildProgram_type) dlsym(RTLD_NEXT, call);
 54 |     }
 55 | 
 56 |     // Arguments.
 57 |     logger.log_ptr(call, "program", program);
 58 |     logger.log_list<cl_device_id>(call, "device_list", device_list, num_devices);
 59 |     logger.log_str(call, "options", options ? options : "");
 60 |     logger.log_ptr(call, "pfn_notify", (const void *) pfn_notify);
 61 |     logger.log_ptr(call, "user_data", user_data);
 62 | 
 63 | #ifndef DVDT_PROF_TEST
 64 |     logger.log_timestamp_start(call);
 65 | 
 66 |     // Original call.
 67 |     errcode = prof.interceptor.clBuildProgram_original(\
 68 |         program, num_devices, device_list, options, pfn_notify, user_data);
 69 |     // TODO: When pfn_notify is not NULL, still make the call blocking so that
 70 |     // (timestamp_end - timestamp_start) represents the actual build time.
 71 | 
 72 |     logger.log_timestamp_end(call);
 73 | #endif
 74 | 
 75 |     // Return value.
 76 |     logger.log_num<cl_int>(call, "errcode", errcode); logger.log_lf();
 77 | 
 78 |     return errcode;
 79 | 
 80 | } // clBuildProgram()
 81 | 
 82 | 
 83 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateBuffer.html
 84 | extern CL_API_ENTRY cl_mem CL_API_CALL
 85 | clCreateBuffer(
 86 |     cl_context context,
 87 |     cl_mem_flags flags,
 88 |     size_t size,
 89 |     void *host_ptr,
 90 |     cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0
 91 | {
 92 |     // Return value.
 93 |     cl_mem buffer = (cl_mem) 0x0;
 94 | 
 95 |     // API call.
 96 |     const char * call = "clCreateBuffer";
 97 |     logger.log_call(call);
 98 | 
 99 |     if (NULL == prof.interceptor.clCreateBuffer_original)
100 |     {
101 |         prof.interceptor.clCreateBuffer_original = (dvdt::Prof::Interceptor::clCreateBuffer_type) dlsym(RTLD_NEXT, call);
102 |     }
103 | 
104 |     if (NULL == prof.interceptor.context)
105 |     {
106 |         prof.interceptor.context = context;
107 |     }
108 | 
109 |     // Arguments.
110 |     logger.log_ptr(call, "context", context);
111 |     logger.log_num<cl_mem_flags>(call, "flags", flags);
112 |     logger.log_num<size_t>(call, "size", size);
113 |     logger.log_ptr(call, "host_ptr", host_ptr);
114 |     logger.log_ptr(call, "errcode_ret", errcode_ret);
115 | 
116 | #ifndef DVDT_PROF_TEST
117 |     logger.log_timestamp_start(call);
118 | 
119 |     // Original call.
120 |     buffer = prof.interceptor.clCreateBuffer_original(\
121 |         context, flags, size, host_ptr, errcode_ret);
122 | 
123 |     logger.log_timestamp_end(call);
124 | 
125 |     // Error value.
126 |     logger.log_num<cl_int>(call, "errcode", errcode_ret ? *errcode_ret : -1);
127 | #endif
128 | 
129 |     // Return value.
130 |     logger.log_ptr(call, "buffer", buffer); logger.log_lf();
131 | 
132 |     return buffer;
133 | 
134 | } // clCreateBuffer()
135 | 
136 | 
137 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateCommandQueue.html
138 | extern CL_API_ENTRY cl_command_queue CL_API_CALL
139 | clCreateCommandQueue(
140 |     cl_context context,
141 |     cl_device_id device,
142 |     cl_command_queue_properties properties,
143 |     cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0
144 | {
145 |     // Return value.
146 |     cl_command_queue queue = (cl_command_queue) 0x0;
147 | 
148 |     // API call.
149 |     const char * call = "clCreateCommandQueue";
150 |     logger.log_call(call);
151 | 
152 |     if (NULL == prof.interceptor.clCreateCommandQueue_original)
153 |     {
154 |         prof.interceptor.clCreateCommandQueue_original = (dvdt::Prof::Interceptor::clCreateCommandQueue_type) dlsym(RTLD_NEXT, call);
155 |     }
156 | 
157 |     if (NULL == prof.interceptor.context)
158 |     {
159 |         prof.interceptor.context = context;
160 |     }
161 | 
162 |     // Arguments.
163 |     logger.log_ptr(call, "context", context);
164 |     logger.log_ptr(call, "device", device);
165 |     logger.log_num<cl_command_queue_properties>(call, "properties", properties);
166 |     logger.log_ptr(call, "errcode_ret", errcode_ret);
167 | 
168 | #ifndef DVDT_PROF_TEST
169 |     logger.log_timestamp_start(call);
170 | 
171 |     // Original call.
172 |     queue = prof.interceptor.clCreateCommandQueue_original(\
173 |         context, device, properties | CL_QUEUE_PROFILING_ENABLE, errcode_ret);
174 | 
175 |     logger.log_timestamp_end(call);
176 | 
177 |     // Error value.
178 |     logger.log_num<cl_int>(call, "errcode", errcode_ret ? *errcode_ret : -1);
179 | #endif
180 | 
181 |     // Return value.
182 |     logger.log_ptr(call, "queue", queue); logger.log_lf();
183 | 
184 |     return queue;
185 | 
186 | } // clCreateCommandQueue()
187 | 
188 | 
189 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateKernel.html
190 | extern CL_API_ENTRY cl_kernel CL_API_CALL
191 | clCreateKernel(
192 |     cl_program program,
193 |     const char * kernel_name,
194 |     cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0
195 | {
196 |     // Return value.
197 |     cl_kernel kernel = (cl_kernel) 0x0;
198 | 
199 |     // API call.
200 |     const char * call = "clCreateKernel";
201 |     logger.log_call(call);
202 | 
203 |     if (NULL == prof.interceptor.clCreateKernel_original)
204 |     {
205 |         prof.interceptor.clCreateKernel_original = (dvdt::Prof::Interceptor::clCreateKernel_type) dlsym(RTLD_NEXT, call);
206 |     }
207 | 
208 |     // Arguments.
209 |     logger.log_ptr(call, "program", program);
210 |     logger.log_str(call, "name", kernel_name);
211 |     logger.log_ptr(call, "errcode_ret", errcode_ret);
212 | 
213 | #ifndef DVDT_PROF_TEST
214 |     logger.log_timestamp_start(call);
215 | 
216 |     // Original call.
217 |     kernel = prof.interceptor.clCreateKernel_original(
218 |         program, kernel_name, errcode_ret);
219 | 
220 |     logger.log_timestamp_end(call);
221 | 
222 |     // Error value.
223 |     logger.log_num<cl_int>(call, "errcode", errcode_ret ? *errcode_ret : -1);
224 | #endif
225 | 
226 |     // Return value.
227 |     logger.log_ptr(call, "kernel", kernel); logger.log_lf();
228 | 
229 |     return kernel;
230 | 
231 | } // clCreateKernel()
232 | 
233 | 
234 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateKernelsInProgram.html
235 | extern CL_API_ENTRY cl_int CL_API_CALL
236 | clCreateKernelsInProgram(
237 |     cl_program program,
238 |     cl_uint num_kernels,
239 |     cl_kernel *kernels,
240 |     cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0
241 | {
242 |     // Return value.
243 |     cl_int errcode = CL_SUCCESS;
244 | 
245 |     // API call.
246 |     const char * call = "clCreateKernelsInProgram";
247 |     logger.log_call(call);
248 | 
249 |     if (NULL == prof.interceptor.clCreateKernelsInProgram_original)
250 |     {
251 |         prof.interceptor.clCreateKernelsInProgram_original = (dvdt::Prof::Interceptor::clCreateKernelsInProgram_type) dlsym(RTLD_NEXT, call);
252 |     }
253 | 
254 |     // Arguments.
255 |     logger.log_ptr(call, "program", program);
256 |     logger.log_num<cl_uint>(call, "num_kernels", num_kernels);
257 |     logger.log_ptr(call, "kernels", kernels);
258 |     // TODO: log list of kernels.
259 |     logger.log_ptr(call, "num_kernels_ret_ptr", num_kernels_ret);
260 | 
261 | #ifndef DVDT_PROF_TEST
262 |     logger.log_timestamp_start(call);
263 | 
264 |     // Original call.
265 |     errcode = prof.interceptor.clCreateKernelsInProgram_original(
266 |         program, num_kernels, kernels, num_kernels_ret);
267 | 
268 |     logger.log_timestamp_end(call);
269 | 
270 |     // Actual number of kernels in program.
271 |     logger.log_num<cl_uint>(call, "num_kernels_ret", num_kernels_ret ? *num_kernels_ret : -1);
272 | #else
273 |     logger.log_num<cl_uint>(call, "num_kernels_ret", 0);
274 | #endif
275 | 
276 |     // Return value.
277 |     logger.log_num<cl_int>(call, "errcode", errcode); logger.log_lf();
278 | 
279 |     return errcode;
280 | 
281 | } // clCreateKernelsInProgram()
282 | 
283 | 
284 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateProgramWithBinary.html
285 | extern CL_API_ENTRY cl_program CL_API_CALL
286 | clCreateProgramWithBinary(
287 |     cl_context context,
288 |     cl_uint num_devices,
289 |     const cl_device_id *device_list,
290 |     const size_t *lengths,
291 |     const unsigned char **binaries,
292 |     cl_int *binary_status,
293 |     cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0
294 | {
295 |     // Return value.
296 |     cl_program program = (cl_program) 0x0;
297 | 
298 |     // API call.
299 |     const char * call = "clCreateProgramWithBinary";
300 |     logger.log_call(call);
301 | 
302 |     if (NULL == prof.interceptor.clCreateProgramWithBinary_original)
303 |     {
304 |         prof.interceptor.clCreateProgramWithBinary_original = (dvdt::Prof::Interceptor::clCreateProgramWithBinary_type) dlsym(RTLD_NEXT, call);
305 |     }
306 | 
307 |     if (NULL == prof.interceptor.context)
308 |     {
309 |         prof.interceptor.context = context;
310 |     }
311 | 
312 |     // Arguments.
313 |     logger.log_ptr(call, "context", context);
314 |     logger.log_list<cl_device_id>(call, "device_list", device_list, num_devices);
315 |     logger.log_ptr(call, "lengths", lengths);
316 |     logger.log_ptr(call, "binaries", binaries);
317 |     logger.log_ptr(call, "binary_status", binary_status);
318 |     logger.log_ptr(call, "errcode_ret", errcode_ret);
319 | 
320 | #ifndef DVDT_PROF_TEST
321 |     logger.log_timestamp_start(call);
322 | 
323 |     // Original call.
324 |     program = prof.interceptor.clCreateProgramWithBinary_original(\
325 |         context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret);
326 | 
327 |     logger.log_timestamp_end(call);
328 | 
329 |     // Error value.
330 |     logger.log_num<cl_int>(call, "errcode", errcode_ret ? *errcode_ret : -1);
331 | #endif
332 | 
333 |     // Return value.
334 |     logger.log_ptr(call, "program", program); logger.log_lf();
335 | 
336 |     return program;
337 | 
338 | } // clCreateProgramWithBinary()
339 | 
340 | 
341 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateProgramWithSource.html
342 | extern CL_API_ENTRY cl_program CL_API_CALL
343 | clCreateProgramWithSource(
344 |     cl_context context,
345 |     cl_uint count,
346 |     const char **strings,
347 |     const size_t *lengths,
348 |     cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0
349 | {
350 |     // Return value.
351 |     cl_program program = (cl_program) 0x0;
352 | 
353 |     // API call.
354 |     const char * call = "clCreateProgramWithSource";
355 |     logger.log_call(call);
356 | 
357 |     if (NULL == prof.interceptor.clCreateProgramWithSource_original)
358 |     {
359 |         prof.interceptor.clCreateProgramWithSource_original = (dvdt::Prof::Interceptor::clCreateProgramWithSource_type) dlsym(RTLD_NEXT, call);
360 |     }
361 | 
362 |     if (NULL == prof.interceptor.context)
363 |     {
364 |         prof.interceptor.context = context;
365 |     }
366 | 
367 |     // Arguments.
368 |     logger.log_ptr(call, "context", context);
369 |     logger.log_num<cl_uint>(call, "count", count);
370 |     logger.log_ptr(call, "strings", strings);
371 |     logger.log_ptr(call, "lengths", lengths);
372 |     logger.log_src(call, count, strings, lengths);
373 |     logger.log_ptr(call, "errcode_ret", errcode_ret);
374 | 
375 | #ifndef DVDT_PROF_TEST
376 |     logger.log_timestamp_start(call);
377 | 
378 |     // Original call.
379 |     program = prof.interceptor.clCreateProgramWithSource_original(\
380 |         context, count, strings, lengths, errcode_ret);
381 | 
382 |     logger.log_timestamp_end(call);
383 | 
384 |     // Error value.
385 |     logger.log_num<cl_int>(call, "errcode", errcode_ret ? *errcode_ret : -1);
386 | #endif
387 | 
388 |     // Return value.
389 |     logger.log_ptr(call, "program", program); logger.log_lf();
390 | 
391 |     return program;
392 | 
393 | } // clCreateProgramWithSource()
394 | 
395 | 
396 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueNDRangeKernel.html
397 | extern CL_API_ENTRY cl_int CL_API_CALL
398 | clEnqueueNDRangeKernel(
399 |     cl_command_queue queue,
400 |     cl_kernel kernel,
401 |     cl_uint work_dim,
402 |     const size_t *global_work_offset,
403 |     const size_t *global_work_size,
404 |     const size_t *local_work_size,
405 |     cl_uint num_events_in_wait_list,
406 |     const cl_event *event_wait_list,
407 |     cl_event *event) CL_API_SUFFIX__VERSION_1_0
408 | {
409 |     // Return value.
410 |     cl_int errcode = CL_SUCCESS;
411 | 
412 |     // API call.
413 |     const char * call = "clEnqueueNDRangeKernel";
414 |     logger.log_call(call);
415 | 
416 |     if (NULL == prof.interceptor.clEnqueueNDRangeKernel_original)
417 |     {
418 |         prof.interceptor.clEnqueueNDRangeKernel_original = (dvdt::Prof::Interceptor::clEnqueueNDRangeKernel_type) dlsym(RTLD_NEXT, call);
419 |     }
420 | 
421 |     // Kernel name.
422 | #ifndef DVDT_PROF_TEST
423 |     const size_t max_name_length = 80;
424 |     char name[max_name_length];
425 |     {
426 |         size_t name_length;
427 |         cl_int info_errcode = clGetKernelInfo(\
428 |             kernel, CL_KERNEL_FUNCTION_NAME, max_name_length, name, &name_length);
429 |         assert(info_errcode == CL_SUCCESS && "Failed to get kernel name");
430 |         assert(name_length <= max_name_length);
431 |     }
432 | #else
433 |     const char name[] = "dvdt_prof_kernel";
434 | #endif
435 |     logger.log_str(call, "name", name);
436 | 
437 |     local_work_size = prof.interceptor.update_lws(name, local_work_size);
438 | 
439 |     // Arguments.
440 |     logger.log_ptr(call, "queue", queue);
441 |     logger.log_ptr(call, "kernel", kernel);
442 |     logger.log_gwo(call, work_dim, global_work_offset);
443 |     logger.log_gws(call, work_dim, global_work_size);
444 |     logger.log_lws(call, work_dim, local_work_size);
445 |     logger.log_list<cl_event>(call, "event_wait_list", event_wait_list, num_events_in_wait_list);
446 |     logger.log_ptr(call, "event", event);
447 | 
448 | #ifndef DVDT_PROF_TEST
449 |     logger.log_timestamp_start(call);
450 | 
451 |     // Event object needed if 'event' is NULL.
452 |     cl_event prof_event_obj;
453 |     cl_event * prof_event = (NULL != event ? event : &prof_event_obj);
454 | 
455 |     // Original call.
456 |     errcode = prof.interceptor.clEnqueueNDRangeKernel_original(\
457 |         queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size,\
458 |         num_events_in_wait_list, event_wait_list, prof_event);
459 | 
460 |     // Wait for original call to complete.
461 |     logger.log_profiling_info(call, prof_event);
462 | 
463 |     logger.log_timestamp_end(call);
464 | #else
465 |     logger.log_profiling_info(call, NULL);
466 | #endif
467 | 
468 |     // Return value.
469 |     logger.log_num<cl_int>(call, "errcode", errcode); logger.log_lf();
470 | 
471 |     return errcode;
472 | 
473 | } // clEnqueueNDRangeKernel()
474 | 
475 | 
476 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueReadBuffer.html
477 | extern CL_API_ENTRY cl_int CL_API_CALL
478 | clEnqueueReadBuffer(
479 |     cl_command_queue queue,
480 |     cl_mem buffer,
481 |     cl_bool blocking,
482 |     size_t offset,
483 |     size_t size,
484 |     void *ptr,
485 |     cl_uint num_events_in_wait_list,
486 |     const cl_event *event_wait_list,
487 |     cl_event *event) CL_API_SUFFIX__VERSION_1_0
488 | {
489 |     // Return value.
490 |     cl_int errcode = CL_SUCCESS;
491 | 
492 |     // API call.
493 |     const char * call = "clEnqueueReadBuffer";
494 |     logger.log_call(call);
495 | 
496 |     if (NULL == prof.interceptor.clEnqueueReadBuffer_original)
497 |     {
498 |         prof.interceptor.clEnqueueReadBuffer_original = (dvdt::Prof::Interceptor::clEnqueueReadBuffer_type) dlsym(RTLD_NEXT, call);
499 |     }
500 | 
501 |     // Arguments.
502 |     logger.log_ptr(call, "queue", queue);
503 |     logger.log_ptr(call, "buffer", buffer);
504 |     logger.log_num<cl_bool>(call, "blocking", blocking);
505 |     logger.log_num<size_t>(call, "offset", offset);
506 |     logger.log_num<size_t>(call, "size", size);
507 |     logger.log_ptr(call, "ptr", ptr);
508 |     // - event_wait_list
509 |     logger.log_list<cl_event>(call, "event_wait_list", event_wait_list, num_events_in_wait_list);
510 |     // - event
511 |     logger.log_ptr(call, "event", event);
512 | 
513 | #ifndef DVDT_PROF_TEST
514 |     logger.log_timestamp_start(call);
515 | 
516 |     // Event object needed if 'event' is NULL.
517 |     cl_event prof_event_obj;
518 |     cl_event * prof_event = (NULL != event ? event : &prof_event_obj);
519 | 
520 |     // Original call.
521 |     errcode = prof.interceptor.clEnqueueReadBuffer_original(queue, buffer, blocking, offset, size, ptr,
522 |         num_events_in_wait_list, event_wait_list, prof_event);
523 | 
524 |     // Wait for original call to complete.
525 |     logger.log_profiling_info(call, prof_event);
526 | 
527 |     logger.log_timestamp_end(call);
528 | #else
529 |     logger.log_profiling_info(call, NULL);
530 | #endif
531 | 
532 |     // Return value.
533 |     logger.log_num<cl_int>(call, "errcode", errcode); logger.log_lf();
534 | 
535 |     return errcode;
536 | 
537 | } // clEnqueueReadBuffer()
538 | 
539 | 
540 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueWriteBuffer.html
541 | extern CL_API_ENTRY cl_int CL_API_CALL
542 | clEnqueueWriteBuffer(
543 |     cl_command_queue queue,
544 |     cl_mem buffer,
545 |     cl_bool blocking,
546 |     size_t offset,
547 |     size_t size,
548 |     const void *ptr,
549 |     cl_uint num_events_in_wait_list,
550 |     const cl_event *event_wait_list,
551 |     cl_event *event) CL_API_SUFFIX__VERSION_1_0
552 | {
553 |     // Return value.
554 |     cl_int errcode = CL_SUCCESS;
555 | 
556 |     // API call.
557 |     const char * call = "clEnqueueWriteBuffer";
558 |     logger.log_call(call);
559 | 
560 |     if (NULL == prof.interceptor.clEnqueueWriteBuffer_original)
561 |     {
562 |         prof.interceptor.clEnqueueWriteBuffer_original = (dvdt::Prof::Interceptor::clEnqueueWriteBuffer_type) dlsym(RTLD_NEXT, call);
563 |     }
564 | 
565 |     // Arguments.
566 |     logger.log_ptr(call, "queue", queue);
567 |     logger.log_ptr(call, "buffer", buffer);
568 |     logger.log_num<cl_bool>(call, "blocking", blocking);
569 |     logger.log_num<size_t>(call, "offset", offset);
570 |     logger.log_num<size_t>(call, "size", size);
571 |     logger.log_ptr(call, "ptr", ptr);
572 |     logger.log_list<cl_event>(call, "event_wait_list", event_wait_list, num_events_in_wait_list);
573 |     logger.log_ptr(call, "event", event);
574 | 
575 | #ifndef DVDT_PROF_TEST
576 |     logger.log_timestamp_start(call);
577 | 
578 |     // Event object needed if 'event' is NULL.
579 |     cl_event prof_event_obj;
580 |     cl_event * prof_event = (NULL != event ? event : &prof_event_obj);
581 | 
582 |     // Original call.
583 |     errcode = prof.interceptor.clEnqueueWriteBuffer_original(\
584 |         queue, buffer, blocking, offset, size, ptr,
585 |         num_events_in_wait_list, event_wait_list, prof_event);
586 | 
587 |     // Wait for original call to complete.
588 |     logger.log_profiling_info(call, prof_event);
589 | 
590 |     logger.log_timestamp_end(call);
591 | #else
592 |     logger.log_profiling_info(call, NULL);
593 | #endif
594 | 
595 |     // Return value.
596 |     logger.log_num<cl_int>(call, "errcode", errcode); logger.log_lf();
597 | 
598 |     return errcode;
599 | 
600 | } // clEnqueueWriteBuffer()
601 | 
602 | 
603 | // https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clSetKernelArg.html
604 | extern CL_API_ENTRY cl_int CL_API_CALL
605 | clSetKernelArg(
606 |     cl_kernel kernel,
607 |     cl_uint arg_index,
608 |     size_t arg_size,
609 |     const void *arg_value_ptr) CL_API_SUFFIX__VERSION_1_0
610 | {
611 |     // Return value.
612 |     cl_int errcode = CL_SUCCESS;
613 | 
614 |     // API call.
615 |     const char * call = "clSetKernelArg";
616 |     logger.log_call(call);
617 | 
618 |     if (NULL == prof.interceptor.clSetKernelArg_original)
619 |     {
620 |         prof.interceptor.clSetKernelArg_original = (dvdt::Prof::Interceptor::clSetKernelArg_type) dlsym(RTLD_NEXT, call);
621 |     }
622 | 
623 |     // Arguments.
624 |     logger.log_ptr(call, "kernel", kernel);
625 |     logger.log_num<cl_uint>(call, "arg_index", arg_index);
626 |     logger.log_num<size_t>(call, "arg_size", arg_size);
627 |     logger.log_ptr(call, "arg_value_ptr", arg_value_ptr);
628 |     logger.log_hex(call, "arg_value", arg_value_ptr, arg_size);
629 | 
630 | #ifndef DVDT_PROF_TEST
631 |     logger.log_timestamp_start(call);
632 | 
633 |     // Original call.
634 |     errcode = prof.interceptor.clSetKernelArg_original(kernel, arg_index, arg_size, arg_value_ptr);
635 | 
636 |     logger.log_timestamp_end(call);
637 | #endif
638 | 
639 |     // Return value.
640 |     logger.log_num<cl_int>(call, "errcode", errcode); logger.log_lf();
641 | 
642 |     return errcode;
643 | 
644 | } // clSetKernelArg()
645 | 


--------------------------------------------------------------------------------
/cpp/prof.hpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // 2015-2017 (c) dividiti
  3 | //
  4 | 
  5 | #ifndef DVDT_PROF_HPP
  6 | #define DVDT_PROF_HPP
  7 | 
  8 | #include <dlfcn.h>
  9 | 
 10 | #include <cstdlib>
 11 | #include <cassert>
 12 | #include <ostream>
 13 | #include <sstream>
 14 | 
 15 | #include <string>
 16 | #include <vector>
 17 | #include <map>
 18 | 
 19 | #ifdef __APPLE__
 20 | #include <OpenCL/opencl.h>
 21 | #else
 22 | #include <CL/opencl.h>
 23 | #endif
 24 | 
 25 | #if   (1 == DVDT_PROF_WALLCLOCK_BOOST)
 26 | #include <boost/date_time.hpp>
 27 | #elif (1 == DVDT_PROF_WALLCLOCK_TIMEOFDAY)
 28 | #include <sys/time.h>
 29 | #else
 30 | #error "Don't know how to measure wall-clock time"
 31 | #endif
 32 | 
 33 | #if (1 == DVDT_PROF_CJSON)
 34 | #include <cJSON.h>
 35 | #endif
 36 | 
 37 | // Log fixed width pointers.
 38 | #if (1 == DVDT_PROF_TEST)
 39 | #include <iomanip>
 40 | #endif
 41 | 
 42 | // Configure output stream at compile-time.
 43 | #ifndef DVDT_PROF_OSTREAM
 44 | #define DVDT_PROF_OSTREAM std::cout
 45 | #endif
 46 | 
 47 | namespace dvdt
 48 | {
 49 | 
 50 | class Prof
 51 | {
 52 | public:
 53 |     class Interceptor
 54 |     {
 55 |     public:
 56 |         // Types of OpenCL API functions.
 57 |         typedef cl_int (*clBuildProgram_type)\
 58 |             (cl_program, cl_uint, const cl_device_id *, const char *, void (CL_CALLBACK *)(cl_program, void *), void *);
 59 | 
 60 |         typedef cl_mem (*clCreateBuffer_type)\
 61 |             (cl_context, cl_mem_flags, size_t, void *, cl_int *);
 62 | 
 63 |         typedef cl_command_queue (*clCreateCommandQueue_type)\
 64 |             (cl_context, cl_device_id, cl_command_queue_properties, cl_int *errcode_ret);
 65 | 
 66 |         typedef cl_kernel (*clCreateKernel_type)\
 67 |             (cl_program, const char * kernel_name, cl_int * errcode_ret);
 68 | 
 69 |         typedef cl_int (*clCreateKernelsInProgram_type)\
 70 |             (cl_program, cl_uint num_kernels, cl_kernel * kernel, cl_uint * num_kernels_ret);
 71 | 
 72 |         typedef cl_program (*clCreateProgramWithBinary_type)\
 73 |             (cl_context, cl_uint num_devices, const cl_device_id * device_list, const size_t * lengths,\
 74 |              const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret);
 75 | 
 76 |         typedef cl_program (*clCreateProgramWithSource_type)\
 77 |             (cl_context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret);
 78 | 
 79 |         typedef cl_int (*clEnqueueNDRangeKernel_type)\
 80 |             (cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
 81 | 
 82 |         typedef cl_int (*clEnqueueReadBuffer_type)\
 83 |             (cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
 84 | 
 85 |         typedef cl_int (*clEnqueueWriteBuffer_type)\
 86 |             (cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
 87 | 
 88 |         typedef cl_int (*clSetKernelArg_type)\
 89 |             (cl_kernel, cl_uint, size_t, const void *);
 90 | 
 91 |         // OpenCL API functions from the underlying vendor implementation.
 92 |         clBuildProgram_type            clBuildProgram_original;
 93 |         clCreateBuffer_type            clCreateBuffer_original;
 94 |         clCreateCommandQueue_type      clCreateCommandQueue_original;
 95 |         clCreateKernel_type            clCreateKernel_original;
 96 |         clCreateKernelsInProgram_type  clCreateKernelsInProgram_original;
 97 |         clCreateProgramWithBinary_type clCreateProgramWithBinary_original;
 98 |         clCreateProgramWithSource_type clCreateProgramWithSource_original;
 99 |         clEnqueueNDRangeKernel_type    clEnqueueNDRangeKernel_original;
100 |         clEnqueueReadBuffer_type       clEnqueueReadBuffer_original;
101 |         clEnqueueWriteBuffer_type      clEnqueueWriteBuffer_original;
102 |         clSetKernelArg_type            clSetKernelArg_original;
103 | 
104 |         // Mapping a kernel to a local work size tuple that will be used
105 |         // to override the local work size specified in the program.
106 |         const size_t * update_lws(const char * name, const size_t * program_lws);
107 | 
108 |         // Constructor.
109 |         Interceptor() :
110 |             clBuildProgram_original(NULL),
111 |             clCreateBuffer_original(NULL),
112 |             clCreateCommandQueue_original(NULL),
113 |             clCreateKernel_original(NULL),
114 |             clCreateKernelsInProgram_original(NULL),
115 |             clCreateProgramWithBinary_original(NULL),
116 |             clCreateProgramWithSource_original(NULL),
117 |             clEnqueueNDRangeKernel_original(NULL),
118 |             clEnqueueReadBuffer_original(NULL),
119 |             clEnqueueWriteBuffer_original(NULL),
120 |             clSetKernelArg_original(NULL),
121 |             kernel_lws_null(false),
122 |             context(NULL)
123 |         {
124 |             if (getenv("DVDT_PROF_LWS_NULL"))
125 |             {
126 |                 kernel_lws_null = true;
127 |             }
128 |             else if (const char * kernel_lws_list = getenv("DVDT_PROF_LWS"))
129 |             {
130 |                 update_kernel_lws_map(kernel_lws_list);
131 |             }
132 |         }
133 | 
134 |         // Destructor.
135 |         ~Interceptor()
136 |         {
137 |             // Free local work size values.
138 |             for (std::map<std::string, size_t*>::iterator i = kernel_lws_map.begin(),
139 |                 e = kernel_lws_map.end(); i != e; i++)
140 |             {
141 |                 delete[] i->second;
142 |             }
143 |         }
144 | 
145 |     private:
146 |         // The map is populated by parsing an environment variable DVDT_PROF_LWS
147 |         // in the following format:
148 |         //
149 |         //   "kernel_A:lws_A0,lws_A1,lws_A2 kernel_B:lws_B0,lws_B1,lws_B1 ..."
150 |         //
151 |         // Namely, the list elements are separated by spaces; the kernel names
152 |         // (strings) are separated from the local work size tuple by colons;
153 |         // the tuple elements (unsigned integers) are delimited by commas.
154 |         // The number of elements in a tuple must match the number of work-group
155 |         // dimensions as specified in the program or start with the value of 0
156 |         // to use NULL as the local work size for this kernel.
157 |         // (For convenience, kernel_lws_null allows to use NULL for all kernels
158 |         // in the program.)
159 |         std::map<std::string, size_t*> kernel_lws_map;
160 | 
161 |         // True if NULL is to be used as the local work size for all kernels.
162 |         bool kernel_lws_null;
163 | 
164 |         // Helper method for update_kernel_lws_map().
165 |         std::vector<std::string> split(const std::string & str, char delim)
166 |         {
167 |             std::vector<std::string> elems;
168 |             std::stringstream ss(str);
169 |             std::string elem;
170 |             while (std::getline(ss, elem, delim))
171 |             {
172 |                 elems.push_back(elem);
173 |             }
174 |             return elems;
175 |         }
176 | 
177 |         // See kernel_lws_map.
178 |         void update_kernel_lws_map(const char * kernel_lws_list);
179 | 
180 |     public:
181 |         // Cached OpenCL context. (Currently unused.)
182 |         cl_context context;
183 | 
184 |     }; // inner class Interceptor
185 | 
186 |     class Logger
187 |     {
188 |     public:
189 |         virtual inline void
190 |         log_call(const char * call_name) = 0;
191 | 
192 |         virtual inline void
193 |         log_gws(const char * call_name, cl_uint work_dim, const size_t * global_work_size) = 0;
194 | 
195 |         virtual inline void
196 |         log_gwo(const char * call_name, cl_uint work_dim, const size_t * global_work_offset) = 0;
197 | 
198 |         virtual inline void
199 |         log_hex(const char * call_name, const char * arg_name, const void * arg_value_ptr, size_t arg_size) = 0;
200 | 
201 |         // NB: Templated function cannot be virtual.
202 |         template <typename elem_ty> inline void
203 |         log_list(const char * call_name, const char * list_name, const elem_ty * list, cl_uint list_size) { };
204 | 
205 |         virtual inline void
206 |         log_lws(const char * call_name, cl_uint work_dim, const size_t * local_work_size) = 0;
207 | 
208 |         // NB: Templated function cannot be virtual.
209 |         template <typename num_ty> inline void
210 |         log_num(const char * call_name, const char * arg_name, num_ty arg_value) { };
211 | 
212 |         virtual inline void
213 |         log_profiling_info(const char * call_name, cl_event * prof_event) = 0;
214 | 
215 |         virtual inline void
216 |         log_ptr(const char * call_name, const char * arg_name, const void * arg_value) = 0;
217 | 
218 |         virtual inline void
219 |         log_src(const char * call_name, cl_uint count, const char **strings, const size_t *lengths) = 0;
220 | 
221 |         virtual inline void
222 |         log_str(const char * call_name, const char * arg_name, const char * arg_value) = 0;
223 | 
224 |         virtual inline void
225 |         log_timestamp_end(const char * call_name) = 0;
226 | 
227 |         virtual inline void
228 |         log_timestamp_start(const char * call_name) = 0;
229 | 
230 |         inline std::string
231 |         ptr_to_str(const void * ptr)
232 |         {
233 |             std::stringstream ss;
234 | #if (1 == DVDT_PROF_TEST)
235 |             ss << "0x" << std::hex << std::setw(8) << std::setfill('0') <<
236 |                 reinterpret_cast<uintptr_t>(ptr) << std::dec;
237 | #else
238 |             ss << ptr;
239 | #endif
240 |             return ss.str();
241 |         }
242 |     }; // abstract inner class Logger
243 | 
244 |     // Interceptor object.
245 |     Interceptor interceptor;
246 | 
247 |     // Typical implementation-defined constants.
248 |     // TODO: query the actual implementation.
249 |     static const cl_uint max_work_dim = 3;
250 | 
251 |     // Default values of work size parameters.
252 |     static const size_t default_local_work_size = 1;
253 |     static const size_t null_local_work_size = 0;
254 | 
255 |     static const size_t default_global_work_size = 1;
256 |     // NB: no null_global_work_size
257 | 
258 |     static const size_t default_global_work_offset = 0;
259 |     static const size_t null_global_work_offset = 0;
260 | 
261 | }; // class Prof
262 | 
263 | 
264 | #if (1 != DVDT_PROF_CJSON)
265 | class ostreamLogger : public Prof::Logger
266 | {
267 | private:
268 |     std::ostream & stream;
269 | 
270 |     const char * prefix;
271 |     const char sep;
272 |     const char lf;
273 | 
274 | public:
275 |     // Constructor.
276 |     ostreamLogger(std::ostream & _stream=DVDT_PROF_OSTREAM,
277 |                   const char * _prefix="[dv/dt]",
278 |                   const char _sep=' ',
279 |                   const char _lf='\n') :
280 |         stream(_stream), prefix(_prefix), sep(_sep), lf(_lf)
281 |     {}
282 | 
283 |     inline void log_prefix() { stream << prefix; }
284 |     inline void log_sep()    { stream << sep;    }
285 |     inline void log_lf()     { stream << lf;     }
286 | 
287 | public:
288 |     inline void
289 |     log_call(const char * call_name)
290 |     {
291 |         stream << prefix << sep << call_name << lf;
292 |     } // log_call()
293 | 
294 |     inline void
295 |     log_gws(const char * call_name, cl_uint work_dim, const size_t * global_work_size)
296 |     {
297 |         stream << prefix << sep << call_name << sep << "gws";
298 |         for (cl_uint d = 0; d < dvdt::Prof::max_work_dim; ++d)
299 |         {
300 |             stream << sep << (d < work_dim ? global_work_size[d] : dvdt::Prof::default_global_work_size);
301 |         }
302 |         stream << lf;
303 |     } // log_gws()
304 | 
305 |     inline void
306 |     log_gwo(const char * call_name, cl_uint work_dim, const size_t * global_work_offset)
307 |     {
308 |         stream << prefix << sep << call_name << sep << "gwo";
309 |         for (cl_uint d = 0; d < dvdt::Prof::max_work_dim; ++d)
310 |         {
311 |             if (global_work_offset)
312 |             {
313 |                 stream << sep << (d < work_dim ? global_work_offset[d] : dvdt::Prof::default_global_work_offset);
314 |             }
315 |             else
316 |             {
317 |                 stream << sep << dvdt::Prof::null_global_work_offset;
318 |             }
319 |         }
320 |         stream << lf;
321 |     } // log_gwo()
322 | 
323 |     template <typename elem_ty> inline void
324 |     log_list(const char * call_name, const char * list_name, const elem_ty * list, cl_uint list_size)
325 |     {
326 |         stream << prefix << sep << call_name << sep << list_name;
327 |         for (cl_uint i = 0; i < list_size; ++i)
328 |         {
329 |             stream << sep << list[i];
330 |         }
331 |         stream << lf;
332 |     } // log_list()
333 | 
334 |     inline void
335 |     log_lws(const char * call_name, cl_uint work_dim, const size_t * local_work_size)
336 |     {
337 |         stream << prefix << sep << call_name << sep << "lws";
338 |         for (cl_uint d = 0; d < dvdt::Prof::max_work_dim; ++d)
339 |         {
340 |             if (local_work_size)
341 |             {
342 |                 stream << sep << (d < work_dim ? local_work_size[d] : dvdt::Prof::default_local_work_size);
343 |             }
344 |             else
345 |             {
346 |                 stream << sep << dvdt::Prof::null_local_work_size;
347 |             }
348 |         }
349 |         stream << lf;
350 |     } // log_lws()
351 | 
352 |     template <typename num_ty> inline void
353 |     log_num(const char * call_name, const char * arg_name, num_ty arg_value)
354 |     {
355 |         stream << prefix << sep << call_name << sep << arg_name << sep << arg_value << lf;
356 |     } // log_num()
357 | 
358 |     inline void
359 |     log_hex(const char * call_name, const char * arg_name, const void * arg_value_ptr, size_t arg_size)
360 |     {
361 |         stream << prefix << sep << call_name << sep << arg_name << sep << std::hex;
362 |         for (size_t i = 0; i < arg_size; ++i)
363 |         {
364 |             unsigned int byte = static_cast<unsigned int>(
365 |                 reinterpret_cast<const unsigned char*>(arg_value_ptr)[i]
366 |             );
367 |             stream << std::setfill('0') << std::setw(2) << byte;
368 |         }
369 |         stream << std::dec << lf;
370 |     } // log_hex()
371 | 
372 |     inline void
373 |     log_profiling_info(const char * call_name, cl_event * prof_event)
374 |     {
375 |         cl_ulong queued, submit, start, end;
376 | #ifndef DVDT_PROF_TEST
377 |         cl_int prof_errcode = CL_SUCCESS;
378 |         prof_errcode |= clWaitForEvents(1, prof_event);
379 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, NULL);
380 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, NULL);
381 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_START,  sizeof(cl_ulong), &start,  NULL);
382 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_END,    sizeof(cl_ulong), &end,    NULL);
383 |         if (CL_SUCCESS != prof_errcode)
384 |         {
385 |             stream << prefix << sep << call_name << sep << "output profiling info error: " << prof_errcode << lf;
386 |         }
387 | #else
388 |         queued = 100200300400L;
389 |         submit = 100200300500L;
390 |         start  = 100200300600L;
391 |         end    = 100200300700L;
392 | #endif
393 |         stream << prefix << sep << call_name << sep << "profiling" <<
394 |             sep << queued << sep << submit << sep << start << sep << end << lf;
395 |     } // log_profiling_info()
396 | 
397 |     inline void
398 |     log_ptr(const char * call_name, const char * arg_name, const void * arg_value)
399 |     {
400 |         stream << prefix << sep << call_name << sep << arg_name << sep << ptr_to_str(arg_value) << lf;
401 |     } // log_ptr()
402 | 
403 |     inline void
404 |     log_src(const char * call_name, cl_uint count, const char **strings, const size_t *lengths)
405 |     {
406 |         for (cl_uint c = 0; c < count; ++c)
407 |         {
408 |             stream << prefix << sep << call_name << sep << "sources[" << c << "] <<" << lf;
409 |             if (NULL == lengths || 0 == lengths[c])
410 |             {
411 |                 // Program string is null-terminated.
412 |                 stream << strings[c];
413 |             }
414 |             else
415 |             {
416 |                 // When program string it not null-terminated, only
417 |                 // print lengths[c] characters from strings[c].
418 |                 for (cl_uint k = 0; k < lengths[c]; ++ k)
419 |                 {
420 |                     stream << strings[c][k];
421 |                 }
422 |             }
423 |             stream << std::endl;
424 |             stream << prefix << sep << call_name << sep << "sources[" << c << "] >>" << lf;
425 |         }
426 |     } // log_src()
427 | 
428 |     inline void
429 |     log_str(const char * call_name, const char * arg_name, const char * arg_value)
430 |     {
431 |         stream << prefix << sep << call_name << sep << arg_name << sep << arg_value << lf;
432 |     } // log_str()
433 | 
434 | private:
435 |     inline void
436 |     log_timestamp(const char * call_name, const char * timestamp_kind)
437 |     {
438 |     #if   (1 == DVDT_PROF_WALLCLOCK_BOOST)
439 |         const boost::posix_time::ptime time = boost::posix_time::microsec_clock::universal_time();
440 |         const std::string time_str = boost::posix_time::to_iso_extended_string(time);
441 |     #elif (1 == DVDT_PROF_WALLCLOCK_TIMEOFDAY)
442 |         const std::string time_str("1970-01-01 00:00:00.000");
443 |     #endif
444 |         stream << prefix << sep << call_name << sep << timestamp_kind << sep << time_str << lf;
445 |     } // log_timestamp()
446 | public:
447 |     inline void
448 |     log_timestamp_end(const char * call_name)
449 |     {
450 |         log_timestamp(call_name, "end"  );
451 |     } // log_timestamp_end()
452 | 
453 |     inline void
454 |     log_timestamp_start(const char * call_name)
455 |     {
456 |         log_timestamp(call_name, "start");
457 |     } // log_timestamp_start()
458 | }; // class ostreamLogger : Logger
459 | #endif // (1 != DVDT_PROF_CJSON)
460 | 
461 | #if (1 == DVDT_PROF_CJSON)
462 | class cjsonLogger : public Prof::Logger
463 | {
464 | private:
465 |     // Stream to write the final JSON to.
466 |     std::ostream & stream;
467 |     // Prefix for pattern matching the final JSON.
468 |     const char * prefix;
469 |     // Array (list) of call objects.
470 |     cJSON * calls;
471 |     // Currently open call object.
472 |     cJSON * call;
473 | 
474 | public:
475 |     // Constructor.
476 |     cjsonLogger(std::ostream & _stream=DVDT_PROF_OSTREAM,
477 |                 const char * _prefix="[dv/dt]") :
478 |         stream(_stream), prefix(_prefix), calls(NULL), call(NULL)
479 |     {
480 |         calls = cJSON_CreateArray();
481 |     }
482 | 
483 |     // Destructor.
484 |     ~cjsonLogger()
485 |     {
486 |         // Add last call object to calls array.
487 |         if (call)
488 |         {
489 |             cJSON_AddItemToArray(calls, call);
490 |         }
491 |         // Print calls array.
492 |         {
493 |             char * result_ptr = cJSON_Print(calls);
494 |             std::string result_str(result_ptr);
495 |             stream << prefix << " <<\n";
496 |             stream << result_str << "\n";
497 |             stream << prefix << " >>\n";
498 |             stream.flush();
499 |             free(result_ptr);
500 |         }
501 |         // Free calls array.
502 |         cJSON_Delete(calls);
503 |     }
504 | 
505 |     // No-op.
506 |     inline void log_lf() { return; }
507 | 
508 | public:
509 |     inline void
510 |     log_call(const char * call_name)
511 |     {
512 |         // Add previous call object to calls array.
513 |         if (call)
514 |         {
515 |             cJSON_AddItemToArray(calls, call);
516 |         }
517 |         // Create new call object.
518 |         call = cJSON_CreateObject();
519 |         cJSON_AddItemToObject(call, "call", cJSON_CreateString(call_name));
520 |     } // log_call()
521 | 
522 |     inline void
523 |     log_gws(const char * call_name, cl_uint work_dim, const size_t * global_work_size)
524 |     {
525 |         cJSON * gws = cJSON_CreateArray();
526 |         for (cl_uint d = 0; d < dvdt::Prof::max_work_dim; ++d)
527 |         {
528 |             size_t gws_d;
529 |             gws_d = d < work_dim ?
530 |                     global_work_size[d] :
531 |                     dvdt::Prof::default_global_work_size;
532 |             cJSON * gws_d_as_num = cJSON_CreateNumber(gws_d);
533 |             cJSON_AddItemToArray(gws, gws_d_as_num);
534 |         }
535 |         cJSON_AddItemToObject(call, "gws", gws);
536 |     } // log_gws()
537 | 
538 |     inline void
539 |     log_gwo(const char * call_name, cl_uint work_dim, const size_t * global_work_offset)
540 |     {
541 |         cJSON * gwo = cJSON_CreateArray();
542 |         for (cl_uint d = 0; d < dvdt::Prof::max_work_dim; ++d)
543 |         {
544 |             size_t gwo_d;
545 |             if (global_work_offset)
546 |             {
547 |                 gwo_d = d < work_dim ?
548 |                         global_work_offset[d] :
549 |                         dvdt::Prof::default_global_work_offset;
550 |             }
551 |             else
552 |             {
553 |                 gwo_d = dvdt::Prof::null_global_work_offset;
554 |             }
555 |             cJSON * gwo_d_as_num = cJSON_CreateNumber(gwo_d);
556 |             cJSON_AddItemToArray(gwo, gwo_d_as_num);
557 |         }
558 |         cJSON_AddItemToObject(call, "gwo", gwo);
559 |     } // log_gwo()
560 | 
561 |     inline void
562 |     log_hex(const char * call_name, const char * arg_name, const void * arg_value_ptr, size_t arg_size)
563 |     {
564 |         std::stringstream ss;
565 |         ss << std::hex;
566 |         for (size_t i = 0; i < arg_size; ++i)
567 |         {
568 |             unsigned int byte = static_cast<unsigned int>(
569 |                 reinterpret_cast<const unsigned char*>(arg_value_ptr)[i]
570 |             );
571 |             ss << std::setfill('0') << std::setw(2) << byte;
572 |         }
573 |         const std::string arg_value_str = ss.str();
574 |         const char * arg_value_cstr = arg_value_str.c_str();
575 |         cJSON * arg_value_as_str = cJSON_CreateString(arg_value_cstr);
576 |         cJSON_AddItemToObject(call, arg_name, arg_value_as_str);
577 |     } // log_hex()
578 | 
579 |     template <typename elem_ty> inline void
580 |     log_list(const char * call_name, const char * list_name, const elem_ty * list, cl_uint list_size)
581 |     {
582 |         cJSON * list_as_array = cJSON_CreateArray();
583 |         for (cl_uint i = 0; i < list_size; ++i)
584 |         {
585 |             elem_ty list_i = list[i];
586 |             // FIXME: Currently only used for lists of cl_event's
587 |             // and cl_device_id's, which are effectively pointers.
588 |             cJSON_AddItemToArray(list_as_array,
589 |                 cJSON_CreateString(ptr_to_str(list_i).c_str()));
590 |         }
591 |         cJSON_AddItemToObject(call, list_name, list_as_array);
592 |     } // log_list()
593 | 
594 |     inline void
595 |     log_lws(const char * call_name, cl_uint work_dim, const size_t * local_work_size)
596 |     {
597 |         cJSON * lws = cJSON_CreateArray();
598 |         for (cl_uint d = 0; d < dvdt::Prof::max_work_dim; ++d)
599 |         {
600 |             size_t lws_d;
601 |             if (local_work_size)
602 |             {
603 |                 lws_d = d < work_dim ?
604 |                         local_work_size[d] :
605 |                         dvdt::Prof::default_local_work_size;
606 |             }
607 |             else
608 |             {
609 |                 lws_d = dvdt::Prof::null_local_work_size;
610 |             }
611 |             cJSON * lws_d_as_num = cJSON_CreateNumber(lws_d);
612 |             cJSON_AddItemToArray(lws, lws_d_as_num);
613 |         }
614 |         cJSON_AddItemToObject(call, "lws", lws);
615 |     } // log_lws()
616 | 
617 |     template <typename num_ty> inline void
618 |     log_num(const char * call_name, const char * arg_name, num_ty arg_value)
619 |     {
620 |         cJSON * arg_value_as_num = cJSON_CreateNumber(
621 |                                        static_cast<double>(arg_value));
622 |         cJSON_AddItemToObject(call, arg_name, arg_value_as_num);
623 |     } // log_num()
624 | 
625 |     inline void
626 |     log_profiling_info(const char * call_name, cl_event * prof_event)
627 |     {
628 |         cl_ulong queued, submit, start, end;
629 | #ifndef DVDT_PROF_TEST
630 |         cl_int prof_errcode = CL_SUCCESS;
631 |         prof_errcode |= clWaitForEvents(1, prof_event);
632 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, NULL);
633 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, NULL);
634 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_START,  sizeof(cl_ulong), &start,  NULL);
635 |         prof_errcode |= clGetEventProfilingInfo(*prof_event, CL_PROFILING_COMMAND_END,    sizeof(cl_ulong), &end,    NULL);
636 |         if (CL_SUCCESS != prof_errcode)
637 |         {
638 |             cJSON * prof_errcode_as_num = cJSON_CreateNumber(prof_errcode);
639 |             cJSON_AddItemToObject(call,
640 |                 "output profiling_error", prof_errcode_as_num);
641 |         }
642 | #else
643 |         queued = 100200300400L;
644 |         submit = 100200300500L;
645 |         start  = 100200300600L;
646 |         end    = 100200300700L;
647 | #endif
648 |         cJSON * profiling = cJSON_CreateObject();
649 |         cJSON * queued_as_num = cJSON_CreateNumber(queued);
650 |         cJSON * submit_as_num = cJSON_CreateNumber(submit);
651 |         cJSON * start_as_num  = cJSON_CreateNumber(start);
652 |         cJSON * end_as_num    = cJSON_CreateNumber(end);
653 |         cJSON_AddItemToObject(profiling, "queued", queued_as_num);
654 |         cJSON_AddItemToObject(profiling, "submit", submit_as_num);
655 |         cJSON_AddItemToObject(profiling, "start",  start_as_num);
656 |         cJSON_AddItemToObject(profiling, "end",    end_as_num);
657 |         cJSON_AddItemToObject(call, "profiling", profiling);
658 |     } // log_profiling_info()
659 | 
660 |     inline void
661 |     log_ptr(const char * call_name, const char * arg_name, const void * arg_value)
662 |     {
663 |         std::string arg_value_as_ptr_str = ptr_to_str(arg_value);
664 |         const char * arg_value_as_ptr_cstr = arg_value_as_ptr_str.c_str();
665 |         cJSON_AddStringToObject(call, arg_name, arg_value_as_ptr_cstr);
666 |     } // log_ptr()
667 | 
668 |     inline void
669 |     log_src(const char * call_name, cl_uint count, const char **strings, const size_t *lengths)
670 |     {
671 |         cJSON * source = cJSON_CreateObject();
672 |         cJSON_AddItemToObject(call, "source", source);
673 |         for (cl_uint c = 0; c < count; ++c)
674 |         {
675 |             std::stringstream string_ss;
676 |             if (NULL == lengths || 0 == lengths[c])
677 |             {
678 |                 // Program string is null-terminated.
679 |                 string_ss << strings[c];
680 |             }
681 |             else
682 |             {
683 |                 // When program string it not null-terminated, only
684 |                 // print lengths[c] characters from strings[c].
685 |                 for (cl_uint k = 0; k < lengths[c]; ++ k)
686 |                 {
687 |                     string_ss << strings[c][k];
688 |                 }
689 |             }
690 |             std::stringstream c_ss;
691 |             c_ss << c;
692 | 
693 |             const std::string string_str = string_ss.str();
694 |             const std::string c_str = c_ss.str();
695 | 
696 |             const char * string_cstr = string_str.c_str();
697 |             const char * c_cstr = c_str.c_str();
698 | 
699 |             cJSON_AddStringToObject(source, c_cstr, string_cstr);
700 |         }
701 |     } // log_src()
702 | 
703 |     inline void
704 |     log_str(const char * call_name, const char * arg_name, const char * arg_value)
705 |     {
706 |         cJSON * arg_value_as_str = cJSON_CreateString(arg_value);
707 |         cJSON_AddItemToObject(call, arg_name, arg_value_as_str);
708 |     } // log_str()
709 | 
710 | private:
711 |     inline void
712 |     log_timestamp(const char * call_name, const char * timestamp_kind)
713 |     {
714 |     #if   (1 == DVDT_PROF_WALLCLOCK_BOOST)
715 |         const boost::posix_time::ptime time = boost::posix_time::microsec_clock::universal_time();
716 |         const std::string time_str = boost::posix_time::to_iso_extended_string(time);
717 |     #elif (1 == DVDT_PROF_WALLCLOCK_TIMEOFDAY)
718 |         const std::string time_str("1970-01-01 00:00:00.000");
719 |     #endif
720 |         const char * time_cstr = time_str.c_str();
721 |         cJSON * timestamp = cJSON_GetObjectItem(call, "timestamp");
722 |         if (NULL == timestamp)
723 |         {
724 |             assert(std::string(timestamp_kind) == std::string("start"));
725 |             timestamp = cJSON_CreateObject();
726 |             cJSON_AddItemToObject(call, "timestamp", timestamp);
727 |         }
728 |         else
729 |         {
730 |             assert(std::string(timestamp_kind) == std::string("end"));
731 |         }
732 |         cJSON_AddStringToObject(
733 |             timestamp, timestamp_kind, time_cstr);
734 |     } // log_timestamp()
735 | public:
736 |     inline void
737 |     log_timestamp_end(const char * call_name)
738 |     {
739 |         log_timestamp(call_name, "end"  );
740 |     } // log_timestamp_end()
741 | 
742 |     inline void
743 |     log_timestamp_start(const char * call_name)
744 |     {
745 |         log_timestamp(call_name, "start");
746 |     } // log_timestamp_start()
747 | }; // class cjsonLogger : Logger
748 | #endif // (1 == DVDT_PROF_CJSON)
749 | 
750 | void
751 | Prof::Interceptor::update_kernel_lws_map(const char * kernel_lws_list)
752 | {
753 |     // Strip surrounding double quotation marks if present.
754 |     std::string kernel_lws_list_str(kernel_lws_list);
755 |     {
756 |         const char double_quote = '\"';
757 |         const std::string::size_type first = kernel_lws_list_str.find(double_quote);
758 |         const std::string::size_type last = kernel_lws_list_str.find_last_of(double_quote);
759 |         kernel_lws_list_str = kernel_lws_list_str.substr(first+1, last-(first+1));
760 |     }
761 | 
762 |     // Split space-separated list of elements into vector of elements.
763 |     const char per_kernel_delim = ' ';
764 |     const std::vector<std::string> per_kernel_elems = split(kernel_lws_list_str, per_kernel_delim);
765 | 
766 |     for (std::vector<std::string>::const_iterator elems_i = per_kernel_elems.begin(),
767 |         elems_e = per_kernel_elems.end(); elems_i != elems_e; ++elems_i)
768 |     {
769 |         const std::string elem(*elems_i);
770 | 
771 |         // Split element into two colon-separated strings: kernel name and lws tuple.
772 |         const char kernel_lws_delim = ':';
773 |         const std::string::size_type pos = elem.find(kernel_lws_delim);
774 |         assert(pos != std::string::npos);
775 |         const std::string kernel = elem.substr(0, pos);
776 |         const std::string lws_list = elem.substr(pos+1);
777 | 
778 |         // Split comma-separated lws tuple string into vector of lws dimensions.
779 |         const char lws_delim = ',';
780 |         const std::vector<std::string> lws_vector = split(lws_list, lws_delim);
781 |         const std::vector<std::string>::size_type n = lws_vector.size();
782 |         assert((1 <= n) && (n <= Prof::max_work_dim));
783 |         size_t * lws = new size_t[n]; // To be deallocated in the destructor.
784 |         for (std::vector<std::string>::size_type i = 0; i < n; ++i)
785 |         {
786 |             std::stringstream(lws_vector[i]) >> lws[i];
787 |         }
788 |         // TODO: allow updating the map (e.g. for runtime adaptation).
789 |         assert(kernel_lws_map.count(kernel) == 0);
790 |         kernel_lws_map.insert(std::pair<std::string, size_t*>(kernel, lws));
791 |     }
792 | 
793 |     return;
794 | 
795 | } // Prof::Interceptor::update_kernel_lws_map()
796 | 
797 | 
798 | const size_t *
799 | Prof::Interceptor::update_lws(const char * name, const size_t * program_lws)
800 | {
801 |     if (kernel_lws_null)
802 |     {
803 |         return NULL;
804 |     }
805 |     std::map<std::string, size_t *>::iterator it = kernel_lws_map.find(std::string(name));
806 |     if (kernel_lws_map.end() != it)
807 |     {
808 |         const size_t * lws = it->second;
809 |         if (0 == lws[0])
810 |         {
811 |             program_lws = NULL;
812 |         }
813 |         else
814 |         {
815 |             program_lws = lws;
816 |         }
817 |     }
818 | 
819 |     return program_lws;
820 | 
821 | } // Prof::Interceptor::update_lws()
822 | 
823 | } // namespace dvdt
824 | 
825 | #endif // #ifndef DVDT_PROF_HPP
826 | 


--------------------------------------------------------------------------------