├── .gitignore
├── README.md
├── app
    ├── Makefile
    └── main.cpp
├── app_template
    ├── .cproject
    ├── .project
    ├── app_template.dxy
    ├── common
    │   ├── gpumemioctl.h
    │   ├── utypes.h
    │   └── utypes_linux.h
    ├── create_doc
    ├── cuda
    │   └── check_counter.cu
    ├── host
    │   ├── cl_cuda.cu
    │   ├── cl_cuda.h
    │   ├── cl_cuda_test.cpp
    │   ├── main.cpp
    │   ├── task_data.h
    │   ├── tf_test.h
    │   ├── tf_testcnt.cpp
    │   ├── tf_testcnt.h
    │   ├── tf_testthread.cpp
    │   └── tf_testthread.h
    ├── run_cycle_1M
    └── run_cycle_64M
└── module
    ├── Makefile
    ├── drvload.sh
    ├── gpumemdrv.c
    ├── gpumemdrv.h
    ├── gpumemioctl.h
    ├── gpumemproc.c
    ├── gpumemproc.h
    ├── ioctlrw.c
    └── ioctlrw.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | #
 2 | # NOTE! Don't add files that are generated in specific
 3 | # subdirectories here. Add them in the ".gitignore" file
 4 | # in that subdirectory instead.
 5 | #
 6 | # Normal rules
 7 | #
 8 | .*
 9 | *.o
10 | *.o.*
11 | *.a
12 | *.s
13 | *.su
14 | *.mod.c
15 | *.i
16 | *.lst
17 | *.order
18 | *.elf
19 | *.swp
20 | *.bin
21 | *.patch
22 | *.cfgtmp
23 | *.orig
24 | *~
25 | \#*#
26 | *.ko
27 | *.symvers
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GPUDirect RDMA example.
  2 | 
  3 | ## Install
  4 | 1 Clone repo and get source code of NVIDIA-Linux-x86_64-X.Y driver 
  5 | the same version as installed in your systems.
  6 | 
  7 | 2 Extract it in the gpudma project directory and create symbolic link "nvidia" on NVIDIA-Linux-x86_64-X.Y driver directory.
  8 | Default location is ~/gpudma; For another location you must set variable GPUDMA_DIR, for example: GPUDMA_DIR=/xprj/gpudma
  9 | 
 10 | 3 Build NVIDIA driver in nvidia/kernel. We need only Module.symvers file from nvidia/kernel directory.
 11 | 
 12 | 4 Build gpumem module.
 13 | 
 14 | 5 Build application app
 15 | 
 16 | 6 Build application app_template
 17 | 
 18 | **Linux commands:**
 19 | 
 20 | git clone https://github.com/karakozov/gpudma.git
 21 | 
 22 | cp ~/Downloads/NVIDIA-Linux-x86_64-367.57.run ~/gpudma
 23 | 
 24 | ./NVIDIA-Linux-x86_64-367.57.run -x
 25 | 
 26 | ln -svf NVIDIA-Linux-x86_64-367.57 nvidia
 27 | 
 28 | cd ~/gpudma/nvidia/kernel && make
 29 | 
 30 | cd ~/gpudma/module && make
 31 | 
 32 | cd ~/gpudma/app && make
 33 | 
 34 | ## Load driver
 35 | 
 36 | cd ~/gpudma/module && ./drvload.sh
 37 | 
 38 | Check driver: ls -l /dev/gpumem
 39 | 
 40 | crw-rw-rw-. 1 root root 10, 55 Apr  2 21:57 /dev/gpumem
 41 | 
 42 | ## Run app example
 43 | 
 44 | cd ~/gpudma/app && ./gpu_direct
 45 | 
 46 | Application create CUDA context and allocate GPU memory.
 47 | This memory pointer passed to gpumem module. Gpumem module get address of all physical 
 48 | pages of the allocates area and GPU page size. Application can get addresses and do mmap(), 
 49 | fill data pattern and free all of them. Than release GPU memory allocation and unlock pages.
 50 | 
 51 | Test must be finished with message: "Test successful"
 52 | 
 53 | ## Build and run app_template
 54 | 
 55 | app_template must be built with Nsight Eclipse Edition from NVIDIA.
 56 | 
 57 | Command line for launch:  **app_template** **-count** ncount **-size** nsize
 58 | * ncount - block counts for read, 0 - for infinity cycle; Default is 16;
 59 | * nsize  - size of one buffers in kbytes. Maximum size is 65536. Default is 256;
 60 | 
 61 | Main mode is infinity cycle (ncount=0). There are two command for launch application:
 62 | * run_cycle_1M - launch with buffers of 1 megabytes
 63 | * run_cycle_64M - launch with buffers of 64 megabytes
 64 | 
 65 | Infinity cycle must be executed only from console. Nsight Eclipse Edition cannot correct display status line with "\r" symbol. If you can do it then send me about it, please.
 66 | For launch application from Nsight Eclipse Edition use non-zero value for count argument. This is enough for debugging.
 67 | 
 68 | There are main executing stages:
 69 | 
 70 | 1. Create exemplar TF_TestCnt - launch thread for working with CUDA
 71 | 
 72 | 2. Prepare
 73 |   * Open device
 74 |   * Allocate three buffers with size <nsize> and map in the BAR1 - class CL_Cuda
 75 |   * Allocate 64 kbytes buffer for struct TaskMonitor
 76 |   * Allocate page-locked HOST memory for td->hostBuffer 
 77 |   * Allocate page-locked HOST memory for struct TaskHostMonitor
 78 | 
 79 | 3. Launch main cycle - TF_TestCnt::Run()
 80 |   * Launch thread for filling buffers - TF_TestCnt::FillThreadStart()
 81 |   * Launch kernel for checking data - run_checkCounter()
 82 |   * Check flag in the host memory and start DMA transfer - cudaMemcpyAsync()
 83 |   * Check data: TestCnt::CheckHostData()
 84 |   * Measuring velosity of data transfer
 85 | 
 86 | 4. Periodcal launch function TF_TestCnt::StepTable() from function main() for display status information. It is working only for infinity cycle mode. Function display several parameters:
 87 |   * CUDA_RD - number of received buffers to CUDA
 88 |   * CUDA_OK - number of correct buffers to CUDA
 89 |   * CUDA_ERR - number of incorrect buffers to CUDA
 90 |   * HOST_RD - number of received buffers to HOST
 91 |   * HOST_OK - number of correct buffers to HOST
 92 |   * HOST_ERR - number of incorrect buffers to HOST
 93 |   * E2C_CUR - current velosity of data transfer from external device to CUDA
 94 |   * E2C_AVR - avarage velosity of data transfer from external device to CUDA
 95 |   * C2H_CUR - current velosity of data transfer from CUDA to HOST
 96 |   * C2H_AVR - avarage velosity of data transfer from CUDA to HOST
 97 | 
 98 | 5. Function run_checkCounter() launch wrap of 32 thread for checking data. 
 99 | 
100 |   Thread 0 is difference from another:
101 |    * Read ts->irqFlag in the global memory and write it in the local wrap memory.
102 |    * Write checking data to output buffers
103 | 
104 |   Thread 0 and another threads :
105 |    * Check flag ptrMonitor->flagExit and exit if it is set.
106 |    * Check received data  
107 |    * Write first 16 errors to struct "check"
108 | 
109 | 6. Display result after exiting from main cycle - TF_TestCnt::GetResult()
110 | 
111 | 7. Free memory
112 | 
113 | Some notes:
114 | * app_template/create_doc.sh - create documentation via doxygen
115 | * There is class CL_Cuda_private for internal data for CL_Cuda
116 | * There is file task_data.h with structs:
117 |   * TaskData - internal task for TF_TestCnt
118 |   * TaskMonitor - struct for shared memory in the CUDA 
119 |   * TaskHostMonitor - struct for shared memory in the HOST
120 |   * TaskBufferStatus - struct for work with one buffer
121 |   * TaskCheckData - struct for error data
122 |   * const int TaskCounts=32 - number of threads in the wrap
123 | 
124 | 
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/app/Makefile:
--------------------------------------------------------------------------------
 1 |  
 2 | PHONY = clean all
 3 | TARGET_NAME = gpu_direct
 4 | 
 5 | all: $(TARGET_NAME)
 6 | 
 7 | ROOT_DIR = $(shell pwd)
 8 | 
 9 | CC = $(CROSS_COMPILE)gcc
10 | LD = $(CROSS_COMPILE)gcc
11 | 
12 | #NVIDIA_DRIVER_PATH := $(HOME)/gpudma/nvidia
13 | NVIDIA_DRIVER_PATH := ../nvidia
14 | 
15 | 
16 | INCDIR := . $(CUDADIR)/include ../module $(NVIDIA_DRIVER_PATH)/kernel/nvidia
17 | INCLUDE := $(addprefix -I, $(INCDIR))
18 | CFLAGS := -D__LINUX__ -g -Wall $(INCLUDE)
19 | SRCFILE := $(wildcard *.cpp) 
20 | OBJFILE := $(patsubst %.cpp,%.o, $(SRCFILE))
21 | 
22 | #LDFLAGS := -Wl,-rpath,$(CUDADIR)/lib64/stubs -L"$(CUDADIR)/lib64/stubs" -lcuda
23 | #LDFLAGS :=  -L/usr/local/cuda/lib64 -lcudart -L/usr/local/cuda/lib64/stubs -lcuda $(LIBS)
24 | LDFLAGS +=  -L$(CUDADIR)/lib64 -lcuda
25 | #LDFLAGS +=  -L$(CUDADIR)/lib64 -lcudart
26 | LDFLAGS +=  -lstdc++
27 | 
28 | $(TARGET_NAME): $(OBJFILE)
29 | 	$(LD) $(notdir $^) -o $(TARGET_NAME) $(LDFLAGS)
30 | 	cp $(TARGET_NAME) ../bin
31 | 
32 | %.o: %.cpp
33 | 	$(CC) $(CFLAGS) -c -MD $<
34 | 	
35 | include $(wildcard *.d)
36 | 
37 | 
38 | clean:
39 | 	rm -f *.o *~ core
40 | 	rm -f *.d *~ core
41 | 	rm -f $(TARGET_NAME)
42 | 	
43 | distclean:
44 | 	rm -f *.o *~ core
45 | 	rm -f *.d *~ core
46 | 	rm -f $(TARGET_NAME)
47 | 
48 | src:
49 | 	@echo $(SRCFILE)
50 | 	@echo $(OBJFILE)
51 | 


--------------------------------------------------------------------------------
/app/main.cpp:
--------------------------------------------------------------------------------
  1 |   
  2 | #include "cuda.h"
  3 | //#include "cuda_runtime_api.h"
  4 | #include "gpumemioctl.h"
  5 | 
  6 | #include <dirent.h>
  7 | #include <signal.h>
  8 | #include <pthread.h>
  9 | #include <math.h>
 10 | #include <stdint.h>
 11 | #include <stdlib.h>
 12 | #include <stdio.h>
 13 | #include <unistd.h>
 14 | #include <fcntl.h>
 15 | #include <string.h>
 16 | #include <errno.h>
 17 | #include <sys/uio.h>
 18 | #include <sys/ioctl.h>
 19 | #include <sys/types.h>
 20 | #include <sys/mman.h>
 21 | 
 22 | //-----------------------------------------------------------------------------
 23 | 
 24 | void checkError(CUresult status);
 25 | bool wasError(CUresult status);
 26 | 
 27 | //-----------------------------------------------------------------------------
 28 | 
 29 | int main(int argc, char *argv[])
 30 | {
 31 |     gpudma_lock_t lock;
 32 |     gpudma_unlock_t unlock;
 33 |     gpudma_state_t *state = 0;
 34 |     int statesize = 0;
 35 |     int res = -1;
 36 |     unsigned count=0x0A000000;
 37 | 
 38 |     int fd = open("/dev/"GPUMEM_DRIVER_NAME, O_RDWR, 0);
 39 |     if (fd < 0) {
 40 |         printf("Error open file %s\n", "/dev/"GPUMEM_DRIVER_NAME);
 41 |         return -1;
 42 |     }
 43 | 
 44 |     checkError(cuInit(0));
 45 | 
 46 |     int total = 0;
 47 |     checkError(cuDeviceGetCount(&total));
 48 |     fprintf(stderr, "Total devices: %d\n", total);
 49 | 
 50 |     CUdevice device;
 51 |     checkError(cuDeviceGet(&device, 0));
 52 | 
 53 |     char name[256];
 54 |     checkError(cuDeviceGetName(name, 256, device));
 55 |     fprintf(stderr, "Select device: %s\n", name);
 56 | 
 57 |     // get compute capabilities and the devicename
 58 |     int major = 0, minor = 0;
 59 |     checkError( cuDeviceComputeCapability(&major, &minor, device));
 60 |     fprintf(stderr, "Compute capability: %d.%d\n", major, minor);
 61 | 
 62 |     size_t global_mem = 0;
 63 |     checkError( cuDeviceTotalMem(&global_mem, device));
 64 |     fprintf(stderr, "Global memory: %llu MB\n", (unsigned long long)(global_mem >> 20));
 65 |     if(global_mem > (unsigned long long)4*1024*1024*1024L)
 66 |         fprintf(stderr, "64-bit Memory Address support\n");
 67 | 
 68 |     CUcontext  context;
 69 |     checkError(cuCtxCreate(&context, 0, device));
 70 | 
 71 |     size_t size = 0x100000;
 72 |     CUdeviceptr dptr = 0;
 73 |     unsigned int flag = 1;
 74 |     unsigned char *h_odata = NULL;
 75 |     h_odata = (unsigned char *)malloc(size);
 76 | 
 77 |     CUresult status = cuMemAlloc(&dptr, size);
 78 |     if(wasError(status)) {
 79 |         goto do_free_context;
 80 |     }
 81 | 
 82 |     fprintf(stderr, "Allocate memory address: 0x%llx\n",  (unsigned long long)dptr);
 83 | 
 84 |     status = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr);
 85 |     if(wasError(status)) {
 86 |         goto do_free_memory;
 87 |     }
 88 | 
 89 |     fprintf(stderr, "Press enter to lock\n");
 90 |     //getchar();
 91 | 
 92 |     // TODO: add kernel driver interaction...
 93 |     lock.addr = dptr;
 94 |     lock.size = size;
 95 |     res = ioctl(fd, IOCTL_GPUMEM_LOCK, &lock);
 96 |     if(res < 0) {
 97 |         fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_LOCK\n");
 98 |         goto do_free_attr;
 99 |     }
100 | 
101 |     fprintf(stderr, "Press enter to get state. We lock %ld pages\n", lock.page_count);
102 |     //getchar();
103 | 
104 |     statesize = (lock.page_count*sizeof(uint64_t) + sizeof(struct gpudma_state_t));
105 |     state = (struct gpudma_state_t*)malloc(statesize);
106 |     if(!state) {
107 |         goto do_free_attr;
108 |     }
109 |     memset(state, 0, statesize);
110 |     state->handle = lock.handle;
111 |     state->page_count = lock.page_count;
112 |     res = ioctl(fd, IOCTL_GPUMEM_STATE, state);
113 |     if(res < 0) {
114 |         fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_UNLOCK\n");
115 |         goto do_unlock;
116 |     }
117 | 
118 |     fprintf(stderr, "Page count 0x%lx\n", state->page_count);
119 |     fprintf(stderr, "Page size 0x%lx\n", state->page_size);
120 | 
121 |     for(unsigned i=0; i<state->page_count; i++) {
122 |         fprintf(stderr, "%02d: 0x%lx\n", i, state->pages[i]);
123 |         void* va = mmap(0, state->page_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, (off_t)state->pages[i]);
124 |         if(va == MAP_FAILED ) {
125 |              fprintf(stderr, "%s(): %s\n", __FUNCTION__, strerror(errno));
126 |              va = 0;
127 |         } else {
128 |             //memset(va, 0x55, state->page_size);
129 |         	unsigned *ptr=(unsigned*)va;
130 |         	for( unsigned jj=0; jj<(state->page_size/4); jj++ )
131 |         	{
132 |         		*ptr++=count++;
133 |         	}
134 | 
135 |             fprintf(stderr, "%s(): Physical Address 0x%lx -> Virtual Address %p\n", __FUNCTION__, state->pages[i], va);
136 |             munmap(va, state->page_size);
137 |         }
138 |     }
139 | 
140 |     {
141 |         //const void* d_idata = (const void*)dptr;
142 |     	//cudaMemcpy(h_odata, d_idata, size, cudaMemcpyDeviceToHost);
143 |     	//cudaDeviceSynchronize();
144 | 
145 |     	cuMemcpyDtoH( h_odata, dptr, size );
146 |     	cuCtxSynchronize();
147 | 
148 |     	unsigned *ptr = (unsigned*)h_odata;
149 |     	unsigned val;
150 |     	unsigned expect_data=0x0A000000;
151 |     	unsigned cnt=size/4;
152 |     	unsigned error_cnt=0;
153 |     	for( unsigned ii=0; ii<cnt; ii++ )
154 |     	{
155 |     		val=*ptr++;
156 |     		if( val!=expect_data )
157 |     		{
158 |     			error_cnt++;
159 |     			if( error_cnt<32 )
160 |     			 fprintf(stderr, "%4d 0x%.8X - Error  expect: 0x%.8X\n", ii, val, expect_data );
161 |     		} else if( ii<16 )
162 |     		{
163 |       		  fprintf(stderr, "%4d 0x%.8X \n", ii, val );
164 |     		}
165 |     		expect_data++;
166 | 
167 |     	}
168 |     	if( 0==error_cnt )
169 |     	{
170 |     		  fprintf(stderr, "\nTest successful\n" );
171 |     	} else
172 |     	{
173 |     		  fprintf(stderr, "\nTest with error\n" );
174 |     	}
175 |     }
176 | 
177 | 
178 |     fprintf(stderr, "Press enter to unlock\n");
179 |     //getchar();
180 | 
181 | do_unlock:
182 |     unlock.handle = lock.handle;
183 |     res = ioctl(fd, IOCTL_GPUMEM_UNLOCK, &unlock);
184 |     if(res < 0) {
185 |         fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_UNLOCK\n");
186 |         goto do_free_state;
187 |     }
188 | do_free_state:
189 |     free(state);
190 | do_free_attr:
191 |     flag = 0;
192 |     cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr);
193 | 
194 | do_free_memory:
195 |     cuMemFree(dptr);
196 | 
197 | do_free_context:
198 |     cuCtxDestroy(context);
199 | 
200 |     close(fd);
201 | 
202 |     return 0;
203 | }
204 | 
205 | // -------------------------------------------------------------------
206 | 
207 | void checkError(CUresult status)
208 | {
209 |     if(status != CUDA_SUCCESS) {
210 |         const char *perrstr = 0;
211 |         CUresult ok = cuGetErrorString(status,&perrstr);
212 |         if(ok == CUDA_SUCCESS) {
213 |             if(perrstr) {
214 |                 fprintf(stderr, "info: %s\n", perrstr);
215 |             } else {
216 |                 fprintf(stderr, "info: unknown error\n");
217 |             }
218 |         }
219 |         exit(0);
220 |     }
221 | }
222 | 
223 | //-----------------------------------------------------------------------------
224 | 
225 | bool wasError(CUresult status)
226 | {
227 |     if(status != CUDA_SUCCESS) {
228 |         const char *perrstr = 0;
229 |         CUresult ok = cuGetErrorString(status,&perrstr);
230 |         if(ok == CUDA_SUCCESS) {
231 |             if(perrstr) {
232 |                 fprintf(stderr, "info: %s\n", perrstr);
233 |             } else {
234 |                 fprintf(stderr, "info: unknown error\n");
235 |             }
236 |         }
237 |         return true;
238 |     }
239 |     return false;
240 | }
241 | 
242 | //-----------------------------------------------------------------------------
243 | 


--------------------------------------------------------------------------------
/app_template/.cproject:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
  3 | 	<storageModule moduleId="org.eclipse.cdt.core.settings">
  4 | 		<cconfiguration id="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203">
  5 | 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203" moduleId="org.eclipse.cdt.core.settings" name="Debug">
  6 | 				<externalSettings/>
  7 | 				<extensions>
  8 | 					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
  9 | 					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
 10 | 					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
 11 | 					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 12 | 					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 13 | 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 14 | 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 15 | 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 16 | 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 17 | 				</extensions>
 18 | 			</storageModule>
 19 | 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
 20 | 				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203" name="Debug" parent="com.nvidia.cuda.ide.eight_zero.configuration.debug">
 21 | 					<folderInfo id="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203." name="/" resourcePath="">
 22 | 						<toolChain id="com.nvidia.cuda.tools.toolchain.eight_zero.exe.debug.1258879585" name="CUDA Toolkit 8.0" superClass="com.nvidia.cuda.tools.toolchain.eight_zero.exe.debug">
 23 | 							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.1559038096" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
 24 | 							<builder buildPath="${workspace_loc:/app_template}/Debug" id="com.nvidia.cuda.ide.builder.1169646752" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="CUDA Toolkit 8.0 Builder" superClass="com.nvidia.cuda.ide.builder"/>
 25 | 							<tool commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX} ${OUTPUT} ${INPUTS} " id="nvcc.compiler.base.1224963947" name="NVCC Compiler" superClass="nvcc.compiler.base">
 26 | 								<option id="nvcc.compiler.deviceDebug.1371846519" name="Generate device debug information (-G)" superClass="nvcc.compiler.deviceDebug" value="true" valueType="boolean"/>
 27 | 								<option id="nvcc.compiler.option.level.1860638282" name="Generate host debug information (-g)" superClass="nvcc.compiler.option.level" value="true" valueType="boolean"/>
 28 | 								<option defaultValue="nvcc.compiler.optimization.level.none" id="nvcc.compiler.optimization.level.925997822" name="Optimization Level" superClass="nvcc.compiler.optimization.level" valueType="enumerated"/>
 29 | 								<option id="nvcc.compiler.pic.1190667854" name="Position Independent Code (-fPIC)" superClass="nvcc.compiler.pic"/>
 30 | 								<option id="nvcc.compiler.include.paths.1965030831" name="Include paths (-I)" superClass="nvcc.compiler.include.paths" valueType="includePath">
 31 | 									<listOptionValue builtIn="false" value="/usr/local/cuda/samples/common/inc"/>
 32 | 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/common}&quot;"/>
 33 | 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/cuda}&quot;"/>
 34 | 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/host}&quot;"/>
 35 | 								</option>
 36 | 								<option id="nvcc.compiler.cpp11option.412176469" name="Enable C++11 support (-std=c++11)" superClass="nvcc.compiler.cpp11option" value="false" valueType="boolean"/>
 37 | 								<inputType id="nvcc.compiler.input.cu.1039250508" superClass="nvcc.compiler.input.cu"/>
 38 | 								<inputType id="nvcc.compiler.input.cpp.2131155639" superClass="nvcc.compiler.input.cpp"/>
 39 | 								<inputType id="nvcc.compiler.input.c.1997590059" superClass="nvcc.compiler.input.c"/>
 40 | 							</tool>
 41 | 							<tool id="nvcc.linker.base.1994144778" name="NVCC Linker" superClass="nvcc.linker.base">
 42 | 								<option id="nvcc.linker.option.libs.354515123" name="Libraries (-l)" superClass="nvcc.linker.option.libs" valueType="libs">
 43 | 									<listOptionValue builtIn="false" value="cuda"/>
 44 | 								</option>
 45 | 								<inputType id="nvcc.linker.input.1328376550" superClass="nvcc.linker.input">
 46 | 									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
 47 | 									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
 48 | 								</inputType>
 49 | 							</tool>
 50 | 							<tool id="nvcc.archiver.base.157293855" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
 51 | 							<tool id="com.nvidia.host.assembler.1461576937" name="Host Assembler" superClass="com.nvidia.host.assembler">
 52 | 								<option id="gnu.both.asm.option.include.paths.1843042356" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath">
 53 | 									<listOptionValue builtIn="false" value="/usr/local/cuda/samples/common/inc"/>
 54 | 								</option>
 55 | 								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.127007948" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
 56 | 							</tool>
 57 | 						</toolChain>
 58 | 					</folderInfo>
 59 | 					<sourceEntries>
 60 | 						<entry excluding="host|cuda" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 61 | 						<entry flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name="cuda"/>
 62 | 						<entry flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name="host"/>
 63 | 					</sourceEntries>
 64 | 				</configuration>
 65 | 			</storageModule>
 66 | 			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration">
 67 | 				<executable devicelink="false">
 68 | 					<sass major="5" minor="2"/>
 69 | 				</executable>
 70 | 				<editor-arch major="5" minor="2"/>
 71 | 			</storageModule>
 72 | 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
 73 | 		</cconfiguration>
 74 | 		<cconfiguration id="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846">
 75 | 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846" moduleId="org.eclipse.cdt.core.settings" name="Release">
 76 | 				<externalSettings/>
 77 | 				<extensions>
 78 | 					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
 79 | 					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
 80 | 					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
 81 | 					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 82 | 					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 83 | 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 84 | 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 85 | 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 86 | 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 87 | 				</extensions>
 88 | 			</storageModule>
 89 | 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
 90 | 				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846" name="Release" parent="com.nvidia.cuda.ide.eight_zero.configuration.release">
 91 | 					<folderInfo id="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846." name="/" resourcePath="">
 92 | 						<toolChain id="com.nvidia.cuda.ide.toolchain.eight_zero.exe.release.1907575049" name="CUDA Toolkit 8.0" superClass="com.nvidia.cuda.ide.toolchain.eight_zero.exe.release">
 93 | 							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.2010600415" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
 94 | 							<builder buildPath="${workspace_loc:/app_template}/Release" id="com.nvidia.cuda.ide.builder.1664654645" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="CUDA Toolkit 8.0 Builder" superClass="com.nvidia.cuda.ide.builder"/>
 95 | 							<tool id="nvcc.compiler.base.1988295041" name="NVCC Compiler" superClass="nvcc.compiler.base">
 96 | 								<option id="nvcc.compiler.deviceDebug.523326850" name="Generate device debug information (-G)" superClass="nvcc.compiler.deviceDebug"/>
 97 | 								<option id="nvcc.compiler.option.level.1008307181" name="Generate host debug information (-g)" superClass="nvcc.compiler.option.level"/>
 98 | 								<option defaultValue="nvcc.compiler.optimization.level.most" id="nvcc.compiler.optimization.level.1356719978" name="Optimization Level" superClass="nvcc.compiler.optimization.level" valueType="enumerated"/>
 99 | 								<option id="nvcc.compiler.pic.352129615" name="Position Independent Code (-fPIC)" superClass="nvcc.compiler.pic"/>
100 | 								<option id="nvcc.compiler.include.paths.1413735017" name="Include paths (-I)" superClass="nvcc.compiler.include.paths" valueType="includePath">
101 | 									<listOptionValue builtIn="false" value="/usr/local/cuda/samples/common/inc"/>
102 | 								</option>
103 | 								<inputType id="nvcc.compiler.input.cu.214480156" superClass="nvcc.compiler.input.cu"/>
104 | 								<inputType id="nvcc.compiler.input.cpp.592658211" superClass="nvcc.compiler.input.cpp"/>
105 | 								<inputType id="nvcc.compiler.input.c.2070226338" superClass="nvcc.compiler.input.c"/>
106 | 							</tool>
107 | 							<tool id="nvcc.linker.base.550716777" name="NVCC Linker" superClass="nvcc.linker.base">
108 | 								<inputType id="nvcc.linker.input.621028591" superClass="nvcc.linker.input">
109 | 									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
110 | 									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
111 | 								</inputType>
112 | 							</tool>
113 | 							<tool id="nvcc.archiver.base.1578162796" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
114 | 							<tool id="com.nvidia.host.assembler.2145343875" name="Host Assembler" superClass="com.nvidia.host.assembler">
115 | 								<option id="gnu.both.asm.option.include.paths.1080056675" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath">
116 | 									<listOptionValue builtIn="false" value="/usr/local/cuda/samples/common/inc"/>
117 | 								</option>
118 | 								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1008941105" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
119 | 							</tool>
120 | 						</toolChain>
121 | 					</folderInfo>
122 | 					<sourceEntries>
123 | 						<entry excluding="host|cuda" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
124 | 						<entry flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name="cuda"/>
125 | 						<entry flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name="host"/>
126 | 					</sourceEntries>
127 | 				</configuration>
128 | 			</storageModule>
129 | 			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration">
130 | 				<executable devicelink="false">
131 | 					<sass major="5" minor="2"/>
132 | 				</executable>
133 | 			</storageModule>
134 | 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
135 | 		</cconfiguration>
136 | 	</storageModule>
137 | 	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
138 | 		<project id="app_template.com.nvidia.cuda.ide.eight_zero.exe.1518077970" name="Executable" projectType="com.nvidia.cuda.ide.eight_zero.exe"/>
139 | 	</storageModule>
140 | 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
141 | 	<storageModule moduleId="refreshScope" versionNumber="2">
142 | 		<configuration configurationName="Release">
143 | 			<resource resourceType="PROJECT" workspacePath="/app_template"/>
144 | 		</configuration>
145 | 		<configuration configurationName="Debug">
146 | 			<resource resourceType="PROJECT" workspacePath="/app_template"/>
147 | 		</configuration>
148 | 	</storageModule>
149 | 	<storageModule moduleId="scannerConfiguration">
150 | 		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
151 | 		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846;com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846.;nvcc.compiler.base.1988295041;nvcc.compiler.input.c.2070226338">
152 | 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
153 | 		</scannerConfigBuildInfo>
154 | 		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846;com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846.;nvcc.compiler.base.1988295041;nvcc.compiler.input.cpp.592658211">
155 | 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
156 | 		</scannerConfigBuildInfo>
157 | 		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203;com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203.;nvcc.compiler.base.1224963947;nvcc.compiler.input.c.1997590059">
158 | 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
159 | 		</scannerConfigBuildInfo>
160 | 		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846;com.nvidia.cuda.ide.eight_zero.configuration.release.1091078846.;nvcc.compiler.base.1988295041;nvcc.compiler.input.cu.214480156">
161 | 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
162 | 		</scannerConfigBuildInfo>
163 | 		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203;com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203.;nvcc.compiler.base.1224963947;nvcc.compiler.input.cpp.2131155639">
164 | 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
165 | 		</scannerConfigBuildInfo>
166 | 		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203;com.nvidia.cuda.ide.eight_zero.configuration.debug.769605203.;nvcc.compiler.base.1224963947;nvcc.compiler.input.cu.1039250508">
167 | 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
168 | 		</scannerConfigBuildInfo>
169 | 	</storageModule>
170 | </cproject>
171 | 


--------------------------------------------------------------------------------
/app_template/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>app_template</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
10 | 			<triggers>clean,full,incremental,</triggers>
11 | 			<arguments>
12 | 			</arguments>
13 | 		</buildCommand>
14 | 		<buildCommand>
15 | 			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
16 | 			<triggers>full,incremental,</triggers>
17 | 			<arguments>
18 | 			</arguments>
19 | 		</buildCommand>
20 | 	</buildSpec>
21 | 	<natures>
22 | 		<nature>org.eclipse.cdt.core.cnature</nature>
23 | 		<nature>org.eclipse.cdt.core.ccnature</nature>
24 | 		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
25 | 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
26 | 	</natures>
27 | </projectDescription>
28 | 


--------------------------------------------------------------------------------
/app_template/app_template.dxy:
--------------------------------------------------------------------------------
   1 | # Doxyfile 1.6.1
   2 | 
   3 | # This file describes the settings to be used by the documentation system
   4 | # doxygen (www.doxygen.org) for a project
   5 | #
   6 | # All text after a hash (#) is considered a comment and will be ignored
   7 | # The format is:
   8 | #       TAG = value [value, ...]
   9 | # For lists items can also be appended using:
  10 | #       TAG += value [value, ...]
  11 | # Values that contain spaces should be placed between quotes (" ")
  12 | 
  13 | #---------------------------------------------------------------------------
  14 | # Project related configuration options
  15 | #---------------------------------------------------------------------------
  16 | 
  17 | # This tag specifies the encoding used for all characters in the config file
  18 | # that follow. The default is UTF-8 which is also the encoding used for all
  19 | # text before the first occurrence of this tag. Doxygen uses libiconv (or the
  20 | # iconv built into libc) for the transcoding. See
  21 | # http://www.gnu.org/software/libiconv for the list of possible encodings.
  22 | 
  23 | DOXYFILE_ENCODING      = UTF-8
  24 | 
  25 | # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
  26 | # by quotes) that should identify the project.
  27 | 
  28 | PROJECT_NAME           = APP_TEMPLATE
  29 | 
  30 | # The PROJECT_NUMBER tag can be used to enter a project or revision number.
  31 | # This could be handy for archiving the generated documentation or
  32 | # if some version control system is used.
  33 | 
  34 | PROJECT_NUMBER         = 0.1
  35 | 
  36 | # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
  37 | # base path where the generated documentation will be put.
  38 | # If a relative path is entered, it will be relative to the location
  39 | # where doxygen was started. If left blank the current directory will be used.
  40 | 
  41 | OUTPUT_DIRECTORY       = ./doc
  42 | 
  43 | # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
  44 | # 4096 sub-directories (in 2 levels) under the output directory of each output
  45 | # format and will distribute the generated files over these directories.
  46 | # Enabling this option can be useful when feeding doxygen a huge amount of
  47 | # source files, where putting all generated files in the same directory would
  48 | # otherwise cause performance problems for the file system.
  49 | 
  50 | CREATE_SUBDIRS         = NO
  51 | 
  52 | # The OUTPUT_LANGUAGE tag is used to specify the language in which all
  53 | # documentation generated by doxygen is written. Doxygen will use this
  54 | # information to generate all constant output in the proper language.
  55 | # The default language is English, other supported languages are:
  56 | # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
  57 | # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
  58 | # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
  59 | # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
  60 | # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
  61 | # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
  62 | 
  63 | OUTPUT_LANGUAGE        = English
  64 | 
  65 | # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
  66 | # include brief member descriptions after the members that are listed in
  67 | # the file and class documentation (similar to JavaDoc).
  68 | # Set to NO to disable this.
  69 | 
  70 | BRIEF_MEMBER_DESC      = YES
  71 | 
  72 | # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
  73 | # the brief description of a member or function before the detailed description.
  74 | # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
  75 | # brief descriptions will be completely suppressed.
  76 | 
  77 | REPEAT_BRIEF           = YES
  78 | 
  79 | # This tag implements a quasi-intelligent brief description abbreviator
  80 | # that is used to form the text in various listings. Each string
  81 | # in this list, if found as the leading text of the brief description, will be
  82 | # stripped from the text and the result after processing the whole list, is
  83 | # used as the annotated text. Otherwise, the brief description is used as-is.
  84 | # If left blank, the following values are used ("$name" is automatically
  85 | # replaced with the name of the entity): "The $name class" "The $name widget"
  86 | # "The $name file" "is" "provides" "specifies" "contains"
  87 | # "represents" "a" "an" "the"
  88 | 
  89 | ABBREVIATE_BRIEF       =
  90 | 
  91 | # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
  92 | # Doxygen will generate a detailed section even if there is only a brief
  93 | # description.
  94 | 
  95 | ALWAYS_DETAILED_SEC    = NO
  96 | 
  97 | # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
  98 | # inherited members of a class in the documentation of that class as if those
  99 | # members were ordinary class members. Constructors, destructors and assignment
 100 | # operators of the base classes will not be shown.
 101 | 
 102 | INLINE_INHERITED_MEMB  = NO
 103 | 
 104 | # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
 105 | # path before files name in the file list and in the header files. If set
 106 | # to NO the shortest path that makes the file name unique will be used.
 107 | 
 108 | FULL_PATH_NAMES        = YES
 109 | 
 110 | # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
 111 | # can be used to strip a user-defined part of the path. Stripping is
 112 | # only done if one of the specified strings matches the left-hand part of
 113 | # the path. The tag can be used to show relative paths in the file list.
 114 | # If left blank the directory from which doxygen is run is used as the
 115 | # path to strip.
 116 | 
 117 | STRIP_FROM_PATH        =
 118 | 
 119 | # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
 120 | # the path mentioned in the documentation of a class, which tells
 121 | # the reader which header file to include in order to use a class.
 122 | # If left blank only the name of the header file containing the class
 123 | # definition is used. Otherwise one should specify the include paths that
 124 | # are normally passed to the compiler using the -I flag.
 125 | 
 126 | STRIP_FROM_INC_PATH    =
 127 | 
 128 | # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
 129 | # (but less readable) file names. This can be useful is your file systems
 130 | # doesn't support long names like on DOS, Mac, or CD-ROM.
 131 | 
 132 | SHORT_NAMES            = NO
 133 | 
 134 | # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
 135 | # will interpret the first line (until the first dot) of a JavaDoc-style
 136 | # comment as the brief description. If set to NO, the JavaDoc
 137 | # comments will behave just like regular Qt-style comments
 138 | # (thus requiring an explicit @brief command for a brief description.)
 139 | 
 140 | JAVADOC_AUTOBRIEF      = NO
 141 | 
 142 | # If the QT_AUTOBRIEF tag is set to YES then Doxygen will
 143 | # interpret the first line (until the first dot) of a Qt-style
 144 | # comment as the brief description. If set to NO, the comments
 145 | # will behave just like regular Qt-style comments (thus requiring
 146 | # an explicit \brief command for a brief description.)
 147 | 
 148 | QT_AUTOBRIEF           = NO
 149 | 
 150 | # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
 151 | # treat a multi-line C++ special comment block (i.e. a block of //! or ///
 152 | # comments) as a brief description. This used to be the default behaviour.
 153 | # The new default is to treat a multi-line C++ comment block as a detailed
 154 | # description. Set this tag to YES if you prefer the old behaviour instead.
 155 | 
 156 | MULTILINE_CPP_IS_BRIEF = NO
 157 | 
 158 | # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
 159 | # member inherits the documentation from any documented member that it
 160 | # re-implements.
 161 | 
 162 | INHERIT_DOCS           = YES
 163 | 
 164 | # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
 165 | # a new page for each member. If set to NO, the documentation of a member will
 166 | # be part of the file/class/namespace that contains it.
 167 | 
 168 | SEPARATE_MEMBER_PAGES  = NO
 169 | 
 170 | # The TAB_SIZE tag can be used to set the number of spaces in a tab.
 171 | # Doxygen uses this value to replace tabs by spaces in code fragments.
 172 | 
 173 | TAB_SIZE               = 8
 174 | 
 175 | # This tag can be used to specify a number of aliases that acts
 176 | # as commands in the documentation. An alias has the form "name=value".
 177 | # For example adding "sideeffect=\par Side Effects:\n" will allow you to
 178 | # put the command \sideeffect (or @sideeffect) in the documentation, which
 179 | # will result in a user-defined paragraph with heading "Side Effects:".
 180 | # You can put \n's in the value part of an alias to insert newlines.
 181 | 
 182 | ALIASES                =
 183 | 
 184 | # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
 185 | # sources only. Doxygen will then generate output that is more tailored for C.
 186 | # For instance, some of the names that are used will be different. The list
 187 | # of all members will be omitted, etc.
 188 | 
 189 | OPTIMIZE_OUTPUT_FOR_C  = NO
 190 | 
 191 | # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
 192 | # sources only. Doxygen will then generate output that is more tailored for
 193 | # Java. For instance, namespaces will be presented as packages, qualified
 194 | # scopes will look different, etc.
 195 | 
 196 | OPTIMIZE_OUTPUT_JAVA   = NO
 197 | 
 198 | # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
 199 | # sources only. Doxygen will then generate output that is more tailored for
 200 | # Fortran.
 201 | 
 202 | OPTIMIZE_FOR_FORTRAN   = NO
 203 | 
 204 | # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
 205 | # sources. Doxygen will then generate output that is tailored for
 206 | # VHDL.
 207 | 
 208 | OPTIMIZE_OUTPUT_VHDL   = NO
 209 | 
 210 | # Doxygen selects the parser to use depending on the extension of the files it parses.
 211 | # With this tag you can assign which parser to use for a given extension.
 212 | # Doxygen has a built-in mapping, but you can override or extend it using this tag.
 213 | # The format is ext=language, where ext is a file extension, and language is one of
 214 | # the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
 215 | # Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
 216 | # .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
 217 | # use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
 218 | 
 219 | EXTENSION_MAPPING      = C++
 220 | 
 221 | # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
 222 | # to include (a tag file for) the STL sources as input, then you should
 223 | # set this tag to YES in order to let doxygen match functions declarations and
 224 | # definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
 225 | # func(std::string) {}). This also make the inheritance and collaboration
 226 | # diagrams that involve STL classes more complete and accurate.
 227 | 
 228 | BUILTIN_STL_SUPPORT    = YES
 229 | 
 230 | # If you use Microsoft's C++/CLI language, you should set this option to YES to
 231 | # enable parsing support.
 232 | 
 233 | CPP_CLI_SUPPORT        = NO
 234 | 
 235 | # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
 236 | # Doxygen will parse them like normal C++ but will assume all classes use public
 237 | # instead of private inheritance when no explicit protection keyword is present.
 238 | 
 239 | SIP_SUPPORT            = NO
 240 | 
 241 | # For Microsoft's IDL there are propget and propput attributes to indicate getter
 242 | # and setter methods for a property. Setting this option to YES (the default)
 243 | # will make doxygen to replace the get and set methods by a property in the
 244 | # documentation. This will only work if the methods are indeed getting or
 245 | # setting a simple type. If this is not the case, or you want to show the
 246 | # methods anyway, you should set this option to NO.
 247 | 
 248 | IDL_PROPERTY_SUPPORT   = YES
 249 | 
 250 | # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
 251 | # tag is set to YES, then doxygen will reuse the documentation of the first
 252 | # member in the group (if any) for the other members of the group. By default
 253 | # all members of a group must be documented explicitly.
 254 | 
 255 | DISTRIBUTE_GROUP_DOC   = NO
 256 | 
 257 | # Set the SUBGROUPING tag to YES (the default) to allow class member groups of
 258 | # the same type (for instance a group of public functions) to be put as a
 259 | # subgroup of that type (e.g. under the Public Functions section). Set it to
 260 | # NO to prevent subgrouping. Alternatively, this can be done per class using
 261 | # the \nosubgrouping command.
 262 | 
 263 | SUBGROUPING            = YES
 264 | 
 265 | # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
 266 | # is documented as struct, union, or enum with the name of the typedef. So
 267 | # typedef struct TypeS {} TypeT, will appear in the documentation as a struct
 268 | # with name TypeT. When disabled the typedef will appear as a member of a file,
 269 | # namespace, or class. And the struct will be named TypeS. This can typically
 270 | # be useful for C code in case the coding convention dictates that all compound
 271 | # types are typedef'ed and only the typedef is referenced, never the tag name.
 272 | 
 273 | TYPEDEF_HIDES_STRUCT   = NO
 274 | 
 275 | # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
 276 | # determine which symbols to keep in memory and which to flush to disk.
 277 | # When the cache is full, less often used symbols will be written to disk.
 278 | # For small to medium size projects (<1000 input files) the default value is
 279 | # probably good enough. For larger projects a too small cache size can cause
 280 | # doxygen to be busy swapping symbols to and from disk most of the time
 281 | # causing a significant performance penality.
 282 | # If the system has enough physical memory increasing the cache will improve the
 283 | # performance by keeping more symbols in memory. Note that the value works on
 284 | # a logarithmic scale so increasing the size by one will rougly double the
 285 | # memory usage. The cache size is given by this formula:
 286 | # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
 287 | # corresponding to a cache size of 2^16 = 65536 symbols
 288 | 
 289 | SYMBOL_CACHE_SIZE      = 0
 290 | 
 291 | #---------------------------------------------------------------------------
 292 | # Build related configuration options
 293 | #---------------------------------------------------------------------------
 294 | 
 295 | # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
 296 | # documentation are documented, even if no documentation was available.
 297 | # Private class members and static file members will be hidden unless
 298 | # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
 299 | 
 300 | EXTRACT_ALL            = YES
 301 | 
 302 | # If the EXTRACT_PRIVATE tag is set to YES all private members of a class
 303 | # will be included in the documentation.
 304 | 
 305 | EXTRACT_PRIVATE        = YES
 306 | 
 307 | # If the EXTRACT_STATIC tag is set to YES all static members of a file
 308 | # will be included in the documentation.
 309 | 
 310 | EXTRACT_STATIC         = YES
 311 | 
 312 | # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
 313 | # defined locally in source files will be included in the documentation.
 314 | # If set to NO only classes defined in header files are included.
 315 | 
 316 | EXTRACT_LOCAL_CLASSES  = YES
 317 | 
 318 | # This flag is only useful for Objective-C code. When set to YES local
 319 | # methods, which are defined in the implementation section but not in
 320 | # the interface are included in the documentation.
 321 | # If set to NO (the default) only methods in the interface are included.
 322 | 
 323 | EXTRACT_LOCAL_METHODS  = NO
 324 | 
 325 | # If this flag is set to YES, the members of anonymous namespaces will be
 326 | # extracted and appear in the documentation as a namespace called
 327 | # 'anonymous_namespace{file}', where file will be replaced with the base
 328 | # name of the file that contains the anonymous namespace. By default
 329 | # anonymous namespace are hidden.
 330 | 
 331 | EXTRACT_ANON_NSPACES   = NO
 332 | 
 333 | # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
 334 | # undocumented members of documented classes, files or namespaces.
 335 | # If set to NO (the default) these members will be included in the
 336 | # various overviews, but no documentation section is generated.
 337 | # This option has no effect if EXTRACT_ALL is enabled.
 338 | 
 339 | HIDE_UNDOC_MEMBERS     = NO
 340 | 
 341 | # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
 342 | # undocumented classes that are normally visible in the class hierarchy.
 343 | # If set to NO (the default) these classes will be included in the various
 344 | # overviews. This option has no effect if EXTRACT_ALL is enabled.
 345 | 
 346 | HIDE_UNDOC_CLASSES     = NO
 347 | 
 348 | # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
 349 | # friend (class|struct|union) declarations.
 350 | # If set to NO (the default) these declarations will be included in the
 351 | # documentation.
 352 | 
 353 | HIDE_FRIEND_COMPOUNDS  = NO
 354 | 
 355 | # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
 356 | # documentation blocks found inside the body of a function.
 357 | # If set to NO (the default) these blocks will be appended to the
 358 | # function's detailed documentation block.
 359 | 
 360 | HIDE_IN_BODY_DOCS      = NO
 361 | 
 362 | # The INTERNAL_DOCS tag determines if documentation
 363 | # that is typed after a \internal command is included. If the tag is set
 364 | # to NO (the default) then the documentation will be excluded.
 365 | # Set it to YES to include the internal documentation.
 366 | 
 367 | INTERNAL_DOCS          = NO
 368 | 
 369 | # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
 370 | # file names in lower-case letters. If set to YES upper-case letters are also
 371 | # allowed. This is useful if you have classes or files whose names only differ
 372 | # in case and if your file system supports case sensitive file names. Windows
 373 | # and Mac users are advised to set this option to NO.
 374 | 
 375 | CASE_SENSE_NAMES       = YES
 376 | 
 377 | # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
 378 | # will show members with their full class and namespace scopes in the
 379 | # documentation. If set to YES the scope will be hidden.
 380 | 
 381 | HIDE_SCOPE_NAMES       = NO
 382 | 
 383 | # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
 384 | # will put a list of the files that are included by a file in the documentation
 385 | # of that file.
 386 | 
 387 | SHOW_INCLUDE_FILES     = YES
 388 | 
 389 | # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
 390 | # is inserted in the documentation for inline members.
 391 | 
 392 | INLINE_INFO            = YES
 393 | 
 394 | # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
 395 | # will sort the (detailed) documentation of file and class members
 396 | # alphabetically by member name. If set to NO the members will appear in
 397 | # declaration order.
 398 | 
 399 | SORT_MEMBER_DOCS       = YES
 400 | 
 401 | # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
 402 | # brief documentation of file, namespace and class members alphabetically
 403 | # by member name. If set to NO (the default) the members will appear in
 404 | # declaration order.
 405 | 
 406 | SORT_BRIEF_DOCS        = NO
 407 | 
 408 | # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
 409 | 
 410 | SORT_MEMBERS_CTORS_1ST = NO
 411 | 
 412 | # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
 413 | # hierarchy of group names into alphabetical order. If set to NO (the default)
 414 | # the group names will appear in their defined order.
 415 | 
 416 | SORT_GROUP_NAMES       = NO
 417 | 
 418 | # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
 419 | # sorted by fully-qualified names, including namespaces. If set to
 420 | # NO (the default), the class list will be sorted only by class name,
 421 | # not including the namespace part.
 422 | # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
 423 | # Note: This option applies only to the class list, not to the
 424 | # alphabetical list.
 425 | 
 426 | SORT_BY_SCOPE_NAME     = NO
 427 | 
 428 | # The GENERATE_TODOLIST tag can be used to enable (YES) or
 429 | # disable (NO) the todo list. This list is created by putting \todo
 430 | # commands in the documentation.
 431 | 
 432 | GENERATE_TODOLIST      = YES
 433 | 
 434 | # The GENERATE_TESTLIST tag can be used to enable (YES) or
 435 | # disable (NO) the test list. This list is created by putting \test
 436 | # commands in the documentation.
 437 | 
 438 | GENERATE_TESTLIST      = YES
 439 | 
 440 | # The GENERATE_BUGLIST tag can be used to enable (YES) or
 441 | # disable (NO) the bug list. This list is created by putting \bug
 442 | # commands in the documentation.
 443 | 
 444 | GENERATE_BUGLIST       = YES
 445 | 
 446 | # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
 447 | # disable (NO) the deprecated list. This list is created by putting
 448 | # \deprecated commands in the documentation.
 449 | 
 450 | GENERATE_DEPRECATEDLIST= YES
 451 | 
 452 | # The ENABLED_SECTIONS tag can be used to enable conditional
 453 | # documentation sections, marked by \if sectionname ... \endif.
 454 | 
 455 | ENABLED_SECTIONS       =
 456 | 
 457 | # The MAX_INITIALIZER_LINES tag determines the maximum number of lines
 458 | # the initial value of a variable or define consists of for it to appear in
 459 | # the documentation. If the initializer consists of more lines than specified
 460 | # here it will be hidden. Use a value of 0 to hide initializers completely.
 461 | # The appearance of the initializer of individual variables and defines in the
 462 | # documentation can be controlled using \showinitializer or \hideinitializer
 463 | # command in the documentation regardless of this setting.
 464 | 
 465 | MAX_INITIALIZER_LINES  = 30
 466 | 
 467 | # Set the SHOW_USED_FILES tag to NO to disable the list of files generated
 468 | # at the bottom of the documentation of classes and structs. If set to YES the
 469 | # list will mention the files that were used to generate the documentation.
 470 | 
 471 | SHOW_USED_FILES        = YES
 472 | 
 473 | # If the sources in your project are distributed over multiple directories
 474 | # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
 475 | # in the documentation. The default is NO.
 476 | 
 477 | SHOW_DIRECTORIES       = NO
 478 | 
 479 | # Set the SHOW_FILES tag to NO to disable the generation of the Files page.
 480 | # This will remove the Files entry from the Quick Index and from the
 481 | # Folder Tree View (if specified). The default is YES.
 482 | 
 483 | SHOW_FILES             = YES
 484 | 
 485 | # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
 486 | # Namespaces page.
 487 | # This will remove the Namespaces entry from the Quick Index
 488 | # and from the Folder Tree View (if specified). The default is YES.
 489 | 
 490 | SHOW_NAMESPACES        = YES
 491 | 
 492 | # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 493 | # doxygen should invoke to get the current version for each file (typically from
 494 | # the version control system). Doxygen will invoke the program by executing (via
 495 | # popen()) the command <command> <input-file>, where <command> is the value of
 496 | # the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
 497 | # provided by doxygen. Whatever the program writes to standard output
 498 | # is used as the file version. See the manual for examples.
 499 | 
 500 | FILE_VERSION_FILTER    =
 501 | 
 502 | # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
 503 | # doxygen. The layout file controls the global structure of the generated output files
 504 | # in an output format independent way. The create the layout file that represents
 505 | # doxygen's defaults, run doxygen with the -l option. You can optionally specify a
 506 | # file name after the option, if omitted DoxygenLayout.xml will be used as the name
 507 | # of the layout file.
 508 | 
 509 | LAYOUT_FILE            =
 510 | 
 511 | #---------------------------------------------------------------------------
 512 | # configuration options related to warning and progress messages
 513 | #---------------------------------------------------------------------------
 514 | 
 515 | # The QUIET tag can be used to turn on/off the messages that are generated
 516 | # by doxygen. Possible values are YES and NO. If left blank NO is used.
 517 | 
 518 | QUIET                  = NO
 519 | 
 520 | # The WARNINGS tag can be used to turn on/off the warning messages that are
 521 | # generated by doxygen. Possible values are YES and NO. If left blank
 522 | # NO is used.
 523 | 
 524 | WARNINGS               = YES
 525 | 
 526 | # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
 527 | # for undocumented members. If EXTRACT_ALL is set to YES then this flag will
 528 | # automatically be disabled.
 529 | 
 530 | WARN_IF_UNDOCUMENTED   = YES
 531 | 
 532 | # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
 533 | # potential errors in the documentation, such as not documenting some
 534 | # parameters in a documented function, or documenting parameters that
 535 | # don't exist or using markup commands wrongly.
 536 | 
 537 | WARN_IF_DOC_ERROR      = YES
 538 | 
 539 | # This WARN_NO_PARAMDOC option can be abled to get warnings for
 540 | # functions that are documented, but have no documentation for their parameters
 541 | # or return value. If set to NO (the default) doxygen will only warn about
 542 | # wrong or incomplete parameter documentation, but not about the absence of
 543 | # documentation.
 544 | 
 545 | WARN_NO_PARAMDOC       = NO
 546 | 
 547 | # The WARN_FORMAT tag determines the format of the warning messages that
 548 | # doxygen can produce. The string should contain the $file, $line, and $text
 549 | # tags, which will be replaced by the file and line number from which the
 550 | # warning originated and the warning text. Optionally the format may contain
 551 | # $version, which will be replaced by the version of the file (if it could
 552 | # be obtained via FILE_VERSION_FILTER)
 553 | 
 554 | WARN_FORMAT            = "$file:$line: $text"
 555 | 
 556 | # The WARN_LOGFILE tag can be used to specify a file to which warning
 557 | # and error messages should be written. If left blank the output is written
 558 | # to stderr.
 559 | 
 560 | WARN_LOGFILE           =
 561 | 
 562 | #---------------------------------------------------------------------------
 563 | # configuration options related to the input files
 564 | #---------------------------------------------------------------------------
 565 | 
 566 | # The INPUT tag can be used to specify the files and/or directories that contain
 567 | # documented source files. You may enter file names like "myfile.cpp" or
 568 | # directories like "/usr/src/myproject". Separate the files or directories
 569 | # with spaces.
 570 | 
 571 | INPUT                  = ./cuda ./host ./common
 572 | 
 573 | # This tag can be used to specify the character encoding of the source files
 574 | # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
 575 | # also the default input encoding. Doxygen uses libiconv (or the iconv built
 576 | # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
 577 | # the list of possible encodings.
 578 | 
 579 | INPUT_ENCODING         = UTF-8
 580 | 
 581 | # If the value of the INPUT tag contains directories, you can use the
 582 | # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
 583 | # and *.h) to filter out the source-files in the directories. If left
 584 | # blank the following patterns are tested:
 585 | # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
 586 | # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
 587 | 
 588 | FILE_PATTERNS          = *.h *.cpp *.cu
 589 | 
 590 | # The RECURSIVE tag can be used to turn specify whether or not subdirectories
 591 | # should be searched for input files as well. Possible values are YES and NO.
 592 | # If left blank NO is used.
 593 | 
 594 | RECURSIVE              = YES
 595 | 
 596 | # The EXCLUDE tag can be used to specify files and/or directories that should
 597 | # excluded from the INPUT source files. This way you can easily exclude a
 598 | # subdirectory from a directory tree whose root is specified with the INPUT tag.
 599 | 
 600 | EXCLUDE                =
 601 | 
 602 | # The EXCLUDE_SYMLINKS tag can be used select whether or not files or
 603 | # directories that are symbolic links (a Unix filesystem feature) are excluded
 604 | # from the input.
 605 | 
 606 | EXCLUDE_SYMLINKS       = NO
 607 | 
 608 | # If the value of the INPUT tag contains directories, you can use the
 609 | # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
 610 | # certain files from those directories. Note that the wildcards are matched
 611 | # against the file with absolute path, so to exclude all test directories
 612 | # for example use the pattern */test/*
 613 | 
 614 | EXCLUDE_PATTERNS       =
 615 | 
 616 | # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 617 | # (namespaces, classes, functions, etc.) that should be excluded from the
 618 | # output. The symbol name can be a fully qualified name, a word, or if the
 619 | # wildcard * is used, a substring. Examples: ANamespace, AClass,
 620 | # AClass::ANamespace, ANamespace::*Test
 621 | 
 622 | EXCLUDE_SYMBOLS        =
 623 | 
 624 | # The EXAMPLE_PATH tag can be used to specify one or more files or
 625 | # directories that contain example code fragments that are included (see
 626 | # the \include command).
 627 | 
 628 | EXAMPLE_PATH           =
 629 | 
 630 | # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 631 | # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
 632 | # and *.h) to filter out the source-files in the directories. If left
 633 | # blank all files are included.
 634 | 
 635 | EXAMPLE_PATTERNS       =
 636 | 
 637 | # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
 638 | # searched for input files to be used with the \include or \dontinclude
 639 | # commands irrespective of the value of the RECURSIVE tag.
 640 | # Possible values are YES and NO. If left blank NO is used.
 641 | 
 642 | EXAMPLE_RECURSIVE      = NO
 643 | 
 644 | # The IMAGE_PATH tag can be used to specify one or more files or
 645 | # directories that contain image that are included in the documentation (see
 646 | # the \image command).
 647 | 
 648 | IMAGE_PATH             =
 649 | 
 650 | # The INPUT_FILTER tag can be used to specify a program that doxygen should
 651 | # invoke to filter for each input file. Doxygen will invoke the filter program
 652 | # by executing (via popen()) the command <filter> <input-file>, where <filter>
 653 | # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
 654 | # input file. Doxygen will then use the output that the filter program writes
 655 | # to standard output.
 656 | # If FILTER_PATTERNS is specified, this tag will be
 657 | # ignored.
 658 | 
 659 | INPUT_FILTER           =
 660 | 
 661 | # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
 662 | # basis.
 663 | # Doxygen will compare the file name with each pattern and apply the
 664 | # filter if there is a match.
 665 | # The filters are a list of the form:
 666 | # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
 667 | # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
 668 | # is applied to all files.
 669 | 
 670 | FILTER_PATTERNS        =
 671 | 
 672 | # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
 673 | # INPUT_FILTER) will be used to filter the input files when producing source
 674 | # files to browse (i.e. when SOURCE_BROWSER is set to YES).
 675 | 
 676 | FILTER_SOURCE_FILES    = NO
 677 | 
 678 | #---------------------------------------------------------------------------
 679 | # configuration options related to source browsing
 680 | #---------------------------------------------------------------------------
 681 | 
 682 | # If the SOURCE_BROWSER tag is set to YES then a list of source files will
 683 | # be generated. Documented entities will be cross-referenced with these sources.
 684 | # Note: To get rid of all source code in the generated output, make sure also
 685 | # VERBATIM_HEADERS is set to NO.
 686 | 
 687 | SOURCE_BROWSER         = YES
 688 | 
 689 | # Setting the INLINE_SOURCES tag to YES will include the body
 690 | # of functions and classes directly in the documentation.
 691 | 
 692 | INLINE_SOURCES         = NO
 693 | 
 694 | # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
 695 | # doxygen to hide any special comment blocks from generated source code
 696 | # fragments. Normal C and C++ comments will always remain visible.
 697 | 
 698 | STRIP_CODE_COMMENTS    = NO
 699 | 
 700 | # If the REFERENCED_BY_RELATION tag is set to YES
 701 | # then for each documented function all documented
 702 | # functions referencing it will be listed.
 703 | 
 704 | REFERENCED_BY_RELATION = NO
 705 | 
 706 | # If the REFERENCES_RELATION tag is set to YES
 707 | # then for each documented function all documented entities
 708 | # called/used by that function will be listed.
 709 | 
 710 | REFERENCES_RELATION    = NO
 711 | 
 712 | # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
 713 | # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
 714 | # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
 715 | # link to the source code.
 716 | # Otherwise they will link to the documentation.
 717 | 
 718 | REFERENCES_LINK_SOURCE = YES
 719 | 
 720 | # If the USE_HTAGS tag is set to YES then the references to source code
 721 | # will point to the HTML generated by the htags(1) tool instead of doxygen
 722 | # built-in source browser. The htags tool is part of GNU's global source
 723 | # tagging system (see http://www.gnu.org/software/global/global.html). You
 724 | # will need version 4.8.6 or higher.
 725 | 
 726 | USE_HTAGS              = NO
 727 | 
 728 | # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
 729 | # will generate a verbatim copy of the header file for each class for
 730 | # which an include is specified. Set to NO to disable this.
 731 | 
 732 | VERBATIM_HEADERS       = NO
 733 | 
 734 | #---------------------------------------------------------------------------
 735 | # configuration options related to the alphabetical class index
 736 | #---------------------------------------------------------------------------
 737 | 
 738 | # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
 739 | # of all compounds will be generated. Enable this if the project
 740 | # contains a lot of classes, structs, unions or interfaces.
 741 | 
 742 | ALPHABETICAL_INDEX     = YES
 743 | 
 744 | # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
 745 | # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
 746 | # in which this list will be split (can be a number in the range [1..20])
 747 | 
 748 | COLS_IN_ALPHA_INDEX    = 5
 749 | 
 750 | # In case all classes in a project start with a common prefix, all
 751 | # classes will be put under the same header in the alphabetical index.
 752 | # The IGNORE_PREFIX tag can be used to specify one or more prefixes that
 753 | # should be ignored while generating the index headers.
 754 | 
 755 | IGNORE_PREFIX          =
 756 | 
 757 | #---------------------------------------------------------------------------
 758 | # configuration options related to the HTML output
 759 | #---------------------------------------------------------------------------
 760 | 
 761 | # If the GENERATE_HTML tag is set to YES (the default) Doxygen will
 762 | # generate HTML output.
 763 | 
 764 | GENERATE_HTML          = YES
 765 | 
 766 | # The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
 767 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
 768 | # put in front of it. If left blank `html' will be used as the default path.
 769 | 
 770 | HTML_OUTPUT            = html
 771 | 
 772 | # The HTML_FILE_EXTENSION tag can be used to specify the file extension for
 773 | # each generated HTML page (for example: .htm,.php,.asp). If it is left blank
 774 | # doxygen will generate files with .html extension.
 775 | 
 776 | HTML_FILE_EXTENSION    = .html
 777 | 
 778 | # The HTML_HEADER tag can be used to specify a personal HTML header for
 779 | # each generated HTML page. If it is left blank doxygen will generate a
 780 | # standard header.
 781 | 
 782 | HTML_HEADER            =
 783 | 
 784 | # The HTML_FOOTER tag can be used to specify a personal HTML footer for
 785 | # each generated HTML page. If it is left blank doxygen will generate a
 786 | # standard footer.
 787 | 
 788 | HTML_FOOTER            =
 789 | 
 790 | # If the HTML_TIMESTAMP tag is set to YES then the generated HTML
 791 | # documentation will contain the timesstamp.
 792 | 
 793 | HTML_TIMESTAMP         = NO
 794 | 
 795 | # The HTML_STYLESHEET tag can be used to specify a user-defined cascading
 796 | # style sheet that is used by each HTML page. It can be used to
 797 | # fine-tune the look of the HTML output. If the tag is left blank doxygen
 798 | # will generate a default style sheet. Note that doxygen will try to copy
 799 | # the style sheet file to the HTML output directory, so don't put your own
 800 | # stylesheet in the HTML output directory as well, or it will be erased!
 801 | 
 802 | HTML_STYLESHEET        =
 803 | 
 804 | # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
 805 | # files or namespaces will be aligned in HTML using tables. If set to
 806 | # NO a bullet list will be used.
 807 | 
 808 | HTML_ALIGN_MEMBERS     = YES
 809 | 
 810 | # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 811 | # documentation will contain sections that can be hidden and shown after the
 812 | # page has loaded. For this to work a browser that supports
 813 | # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
 814 | # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
 815 | 
 816 | HTML_DYNAMIC_SECTIONS  = NO
 817 | 
 818 | # If the GENERATE_DOCSET tag is set to YES, additional index files
 819 | # will be generated that can be used as input for Apple's Xcode 3
 820 | # integrated development environment, introduced with OSX 10.5 (Leopard).
 821 | # To create a documentation set, doxygen will generate a Makefile in the
 822 | # HTML output directory. Running make will produce the docset in that
 823 | # directory and running "make install" will install the docset in
 824 | # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
 825 | # it at startup.
 826 | # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
 827 | 
 828 | GENERATE_DOCSET        = NO
 829 | 
 830 | # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
 831 | # feed. A documentation feed provides an umbrella under which multiple
 832 | # documentation sets from a single provider (such as a company or product suite)
 833 | # can be grouped.
 834 | 
 835 | DOCSET_FEEDNAME        = "Doxygen generated docs"
 836 | 
 837 | # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
 838 | # should uniquely identify the documentation set bundle. This should be a
 839 | # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
 840 | # will append .docset to the name.
 841 | 
 842 | DOCSET_BUNDLE_ID       = org.doxygen.Project
 843 | 
 844 | # If the GENERATE_HTMLHELP tag is set to YES, additional index files
 845 | # will be generated that can be used as input for tools like the
 846 | # Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
 847 | # of the generated HTML documentation.
 848 | 
 849 | GENERATE_HTMLHELP      = NO
 850 | 
 851 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
 852 | # be used to specify the file name of the resulting .chm file. You
 853 | # can add a path in front of the file if the result should not be
 854 | # written to the html output directory.
 855 | 
 856 | CHM_FILE               =
 857 | 
 858 | # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
 859 | # be used to specify the location (absolute path including file name) of
 860 | # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
 861 | # the HTML help compiler on the generated index.hhp.
 862 | 
 863 | HHC_LOCATION           =
 864 | 
 865 | # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
 866 | # controls if a separate .chi index file is generated (YES) or that
 867 | # it should be included in the master .chm file (NO).
 868 | 
 869 | GENERATE_CHI           = NO
 870 | 
 871 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
 872 | # is used to encode HtmlHelp index (hhk), content (hhc) and project file
 873 | # content.
 874 | 
 875 | CHM_INDEX_ENCODING     =
 876 | 
 877 | # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
 878 | # controls whether a binary table of contents is generated (YES) or a
 879 | # normal table of contents (NO) in the .chm file.
 880 | 
 881 | BINARY_TOC             = NO
 882 | 
 883 | # The TOC_EXPAND flag can be set to YES to add extra items for group members
 884 | # to the contents of the HTML help documentation and to the tree view.
 885 | 
 886 | TOC_EXPAND             = NO
 887 | 
 888 | # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
 889 | # are set, an additional index file will be generated that can be used as input for
 890 | # Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
 891 | # HTML documentation.
 892 | 
 893 | GENERATE_QHP           = NO
 894 | 
 895 | # If the QHG_LOCATION tag is specified, the QCH_FILE tag can
 896 | # be used to specify the file name of the resulting .qch file.
 897 | # The path specified is relative to the HTML output folder.
 898 | 
 899 | QCH_FILE               =
 900 | 
 901 | # The QHP_NAMESPACE tag specifies the namespace to use when generating
 902 | # Qt Help Project output. For more information please see
 903 | # http://doc.trolltech.com/qthelpproject.html#namespace
 904 | 
 905 | QHP_NAMESPACE          =
 906 | 
 907 | # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
 908 | # Qt Help Project output. For more information please see
 909 | # http://doc.trolltech.com/qthelpproject.html#virtual-folders
 910 | 
 911 | QHP_VIRTUAL_FOLDER     = doc
 912 | 
 913 | # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
 914 | # For more information please see
 915 | # http://doc.trolltech.com/qthelpproject.html#custom-filters
 916 | 
 917 | QHP_CUST_FILTER_NAME   =
 918 | 
 919 | # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
 920 | # <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
 921 | 
 922 | QHP_CUST_FILTER_ATTRS  =
 923 | 
 924 | # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
 925 | # filter section matches.
 926 | # <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
 927 | 
 928 | QHP_SECT_FILTER_ATTRS  =
 929 | 
 930 | # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
 931 | # be used to specify the location of Qt's qhelpgenerator.
 932 | # If non-empty doxygen will try to run qhelpgenerator on the generated
 933 | # .qhp file.
 934 | 
 935 | QHG_LOCATION           =
 936 | 
 937 | # The DISABLE_INDEX tag can be used to turn on/off the condensed index at
 938 | # top of each HTML page. The value NO (the default) enables the index and
 939 | # the value YES disables it.
 940 | 
 941 | DISABLE_INDEX          = NO
 942 | 
 943 | # This tag can be used to set the number of enum values (range [1..20])
 944 | # that doxygen will group on one line in the generated HTML documentation.
 945 | 
 946 | ENUM_VALUES_PER_LINE   = 4
 947 | 
 948 | # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
 949 | # structure should be generated to display hierarchical information.
 950 | # If the tag value is set to YES, a side panel will be generated
 951 | # containing a tree-like index structure (just like the one that
 952 | # is generated for HTML Help). For this to work a browser that supports
 953 | # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
 954 | # Windows users are probably better off using the HTML help feature.
 955 | 
 956 | GENERATE_TREEVIEW      = YES
 957 | 
 958 | # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
 959 | # and Class Hierarchy pages using a tree view instead of an ordered list.
 960 | 
 961 | USE_INLINE_TREES       = NO
 962 | 
 963 | # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
 964 | # used to set the initial width (in pixels) of the frame in which the tree
 965 | # is shown.
 966 | 
 967 | TREEVIEW_WIDTH         = 250
 968 | 
 969 | # Use this tag to change the font size of Latex formulas included
 970 | # as images in the HTML documentation. The default is 10. Note that
 971 | # when you change the font size after a successful doxygen run you need
 972 | # to manually remove any form_*.png images from the HTML output directory
 973 | # to force them to be regenerated.
 974 | 
 975 | FORMULA_FONTSIZE       = 10
 976 | 
 977 | # When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript
 978 | # and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP)
 979 | # there is already a search function so this one should typically
 980 | # be disabled.
 981 | 
 982 | SEARCHENGINE           = YES
 983 | 
 984 | #---------------------------------------------------------------------------
 985 | # configuration options related to the LaTeX output
 986 | #---------------------------------------------------------------------------
 987 | 
 988 | # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
 989 | # generate Latex output.
 990 | 
 991 | GENERATE_LATEX         = YES
 992 | 
 993 | # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
 994 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
 995 | # put in front of it. If left blank `latex' will be used as the default path.
 996 | 
 997 | LATEX_OUTPUT           = latex
 998 | 
 999 | # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
1000 | # invoked. If left blank `latex' will be used as the default command name.
1001 | 
1002 | LATEX_CMD_NAME         = latex
1003 | 
1004 | # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
1005 | # generate index for LaTeX. If left blank `makeindex' will be used as the
1006 | # default command name.
1007 | 
1008 | MAKEINDEX_CMD_NAME     = makeindex
1009 | 
1010 | # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
1011 | # LaTeX documents. This may be useful for small projects and may help to
1012 | # save some trees in general.
1013 | 
1014 | COMPACT_LATEX          = NO
1015 | 
1016 | # The PAPER_TYPE tag can be used to set the paper type that is used
1017 | # by the printer. Possible values are: a4, a4wide, letter, legal and
1018 | # executive. If left blank a4wide will be used.
1019 | 
1020 | PAPER_TYPE             = a4wide
1021 | 
1022 | # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
1023 | # packages that should be included in the LaTeX output.
1024 | 
1025 | EXTRA_PACKAGES         =
1026 | 
1027 | # The LATEX_HEADER tag can be used to specify a personal LaTeX header for
1028 | # the generated latex document. The header should contain everything until
1029 | # the first chapter. If it is left blank doxygen will generate a
1030 | # standard header. Notice: only use this tag if you know what you are doing!
1031 | 
1032 | LATEX_HEADER           =
1033 | 
1034 | # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
1035 | # is prepared for conversion to pdf (using ps2pdf). The pdf file will
1036 | # contain links (just like the HTML output) instead of page references
1037 | # This makes the output suitable for online browsing using a pdf viewer.
1038 | 
1039 | PDF_HYPERLINKS         = YES
1040 | 
1041 | # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
1042 | # plain latex in the generated Makefile. Set this option to YES to get a
1043 | # higher quality PDF documentation.
1044 | 
1045 | USE_PDFLATEX           = YES
1046 | 
1047 | # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
1048 | # command to the generated LaTeX files. This will instruct LaTeX to keep
1049 | # running if errors occur, instead of asking the user for help.
1050 | # This option is also used when generating formulas in HTML.
1051 | 
1052 | LATEX_BATCHMODE        = NO
1053 | 
1054 | # If LATEX_HIDE_INDICES is set to YES then doxygen will not
1055 | # include the index chapters (such as File Index, Compound Index, etc.)
1056 | # in the output.
1057 | 
1058 | LATEX_HIDE_INDICES     = NO
1059 | 
1060 | # If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER.
1061 | 
1062 | LATEX_SOURCE_CODE      = NO
1063 | 
1064 | #---------------------------------------------------------------------------
1065 | # configuration options related to the RTF output
1066 | #---------------------------------------------------------------------------
1067 | 
1068 | # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
1069 | # The RTF output is optimized for Word 97 and may not look very pretty with
1070 | # other RTF readers or editors.
1071 | 
1072 | GENERATE_RTF           = NO
1073 | 
1074 | # The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
1075 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1076 | # put in front of it. If left blank `rtf' will be used as the default path.
1077 | 
1078 | RTF_OUTPUT             = rtf
1079 | 
1080 | # If the COMPACT_RTF tag is set to YES Doxygen generates more compact
1081 | # RTF documents. This may be useful for small projects and may help to
1082 | # save some trees in general.
1083 | 
1084 | COMPACT_RTF            = NO
1085 | 
1086 | # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
1087 | # will contain hyperlink fields. The RTF file will
1088 | # contain links (just like the HTML output) instead of page references.
1089 | # This makes the output suitable for online browsing using WORD or other
1090 | # programs which support those fields.
1091 | # Note: wordpad (write) and others do not support links.
1092 | 
1093 | RTF_HYPERLINKS         = NO
1094 | 
1095 | # Load stylesheet definitions from file. Syntax is similar to doxygen's
1096 | # config file, i.e. a series of assignments. You only have to provide
1097 | # replacements, missing definitions are set to their default value.
1098 | 
1099 | RTF_STYLESHEET_FILE    =
1100 | 
1101 | # Set optional variables used in the generation of an rtf document.
1102 | # Syntax is similar to doxygen's config file.
1103 | 
1104 | RTF_EXTENSIONS_FILE    =
1105 | 
1106 | #---------------------------------------------------------------------------
1107 | # configuration options related to the man page output
1108 | #---------------------------------------------------------------------------
1109 | 
1110 | # If the GENERATE_MAN tag is set to YES (the default) Doxygen will
1111 | # generate man pages
1112 | 
1113 | GENERATE_MAN           = NO
1114 | 
1115 | # The MAN_OUTPUT tag is used to specify where the man pages will be put.
1116 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1117 | # put in front of it. If left blank `man' will be used as the default path.
1118 | 
1119 | MAN_OUTPUT             = man
1120 | 
1121 | # The MAN_EXTENSION tag determines the extension that is added to
1122 | # the generated man pages (default is the subroutine's section .3)
1123 | 
1124 | MAN_EXTENSION          = .3
1125 | 
1126 | # If the MAN_LINKS tag is set to YES and Doxygen generates man output,
1127 | # then it will generate one additional man file for each entity
1128 | # documented in the real man page(s). These additional files
1129 | # only source the real man page, but without them the man command
1130 | # would be unable to find the correct page. The default is NO.
1131 | 
1132 | MAN_LINKS              = NO
1133 | 
1134 | #---------------------------------------------------------------------------
1135 | # configuration options related to the XML output
1136 | #---------------------------------------------------------------------------
1137 | 
1138 | # If the GENERATE_XML tag is set to YES Doxygen will
1139 | # generate an XML file that captures the structure of
1140 | # the code including all documentation.
1141 | 
1142 | GENERATE_XML           = NO
1143 | 
1144 | # The XML_OUTPUT tag is used to specify where the XML pages will be put.
1145 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1146 | # put in front of it. If left blank `xml' will be used as the default path.
1147 | 
1148 | XML_OUTPUT             = xml
1149 | 
1150 | # The XML_SCHEMA tag can be used to specify an XML schema,
1151 | # which can be used by a validating XML parser to check the
1152 | # syntax of the XML files.
1153 | 
1154 | XML_SCHEMA             =
1155 | 
1156 | # The XML_DTD tag can be used to specify an XML DTD,
1157 | # which can be used by a validating XML parser to check the
1158 | # syntax of the XML files.
1159 | 
1160 | XML_DTD                =
1161 | 
1162 | # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
1163 | # dump the program listings (including syntax highlighting
1164 | # and cross-referencing information) to the XML output. Note that
1165 | # enabling this will significantly increase the size of the XML output.
1166 | 
1167 | XML_PROGRAMLISTING     = YES
1168 | 
1169 | #---------------------------------------------------------------------------
1170 | # configuration options for the AutoGen Definitions output
1171 | #---------------------------------------------------------------------------
1172 | 
1173 | # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
1174 | # generate an AutoGen Definitions (see autogen.sf.net) file
1175 | # that captures the structure of the code including all
1176 | # documentation. Note that this feature is still experimental
1177 | # and incomplete at the moment.
1178 | 
1179 | GENERATE_AUTOGEN_DEF   = NO
1180 | 
1181 | #---------------------------------------------------------------------------
1182 | # configuration options related to the Perl module output
1183 | #---------------------------------------------------------------------------
1184 | 
1185 | # If the GENERATE_PERLMOD tag is set to YES Doxygen will
1186 | # generate a Perl module file that captures the structure of
1187 | # the code including all documentation. Note that this
1188 | # feature is still experimental and incomplete at the
1189 | # moment.
1190 | 
1191 | GENERATE_PERLMOD       = NO
1192 | 
1193 | # If the PERLMOD_LATEX tag is set to YES Doxygen will generate
1194 | # the necessary Makefile rules, Perl scripts and LaTeX code to be able
1195 | # to generate PDF and DVI output from the Perl module output.
1196 | 
1197 | PERLMOD_LATEX          = NO
1198 | 
1199 | # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1200 | # nicely formatted so it can be parsed by a human reader.
1201 | # This is useful
1202 | # if you want to understand what is going on.
1203 | # On the other hand, if this
1204 | # tag is set to NO the size of the Perl module output will be much smaller
1205 | # and Perl will parse it just the same.
1206 | 
1207 | PERLMOD_PRETTY         = YES
1208 | 
1209 | # The names of the make variables in the generated doxyrules.make file
1210 | # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
1211 | # This is useful so different doxyrules.make files included by the same
1212 | # Makefile don't overwrite each other's variables.
1213 | 
1214 | PERLMOD_MAKEVAR_PREFIX =
1215 | 
1216 | #---------------------------------------------------------------------------
1217 | # Configuration options related to the preprocessor
1218 | #---------------------------------------------------------------------------
1219 | 
1220 | # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
1221 | # evaluate all C-preprocessor directives found in the sources and include
1222 | # files.
1223 | 
1224 | ENABLE_PREPROCESSING   = YES
1225 | 
1226 | # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
1227 | # names in the source code. If set to NO (the default) only conditional
1228 | # compilation will be performed. Macro expansion can be done in a controlled
1229 | # way by setting EXPAND_ONLY_PREDEF to YES.
1230 | 
1231 | MACRO_EXPANSION        = NO
1232 | 
1233 | # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
1234 | # then the macro expansion is limited to the macros specified with the
1235 | # PREDEFINED and EXPAND_AS_DEFINED tags.
1236 | 
1237 | EXPAND_ONLY_PREDEF     = NO
1238 | 
1239 | # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
1240 | # in the INCLUDE_PATH (see below) will be search if a #include is found.
1241 | 
1242 | SEARCH_INCLUDES        = YES
1243 | 
1244 | # The INCLUDE_PATH tag can be used to specify one or more directories that
1245 | # contain include files that are not input files but should be processed by
1246 | # the preprocessor.
1247 | 
1248 | INCLUDE_PATH           =
1249 | 
1250 | # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
1251 | # patterns (like *.h and *.hpp) to filter out the header-files in the
1252 | # directories. If left blank, the patterns specified with FILE_PATTERNS will
1253 | # be used.
1254 | 
1255 | INCLUDE_FILE_PATTERNS  =
1256 | 
1257 | # The PREDEFINED tag can be used to specify one or more macro names that
1258 | # are defined before the preprocessor is started (similar to the -D option of
1259 | # gcc). The argument of the tag is a list of macros of the form: name
1260 | # or name=definition (no spaces). If the definition and the = are
1261 | # omitted =1 is assumed. To prevent a macro definition from being
1262 | # undefined via #undef or recursively expanded use the := operator
1263 | # instead of the = operator.
1264 | 
1265 | PREDEFINED             =
1266 | 
1267 | # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1268 | # this tag can be used to specify a list of macro names that should be expanded.
1269 | # The macro definition that is found in the sources will be used.
1270 | # Use the PREDEFINED tag if you want to use a different macro definition.
1271 | 
1272 | EXPAND_AS_DEFINED      =
1273 | 
1274 | # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1275 | # doxygen's preprocessor will remove all function-like macros that are alone
1276 | # on a line, have an all uppercase name, and do not end with a semicolon. Such
1277 | # function macros are typically used for boiler-plate code, and will confuse
1278 | # the parser if not removed.
1279 | 
1280 | SKIP_FUNCTION_MACROS   = YES
1281 | 
1282 | #---------------------------------------------------------------------------
1283 | # Configuration::additions related to external references
1284 | #---------------------------------------------------------------------------
1285 | 
1286 | # The TAGFILES option can be used to specify one or more tagfiles.
1287 | # Optionally an initial location of the external documentation
1288 | # can be added for each tagfile. The format of a tag file without
1289 | # this location is as follows:
1290 | #
1291 | # TAGFILES = file1 file2 ...
1292 | # Adding location for the tag files is done as follows:
1293 | #
1294 | # TAGFILES = file1=loc1 "file2 = loc2" ...
1295 | # where "loc1" and "loc2" can be relative or absolute paths or
1296 | # URLs. If a location is present for each tag, the installdox tool
1297 | # does not have to be run to correct the links.
1298 | # Note that each tag file must have a unique name
1299 | # (where the name does NOT include the path)
1300 | # If a tag file is not located in the directory in which doxygen
1301 | # is run, you must also specify the path to the tagfile here.
1302 | 
1303 | TAGFILES               =
1304 | 
1305 | # When a file name is specified after GENERATE_TAGFILE, doxygen will create
1306 | # a tag file that is based on the input files it reads.
1307 | 
1308 | GENERATE_TAGFILE       =
1309 | 
1310 | # If the ALLEXTERNALS tag is set to YES all external classes will be listed
1311 | # in the class index. If set to NO only the inherited external classes
1312 | # will be listed.
1313 | 
1314 | ALLEXTERNALS           = NO
1315 | 
1316 | # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
1317 | # in the modules index. If set to NO, only the current project's groups will
1318 | # be listed.
1319 | 
1320 | EXTERNAL_GROUPS        = YES
1321 | 
1322 | # The PERL_PATH should be the absolute path and name of the perl script
1323 | # interpreter (i.e. the result of `which perl').
1324 | 
1325 | PERL_PATH              = /usr/bin/perl
1326 | 
1327 | #---------------------------------------------------------------------------
1328 | # Configuration options related to the dot tool
1329 | #---------------------------------------------------------------------------
1330 | 
1331 | # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
1332 | # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
1333 | # or super classes. Setting the tag to NO turns the diagrams off. Note that
1334 | # this option is superseded by the HAVE_DOT option below. This is only a
1335 | # fallback. It is recommended to install and use dot, since it yields more
1336 | # powerful graphs.
1337 | 
1338 | CLASS_DIAGRAMS         = YES
1339 | 
1340 | # You can define message sequence charts within doxygen comments using the \msc
1341 | # command. Doxygen will then run the mscgen tool (see
1342 | # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
1343 | # documentation. The MSCGEN_PATH tag allows you to specify the directory where
1344 | # the mscgen tool resides. If left empty the tool is assumed to be found in the
1345 | # default search path.
1346 | 
1347 | MSCGEN_PATH            =
1348 | 
1349 | # If set to YES, the inheritance and collaboration graphs will hide
1350 | # inheritance and usage relations if the target is undocumented
1351 | # or is not a class.
1352 | 
1353 | HIDE_UNDOC_RELATIONS   = YES
1354 | 
1355 | # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
1356 | # available from the path. This tool is part of Graphviz, a graph visualization
1357 | # toolkit from AT&T and Lucent Bell Labs. The other options in this section
1358 | # have no effect if this option is set to NO (the default)
1359 | 
1360 | HAVE_DOT               = NO
1361 | 
1362 | # By default doxygen will write a font called FreeSans.ttf to the output
1363 | # directory and reference it in all dot files that doxygen generates. This
1364 | # font does not include all possible unicode characters however, so when you need
1365 | # these (or just want a differently looking font) you can specify the font name
1366 | # using DOT_FONTNAME. You need need to make sure dot is able to find the font,
1367 | # which can be done by putting it in a standard location or by setting the
1368 | # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
1369 | # containing the font.
1370 | 
1371 | DOT_FONTNAME           = FreeSans
1372 | 
1373 | # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1374 | # The default size is 10pt.
1375 | 
1376 | DOT_FONTSIZE           = 10
1377 | 
1378 | # By default doxygen will tell dot to use the output directory to look for the
1379 | # FreeSans.ttf font (which doxygen will put there itself). If you specify a
1380 | # different font using DOT_FONTNAME you can set the path where dot
1381 | # can find it using this tag.
1382 | 
1383 | DOT_FONTPATH           =
1384 | 
1385 | # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
1386 | # will generate a graph for each documented class showing the direct and
1387 | # indirect inheritance relations. Setting this tag to YES will force the
1388 | # the CLASS_DIAGRAMS tag to NO.
1389 | 
1390 | CLASS_GRAPH            = YES
1391 | 
1392 | # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
1393 | # will generate a graph for each documented class showing the direct and
1394 | # indirect implementation dependencies (inheritance, containment, and
1395 | # class references variables) of the class with other documented classes.
1396 | 
1397 | COLLABORATION_GRAPH    = YES
1398 | 
1399 | # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
1400 | # will generate a graph for groups, showing the direct groups dependencies
1401 | 
1402 | GROUP_GRAPHS           = YES
1403 | 
1404 | # If the UML_LOOK tag is set to YES doxygen will generate inheritance and
1405 | # collaboration diagrams in a style similar to the OMG's Unified Modeling
1406 | # Language.
1407 | 
1408 | UML_LOOK               = NO
1409 | 
1410 | # If set to YES, the inheritance and collaboration graphs will show the
1411 | # relations between templates and their instances.
1412 | 
1413 | TEMPLATE_RELATIONS     = NO
1414 | 
1415 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
1416 | # tags are set to YES then doxygen will generate a graph for each documented
1417 | # file showing the direct and indirect include dependencies of the file with
1418 | # other documented files.
1419 | 
1420 | INCLUDE_GRAPH          = YES
1421 | 
1422 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
1423 | # HAVE_DOT tags are set to YES then doxygen will generate a graph for each
1424 | # documented header file showing the documented files that directly or
1425 | # indirectly include this file.
1426 | 
1427 | INCLUDED_BY_GRAPH      = YES
1428 | 
1429 | # If the CALL_GRAPH and HAVE_DOT options are set to YES then
1430 | # doxygen will generate a call dependency graph for every global function
1431 | # or class method. Note that enabling this option will significantly increase
1432 | # the time of a run. So in most cases it will be better to enable call graphs
1433 | # for selected functions only using the \callgraph command.
1434 | 
1435 | CALL_GRAPH             = NO
1436 | 
1437 | # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
1438 | # doxygen will generate a caller dependency graph for every global function
1439 | # or class method. Note that enabling this option will significantly increase
1440 | # the time of a run. So in most cases it will be better to enable caller
1441 | # graphs for selected functions only using the \callergraph command.
1442 | 
1443 | CALLER_GRAPH           = NO
1444 | 
1445 | # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
1446 | # will graphical hierarchy of all classes instead of a textual one.
1447 | 
1448 | GRAPHICAL_HIERARCHY    = YES
1449 | 
1450 | # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
1451 | # then doxygen will show the dependencies a directory has on other directories
1452 | # in a graphical way. The dependency relations are determined by the #include
1453 | # relations between the files in the directories.
1454 | 
1455 | DIRECTORY_GRAPH        = YES
1456 | 
1457 | # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
1458 | # generated by dot. Possible values are png, jpg, or gif
1459 | # If left blank png will be used.
1460 | 
1461 | DOT_IMAGE_FORMAT       = png
1462 | 
1463 | # The tag DOT_PATH can be used to specify the path where the dot tool can be
1464 | # found. If left blank, it is assumed the dot tool can be found in the path.
1465 | 
1466 | DOT_PATH               =
1467 | 
1468 | # The DOTFILE_DIRS tag can be used to specify one or more directories that
1469 | # contain dot files that are included in the documentation (see the
1470 | # \dotfile command).
1471 | 
1472 | DOTFILE_DIRS           =
1473 | 
1474 | # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
1475 | # nodes that will be shown in the graph. If the number of nodes in a graph
1476 | # becomes larger than this value, doxygen will truncate the graph, which is
1477 | # visualized by representing a node as a red box. Note that doxygen if the
1478 | # number of direct children of the root node in a graph is already larger than
1479 | # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
1480 | # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
1481 | 
1482 | DOT_GRAPH_MAX_NODES    = 50
1483 | 
1484 | # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
1485 | # graphs generated by dot. A depth value of 3 means that only nodes reachable
1486 | # from the root by following a path via at most 3 edges will be shown. Nodes
1487 | # that lay further from the root node will be omitted. Note that setting this
1488 | # option to 1 or 2 may greatly reduce the computation time needed for large
1489 | # code bases. Also note that the size of a graph can be further restricted by
1490 | # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
1491 | 
1492 | MAX_DOT_GRAPH_DEPTH    = 0
1493 | 
1494 | # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1495 | # background. This is disabled by default, because dot on Windows does not
1496 | # seem to support this out of the box. Warning: Depending on the platform used,
1497 | # enabling this option may lead to badly anti-aliased labels on the edges of
1498 | # a graph (i.e. they become hard to read).
1499 | 
1500 | DOT_TRANSPARENT        = NO
1501 | 
1502 | # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
1503 | # files in one run (i.e. multiple -o and -T options on the command line). This
1504 | # makes dot run faster, but since only newer versions of dot (>1.8.10)
1505 | # support this, this feature is disabled by default.
1506 | 
1507 | DOT_MULTI_TARGETS      = NO
1508 | 
1509 | # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
1510 | # generate a legend page explaining the meaning of the various boxes and
1511 | # arrows in the dot generated graphs.
1512 | 
1513 | GENERATE_LEGEND        = YES
1514 | 
1515 | # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
1516 | # remove the intermediate dot files that are used to generate
1517 | # the various graphs.
1518 | 
1519 | DOT_CLEANUP            = YES
1520 | 


--------------------------------------------------------------------------------
/app_template/common/gpumemioctl.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef __GPUDMAIOTCL_H__
 3 | #define __GPUDMAIOTCL_H__
 4 | 
 5 | //-----------------------------------------------------------------------------
 6 | 
 7 | #define GPUMEM_DRIVER_NAME             "gpumem"
 8 | 
 9 | //-----------------------------------------------------------------------------
10 | 
11 | #ifdef __linux__
12 | #include <linux/types.h>
13 | #ifndef __KERNEL__
14 | #include <sys/ioctl.h>
15 | #endif
16 | #define GPUMEM_DEVICE_TYPE             'g'
17 | #define GPUMEM_MAKE_IOCTL(c) _IO(GPUMEM_DEVICE_TYPE, (c))
18 | #endif
19 | 
20 | #define IOCTL_GPUMEM_LOCK		GPUMEM_MAKE_IOCTL(10)
21 | #define IOCTL_GPUMEM_UNLOCK		GPUMEM_MAKE_IOCTL(11)
22 | #define IOCTL_GPUMEM_STATE		GPUMEM_MAKE_IOCTL(12)
23 | 
24 | //-----------------------------------------------------------------------------
25 | // for boundary alignment requirement
26 | #define GPU_BOUND_SHIFT 16
27 | #define GPU_BOUND_SIZE ((u64)1 << GPU_BOUND_SHIFT)
28 | #define GPU_BOUND_OFFSET (GPU_BOUND_SIZE-1)
29 | #define GPU_BOUND_MASK (~GPU_BOUND_OFFSET)
30 | 
31 | //-----------------------------------------------------------------------------
32 | 
33 | struct gpudma_lock_t {
34 |     void*    handle;
35 |     uint64_t addr;
36 |     uint64_t size;
37 |     size_t   page_count;
38 | };
39 | 
40 | //-----------------------------------------------------------------------------
41 | 
42 | struct gpudma_unlock_t {
43 |     void*    handle;
44 | };
45 | 
46 | //-----------------------------------------------------------------------------
47 | 
48 | struct gpudma_state_t {
49 |     void*       handle;
50 |     size_t      page_count;
51 |     size_t      page_size;
52 |     uint64_t    pages[1];
53 | };
54 | 
55 | //-----------------------------------------------------------------------------
56 | 
57 | 
58 | #endif //_GPUDMAIOTCL_H_
59 | 


--------------------------------------------------------------------------------
/app_template/common/utypes.h:
--------------------------------------------------------------------------------
  1 | /***************************************************
  2 | *
  3 | * UTYPES.H - define usefull types.
  4 | *
  5 | * (C) Instrumental Systems Corp. Ekkore, Dec. 1997-2001
  6 | *
  7 | ****************************************************/
  8 | 
  9 | 
 10 | #ifndef _UTYPES_H_
 11 | #define _UTYPES_H_
 12 | 
 13 | #include "utypes_linux.h"
 14 | 
 15 | 
 16 | 
 17 | /*************************************
 18 | *
 19 | *  Types for MSDOS
 20 | */
 21 | 
 22 | 
 23 | #ifdef __MSDOS__
 24 | 
 25 | typedef	unsigned char	UINT08;
 26 | typedef	signed   char	SINT08;
 27 | typedef	unsigned short	UINT16;
 28 | typedef	signed   short	SINT16;
 29 | typedef	unsigned long	UINT32;
 30 | typedef	signed   long	SINT32;
 31 | 
 32 | typedef	unsigned char	U08, *PU08;
 33 | typedef	signed   char	S08, *PS08;
 34 | typedef	unsigned short	U16, *PU16;
 35 | typedef	signed   short	S16, *PS16;
 36 | typedef	unsigned long	U32, *PU32;
 37 | typedef	signed   long	S32, *PS32;
 38 | 
 39 | typedef float			REAL32, *PREAL32;
 40 | typedef double			REAL64, *PREAL64;
 41 | 
 42 | //typedef enum{ FALSE=0, TRUE=1}        BOOL;
 43 | typedef unsigned char   UCHAR;
 44 | typedef unsigned short  USHORT;
 45 | typedef unsigned long   ULONG;
 46 | typedef unsigned long   HANDLE;
 47 | 
 48 | #endif /* __MSDOS__ */
 49 | 
 50 | 
 51 | /*************************************
 52 | *
 53 | *  Types for Windows
 54 | */
 55 | 
 56 | 
 57 | #ifdef _WIN32
 58 | 
 59 | typedef	unsigned char	UINT08;
 60 | typedef	signed   char	SINT08;
 61 | typedef	unsigned short	UINT16;
 62 | typedef	signed   short	SINT16;
 63 | typedef	unsigned int	UINT32;
 64 | typedef	signed   int	SINT32;
 65 | 
 66 | typedef	unsigned char	U08, *PU08;
 67 | typedef	signed   char	S08, *PS08;
 68 | typedef	unsigned short	U16, *PU16;
 69 | typedef	signed   short	S16, *PS16;
 70 | typedef	unsigned int	U32, *PU32;
 71 | typedef	signed   int	S32, *PS32;
 72 | 
 73 | #if _MSC_VER == 1200
 74 | #else
 75 | typedef	unsigned long long int	U64, *PU64;
 76 | typedef	signed   long long int	S64, *PS64;
 77 | #endif
 78 | 
 79 | typedef float			REAL32, *PREAL32;
 80 | typedef double			REAL64, *PREAL64;
 81 | 
 82 | #ifdef _WIN64
 83 | typedef wchar_t			BRDCHAR;
 84 | #define _BRDC(x)		L ## x
 85 | #define BRDC_strlen		wcslen
 86 | #define BRDC_strcpy		wcscpy
 87 | #define BRDC_strncpy	wcsncpy
 88 | #define BRDC_strcmp		wcscmp
 89 | #define BRDC_stricmp	_wcsicmp
 90 | #define BRDC_strnicmp	wcsnicmp
 91 | #define BRDC_strcat		wcscat
 92 | #define BRDC_strchr		wcschr
 93 | #define BRDC_strstr		wcsstr
 94 | #define BRDC_strtol		wcstol
 95 | #define BRDC_strtoul	wcstoul
 96 | #define BRDC_strtod		wcstod
 97 | #define BRDC_atol		_wtol
 98 | #define BRDC_atoi		_wtoi
 99 | #define BRDC_atoi64		_wtoi64
100 | #define BRDC_atof		_wtof
101 | #define BRDC_printf		wprintf
102 | #define BRDC_fprintf	fwprintf
103 | #define BRDC_sprintf	swprintf
104 | #define BRDC_vsprintf	vswprintf
105 | #define BRDC_sscanf		swscanf
106 | #define BRDC_fopen		_wfopen
107 | #define BRDC_sopen		_wsopen
108 | #define BRDC_fgets		fgetws
109 | #define BRDC_fputs		fputws
110 | #define BRDC_getenv		_wgetenv
111 | #define BRDC_main		wmain
112 | #else
113 | typedef char			BRDCHAR;
114 | #define _BRDC(x)		x
115 | #define BRDC_strlen		strlen
116 | #define BRDC_strcpy		strcpy
117 | #define BRDC_strncpy	strncpy
118 | #define BRDC_strcmp		strcmp
119 | #define BRDC_stricmp	_stricmp
120 | #define BRDC_strnicmp	_strnicmp
121 | #define BRDC_strcat		strcat
122 | #define BRDC_strchr		strchr
123 | #define BRDC_strstr		strstr
124 | #define BRDC_strtol		strtol
125 | #define BRDC_strtoul	strtoul
126 | #define BRDC_strtod		strtod
127 | #define BRDC_atol		atol
128 | #define BRDC_atoi		atoi
129 | #define BRDC_atoi64		_atoi64
130 | #define BRDC_atof		atof
131 | #define BRDC_printf		printf
132 | #define BRDC_fprintf	fprintf
133 | #define BRDC_sprintf	sprintf
134 | #define BRDC_vsprintf	vsprintf
135 | #define BRDC_sscanf		sscanf
136 | #define BRDC_fopen		fopen
137 | #define BRDC_sopen		sopen
138 | #define BRDC_fgets		fgets
139 | #define BRDC_fputs		fputs
140 | #define BRDC_getenv		getenv
141 | #define BRDC_main		main
142 | #endif
143 | 
144 | #endif /* _WIN32 */
145 | 
146 | 
147 | /*************************************
148 | *
149 | *  Types for TMS320C3x/C4x
150 | */
151 | 
152 | 
153 | #if defined(_TMS320C30) || defined(_TMS320C40 )
154 | 
155 | /*
156 | typedef	unsigned char	UINT08;
157 | typedef	signed   char	SINT08;
158 | typedef	unsigned short	UINT16;
159 | typedef	signed   short	SINT16;
160 | */
161 | typedef	unsigned int	UINT32;
162 | typedef	signed   int	SINT32;
163 | 
164 | typedef	unsigned int	U32, *PU32;
165 | typedef	signed   int	S32, *PS32;
166 | 
167 | typedef float			REAL32, *PREAL32;
168 | typedef double			REAL64, *PREAL64;
169 | 
170 | typedef UINT32	ULONG;
171 | typedef UINT32	USHORT;
172 | typedef UINT32	UCHAR;
173 | 
174 | #endif /* _TMS320C30 || _TMS320C40 */
175 | 
176 | 
177 | /*************************************
178 | *
179 | *  Types for TMS320C6x
180 | */
181 | 
182 | 
183 | #ifdef _TMS320C6X
184 | 
185 | typedef	unsigned char	UINT08;
186 | typedef	signed   char	SINT08;
187 | typedef	unsigned short	UINT16;
188 | typedef	signed   short	SINT16;
189 | typedef	unsigned int	UINT32;
190 | typedef	signed   int	SINT32;
191 | 
192 | typedef	unsigned char	U08, *PU08;
193 | typedef	signed   char	S08, *PS08;
194 | typedef	unsigned short	U16, *PU16;
195 | typedef	signed   short	S16, *PS16;
196 | typedef	unsigned int	U32, *PU32;
197 | typedef	signed   int	S32, *PS32;
198 | 
199 | typedef	unsigned long long int	U64, *PU64;
200 | typedef	signed   long long int	S64, *PS64;
201 | 
202 | typedef float			REAL32, *PREAL32;
203 | typedef double			REAL64, *PREAL64;
204 | 
205 | typedef UINT32	ULONG;
206 | typedef UINT16	USHORT;
207 | typedef UINT08	UCHAR;
208 | 
209 | #endif /* _TMS320C6X */
210 | 
211 | 
212 | /*************************************
213 | *
214 | *  Types for ADSP2106x
215 | */
216 | 
217 | 
218 | #if defined(__ADSP21060__) || defined(__ADSP21061__) || defined(__ADSP21062__)|| defined(__ADSP21065L__)
219 | 
220 | typedef	unsigned int	UINT32;
221 | typedef	signed   int	SINT32;
222 | 
223 | typedef	unsigned int	U32, *PU32;
224 | typedef	signed   int	S32, *PS32;
225 | 
226 | typedef float	 REAL32, *PREAL32;
227 | typedef double	 REAL64, *PREAL64;
228 | 
229 | typedef UINT32	ULONG;
230 | typedef UINT32	USHORT;
231 | typedef UINT32	UCHAR;
232 | 
233 | #endif /* __ADSP2106x__ */
234 | 
235 | /*************************************
236 | *
237 | *  Types for ADSP2116x
238 | */
239 | 
240 | 
241 | #if defined(__ADSP21160__) || defined(__ADSP21161__)
242 | 
243 | typedef	unsigned int	UINT32;
244 | typedef	signed   int	SINT32;
245 | 
246 | typedef	unsigned int	U32, *PU32;
247 | typedef	signed   int	S32, *PS32;
248 | 
249 | typedef float	 REAL32, *PREAL32;
250 | typedef double	 REAL64, *PREAL64;
251 | 
252 | typedef UINT32	ULONG;
253 | typedef UINT32	USHORT;
254 | typedef UINT32	UCHAR;
255 | 
256 | #endif /* __ADSP2116x__ */
257 | 
258 | /*************************************
259 | *
260 | *  Types for ADSP-TS101
261 | */
262 | 
263 | 
264 | #if defined(__ADSPTS__)
265 | 
266 | typedef	unsigned int	UINT32;
267 | typedef	signed   int	SINT32;
268 | 
269 | typedef	unsigned int	U32, *PU32;
270 | typedef	signed   int	S32, *PS32;
271 | 
272 | typedef	unsigned long long int	U64, *PU64;
273 | typedef	signed   long long int	S64, *PS64;
274 | 
275 | typedef float	 REAL32, *PREAL32;
276 | typedef long double	 REAL64, *PREAL64;
277 | 
278 | typedef UINT32	ULONG;
279 | typedef UINT32	USHORT;
280 | typedef UINT32	UCHAR;
281 | 
282 | #endif /* __ADSPTS__ */
283 | 
284 | /*************************************
285 | *
286 | *  Types for MC24
287 | */
288 | 
289 | #if defined(__GNUC__) && !defined(__linux__)
290 | 
291 | typedef unsigned char   UINT08;
292 | typedef signed   char   SINT08;
293 | typedef unsigned short  UINT16;
294 | typedef signed   short  SINT16;
295 | typedef unsigned long   UINT32;
296 | typedef signed   long   SINT32;
297 | 
298 | typedef unsigned char   U08, *PU08;
299 | typedef signed   char   S08, *PS08;
300 | typedef unsigned short  U16, *PU16;
301 | typedef signed   short  S16, *PS16;
302 | typedef unsigned long   U32, *PU32;
303 | typedef signed   long   S32, *PS32;
304 | 
305 | typedef unsigned long long int	U64, *PU64;
306 | typedef signed   long long int	S64, *PS64;
307 | 
308 | typedef float           REAL32, *PREAL32;
309 | typedef double          REAL64, *PREAL64;
310 | 
311 | typedef unsigned char   UCHAR;
312 | typedef unsigned short  USHORT;
313 | typedef unsigned long   ULONG;
314 | typedef unsigned long   HANDLE;
315 | 
316 | #endif /* __GNUC__ */
317 | 
318 | /*************************************
319 | *
320 | *  Type Aliasing
321 | */
322 | 
323 | typedef UINT32	Uns;
324 | 
325 | /*************************************************
326 | *
327 | * Entry Point types
328 | *
329 | */
330 | #if !defined(WIN32) && !defined(__WIN32__)
331 | //#ifndef WIN32
332 | #define FENTRY
333 | #define STDCALL
334 | #else
335 | #include <windows.h>
336 | #define DllImport	__declspec( dllimport )
337 | #define DllExport	__declspec( dllexport )
338 | #define FENTRY		DllExport
339 | #define STDCALL		__stdcall
340 | #define	huge
341 | #endif  // WIN32
342 | 
343 | 
344 | #endif /* _UTYPES_H_ */
345 | 
346 | /*
347 | *  End of File
348 | */
349 | 
350 | 
351 | 
352 | 


--------------------------------------------------------------------------------
/app_template/common/utypes_linux.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _UTYPES_LINUX_H_
  3 | #define _UTYPES_LINUX_H_
  4 | 
  5 | #if defined (__LINUX__) || defined(__linux__)
  6 | 
  7 | #include <stdint.h>
  8 | #include <limits.h>
  9 | 
 10 | typedef uint8_t   u8,   UINT08, U08, *PU08, BYTE, U8;
 11 | typedef uint16_t  u16,  UINT16, U16, *PU16, WORD;
 12 | typedef uint32_t  u32,  UINT32, U32, *PU32;
 13 | typedef uint64_t  u64,  UINT64, U64, *PU64;
 14 | typedef int8_t    s8,   SINT08, S08, *PS08;
 15 | typedef int16_t   s16,  SINT16, S16, *PS16;
 16 | typedef int32_t   s32,  SINT32, S32, *PS32;
 17 | typedef int64_t   s64,  SINT64, S64, *PS64, __int64;
 18 | 
 19 | //typedef int32_t     LONG;
 20 | typedef long        LONG;
 21 | typedef int64_t     __int64;
 22 | 
 23 | typedef float		REAL32, *PREAL32;
 24 | typedef double		REAL64, *PREAL64;
 25 | 
 26 | #if !defined(TRUE) && !defined(FALSE)
 27 | typedef enum { FALSE=0, TRUE=1 } BOOL;
 28 | #endif
 29 | 
 30 | typedef uint8_t   UCHAR, *PUCHAR;
 31 | typedef uint16_t  USHORT, *PUSHORT;
 32 | typedef uint32_t  ULONG, *PULONG, *PUINT, UINT;
 33 | typedef int       HANDLE;
 34 | typedef void*     HINSTANCE;
 35 | typedef void*     PVOID;
 36 | typedef void      VOID;
 37 | typedef uint32_t  DWORD;
 38 | typedef int64_t  __int64;
 39 | 
 40 | typedef int 		SOCKET;
 41 | typedef char   		TCHAR;
 42 | typedef char*  		PTCHAR;
 43 | typedef char*  		LPTSTR;
 44 | 
 45 | // added for 64-bit windows driver compatibility
 46 | typedef char                    BRDCHAR;
 47 | #define _BRDC(x)                x
 48 | #define BRDC_strlen             strlen
 49 | #define BRDC_strcpy             strcpy
 50 | #define BRDC_strncpy            strncpy
 51 | #define BRDC_strcmp             strcmp
 52 | #define BRDC_stricmp            _stricmp
 53 | #define BRDC_strnicmp           _strnicmp
 54 | #define BRDC_strcat             strcat
 55 | #define BRDC_strchr             strchr
 56 | #define BRDC_strstr             strstr
 57 | #define BRDC_strtol             strtol
 58 | #define BRDC_strtod             strtod
 59 | #define BRDC_atol               atol
 60 | #define BRDC_atoi               atoi
 61 | #define BRDC_atoi64             atoll
 62 | #define BRDC_atof               atof
 63 | #define BRDC_printf             printf
 64 | #define BRDC_fprintf            fprintf
 65 | #define BRDC_sprintf            sprintf
 66 | #define BRDC_vsprintf           vsprintf
 67 | #define BRDC_sscanf             sscanf
 68 | #define BRDC_fopen              fopen
 69 | #define BRDC_sopen              sopen
 70 | #define BRDC_fgets              fgets
 71 | #define BRDC_getenv             getenv
 72 | #define BRDC_main               main
 73 | #define BRDC_fputs              fputs
 74 | 
 75 | //-------------------------------------
 76 | 
 77 | #define lstrcpy strcpy
 78 | #define lstrcpyA strcpy
 79 | #define lstrcat strcat
 80 | #define lstrcatA strcat
 81 | #define lstrlen strlen
 82 | #define lstrlenA strlen
 83 | #define lstrcmpi strcasecmp
 84 | #define _tcsstr strstr
 85 | #define _tcscpy_s strcpy
 86 | #define _tcscpy strcpy
 87 | #define _tcschr strchr
 88 | #define sprintf_s sprintf
 89 | #define _tcscat_s strcat
 90 | #define _tcslen strlen
 91 | #define _tcscpy strcpy
 92 | 
 93 | #define _T(x)       x
 94 | #define _TEXT(x)    x
 95 | #define INFINITE    (-1)
 96 | 
 97 | #define _stricmp strcmp
 98 | #define stricmp strcmp
 99 | #define _strnicmp strncmp
100 | 
101 | typedef const char*	LPCTSTR;
102 | typedef char*		PCTSTR;
103 | typedef char*		PTSTR;
104 | typedef void*		LPVOID;
105 | typedef int             LPOVERLAPPED;
106 | 
107 | #ifndef MAX_PATH
108 | #define MAX_PATH PATH_MAX
109 | #endif
110 | 
111 | #endif /* __linux__ */
112 | 
113 | #endif /* _UTYPES_LINUX_H_ */
114 | 
115 | /*
116 | *  End of File
117 | */
118 | 


--------------------------------------------------------------------------------
/app_template/create_doc:
--------------------------------------------------------------------------------
1 | doxygen  app_template.dxy
2 | 


--------------------------------------------------------------------------------
/app_template/cuda/check_counter.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | // System includes
  5 | #include <stdio.h>
  6 | #include <assert.h>
  7 | 
  8 | // CUDA runtime
  9 | #include <cuda_runtime.h>
 10 | 
 11 | // helper functions and utilities to work with CUDA
 12 | #include <helper_functions.h>
 13 | #include <helper_cuda.h>
 14 |  
 15 | #include "task_data.h"
 16 | 
 17 | 
 18 | 
 19 | /**
 20 |  * 		\brief	CUDA kernel for check buffer
 21 |  *
 22 |  * 		\param	sharedMemory 	area for exchange status information with host
 23 |  * 		\param	nbuf			number of buffer
 24 |  *
 25 |  *
 26 |  */
 27 | __global__ void checkCounterKernel( long *sharedMemory, int nbuf )
 28 | {
 29 | 
 30 | 	__shared__ int shFlagIrq;
 31 | 
 32 | 	TaskMonitor *ptrMonitor = (TaskMonitor*)sharedMemory;
 33 | 	TaskBufferStatus *ts=(TaskBufferStatus *)sharedMemory;
 34 | 	ts+=nbuf;
 35 | 
 36 | 	uint64_t step = TaskCounts;
 37 | 	int size=ts->sizeOfKBytes;
 38 | 	int cnt=1024/8*size/step;
 39 | 
 40 | 	uint64_t expect_data=nbuf*1024*size/8;
 41 | 	expect_data += threadIdx.x;
 42 | 
 43 | 	uint64_t *src = (uint64_t*)(ts->ptrCudaIn);
 44 | 	src+=threadIdx.x;
 45 | 
 46 | 	uint64_t *dst;
 47 | 
 48 | 	TaskCheckData* check= &(ts->check[threadIdx.x]);
 49 | 
 50 | 	unsigned int totalErrorForBuf=0;
 51 | 	unsigned int errorCnt=0;
 52 | 	unsigned int block_rd=0;
 53 | 	unsigned int block_ok=0;
 54 | 	unsigned int block_error=0;
 55 | 
 56 | 	unsigned int flagError=0;
 57 | 
 58 | 	TaskHostStatus *ptrHostStatus = ts->ptrHostStatus;
 59 | 	shFlagIrq=0;
 60 | 
 61 | 
 62 | 	//printf( "src=%p  x=%d y=%d z=%d expect_data=0x%.8lX\n", src, threadIdx.x, threadIdx.y, threadIdx.z, expect_data );
 63 | 
 64 | 
 65 | 	for( int loop=0; ; loop++ )
 66 | 	{
 67 | 		if( 1==ptrMonitor->flagExit )
 68 | 		{
 69 | 			break;
 70 | 		}
 71 | 
 72 | 		if( 0==threadIdx.x )
 73 | 			shFlagIrq=ts->irqFlag;
 74 | 
 75 | 
 76 | 		if( 1!=shFlagIrq )
 77 | 		{
 78 | 			for( volatile int jj=0; jj<1000; jj++ );
 79 | 
 80 | 			continue;
 81 | 		}
 82 | 
 83 | 		src = (uint64_t*)(ts->ptrCudaIn);
 84 | 		src+=threadIdx.x;
 85 | 
 86 | 		__syncthreads();
 87 | 
 88 | 
 89 | 		flagError=0;
 90 | 		check->flagError=1;
 91 | 
 92 | 		if( 0==threadIdx.x )
 93 | 		{
 94 | 
 95 | 			dst=(uint64_t*)(ts->ptrCudaOut);
 96 | 			dst+= ts->indexWr * cnt;
 97 | 
 98 | 			for( int ii=0; ii<cnt; ii++ )
 99 | 			{
100 | 				uint64_t	val;
101 | 				val = *src; src+=step;
102 | 
103 | 				*dst++ = val;
104 | 
105 | 				if( val!=expect_data )
106 | 				{
107 | 					if( errorCnt<16 )
108 | 					{
109 | 						check->nblock[errorCnt]=block_rd;
110 | 						check->adr[errorCnt]=ii;
111 | 						check->expect_data[errorCnt]=expect_data;
112 | 						check->receive_data[errorCnt]=val;
113 | 					}
114 | 					errorCnt++;
115 | 					flagError++;
116 | 				}
117 | 				expect_data+=step;
118 | 			}
119 | 
120 | 			{
121 | 				int n=ts->indexWr+1;
122 | 				if( n==ts->indexMax )
123 | 					n=0;
124 | 				ts->indexWr=n;
125 | 				ptrHostStatus->indexWr=n;
126 | 			}
127 | 
128 | 		} else
129 | 		{
130 | 			for( int ii=0; ii<cnt; ii++ )
131 | 			{
132 | 				uint64_t	val;
133 | 				val = *src; src+=step;
134 | 
135 | 				if( val!=expect_data )
136 | 				{
137 | 					if( errorCnt<16 )
138 | 					{
139 | 						check->nblock[errorCnt]=block_rd;
140 | 						check->adr[errorCnt]=ii;
141 | 						check->expect_data[errorCnt]=expect_data;
142 | 						check->receive_data[errorCnt]=val;
143 | 					}
144 | 					errorCnt++;
145 | 					flagError++;
146 | 				}
147 | 				expect_data+=step;
148 | 			}
149 | 
150 | 		}
151 | 
152 | 
153 | 		check->flagError=flagError;
154 | 		check->cntError=errorCnt;
155 | 
156 | 		if( 0==threadIdx.x )
157 | 		  ptrMonitor->block[nbuf].irqFlag=0;
158 | 
159 | 		expect_data += 2*1024*size/8;
160 | 
161 | 		__syncthreads();
162 | 
163 | 		block_rd++;
164 | 
165 | 		if( 0==threadIdx.x )
166 | 		{
167 | 			// Check all task
168 | 			unsigned int flagErr=0;
169 | 			for( int ii=0; ii<TaskCounts; ii++ )
170 | 			{
171 | 				if( ts->check[ii].flagError )
172 | 				{
173 | 					flagErr=1;
174 | 				}
175 | 			}
176 | 			if( 0==flagErr)
177 | 			{
178 | 				block_ok++;
179 | 			} else
180 | 			{
181 | 				block_error++;
182 | 			}
183 | 
184 | 			ts->blockRd=block_rd;
185 | 			ts->blockOk=block_ok;
186 | 			ts->blockError=block_error;
187 | 			//printf( "buf: %d  expect_data= 0x%.8lX \n", nbuf, expect_data );
188 | 		}
189 | 
190 | 	}
191 | 
192 | 
193 | }
194 | 
195 | /**
196 |  * 		\brief	start checkCounterKernel
197 |  *
198 |  * 		\param	sharedMemory	pointer in CUDA memory of shared data
199 |  * 		\param	nbuf			number of buffer
200 |  * 		\param	stream			CUDA stream for this kernel
201 |  *
202 |  */
203 | int run_checkCounter( long *sharedMemory, int nbuf, cudaStream_t& stream  )
204 | {
205 | 
206 |     //Kernel configuration, where a two-dimensional grid and
207 |     //three-dimensional blocks are configured.
208 |     dim3 dimGrid(1, 1);
209 |     dim3 dimBlock(TaskCounts, 1, 1);
210 |     checkCounterKernel<<<dimGrid, dimBlock, 0, stream>>>( sharedMemory, nbuf );
211 | 
212 |    return 0;
213 | }
214 | 
215 | 
216 | //__global__ void MonitorKernel( long* sharedMemory,  int nbuf, unsigned int index_rd  )
217 | //{
218 | //
219 | //	TaskMonitor *ptrMonitor = (TaskMonitor*)sharedMemory;
220 | //	TaskBufferStatus *ts=(TaskBufferStatus *)sharedMemory;
221 | //	ts+=nbuf;
222 | //
223 | //	for( int loop=0; ; loop++ )
224 | //	{
225 | //		if( 1==ptrMonitor->flagExit )
226 | //		{
227 | //			break;
228 | //		}
229 | //
230 | //		if( index_rd!=ptrMonitor->block[0].indexWr )
231 | //			break;
232 | //
233 | //		for( volatile int jj=0; jj<10000; jj++ );
234 | //	}
235 | //
236 | //
237 | //}
238 | //
239 | //int run_Monitor(  long* sharedMemory, int nbuf, unsigned int index_rd, cudaStream_t stream )
240 | //{
241 | //
242 | //    //Kernel configuration, where a two-dimensional grid and
243 | //    //three-dimensional blocks are configured.
244 | //    dim3 dimGrid(1, 1);
245 | //    dim3 dimBlock(1, 1, 1);
246 | //    MonitorKernel<<<dimGrid, dimBlock, 0, stream>>>(sharedMemory, nbuf, index_rd );
247 | //
248 | //
249 | //}
250 | 


--------------------------------------------------------------------------------
/app_template/host/cl_cuda.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * cl_cuda.cpp
  3 |  *
  4 |  *  Created on: Feb 4, 2017
  5 |  *      Author: Dmitry Smekhov
  6 |  */
  7 | 
  8 | #include "cl_cuda.h"
  9 | 
 10 | // System includes
 11 | #include <stdio.h>
 12 | #include <assert.h>
 13 | #include <stdint.h>
 14 | #include <stdlib.h>
 15 | #include <stdio.h>
 16 | #include <unistd.h>
 17 | #include <fcntl.h>
 18 | #include <string.h>
 19 | #include <errno.h>
 20 | #include <sys/uio.h>
 21 | #include <sys/ioctl.h>
 22 | #include <sys/types.h>
 23 | #include <sys/mman.h>
 24 | 
 25 | // CUDA runtime
 26 | #include <cuda.h>
 27 | #include <cuda_runtime.h>
 28 | 
 29 | //#include "cuda.h"
 30 | //#include "cuda_runtime_api.h"
 31 | 
 32 | #include "gpumemioctl.h"
 33 | 
 34 | // helper functions and utilities to work with CUDA
 35 | #include <helper_functions.h>
 36 | #include <helper_cuda.h>
 37 | 
 38 | void checkError(CUresult status);
 39 | bool wasError(CUresult status);
 40 | 
 41 | /**
 42 |  * 	\brief	Private data for CL_Cuda class
 43 |  */
 44 | class CL_Cuda_private
 45 | {
 46 | 	public:
 47 | 
 48 |     int 			devID;	//!< Id for CUDA device
 49 |     cudaDeviceProp 	props;	//!< attributes for CUDA device
 50 | 
 51 |     int				fd;		//!< description of gpumem driver
 52 | 
 53 |     CUdevice 		device;		//!< Descriptor CUDA device
 54 |     char 			name[256];	//!< Name of CUDA device
 55 |     int 			major, minor;	//!< Capability numbers;
 56 |     size_t 			global_mem;		//!< Size of memory on CUDA device
 57 |     CUcontext  		context;		//!< Contex for all cuda functions
 58 | 
 59 | };
 60 | 
 61 | /**
 62 |  * 	\brief	Constructor
 63 |  *
 64 |  * 	\param	argc	argc from main function
 65 |  * 	\param	argv	argv from main function
 66 |  */
 67 | CL_Cuda::CL_Cuda( int argc, char** argv )
 68 | {
 69 | 
 70 | 
 71 | 	pd = new CL_Cuda_private();
 72 | 
 73 | 	cudaDeviceReset();
 74 | 
 75 | 	checkError(cuInit(0));
 76 | 
 77 | //	int total = 0;
 78 | //	cudaGetDeviceCount( &total );
 79 | //	fprintf(stderr, "Total devices: %d\n", total);
 80 | //
 81 | 	pd->devID=0;
 82 |  	cudaSetDevice(pd->devID);
 83 | 
 84 |     int total = 0;
 85 |     checkError(cuDeviceGetCount(&total));
 86 |     fprintf(stderr, "Total devices: %d\n", total);
 87 | 
 88 | 
 89 |     checkError(cuDeviceGet(&pd->device, 0));
 90 | 
 91 | 
 92 |     checkError(cuDeviceGetName( pd->name, 256, pd->device));
 93 |     fprintf(stderr, "Select device: %s\n", pd->name);
 94 | 
 95 |     // get compute capabilities and the devicename
 96 |     pd->major = 0; pd->minor = 0;
 97 |     checkError( cuDeviceComputeCapability(&pd->major, &pd->minor, pd->device));
 98 |     fprintf(stderr, "Compute capability: %d.%d\n", pd->major, pd->minor);
 99 | 
100 |     pd->global_mem = 0;
101 |     checkError( cuDeviceTotalMem(&pd->global_mem, pd->device));
102 |     fprintf(stderr, "Global memory: %llu MB\n", (unsigned long long)(pd->global_mem >> 20));
103 |     if(pd->global_mem > (unsigned long long)4*1024*1024*1024L)
104 |         fprintf(stderr, "64-bit Memory Address support\n");
105 | 
106 | 
107 | 
108 |     checkError(cuCtxCreate(&pd->context, 0, pd->device));
109 | 	//checkError(cuCtxGetCurrent(&pd->context));
110 | 
111 |     pd->devID=0;
112 |     //cudaSetDevice(pd->devID);
113 | 
114 | 
115 |     pd->fd = open("/dev/"GPUMEM_DRIVER_NAME, O_RDWR, 0);
116 |     if(pd->fd < 0)
117 |     {
118 |         printf("Error open file %s\n", "/dev/"GPUMEM_DRIVER_NAME);
119 |         throw( "Error /dev/gpumem");
120 |     }
121 | 
122 | }
123 | 
124 | 
125 | CL_Cuda::~CL_Cuda()
126 | {
127 | 	// TODO Auto-generated destructor stub
128 | 	delete pd; pd=NULL;
129 | }
130 | 
131 | 
132 | //! Allocate buffer in CUDA memory and map it in BAR1 space
133 | void CL_Cuda::AllocateBar1Buffer( int sizeOfKb, BAR1_BUF *pAdr )
134 | {
135 | 
136 | 	size_t size = sizeOfKb * 1024;
137 |     gpudma_lock_t lock;
138 |     gpudma_state_t *state = 0;
139 |     unsigned int flag = 1;
140 | 	CUdeviceptr dptr = 0;
141 | 	int statesize = 0;
142 | 	int res = -1;
143 | 
144 | 	int thLevel=0; // Level of local throw
145 | 
146 |     try
147 | 	{
148 | 
149 | 	if( 0xA00!=pAdr->state)
150 | 	{
151 | 		fprintf(stderr, "BAR1_BUF is busy. state=0x%.3X != 0xA00\n",  pAdr->state );
152 | 		throw(0);
153 | 	}
154 | 	pAdr->state=0xA01;
155 | 
156 |     CUresult status = cuMemAlloc(&dptr, size);
157 |     if(wasError(status)) {
158 |         throw(thLevel);
159 |     }
160 |     thLevel++;
161 | 
162 |     fprintf(stderr, "Allocate memory address: 0x%llx\n",  (unsigned long long)dptr);
163 | 
164 |     status = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr);
165 |     if(wasError(status)) {
166 |         throw(thLevel); //goto do_free_memory;
167 |     }
168 | 
169 | 
170 |     // TODO: add kernel driver interaction...
171 |     lock.addr = dptr;
172 |     lock.size = size;
173 |     res = ioctl(pd->fd, IOCTL_GPUMEM_LOCK, &lock);
174 |     if(res < 0) {
175 |         fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_LOCK\n");
176 |         throw(thLevel); // goto do_free_attr;
177 |     }
178 | 
179 | 
180 |     pAdr->phy_addr= new uint64_t[lock.page_count];
181 |     pAdr->app_addr= new void*[lock.page_count];
182 |     thLevel++;
183 | 
184 |     statesize = (lock.page_count*sizeof(uint64_t) + sizeof(struct gpudma_state_t));
185 |     state = (struct gpudma_state_t*)malloc(statesize);
186 |     if(!state) {
187 |         throw(thLevel); // goto do_free_attr;
188 |     }
189 |     memset(state, 0, statesize);
190 |     state->handle = lock.handle;
191 |     state->page_count = lock.page_count;
192 |     res = ioctl(pd->fd, IOCTL_GPUMEM_STATE, state);
193 |     if(res < 0) {
194 |         fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_UNLOCK\n");
195 |         throw(thLevel);// goto do_unlock;
196 |     }
197 | 
198 |     fprintf(stderr, "Page count 0x%lx\n", state->page_count);
199 |     fprintf(stderr, "Page size 0x%lx\n", state->page_size);
200 | 
201 |     pAdr->page_count=state->page_count;
202 |     pAdr->page_size=state->page_size;
203 |     pAdr->cuda_addr=(void*)dptr;
204 |     pAdr->sizeOfBytes=size;
205 | 
206 | 
207 |     for(unsigned ii=0; ii<state->page_count; ii++) {
208 |     	if( state->page_count<16 )
209 |          fprintf(stderr, "%02d: 0x%lx\n", ii, state->pages[ii]);
210 |         void* va = mmap(0, state->page_size, PROT_READ|PROT_WRITE, MAP_SHARED, pd->fd, (off_t)state->pages[ii]);
211 |         if(va == MAP_FAILED ) {
212 |              fprintf(stderr, "%s(): %s\n", __FUNCTION__, strerror(errno));
213 |              va = 0;
214 |              throw(thLevel);
215 |         } else {
216 |             //fprintf(stderr, "%s(): Physical Address 0x%lx -> Virtual Address %p\n", __FUNCTION__, state->pages[i], va);
217 |         	pAdr->app_addr[ii]=va;
218 |         	pAdr->phy_addr[ii]=state->pages[ii];
219 |         }
220 |     }
221 |     pAdr->state=0xA05; // Success
222 |     fprintf(stderr, "CL_Cuda::AllocateBar1Buffer() - buffer id=%d is allocated, size=%d kB \n",  pAdr->id, sizeOfKb );
223 | 	} catch( int n )
224 | 	{
225 | 		switch( n )
226 | 		{
227 | 
228 | 		case 2:
229 | 			delete pAdr->phy_addr; pAdr->phy_addr=NULL;
230 | 			delete pAdr->app_addr; pAdr->app_addr=NULL;
231 | 		case 1:
232 | 			cuMemFree(dptr);
233 | 		default:
234 | 			pAdr->state=0xA00;
235 | 			break;
236 | 		}
237 | 		throw(0);
238 | 	} catch( ... )
239 | 	{
240 | 		throw( 0 );
241 | 	}
242 | }
243 | 
244 | //! Release buffer from BAR1 space and from CUDA memory
245 | void CL_Cuda::FreeBar1Buffer( BAR1_BUF *pAdr )
246 | {
247 | 
248 | 	if( 0xA05!=pAdr->state)
249 | 	{
250 | 		fprintf(stderr, "BAR1_BUF is not allocate. state=0x%.3X != 0xA05\n",  pAdr->state );
251 | 		throw(0);
252 | 	}
253 | 	pAdr->state = 0xA10;
254 | 
255 | 	// unmap virtual address
256 | 	void *va;
257 | 	for(unsigned ii=0; ii<pAdr->page_count; ii++)
258 | 	{
259 | 		va=pAdr->app_addr[ii];
260 | 		munmap(va, pAdr->page_size);
261 | 		pAdr->app_addr[ii]=NULL;
262 | 	}
263 | 
264 | 	// free CUDA memory
265 | 	cuMemFree((CUdeviceptr)(pAdr->cuda_addr));
266 | 
267 | 	// free array
268 | 	delete pAdr->app_addr; pAdr->app_addr=NULL;
269 | 	delete pAdr->phy_addr; pAdr->phy_addr=NULL;
270 | 
271 | 	// Set empty state of pAdr
272 | 	pAdr->state = 0xA00;
273 | 	fprintf(stderr, "CL_Cuda::FreeBar1Buffer() - buffer id=%d is cleared \n",  pAdr->id);
274 | 
275 | }
276 | 
277 | 
278 | void checkError(CUresult status)
279 | {
280 |     if(status != CUDA_SUCCESS) {
281 |         const char *perrstr = 0;
282 |         CUresult ok = cuGetErrorString(status,&perrstr);
283 |         if(ok == CUDA_SUCCESS) {
284 |             if(perrstr) {
285 |                 fprintf(stderr, "info: %s\n", perrstr);
286 |             } else {
287 |                 fprintf(stderr, "info: unknown error\n");
288 |             }
289 |         }
290 |         throw(0);
291 |     }
292 | }
293 | 
294 | bool wasError(CUresult status)
295 | {
296 |     if(status != CUDA_SUCCESS) {
297 |         const char *perrstr = 0;
298 |         CUresult ok = cuGetErrorString(status,&perrstr);
299 |         if(ok == CUDA_SUCCESS) {
300 |             if(perrstr) {
301 |                 fprintf(stderr, "info: %s\n", perrstr);
302 |             } else {
303 |                 fprintf(stderr, "info: unknown error\n");
304 |             }
305 |         }
306 |         return true;
307 |     }
308 |     return false;
309 | }
310 | 


--------------------------------------------------------------------------------
/app_template/host/cl_cuda.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cl_cuda.h
 3 |  *
 4 |  *  Created on: Feb 4, 2017
 5 |  *      Author: Dmitry Smekhov
 6 |  */
 7 | 
 8 | #ifndef CL_CUDA_H_
 9 | #define CL_CUDA_H_
10 | 
11 | #include <stdint.h>
12 | #include <cuda.h>
13 | 
14 | class CL_Cuda_private;
15 | 
16 | /**
17 |  * 	\brief	Common actions for CUDA device
18 |  */
19 | class CL_Cuda
20 | {
21 | 
22 | private:
23 | 	CL_Cuda_private	*pd;
24 | 
25 | public:
26 | 	CL_Cuda( int argc, char** argv );
27 | 	virtual ~CL_Cuda();
28 | 
29 | 	//! Description buffer in BAR1 space
30 | 	struct BAR1_BUF
31 | 	{
32 | 		int			id;			//!< User id for buffer
33 | 		int			state;		//!< Status of buffer
34 | 		size_t		sizeOfBytes;//!< Size buffer of bytes
35 | 		int 	    page_count;	//!< Count of pages
36 | 		int	      	page_size;	//!< Size of page
37 | 		void*		cuda_addr;	//!< address in CUDA memory
38 | 		uint64_t*   phy_addr;	//!< Array of physical addresses of pages
39 | 		void**		app_addr; 	//!< Array of virtual addresses of pages in the application address space
40 | 
41 | 		BAR1_BUF()
42 | 		{
43 | 			id=-1;
44 | 			state=0xA00;
45 | 			sizeOfBytes=0;
46 | 			page_count=0;
47 | 			page_size=0;
48 | 			phy_addr=0;
49 | 			app_addr=0;
50 | 			cuda_addr=0;
51 | 		}
52 | 	};
53 | 
54 | 	//! Allocate buffer in CUDA memory and map it in BAR1 space
55 | 	void AllocateBar1Buffer( int sizeOfKb, BAR1_BUF *pAdr );
56 | 
57 | 	//! Release buffer from BAR1 space and from CUDA memory
58 | 	void FreeBar1Buffer( BAR1_BUF *pAdr );
59 | 
60 | };
61 | 
62 | #endif /* CL_CUDA_H_ */
63 | 


--------------------------------------------------------------------------------
/app_template/host/cl_cuda_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cl_cuda_test.cpp
 3 |  *
 4 |  *  Created on: Feb 4, 2017
 5 |  *      Author: user52
 6 |  */
 7 | 
 8 | #include "cl_cuda.h"
 9 | 
10 | 


--------------------------------------------------------------------------------
/app_template/host/main.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | // System includes
 4 | #include <stdio.h>
 5 | #include <signal.h>
 6 | #include <unistd.h>
 7 | 
 8 | #include <assert.h>
 9 | 
10 | 
11 | #include "tf_testcnt.h"
12 | 
13 | 
14 | static volatile int exit_flag = 0;
15 | 
16 | void signa_handler(int signo)
17 | {
18 |     exit_flag = 1;
19 | }
20 | 
21 | 
22 | //int run_cuda(int argc, char **argv);
23 | 
24 | int main(int argc, char **argv)
25 | {
26 | 
27 | 	int ret;
28 | 
29 |     signal(SIGINT, signa_handler);
30 | 
31 | 	try
32 | 	{
33 | 
34 | 		TF_TestCnt	*pTest = new TF_TestCnt( argc, argv );
35 | 
36 | 		for( int ii=0; ; ii++)
37 | 		{
38 | 			if( pTest->Prepare(ii) )
39 | 				break;
40 | 		}
41 | 
42 | 		pTest->Start();
43 | 
44 | 		for( ; ; )
45 | 		{
46 | 
47 | 			if( pTest->isComplete() )
48 | 				break;
49 | 
50 | 			if( exit_flag )
51 | 			{
52 | 				pTest->Stop();
53 | 			}
54 | 
55 | 			pTest->StepTable();
56 | 
57 | 			usleep( 10000 ); // 100 ms
58 | 
59 | 		}
60 | 
61 | 		//pTest->GetResult();
62 | 
63 | 		delete pTest; pTest=NULL;
64 | 
65 | 	} catch( ... )
66 | 	{
67 | 
68 | 	}
69 | 
70 | 	//fprintf( stderr, "\nPress any key for exit\n" );
71 | 	//getchar();
72 | 
73 |     return ret;
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/app_template/host/task_data.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "cl_cuda.h"
  3 | 
  4 | //! Number of task for check one buffer
  5 | const int TaskCounts=32;
  6 | 
  7 | 
  8 | /**
  9 |  * 	\brief	Struct for check data in one task for one buffer
 10 |  */
 11 | struct TaskCheckData
 12 | {
 13 | 	unsigned int	flagError;		//!< 1 - error in current runs
 14 | 	unsigned int	cntError;		//!< number of errors for all runs
 15 | 
 16 | 	unsigned int	nblock[16];			//!< number block
 17 | 	unsigned int	adr[16];			//!< address into block
 18 | 	uint64_t		expect_data[16];	//!< expect data
 19 | 	uint64_t		receive_data[16];	//!< receive data
 20 | 
 21 | 	TaskCheckData()
 22 | 	{
 23 | 		for( int ii=0; ii<16; ii++ )
 24 | 		{
 25 | 			nblock[ii]=0;
 26 | 			adr[ii]=0;
 27 | 			expect_data[ii]=0;
 28 | 			receive_data[ii]=0;
 29 | 		}
 30 | 		flagError=0;
 31 | 		cntError=0;
 32 | 	}
 33 | 
 34 | 
 35 | };
 36 | 
 37 | struct TaskHostStatus;
 38 | 
 39 | /**
 40 |  * 	\brief	Struct for status calculate
 41 |  */
 42 | struct TaskBufferStatus
 43 | {
 44 | 	unsigned int irqFlag;		//!< 1 - ready data in bar1 buffer
 45 | 	unsigned int res0;
 46 | 	unsigned int res1;
 47 | 	unsigned int blockRd;		//!< count of read buffer
 48 | 	unsigned int blockOk;		//!< count of correct buffers
 49 | 	unsigned int blockError;	//!< count of buffer with errors
 50 | 	unsigned int sizeOfKBytes;	//!< size of buffers in kilobytes
 51 | 
 52 | 	void*	ptrCudaIn;			//!< pointer on bar1 buffer in the Cuda memory
 53 | 
 54 | 	void*	ptrCudaOut;			//!< pointer on output buffer in the Cuda memory
 55 | 
 56 | 	TaskHostStatus	*ptrHostStatus;		//!< pointer on TaskHostStatus in the Host memory
 57 | 
 58 | 	unsigned int	indexWr;	//!< block number for next write
 59 | 	unsigned int	indexRd;	//!< block number for read
 60 | 	unsigned int	indexMax;	//!< count blocks in output buffer
 61 | 
 62 | 	TaskCheckData	check[ TaskCounts ]; //!< current results for test one buffer
 63 | };
 64 | 
 65 | 
 66 | /**
 67 |  *  \brief	Struct of data in monitor area in BAR1
 68 |  *
 69 |  */
 70 | struct TaskMonitor
 71 | {
 72 | 	TaskBufferStatus	block[3];	//!< Status of buffer0
 73 | 	int		sig;					//!< signature: 0xAA24
 74 | 	int		flagExit;				//!< 1 - exit from programm
 75 | 	int		res0;
 76 | 	int		res1;
 77 | 
 78 | };
 79 | 
 80 | /**
 81 |  * 	\brief	Struct for process status in the host memory
 82 |  */
 83 | struct TaskHostStatus
 84 | {
 85 | 
 86 | 	unsigned int	indexWr;	//!< block number for next write
 87 | 	unsigned int	indexRd;	//!< block number for read
 88 | 	//unsigned int	indexMax;	//!< count blocks in output buffer
 89 | 
 90 | };
 91 | 
 92 | /**
 93 |  *  \brief	Struct of data in monitor area in the host memory
 94 |  *
 95 |  */
 96 | struct TaskHostMonitor
 97 | {
 98 | 	TaskHostStatus	status[3];	//!< Status of process
 99 | 
100 | };
101 | 
102 | 
103 | 
104 | /**
105 |  * 	\brief	collection data for TF_TestCnt
106 |  */
107 | struct TaskData
108 | {
109 | 	TaskMonitor*		ptrMonitor;	//!< address monitor struct in the HOST memory
110 | 	CL_Cuda::BAR1_BUF	monitor;	//!< description of monitor buffer in BAR1
111 | 	CL_Cuda::BAR1_BUF	bar1[3];	//!< description of buffer in BAR1
112 | 
113 | 	uint64_t			currentCounter;	//!< Current value for fill buffers
114 | 
115 | 	int					cycleCnt;
116 | 
117 | 	int 	sizeBufferOfKb;		//!< Size buffer [kbytes]. Must be n*64
118 | 	int 	countOfCycle;		//!< Number of cycle. 0 - infinitely
119 | 
120 | 	int		sizeBufferOfBytes;	//!< Size of BAR1 buffer in bytes
121 | 	int		countOfBuffers;		//!< Conunt of buffers, from 1 to 3
122 | 
123 | 	//void*				decimationBuffers[3];	//!< Buffer in the CUDA memory for
124 | 
125 | 
126 | 	size_t 	outputSizeBuffer; 	//!< size of output buffer [bytes]
127 | 	size_t 	outputSizeBlock;  	//!< size of output block  [bytes]
128 | 	size_t 	outputCountBlock; 	//!< count blocks in the output buffer
129 | 
130 | 
131 | 	uint64_t*	hostBuffer;			//!< data from device
132 | 
133 | 	TaskHostMonitor* hostMonitor;	//!< monitor data in the host memory
134 | 
135 | 	unsigned int hostBlockRd;		//!< count blocks which host received
136 | 	unsigned int hostBlockOk;		//!< block without errors
137 | 	unsigned int hostBlockError;	//!< block with errors;
138 | 
139 | 	uint64_t	hostExpectData;		//!< expect data for checking
140 | 
141 | 	TaskCheckData	hostCheck;		//!< result of checking host data
142 | 
143 | 	double velosityExtToCudaCurrent;	//!< velosity data transfer from external device to Cuda for last 4 sec
144 | 	double velosityExtToCudaAvr;	    //!< velosity data transfer from external device to Cuda from start
145 | 
146 | 	double velosityCudaToHostCurrent;	//!< velosity data transfer from external device to Cuda for last 4 sec
147 | 	double velosityCudaToHostAvr;	    //!< velosity data transfer from external device to Cuda from start
148 | 
149 | 	TaskData()
150 | 	{
151 | 		cycleCnt=0;
152 | 		sizeBufferOfBytes=0;
153 | 		countOfBuffers=3;
154 | 		currentCounter=0;
155 | 
156 | 		hostBuffer=NULL;
157 | 		hostMonitor=NULL;
158 | 
159 | 		hostBlockRd=0;
160 | 		hostBlockOk=0;
161 | 		hostBlockError=0;
162 | 		hostExpectData=0;
163 | 
164 | 		velosityExtToCudaCurrent=0;
165 | 		velosityExtToCudaAvr=0;
166 | 		velosityCudaToHostCurrent=0;
167 | 		velosityCudaToHostAvr=0;
168 | 
169 | 	}
170 | };
171 | 


--------------------------------------------------------------------------------
/app_template/host/tf_test.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * tf_test.h
 3 |  *
 4 |  *  Created on: Jan 29, 2017
 5 |  *      Author: Dmitry Smekhov
 6 |  */
 7 | 
 8 | #ifndef TF_TEST_H_
 9 | #define TF_TEST_H_
10 | 
11 | 
12 | /**
13 |  * 	\brief	Base class for testing device
14 |  */
15 | class TF_Test
16 | {
17 | 
18 | public:
19 | 
20 | 	virtual int 	Prepare( int cnt )=0;
21 | 
22 | 	virtual void	Start( void )=0;
23 | 
24 | 	virtual void 	Stop( void ) {};
25 | 
26 | 	virtual int		isComplete( void ) { return 0; };
27 | 
28 | 	virtual void	StepTable( void ) {};
29 | 
30 | 	virtual void	GetResult( void ) {};
31 | };
32 | 
33 | 
34 | 
35 | 
36 | #endif /* TF_TEST_H_ */
37 | 


--------------------------------------------------------------------------------
/app_template/host/tf_testcnt.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * TF_TestCnt.cpp
  3 |  *
  4 |  *  Created on: Jan 29, 2017
  5 |  *      Author: Dmitry Smekhov
  6 |  */
  7 | 
  8 | #include <sys/types.h>
  9 | #include <sys/stat.h>
 10 | #include "stdio.h"
 11 | 
 12 | #include <unistd.h>
 13 | 
 14 | #include "tf_testcnt.h"
 15 | #include "cl_cuda.h"
 16 | #include <cuda.h>
 17 | #include <cuda_runtime_api.h>
 18 | 
 19 | #include "task_data.h"
 20 | #include <time.h>
 21 | 
 22 | double getTime( void )
 23 | {
 24 | 	clock_t t=clock();
 25 | 	double ret= t / (double)CLOCKS_PER_SEC;
 26 | 	return ret;
 27 | }
 28 | 
 29 | TF_TestCnt::TF_TestCnt( int argc, char **argv ) : TF_TestThread( argc, argv )
 30 | {
 31 | 
 32 | 
 33 | 	td = new TaskData;
 34 | 
 35 | 	td->countOfCycle   = GetFromCommnadLine( argc, argv, "-count", 16 );
 36 | 	td->sizeBufferOfKb = GetFromCommnadLine( argc, argv, "-size", 256);
 37 | 
 38 | 
 39 | 	m_pCuda=NULL;
 40 | 
 41 | 	m_argc=argc;
 42 | 	m_argv=argv;
 43 | 
 44 | }
 45 | 
 46 | TF_TestCnt::~TF_TestCnt() {
 47 | 
 48 | 	delete m_pCuda; m_pCuda=NULL;
 49 | 	delete td; 		td=NULL;
 50 | }
 51 | 
 52 | 
 53 | /**
 54 |  * 		\brief	Display current information about cheking buffers
 55 |  *
 56 |  * 		Function display information if 0==td->countOfCycle
 57 |  * 		function is called from main with interval of 100 ms
 58 |  */
 59 | void	TF_TestCnt::StepTable( void )
 60 | {
 61 | 
 62 | 
 63 | 	if( 0!=td->countOfCycle )
 64 | 		return;
 65 | 
 66 | 	unsigned blockRd=0;
 67 | 	unsigned blockOk=0;
 68 | 	unsigned blockError=0;
 69 | 
 70 | 	for( int ii=0; ii<3; ii++ )
 71 | 	{
 72 | 		blockRd+=td->ptrMonitor->block[ii].blockRd;
 73 | 		blockOk+=td->ptrMonitor->block[ii].blockOk;
 74 | 		blockError+=td->ptrMonitor->block[ii].blockError;
 75 | 
 76 | 	}
 77 | 
 78 | 	printf( " %7d %7d %8d %7d %7d %8d %7.1lf %7.1lf %7.1lf %7.1lf\r", blockRd, blockOk, blockError,
 79 | 			td->hostBlockRd,
 80 | 			//td->ptrMonitor->block[0].indexWr, td->hostMonitor->status[0].indexRd
 81 | 			td->hostBlockOk,
 82 | 			td->hostBlockError,
 83 | 			td->velosityExtToCudaCurrent,
 84 | 			td->velosityExtToCudaAvr,
 85 | 			td->velosityCudaToHostCurrent,
 86 | 			td->velosityCudaToHostAvr
 87 | 
 88 | 			//td->hostMonitor->status[0].indexWr,
 89 | 			//td->hostMonitor->status[0].indexRd
 90 | 			);
 91 | }
 92 | 
 93 | /**
 94 |  * 		\brief	Prepare CUDA and buffers
 95 |  *
 96 |  * 		Open CUDA device
 97 |  * 		Allocate three buffers and buffer for monitor
 98 |  */
 99 | void TF_TestCnt::PrepareInThread( void )
100 | {
101 | 
102 | 	m_pCuda = new CL_Cuda( m_argc, m_argv );
103 | 
104 | 	td->monitor.id=100;
105 | 
106 | 
107 | 	td->countOfBuffers=3;
108 | 	int size=td->sizeBufferOfKb;
109 | 	td->sizeBufferOfBytes=size*1024;
110 | 
111 | 
112 | 	for( int ii=0; ii<td->countOfBuffers ; ii++ )
113 | 	{
114 | 		td->bar1[ii].id=ii;
115 | 		m_pCuda->AllocateBar1Buffer( size, &(td->bar1[ii]) );
116 | 	}
117 | 	m_pCuda->AllocateBar1Buffer( 256, &(td->monitor) );
118 | 
119 | 
120 | 	td->ptrMonitor=(TaskMonitor*)td->monitor.app_addr[0];
121 | 
122 | 
123 | 	size_t outSizeBlock=size*1024/TaskCounts; // size of output buffer
124 | 	int n=512*1024*1024 / outSizeBlock; // count blocks in 512 MB buffer
125 | 
126 | 	size_t outSizeBuffer=n*outSizeBlock;
127 | 
128 | 	td->outputSizeBuffer = outSizeBuffer;
129 | 	td->outputSizeBlock  = outSizeBlock;
130 | 	td->outputCountBlock = n;
131 | 
132 | 	cudaError_t ret;
133 | 
134 | 	void *ptr=NULL;
135 | 	ret=cudaMallocHost( &ptr, outSizeBlock );
136 | 	if( cudaSuccess!=ret )
137 | 		throw( "Error page-locked memory allocate for hostBuffer" );
138 | 	td->hostBuffer=(uint64_t*)ptr;
139 | 
140 | 	ptr=NULL;
141 | 	ret=cudaMallocHost( &ptr, 4096 );
142 | 	if( cudaSuccess!=ret )
143 | 		throw( "Error page-locked memory allocate for hostMonitor" );
144 | 	td->hostMonitor=(TaskHostMonitor*)ptr;
145 | 
146 | 
147 | 	for( int ii=0; ii<td->countOfBuffers; ii++ )
148 | 	{
149 | 		td->ptrMonitor->block[ii].ptrCudaIn=(void*)(td->bar1[ii].cuda_addr);
150 | 
151 | 		td->ptrMonitor->block[ii].sizeOfKBytes=size;
152 | 
153 | 		td->ptrMonitor->block[ii].irqFlag=0;
154 | 		td->ptrMonitor->block[ii].blockOk=0;
155 | 		td->ptrMonitor->block[ii].blockError=0;
156 | 		td->ptrMonitor->block[ii].blockRd=0;
157 | 		for( int jj=0; jj<TaskCounts; jj++ )
158 | 		{
159 | 		 td->ptrMonitor->block[ii].check[jj].cntError=0;
160 | 		 td->ptrMonitor->block[ii].check[jj].flagError=0;
161 | 		}
162 | 
163 | 		 td->ptrMonitor->block[ii].indexRd=0;
164 | 		 td->ptrMonitor->block[ii].indexWr=0;
165 | 
166 | 		 td->ptrMonitor->block[ii].indexMax = td->outputCountBlock;
167 | 		 ptr=NULL;
168 | 		 ret= cudaMalloc( &ptr, outSizeBuffer );
169 | 		 if( cudaSuccess != ret )
170 | 			 throw( "Error memory allocation for output buffer" );
171 | 		 td->ptrMonitor->block[ii].ptrCudaOut=ptr;
172 | 
173 | 		 td->ptrMonitor->block[ii].ptrHostStatus=&td->hostMonitor->status[ii];
174 | 
175 | 
176 | 	}
177 | 
178 | 	td->ptrMonitor->flagExit=0;
179 | 	td->ptrMonitor->sig=0xAA24;
180 | 
181 | 
182 | 	printf( "td->countOfCycle=%d\n", td->countOfCycle );
183 | 	printf( "td->sizeBufferOfKb=%d [kB]\n\n", td->sizeBufferOfKb );
184 | 
185 | 	if( 0==td->countOfCycle )
186 | 		printf( "\n CUDA_RD CUDA_OK CUDA_ERR HOST_RD HOST_OK HOST_ERR E2C_CUR E2C_AVR C2H_CUR C2H_AVR \n" );
187 | 
188 | }
189 | 
190 | /**
191 |  * 		\brief		Free buffers and close device
192 |  *
193 |  */
194 | void TF_TestCnt::CleanupInThread( void )
195 | {
196 | 
197 | 	for( int ii=0; ii<td->countOfBuffers; ii++ )
198 | 	{
199 | 		m_pCuda->FreeBar1Buffer( &(td->bar1[ii]) );
200 | 	}
201 | 	m_pCuda->FreeBar1Buffer( &(td->monitor) );
202 | 
203 | 	cudaFreeHost( td->hostBuffer );
204 | 	cudaFreeHost( td->hostMonitor );
205 | 
206 | 	delete m_pCuda; m_pCuda=NULL;
207 | 
208 | 	fprintf( stderr, "%s - Ok\n", __FUNCTION__ );
209 | }
210 | 
211 | /**
212 |  * 	\brief	fill buffer
213 |  *
214 |  * 	\param	pBar1	description of buffer
215 |  *
216 |  * 	function fill bar1 buffer via pBar1->app_addr[]
217 |  *
218 |  */
219 | void TF_TestCnt::FillCounter( CL_Cuda::BAR1_BUF *pBar1 )
220 | {
221 | 	if( 0xA05 != pBar1->state )
222 | 		throw(0);
223 | 
224 | 	int size64=pBar1->page_size/8;
225 | 	uint64_t *dst;
226 | 	uint64_t val=td->currentCounter;
227 | 
228 | 	for( int page=0; page<pBar1->page_count; page++ )
229 | 	{
230 | 		dst=(uint64_t*) (pBar1->app_addr[page]);
231 | 		for( int ii=size64; ii; ii--)
232 | 			*dst++=val++;
233 | 
234 | 	}
235 | 	td->currentCounter=val;
236 | }
237 | 
238 | 
239 | int run_checkCounter( long *sharedMemory, int nbuf, cudaStream_t& stream );
240 | int run_Monitor( long* sharedMemory, int nbuf, unsigned int index_rd, cudaStream_t stream );
241 | 
242 | /**
243 |  * 	\brief	Main working cycle
244 |  *
245 |  * 	It is main working cycle.
246 |  * 	Function FillCounter  simulate to work external DMA channel.
247 |  *
248 |  */
249 | void TF_TestCnt::Run( void )
250 | {
251 | 
252 | 
253 | 	FillThreadStart();
254 | 
255 | 	long *ptrCudaMonitor=(long*)(td->monitor.cuda_addr);
256 | 
257 | 	cudaStream_t	streamBuf0;
258 | 	cudaStream_t	streamBuf1;
259 | 	cudaStream_t	streamBuf2;
260 | 	//cudaStream_t	streamMonitor;
261 | 	cudaStream_t	streamDMA;
262 | 
263 | 	cudaStreamCreate( &streamBuf0 );
264 | 	cudaStreamCreate( &streamBuf1 );
265 | 	cudaStreamCreate( &streamBuf2 );
266 | 	//cudaStreamCreate( &streamMonitor );
267 | 	cudaStreamCreate( &streamDMA );
268 | 
269 | 
270 | 	run_checkCounter(  ptrCudaMonitor, 0, streamBuf0 );
271 | 	run_checkCounter(  ptrCudaMonitor, 1, streamBuf1 );
272 | 	run_checkCounter(  ptrCudaMonitor, 2, streamBuf2 );
273 | 
274 | 
275 | 	int val;
276 | 	int blockRd;
277 | 
278 | 	int nbuf;
279 | 	//unsigned int indexRd[3]={ 0, 0, 0 };
280 | 	td->hostMonitor->status[0].indexRd=0;
281 | 	td->hostMonitor->status[1].indexRd=0;
282 | 	td->hostMonitor->status[2].indexRd=0;
283 | 
284 | 	cudaError_t ret;
285 | 
286 | 	int status=1;
287 | 
288 | 	volatile unsigned int index_wr;
289 | 	unsigned int index_rd;
290 | 
291 | 	double time_start = getTime();
292 | 	double time_last=time_start;
293 | 	double time_current;
294 | 	double velosity;
295 | 
296 | 	unsigned int blockRdLast=0;
297 | 	unsigned int blockHostRdLast=0;
298 | 
299 | 	for( int kk=0; ; kk++ )
300 | 	{
301 | 
302 | 
303 | 		time_current=getTime();
304 | 		if( time_current-time_last>4 )
305 | 		{
306 | 			blockRd=td->ptrMonitor->block[0].blockRd + td->ptrMonitor->block[1].blockRd + td->ptrMonitor->block[2].blockRd;
307 | 
308 | 			velosity = (double)1.0*td->sizeBufferOfKb*1024*(blockRd-blockRdLast)/(time_current-time_last);
309 | 			td->velosityExtToCudaCurrent=velosity/1024/1024;
310 | 
311 | 			velosity = (double)1.0*td->sizeBufferOfKb*blockRd;
312 | 			velosity/=(time_current-time_start);
313 | 			td->velosityExtToCudaAvr=velosity/1024;
314 | 
315 | 			blockRdLast=blockRd;
316 | 
317 | 
318 | 			blockRd = td->hostBlockRd;
319 | 			velosity = (double)1.0*td->outputSizeBlock*(blockRd-blockHostRdLast)/(time_current-time_last);
320 | 			td->velosityCudaToHostCurrent=velosity/1024/1024;
321 | 
322 | 			velosity = (double)1.0*td->outputSizeBlock*blockRd;
323 | 			velosity/=(time_current-time_start);
324 | 			td->velosityCudaToHostAvr=velosity/1024/1024;
325 | 
326 | 
327 | 			time_last=time_current;
328 | 			blockHostRdLast=blockRd;
329 | 
330 | 		}
331 | 
332 | 
333 | 		if( m_isTerminate || (td->countOfCycle>0 && td->countOfCycle==blockRd ))
334 | 		{
335 | 			td->ptrMonitor->flagExit=1;
336 | 			break;
337 | 		}
338 | 
339 | 		switch( status )
340 | 		{
341 | //			case 0: // run monitor
342 | //				run_Monitor(  ptrCudaMonitor, nbuf, td->hostMonitor->status[nbuf].indexRd, streamMonitor );
343 | //
344 | //				status=1;
345 | //				break;
346 | 
347 | 			case 1: // wait for ready current buffer and start DMA  read
348 | //				ret=cudaStreamQuery( streamMonitor );
349 | //				if( cudaSuccess==ret )
350 | 				index_wr = td->hostMonitor->status[nbuf].indexWr;
351 | 				index_rd = td->hostMonitor->status[nbuf].indexRd;
352 | 				if( index_wr!=index_rd )
353 | 				{
354 | 
355 | 					//hostBlockRdprintf( "status 1: nbuf=%d index_rd=%d\n", nbuf, indexRd[nbuf]);
356 | 					uint64_t* d_src=(uint64_t*)(td->ptrMonitor->block[nbuf].ptrCudaOut);
357 | 					d_src+=td->hostMonitor->status[nbuf].indexRd * td->outputSizeBlock/8;
358 | 
359 | 					cudaMemcpyAsync( td->hostBuffer, d_src, td->outputSizeBlock, cudaMemcpyDeviceToHost, streamDMA );
360 | 					//cudaMemcpy( td->hostBuffer, d_src, td->outputSizeBlock, cudaMemcpyDeviceToHost );
361 | 					//usleep( 1 );
362 | 					status=2;
363 | 				}
364 | 				break;
365 | 			case 2: // wait for data transfer complete
366 | 				//ret=cudaStreamQuery( streamDMA );
367 | 				//if( cudaSuccess==ret )
368 | 				{
369 | 					cudaStreamSynchronize( streamDMA );
370 | 					CheckHostData( td->hostBuffer );
371 | 
372 | 					td->hostBlockRd++;
373 | 					int n=td->hostMonitor->status[nbuf].indexRd+1;
374 | 					if( n==td->outputCountBlock )
375 | 						n=0;
376 | 					td->hostMonitor->status[nbuf].indexRd=n;
377 | 
378 | 					n=nbuf+1;
379 | 					if( td->countOfBuffers==n )
380 | 						n=0;
381 | 					nbuf=n;
382 | 
383 | 					status=1;
384 | 				}
385 | 				break;
386 | 		}
387 | 
388 | 
389 | 
390 | 		usleep( 1000 );
391 | 
392 | 	}
393 | 
394 | 	usleep( 10000 );
395 | 
396 | 	td->ptrMonitor->flagExit=1;
397 | 
398 | 
399 | 	cudaStreamSynchronize( streamBuf0 );
400 | 	cudaStreamSynchronize( streamBuf1 );
401 | 	cudaStreamSynchronize( streamBuf2 );
402 | 
403 | 
404 | 	GetResult();
405 | 
406 | 	FillThreadDestroy();
407 | 
408 | 	return;
409 | 
410 | 
411 | 
412 | }
413 | 
414 | /**
415 |  * 		\brief	Display result for all buffers
416 |  *
417 |  */
418 | void TF_TestCnt::GetResult( void )
419 | {
420 | 	GetResultBuffer( 0 );
421 | 	GetResultBuffer( 1 );
422 | 	GetResultBuffer( 2 );
423 | 
424 | 	GetHostResult();
425 | }
426 | 
427 | /**
428 |  * 		\brief	Display result for one buffers
429 |  *
430 |  * 		\param	nbuf	number of buffer
431 |  *
432 |  */
433 | void TF_TestCnt::GetResultBuffer( int nbuf )
434 | {
435 | 
436 | 	TaskBufferStatus *ts=&(td->ptrMonitor->block[nbuf]);
437 | 	printf( "\nBuffer %d\n", nbuf );
438 | 	printf( "block_rd=%d\n", ts->blockRd );
439 | 	printf( "block_ok=%d\n", ts->blockOk );
440 | 	printf( "block_error=%d\n", ts->blockError );
441 | 
442 | 	int flag_ok=1;
443 | 	for( int ii=0; ii<TaskCounts;ii++)
444 | 	{
445 | 		if( 0!=ts->check[ii].cntError )
446 | 		{
447 | 			flag_ok=0;
448 | 			break;
449 | 		}
450 | 	}
451 | 
452 | 	if( 1==flag_ok )
453 | 	{
454 | 		printf( "Task 0:%d  - Ok\n",  TaskCounts-1 );
455 | 
456 | 	} else
457 | 	{
458 | 
459 | 
460 | 		for( int ii=0; ii<TaskCounts; ii++ )
461 | 		{
462 | 			unsigned int cntError=ts->check[ii].cntError;
463 | 			if( 0==cntError )
464 | 			{
465 | 				printf( "Task %d -Ok\n", ii );
466 | 			} else
467 | 			{
468 | 				printf( "\nTask %d \n", ii );
469 | 				printf( "   cntError=%d\n", cntError);
470 | 				if( cntError>16 )
471 | 					cntError=16;
472 | 				for( int jj=0; jj<cntError; jj++ )
473 | 				{
474 | 				 printf( "%2d block: %4d  addr: 0x%.4X  receive: 0x%.8lX  expect: 0x%.8lX\n",
475 | 						 jj,
476 | 						 ts->check[ii].nblock[jj],
477 | 						 ts->check[ii].adr[jj],
478 | 						 ts->check[ii].receive_data[jj],
479 | 						 ts->check[ii].expect_data[jj]
480 | 					 );
481 | 				}
482 | 			}
483 | 
484 | 		}
485 | 	}
486 | 
487 | 
488 | 
489 | }
490 | 
491 | void TF_TestCnt::FillThreadStart( void )
492 | {
493 |     int res = pthread_attr_init(&m_attrFillThread);
494 |     if(res != 0) {
495 |         fprintf(stderr, "%s\n", "Stream not started");
496 |         throw( "Stream not started" );
497 |     }
498 | 
499 |     res = pthread_attr_setdetachstate(&m_attrFillThread, PTHREAD_CREATE_JOINABLE);
500 |     if(res != 0) {
501 |         fprintf(stderr, "%s\n", "Stream not started");
502 |         throw( "Stream not started" );
503 |     }
504 | 
505 |     res = pthread_create(&m_hFillThread, &m_attrFillThread, FillThreadFunc, this);
506 |     if(res != 0) {
507 |         fprintf(stderr, "%s\n", "Stream not started");
508 |         throw( "Stream not started" );
509 |     }
510 | }
511 | 
512 | void TF_TestCnt::FillThreadDestroy( void )
513 | {
514 | 
515 | }
516 | 
517 | 
518 | void* TF_TestCnt::FillThreadFunc( void* lpvThreadParm )
519 | {
520 | 	TF_TestCnt *test=(TF_TestCnt*)lpvThreadParm;
521 |     void* ret;
522 |     if( !test )
523 |         return 0;
524 |     ret=test->FillExecute();
525 |     return ret;
526 | }
527 | 
528 | void* TF_TestCnt::FillExecute( void )
529 | {
530 | 
531 | 	//printf( "\nFillCounter Start\n");
532 | 	for( ; ; )
533 | 	{
534 | 
535 | //		Check for checkCounter finished checking buffer 0
536 | //		for( ; ; )
537 | //		{
538 | //		  val = td->ptrMonitor->block[0].irqFlag;
539 | //		  if( 0==val )
540 | //			  break;
541 | //		}
542 | 		FillCounter( &td->bar1[0]);
543 | 		td->ptrMonitor->block[0].irqFlag=1;
544 | 
545 | 		usleep( 1 );
546 | 		if( td->ptrMonitor->flagExit )
547 | 			break;
548 | 
549 | 
550 | //		Check for checkCounter finished checking buffer 1
551 | //		for( ; ; )
552 | //		{
553 | //		  val = td->ptrMonitor->block[1].irqFlag;
554 | //		  if( 0==val )
555 | //			  break;
556 | //		}
557 | 		FillCounter( &td->bar1[1]);
558 | 		td->ptrMonitor->block[1].irqFlag=1;
559 | 
560 | 		usleep( 1 );
561 | 		if( td->ptrMonitor->flagExit )
562 | 			break;
563 | 
564 | //		Check for checkCounter finished checking buffer 2
565 | //		for( ; ; )
566 | //		{
567 | //		  val = td->ptrMonitor->block[2].irqFlag;
568 | //		  if( 0==val )
569 | //			  break;
570 | //		}
571 | 		FillCounter( &td->bar1[2]);
572 | 		td->ptrMonitor->block[2].irqFlag=1;
573 | 
574 | 		usleep( 1 );
575 | 		if( td->ptrMonitor->flagExit )
576 | 			break;
577 | 
578 | 	}
579 | 	//printf( "\nFillCounter Stop\n");
580 | 
581 | 	return NULL;
582 | }
583 | 
584 | 
585 | //! Check received data
586 | void TF_TestCnt::CheckHostData( uint64_t* src )
587 | {
588 | 	//printf( "CheckHostData: 0x%.8lX \n", *src );
589 | 
590 | 	int cnt=td->outputSizeBlock/8;
591 | 
592 | 	uint64_t step=TaskCounts;
593 | 	uint64_t val;
594 | 	uint64_t expect_data = td->hostExpectData;
595 | 
596 | 	unsigned int errorCnt=td->hostCheck.cntError;
597 | 
598 | 	int flagError=0;
599 | 	for( int ii=0; ii<cnt; ii++ )
600 | 	{
601 | 		uint64_t	val;
602 | 		val = *src++;
603 | 
604 | 		if( val!=expect_data )
605 | 		{
606 | 			if( errorCnt<16 )
607 | 			{
608 | 				td->hostCheck.nblock[errorCnt]=td->hostBlockRd;
609 | 				td->hostCheck.adr[errorCnt]=ii;
610 | 				td->hostCheck.expect_data[errorCnt]=expect_data;
611 | 				td->hostCheck.receive_data[errorCnt]=val;
612 | 			}
613 | 			errorCnt++;
614 | 			flagError=1;
615 | 		}
616 | 		expect_data+=step;
617 | 	}
618 | 	td->hostExpectData=expect_data;
619 | 	td->hostCheck.cntError=errorCnt;
620 | 	if( flagError )
621 | 	{
622 | 		td->hostBlockError++;
623 | 	}
624 | 	else
625 | 	{
626 | 		td->hostBlockOk++;
627 | 	}
628 | 
629 | }
630 | 
631 | //! Print results for host buffer
632 | void TF_TestCnt::GetHostResult( void )
633 | {
634 | 	printf( "\nHost \n" );
635 | 	printf( "block_rd=%d\n", td->hostBlockRd );
636 | 	printf( "block_ok=%d\n", td->hostBlockOk);
637 | 	printf( "block_error=%d\n", td->hostBlockError );
638 | 
639 | 	unsigned int cntError=td->hostCheck.cntError;
640 | 	if( 0==cntError )
641 | 	{
642 | 		printf( "Host - Ok\n" );
643 | 	} else
644 | 	{
645 | 		printf( "   cntError=%d\n", cntError);
646 | 		if( cntError>16 )
647 | 			cntError=16;
648 | 		for( int jj=0; jj<cntError; jj++ )
649 | 		{
650 | 		 printf( "%2d block: %4d  addr: 0x%.4X  receive: 0x%.8lX  expect: 0x%.8lX\n",
651 | 				 jj,
652 | 				 td->hostCheck.nblock[jj],
653 | 				 td->hostCheck.adr[jj],
654 | 				 td->hostCheck.receive_data[jj],
655 | 				 td->hostCheck.expect_data[jj]
656 | 			 );
657 | 		}
658 | 	}
659 | 
660 | 
661 | }
662 | 


--------------------------------------------------------------------------------
/app_template/host/tf_testcnt.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TF_TestCnt.h
 3 |  *
 4 |  *  Created on: Jan 29, 2017
 5 |  *      Author: Dmitry Smekhov
 6 |  */
 7 | 
 8 | #ifndef TF_TESTCNT_H_
 9 | #define TF_TESTCNT_H_
10 | 
11 | #include <pthread.h>
12 | 
13 | #include "tf_testthread.h"
14 | 
15 | //class CL_Cuda;
16 | //struct CL_Cuda::BAR1_BUF;
17 | #include "cl_cuda.h"
18 | 
19 | 
20 | struct TaskData;
21 | struct TaskBufferStatus;
22 | 
23 | /**
24 |  *	\brief	Checking the transmission counter at CUDA device
25 |  *
26 |  *	Key actions:
27 |  *		-# Open CUDA device
28 |  *		-# Open gpumem driver
29 |  *		-# Allocate three buffers in the CUDA memory
30 |  *		-# Mapping buffers in the BAR1 space on CUDA device
31 |  *		-# Filling the buffer 64-bit counter via BAR1
32 |  *		-# Checking buffer in the CUDA device
33 |  *		-# Decimation buffer and transfer to the HOST
34 |  *		-# Transfer result of checking to HOST
35 |  *
36 |  *
37 |  *		Steps 5-8 are carried out in a loop
38 |  *
39 |  *
40 |  */
41 | class TF_TestCnt: public TF_TestThread
42 | {
43 | public:
44 | 	TF_TestCnt( int argc, char **argv );
45 | 	virtual ~TF_TestCnt();
46 | 
47 | 
48 | 	virtual void StepTable( void );
49 | 
50 | 	virtual void PrepareInThread( void );
51 | 
52 | 	virtual void CleanupInThread( void );
53 | 
54 | 	virtual void Run( void );
55 | 
56 | 	virtual void GetResult( void );
57 | 
58 | 	//! Number of arguments
59 | 	int	m_argc;
60 | 
61 | 	//! Pointers to arguments
62 | 	char** m_argv;
63 | 
64 | 
65 | 	struct TaskData		*td;		//!< Local data for test
66 | 
67 |     CL_Cuda				*m_pCuda;	//!< Cuda device
68 | 
69 | 
70 |     //! Fill buffer in Cuda memory via BAR1
71 |     void FillCounter( CL_Cuda::BAR1_BUF *pBar1 );
72 | 
73 |     //! Print results for buffer
74 |     void GetResultBuffer( int nbuf );
75 | 
76 | 
77 |     pthread_t 			m_hFillThread;
78 |     pthread_attr_t  	m_attrFillThread;
79 | 
80 | 
81 |     void FillThreadStart( void );
82 | 
83 |     void FillThreadDestroy( void );
84 | 
85 | 	static void* FillThreadFunc( void* lpvThreadParm );
86 | 
87 | 	void* FillExecute( void );
88 | 
89 | 
90 | 
91 | 	//! Check received data
92 | 	void CheckHostData( uint64_t* src );
93 | 
94 | 	//! Print results for host buffer
95 | 	void GetHostResult( void );
96 | };
97 | 
98 | #endif /* TF_TESTCNT_H_ */
99 | 


--------------------------------------------------------------------------------
/app_template/host/tf_testthread.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * TF_TestThread.cpp
  3 |  *
  4 |  *  Created on: Jan 29, 2017
  5 |  *      Author: Dmitry Smekhov
  6 |  */
  7 | 
  8 | #include <sys/types.h>
  9 | #include <sys/stat.h>
 10 | #include <stdio.h>
 11 | #include <string.h>
 12 | #include <stdlib.h>
 13 | 
 14 | #include "tf_testthread.h"
 15 | 
 16 | 
 17 | 
 18 | TF_TestThread::TF_TestThread( int argc, char **argv )
 19 | {
 20 | 	// TODO Auto-generated constructor stub
 21 | 
 22 | 	m_isPrepareComplete=0;
 23 | 	m_isComplete=0;
 24 | 	m_isTerminate=0;
 25 | 	m_CycleCnt=0;
 26 | 
 27 | 	pthread_mutex_t		m_StartMutex = PTHREAD_MUTEX_INITIALIZER;
 28 | 	pthread_cond_t		m_StartCond  = PTHREAD_COND_INITIALIZER;
 29 | 
 30 | }
 31 | 
 32 | TF_TestThread::~TF_TestThread()
 33 | {
 34 | 
 35 | }
 36 | 
 37 | 
 38 | 
 39 | int 	TF_TestThread::Prepare( int cnt )
 40 | {
 41 | 		if( 0==cnt )
 42 | 		{
 43 | 		    int res = pthread_attr_init(&m_attrThread);
 44 | 		    if(res != 0) {
 45 | 		        fprintf(stderr, "%s\n", "Stream not started");
 46 | 		        throw( "Stream not started" );
 47 | 		    }
 48 | 
 49 | 		    res = pthread_attr_setdetachstate(&m_attrThread, PTHREAD_CREATE_JOINABLE);
 50 | 		    if(res != 0) {
 51 | 		        fprintf(stderr, "%s\n", "Stream not started");
 52 | 		        throw( "Stream not started" );
 53 | 		    }
 54 | 
 55 | 		    res = pthread_create(&m_hThread, &m_attrThread, ThreadFunc, this);
 56 | 		    if(res != 0) {
 57 | 		        fprintf(stderr, "%s\n", "Stream not started");
 58 | 		        throw( "Stream not started" );
 59 | 		    }
 60 | 		}
 61 | 
 62 | 		int ret=m_isPrepareComplete;
 63 | 
 64 | 		return ret;
 65 | }
 66 | 
 67 | void* TF_TestThread::ThreadFunc( void* lpvThreadParm )
 68 | {
 69 | 	TF_TestThread *test=(TF_TestThread*)lpvThreadParm;
 70 |     void* ret;
 71 |     if( !test )
 72 |         return 0;
 73 |     ret=test->Execute();
 74 |     return ret;
 75 | }
 76 | 
 77 | void* TF_TestThread::Execute( void )
 78 | {
 79 | 		PrepareInThread();
 80 | 		m_isPrepareComplete=1;
 81 | 
 82 | 		// Wait for Start function
 83 | 		pthread_mutex_lock( &m_StartMutex );
 84 | 		pthread_cond_wait( &m_StartCond, &m_StartMutex );
 85 | 		pthread_mutex_unlock( &m_StartMutex );
 86 | 
 87 | 		Run();
 88 | 
 89 | 		CleanupInThread();
 90 | 
 91 | 		m_isComplete=1;
 92 | 		return NULL;
 93 | }
 94 | 
 95 | void	TF_TestThread::Start( void )
 96 | {
 97 | 
 98 | 	// Start Thread
 99 | 	pthread_mutex_lock( &m_StartMutex );
100 | 	pthread_cond_signal( &m_StartCond );
101 | 	pthread_mutex_unlock( &m_StartMutex );
102 | }
103 | 
104 | void 	TF_TestThread::Stop( void )
105 | {
106 | 	m_isTerminate=1;
107 | 	//fprintf( stderr, "%s - Ok\n", __FUNCTION__ );
108 | }
109 | 
110 | int		TF_TestThread::isComplete( void )
111 | {
112 | 		return m_isComplete;
113 | }
114 | 
115 | /**
116 |  * 	\brief 	get value from command line
117 |  *
118 |  * 	format command line:
119 |  * 	<name1> <value1> <name2> <value2>
120 |  *
121 |  * 	\param	argc		number of argument
122 |  * 	\param	argv		pointers to arguments
123 |  * 	\param	name		key of argument
124 |  * 	\parma	defValue	default value for arguments
125 |  *
126 |  * 	\return   value of argument or default value of argument
127 |  */
128 | int TF_TestThread::GetFromCommnadLine( int argc, char **argv, char* name, int defValue )
129 | {
130 | 	int ret=defValue;
131 | 	for( int ii=1; ii<argc-1; ii++ )
132 | 	{
133 | 		if( 0==strcmp( argv[ii], name) )
134 | 		{
135 | 			ret=atoi( argv[ii+1] );
136 | 		}
137 | 	}
138 | 	return ret;
139 | }
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/app_template/host/tf_testthread.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TF_TestThread.h
 3 |  *
 4 |  *  Created on: Jan 29, 2017
 5 |  *      Author: Dmitry Smekhov
 6 |  */
 7 | 
 8 | #ifndef TF_TestThread_H_
 9 | #define TF_TestThread_H_
10 | 
11 | #include <pthread.h>
12 | #include "tf_test.h"
13 | 
14 | 
15 | /**
16 |  *	\brief	Base class for application with thread
17 |  *
18 |  *
19 |  *
20 |  */
21 | class TF_TestThread: public TF_Test {
22 | public:
23 | 	TF_TestThread( int argc, char **argv );
24 | 	virtual ~TF_TestThread();
25 | 
26 | 
27 | 	virtual int 	Prepare( int cnt );
28 | 
29 | 	virtual void	Start( void );
30 | 
31 | 	virtual void 	Stop( void );
32 | 
33 | 	virtual int		isComplete( void );
34 | 
35 | 	virtual void	StepTable( void ) {};
36 | 
37 | 
38 | 	static void* ThreadFunc( void* lpvThreadParm );
39 | 
40 | 	void* Execute( void );
41 | 
42 | 	virtual void PrepareInThread( void ) {};
43 | 
44 | 	virtual void CleanupInThread( void ) {};
45 | 
46 | 	virtual void Run( void ) {};
47 | 
48 | 
49 | 	int	m_isPrepareComplete;
50 | 	int	m_isComplete;
51 | 	int m_isTerminate;
52 | 
53 | 	int	m_CycleCnt;
54 | 
55 | 	pthread_mutex_t		m_StartMutex;
56 | 	pthread_cond_t		m_StartCond;
57 | 
58 |     pthread_t 			m_hThread;
59 |     pthread_attr_t  	m_attrThread;
60 | 
61 |     int GetFromCommnadLine( int argc, char **argv, char* name, int defValue );
62 | 
63 | };
64 | 
65 | #endif /* TF_TestThread_H_ */
66 | 


--------------------------------------------------------------------------------
/app_template/run_cycle_1M:
--------------------------------------------------------------------------------
1 | ./Debug/app_template  -count 0  -size 1024
2 | 


--------------------------------------------------------------------------------
/app_template/run_cycle_64M:
--------------------------------------------------------------------------------
1 | ./Debug/app_template  -count 0  -size 65536
2 | 


--------------------------------------------------------------------------------
/module/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | KERNELVER := $(shell uname -r)
 3 | 
 4 | ifndef GPUDMA_DIR
 5 | 
 6 | NVIDIA_DRIVER_PATH := $(HOME)/gpudma/nvidia
 7 | 
 8 | else
 9 | 
10 | NVIDIA_DRIVER_PATH := $(GPUDMA_DIR)/nvidia
11 | 
12 | endif
13 | 
14 | KBUILD_EXTRA_SYMBOLS := $(NVIDIA_DRIVER_PATH)/kernel/Module.symvers
15 | EXTRA_CFLAGS += -fno-stack-protector
16 | #EXTRA_CFLAGS += -fno-stack-protector -fno-stack-protector-strong
17 | #EXTRA_CFLAGS += -fno-pie
18 | EXTRA_CFLAGS += -O2
19 | EXTRA_CFLAGS += -I$(NVIDIA_DRIVER_PATH)/kernel/nvidia
20 | 
21 | ccflags-y += $(EXTRA_CFLAGS)
22 | 
23 | ifneq ($(KERNELRELEASE),)
24 | 
25 | obj-m += gpumem.o
26 | gpumem-objs := gpumemproc.o ioctlrw.o gpumemdrv.o
27 | 
28 | else
29 | 
30 | endif
31 | 
32 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build
33 | 
34 | 
35 | #PWD := $(shell pwd)
36 | #PWD := $(PWD)
37 | 
38 | 
39 | 
40 | all:
41 | 	$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
42 | 
43 | 
44 | 
45 | distclean:
46 | 	rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.bak .*.cache *.d
47 | clean:
48 | 	rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.bak .*.cache *.d *.markers *.symvers *.order
49 | 


--------------------------------------------------------------------------------
/module/drvload.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sudo insmod ./gpumem.ko
4 | sudo chmod 666 /dev/gpumem
5 | 


--------------------------------------------------------------------------------
/module/gpumemdrv.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <linux/kernel.h>
  3 | #include <linux/module.h>
  4 | #include <linux/version.h>
  5 | #include <linux/init.h>
  6 | #include <linux/fs.h>
  7 | #include <linux/ioport.h>
  8 | #include <linux/list.h>
  9 | #include <linux/pci.h>
 10 | #include <linux/proc_fs.h>
 11 | #include <linux/interrupt.h>
 12 | #include <linux/miscdevice.h>
 13 | #include <linux/platform_device.h>
 14 | //#include <linux/of.h>
 15 | //#include <linux/of_platform.h>
 16 | #include <asm/io.h>
 17 | 
 18 | #include <asm/uaccess.h>
 19 | #include <linux/types.h>
 20 | #include <linux/ioport.h>
 21 | #include <linux/poll.h>
 22 | #include <linux/slab.h>
 23 | #include <linux/interrupt.h>
 24 | 
 25 | #include "gpumemdrv.h"
 26 | #include "ioctlrw.h"
 27 | #include "gpumemioctl.h"
 28 | #include "gpumemproc.h"
 29 | 
 30 | //-----------------------------------------------------------------------------
 31 | 
 32 | MODULE_AUTHOR("Vladimir Karakozov. karakozov@gmail.com");
 33 | MODULE_LICENSE("GPL");
 34 | 
 35 | //-----------------------------------------------------------------------------
 36 | static struct gpumem dev;
 37 | //-----------------------------------------------------------------------------
 38 | 
 39 | static struct gpumem *file_to_device( struct file *file )
 40 | {
 41 |     return (struct gpumem*)file->private_data;
 42 | }
 43 | 
 44 | //--------------------------------------------------------------------
 45 | 
 46 | static int gpumem_open( struct inode *inode, struct file *file )
 47 | {
 48 |     file->private_data = (void*)&dev;
 49 |     return 0;
 50 | }
 51 | 
 52 | //-----------------------------------------------------------------------------
 53 | 
 54 | static int gpumem_close( struct inode *inode, struct file *file )
 55 | {
 56 |     file->private_data = 0;
 57 |     return 0;
 58 | }
 59 | 
 60 | //-----------------------------------------------------------------------------
 61 | 
 62 | static long gpumem_ioctl( struct file *file, unsigned int cmd, unsigned long arg )
 63 | {
 64 |     int error = 0;
 65 |     struct gpumem *dev = file_to_device(file);
 66 |     if(!dev) {
 67 |         printk(KERN_ERR"%s(): ioctl driver failed\n", __FUNCTION__);
 68 |         return -ENODEV;
 69 |     }
 70 | 
 71 |     switch(cmd) {
 72 | 
 73 |     case IOCTL_GPUMEM_LOCK: error = ioctl_mem_lock(dev, arg); break;
 74 |     case IOCTL_GPUMEM_UNLOCK: error = ioctl_mem_unlock(dev, arg); break;
 75 |     case IOCTL_GPUMEM_STATE: error = ioctl_mem_state(dev, arg); break;
 76 |     default:
 77 |         printk(KERN_DEBUG"%s(): Unknown ioctl command\n", __FUNCTION__);
 78 |         error = -EINVAL;
 79 |         break;
 80 |     }
 81 | 
 82 |     return error;
 83 | }
 84 | 
 85 | //-----------------------------------------------------------------------------
 86 | 
 87 | int gpumem_mmap(struct file *file, struct vm_area_struct *vma)
 88 | {
 89 |     size_t size = vma->vm_end - vma->vm_start;
 90 | 
 91 |     if (!(vma->vm_flags & VM_MAYSHARE))
 92 |         return -EINVAL;
 93 | 
 94 |     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 95 | 
 96 |     if (remap_pfn_range(vma,
 97 |                         vma->vm_start,
 98 |                         vma->vm_pgoff,
 99 |                         size,
100 |                         vma->vm_page_prot)) {
101 |         pr_err("%s(): error in remap_page_range.\n", __func__ );
102 |         return -EAGAIN;
103 |     }
104 | 
105 |     return 0;
106 | }
107 | 
108 | //-----------------------------------------------------------------------------
109 | 
110 | struct file_operations gpumem_fops = {
111 | 
112 |     .owner = THIS_MODULE,
113 |     .unlocked_ioctl = gpumem_ioctl,
114 |     .compat_ioctl = gpumem_ioctl,
115 |     .open = gpumem_open,
116 |     .release = gpumem_close,
117 |     .mmap = gpumem_mmap,
118 | };
119 | 
120 | //-----------------------------------------------------------------------------
121 | 
122 | static struct miscdevice gpumem_dev = {
123 | 
124 |     MISC_DYNAMIC_MINOR,
125 |     GPUMEM_DRIVER_NAME,
126 |     &gpumem_fops
127 | };
128 | 
129 | //-----------------------------------------------------------------------------
130 | 
131 | static int __init gpumem_init(void)
132 | {
133 |     pr_info(GPUMEM_DRIVER_NAME ": %s()\n", __func__);
134 |     dev.proc = 0;
135 |     sema_init(&dev.sem, 1);
136 |     INIT_LIST_HEAD(&dev.table_list);
137 |     gpumem_register_proc(GPUMEM_DRIVER_NAME, 0, &dev);
138 |     misc_register(&gpumem_dev);
139 |     return 0;
140 | }
141 | 
142 | //-----------------------------------------------------------------------------
143 | 
144 | static void __exit gpumem_cleanup(void)
145 | {
146 |     pr_info(GPUMEM_DRIVER_NAME ": %s()\n", __func__);
147 |     gpumem_remove_proc(GPUMEM_DRIVER_NAME);
148 |     misc_deregister(&gpumem_dev);
149 | }
150 | 
151 | //-----------------------------------------------------------------------------
152 | 
153 | module_init(gpumem_init);
154 | module_exit(gpumem_cleanup);
155 | 
156 | //-----------------------------------------------------------------------------
157 | 


--------------------------------------------------------------------------------
/module/gpumemdrv.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef GPUMEM_H
 4 | #define GPUMEM_H
 5 | 
 6 | //-----------------------------------------------------------------------------
 7 | 
 8 | #include <linux/cdev.h>
 9 | #include <linux/sched.h>
10 | #include <linux/version.h>
11 | #include <linux/semaphore.h>
12 | 
13 | #include "nv-p2p.h"
14 | 
15 | //-----------------------------------------------------------------------------
16 | 
17 | struct gpumem_t {
18 |     struct list_head list;
19 |     void *handle;
20 |     u64 virt_start;
21 |     nvidia_p2p_page_table_t* page_table;
22 | };
23 | 
24 | //-----------------------------------------------------------------------------
25 | 
26 | struct gpumem {
27 |     struct semaphore         sem;
28 |     struct proc_dir_entry*   proc;
29 |     struct list_head         table_list;
30 | };
31 | 
32 | //-----------------------------------------------------------------------------
33 | 
34 | int get_nv_page_size(int val);
35 | 
36 | //-----------------------------------------------------------------------------
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/module/gpumemioctl.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef __GPUDMAIOTCL_H__
 3 | #define __GPUDMAIOTCL_H__
 4 | 
 5 | //-----------------------------------------------------------------------------
 6 | 
 7 | #define GPUMEM_DRIVER_NAME             "gpumem"
 8 | 
 9 | //-----------------------------------------------------------------------------
10 | 
11 | #ifdef __linux__
12 | #include <linux/types.h>
13 | #ifndef __KERNEL__
14 | #include <sys/ioctl.h>
15 | #endif
16 | #define GPUMEM_DEVICE_TYPE             'g'
17 | #define GPUMEM_MAKE_IOCTL(c) _IO(GPUMEM_DEVICE_TYPE, (c))
18 | #endif
19 | 
20 | #define IOCTL_GPUMEM_LOCK		GPUMEM_MAKE_IOCTL(10)
21 | #define IOCTL_GPUMEM_UNLOCK		GPUMEM_MAKE_IOCTL(11)
22 | #define IOCTL_GPUMEM_STATE		GPUMEM_MAKE_IOCTL(12)
23 | 
24 | //-----------------------------------------------------------------------------
25 | // for boundary alignment requirement
26 | #define GPU_BOUND_SHIFT 16
27 | #define GPU_BOUND_SIZE ((u64)1 << GPU_BOUND_SHIFT)
28 | #define GPU_BOUND_OFFSET (GPU_BOUND_SIZE-1)
29 | #define GPU_BOUND_MASK (~GPU_BOUND_OFFSET)
30 | 
31 | //-----------------------------------------------------------------------------
32 | 
33 | struct gpudma_lock_t {
34 |     void*    handle;
35 |     uint64_t addr;
36 |     uint64_t size;
37 |     size_t   page_count;
38 | };
39 | 
40 | //-----------------------------------------------------------------------------
41 | 
42 | struct gpudma_unlock_t {
43 |     void*    handle;
44 | };
45 | 
46 | //-----------------------------------------------------------------------------
47 | 
48 | struct gpudma_state_t {
49 |     void*       handle;
50 |     size_t      page_count;
51 |     size_t      page_size;
52 |     uint64_t    pages[1];
53 | };
54 | 
55 | //-----------------------------------------------------------------------------
56 | 
57 | 
58 | #endif //_GPUDMAIOTCL_H_
59 | 


--------------------------------------------------------------------------------
/module/gpumemproc.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <linux/kernel.h>
  3 | #define __NO_VERSION__
  4 | #include <linux/module.h>
  5 | #include <linux/types.h>
  6 | #include <linux/version.h>
  7 | #include <linux/ioport.h>
  8 | #include <linux/pci.h>
  9 | #include <linux/pagemap.h>
 10 | #include <linux/interrupt.h>
 11 | #include <linux/proc_fs.h>
 12 | #include <linux/seq_file.h>
 13 | #include <linux/semaphore.h>
 14 | #include <asm/io.h>
 15 | 
 16 | #include "gpumemdrv.h"
 17 | #include "gpumemproc.h"
 18 | 
 19 | //--------------------------------------------------------------------
 20 | 
 21 | struct log_buf_t {
 22 |     struct seq_file *param;
 23 | };
 24 | 
 25 | //--------------------------------------------------------------------
 26 | 
 27 | #define print_info(S...) seq_printf(S)
 28 | 
 29 | //--------------------------------------------------------------------
 30 | 
 31 | static void show_mem_info( struct gpumem *drv, struct seq_file *m )
 32 | {
 33 |     struct list_head *pos, *n;
 34 |     int i=0, idx=0;
 35 |     if(!drv || !m) {
 36 |         printk(KERN_DEBUG"%s(): EINVAL\n", __FUNCTION__ );
 37 |         return;
 38 |     }
 39 | 
 40 |     print_info(m, "%s\n", "Pinned memory info:");
 41 | 
 42 |     list_for_each_safe(pos, n, &drv->table_list) {
 43 | 
 44 |         struct gpumem_t *entry = list_entry(pos, struct gpumem_t, list);
 45 |         if(entry) {
 46 |             if(entry->virt_start) {
 47 | 
 48 |                 print_info(m, "%d: Entry - %p\n", idx, entry);
 49 |                 print_info(m, "Virtual GPU address - 0x%llx\n", entry->virt_start);
 50 |                 print_info(m, "Number of pages - %d\n", entry->page_table->entries);
 51 |                 print_info(m, "Page size - 0x%x\n", get_nv_page_size(entry->page_table->page_size));
 52 | 
 53 |                 for(i=0; i<entry->page_table->entries; i++) {
 54 |                     struct nvidia_p2p_page *nvp = entry->page_table->pages[i];
 55 |                     if(nvp) {
 56 |                         print_info(m, "%02d: - 0x%llx\n", i, nvp->physical_address);
 57 |                     }
 58 |                 }
 59 | 
 60 |                 print_info(m, "\n");
 61 | 
 62 |                 ++idx;
 63 |             }
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | //--------------------------------------------------------------------
 69 | 
 70 | static int gpumem_proc_show(struct seq_file *m, void *v)
 71 | {
 72 |     struct gpumem *p = m->private;
 73 | 
 74 |     show_mem_info( p, m );
 75 | 
 76 |     return 0;
 77 | }
 78 | 
 79 | //--------------------------------------------------------------------
 80 | 
 81 | static int gpumem_proc_open(struct inode *inode, struct file *file)
 82 | {
 83 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
 84 |     struct gpumem *p = (struct gpumem *)PDE_DATA(inode);
 85 | #else
 86 |     struct gpumem *p = (struct gpumem *)PDE(inode)->data;
 87 | #endif
 88 |     return single_open(file, gpumem_proc_show, p);
 89 | }
 90 | 
 91 | //--------------------------------------------------------------------
 92 | 
 93 | static int gpumem_proc_release(struct inode *inode, struct file *file)
 94 | {
 95 |     return single_release(inode, file);
 96 | }
 97 | 
 98 | //--------------------------------------------------------------------
 99 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
100 | static const struct proc_ops gpumem_proc_fops = {
101 |     .proc_open           = gpumem_proc_open,
102 |     .proc_read           = seq_read,
103 |     .proc_lseek          = seq_lseek,
104 |     .proc_release        = gpumem_proc_release,
105 | };
106 | #else
107 | static const struct file_operations gpumem_proc_fops = {
108 |     .owner          = THIS_MODULE,
109 |     .open           = gpumem_proc_open,
110 |     .read           = seq_read,
111 |     .llseek         = seq_lseek,
112 |     .release        = gpumem_proc_release,
113 | };
114 | #endif
115 | 
116 | //--------------------------------------------------------------------
117 | 
118 | void gpumem_register_proc( char *name, void *fptr, void *data )
119 | {
120 |     struct gpumem *p = (struct gpumem*)data;
121 | 
122 |     if(!data) {
123 |         printk(KERN_DEBUG"%s(): Invalid driver pointer\n", __FUNCTION__ );
124 |         return;
125 |     }
126 | 
127 |     p->proc = proc_create_data(name, S_IRUGO, NULL, &gpumem_proc_fops, p);
128 |     if(!p->proc) {
129 |         printk(KERN_DEBUG"%s(): Error register /proc entry\n", __FUNCTION__);
130 |     }
131 | }
132 | 
133 | //--------------------------------------------------------------------
134 | 
135 | void gpumem_remove_proc( char *name )
136 | {
137 |     remove_proc_entry(name, NULL);
138 | }
139 | 
140 | //--------------------------------------------------------------------
141 | 
142 | 


--------------------------------------------------------------------------------
/module/gpumemproc.h:
--------------------------------------------------------------------------------
1 | 
2 | #ifndef __GPUDMAPROC_H__
3 | #define __GPUDMAPROC_H__
4 | 
5 | void gpumem_register_proc(char *name, void *fptr, void *data);
6 | void gpumem_remove_proc(char *name);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/module/ioctlrw.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <linux/kernel.h>
  3 | #define __NO_VERSION__
  4 | #include <linux/module.h>
  5 | #include <linux/types.h>
  6 | #include <linux/ioport.h>
  7 | #include <linux/pci.h>
  8 | #include <linux/pagemap.h>
  9 | #include <linux/interrupt.h>
 10 | #include <linux/proc_fs.h>
 11 | #include <asm/io.h>
 12 | 
 13 | #include "gpumemdrv.h"
 14 | #include "gpumemioctl.h"
 15 | 
 16 | //-----------------------------------------------------------------------------
 17 | 
 18 | int get_nv_page_size(int val)
 19 | {
 20 |     switch(val) {
 21 |     case NVIDIA_P2P_PAGE_SIZE_4KB: return 4*1024;
 22 |     case NVIDIA_P2P_PAGE_SIZE_64KB: return 64*1024;
 23 |     case NVIDIA_P2P_PAGE_SIZE_128KB: return 128*1024;
 24 |     }
 25 |     return 0;
 26 | }
 27 | 
 28 | //--------------------------------------------------------------------
 29 | 
 30 | void free_nvp_callback(void *data)
 31 | {
 32 |     int res;
 33 |     struct gpumem_t *entry = (struct gpumem_t*)data;
 34 |     if(entry) {
 35 |         res = nvidia_p2p_free_page_table(entry->page_table);
 36 |         if(res == 0) {
 37 |             printk(KERN_ERR"%s(): nvidia_p2p_free_page_table() - OK!\n", __FUNCTION__);
 38 |             //entry->virt_start = 0ULL;
 39 |             //entry->page_table = 0;
 40 |         } else {
 41 |             printk(KERN_ERR"%s(): Error in nvidia_p2p_free_page_table()\n", __FUNCTION__);
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | //-----------------------------------------------------------------------------
 47 | 
 48 | int ioctl_mem_lock(struct gpumem *drv, unsigned long arg)
 49 | {
 50 |     int error = 0;
 51 |     size_t pin_size = 0ULL;
 52 |     struct gpumem_t *entry = 0;
 53 |     struct gpudma_lock_t param;
 54 | 
 55 |     if(copy_from_user(&param, (void *)arg, sizeof(struct gpudma_lock_t))) {
 56 |         printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__);
 57 |         error = -EFAULT;
 58 |         goto do_exit;
 59 |     }
 60 | 
 61 |     entry = (struct gpumem_t*)kzalloc(sizeof(struct gpumem_t), GFP_KERNEL);
 62 |     if(!entry) {
 63 |         printk(KERN_ERR"%s(): Error allocate memory to mapping struct\n", __FUNCTION__);
 64 |         error = -ENOMEM;
 65 |         goto do_exit;
 66 |     }
 67 | 
 68 |     INIT_LIST_HEAD(&entry->list);
 69 |     entry->handle = entry;
 70 | 
 71 |     entry->virt_start = (param.addr & GPU_BOUND_MASK);
 72 |     pin_size = (param.addr + param.size - entry->virt_start);
 73 |     if(!pin_size) {
 74 |         printk(KERN_ERR"%s(): Error invalid memory size!\n", __FUNCTION__);
 75 |         error = -EINVAL;
 76 |         goto do_free_mem;
 77 |     }
 78 | 
 79 |     error = nvidia_p2p_get_pages(0, 0, entry->virt_start, pin_size, &entry->page_table, free_nvp_callback, entry);
 80 |     if(error != 0) {
 81 |         printk(KERN_ERR"%s(): Error in nvidia_p2p_get_pages()\n", __FUNCTION__);
 82 |         error = -EINVAL;
 83 |         goto do_free_mem;
 84 |     }
 85 | 
 86 |     param.page_count = entry->page_table->entries;
 87 |     param.handle = entry;
 88 | 
 89 |     printk(KERN_ERR"%s(): param.handle: %p\n", __FUNCTION__, param.handle);
 90 | 
 91 |     if(copy_to_user((void *)arg, &param, sizeof(struct gpudma_lock_t))) {
 92 |         printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__);
 93 |         error = -EFAULT;
 94 |         goto do_unlock_pages;
 95 |     }
 96 | 
 97 |     list_add_tail(&entry->list, &drv->table_list);
 98 | 
 99 |     printk(KERN_ERR"%s(): Add new entry. handle: %p\n", __FUNCTION__, entry->handle);
100 | 
101 |     return 0;
102 | 
103 | do_unlock_pages:
104 |     nvidia_p2p_put_pages(0, 0, entry->virt_start, entry->page_table);
105 | do_free_mem:
106 |     kfree(entry);
107 | do_exit:
108 |     return error;
109 | }
110 | 
111 | //-----------------------------------------------------------------------------
112 | 
113 | int ioctl_mem_unlock(struct gpumem *drv, unsigned long arg)
114 | {
115 |     int error = -EINVAL;
116 |     struct gpumem_t *entry = 0;
117 |     struct gpudma_unlock_t param;
118 |     struct list_head *pos, *n;
119 | 
120 |     if(copy_from_user(&param, (void *)arg, sizeof(struct gpudma_unlock_t))) {
121 |         printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__);
122 |         error = -EFAULT;
123 |         goto do_exit;
124 |     }
125 | 
126 |     list_for_each_safe(pos, n, &drv->table_list) {
127 | 
128 |         entry = list_entry(pos, struct gpumem_t, list);
129 |         if(entry) {
130 |             if(entry->handle == param.handle) {
131 | 
132 |                 printk(KERN_ERR"%s(): param.handle = %p\n", __FUNCTION__, param.handle);
133 |                 printk(KERN_ERR"%s(): entry.handle = %p\n", __FUNCTION__, entry->handle);
134 | 
135 |                 if(entry->virt_start && entry->page_table) {
136 |                     error = nvidia_p2p_put_pages(0, 0, entry->virt_start, entry->page_table);
137 |                     if(error != 0) {
138 |                         printk(KERN_ERR"%s(): Error in nvidia_p2p_put_pages()\n", __FUNCTION__);
139 |                         goto do_exit;
140 |                     }
141 |                     //entry->virt_start = 0ULL;
142 |                     //entry->page_table = 0;
143 |                     printk(KERN_ERR"%s(): nvidia_p2p_put_pages() - Ok!\n", __FUNCTION__);
144 |                 }
145 | 
146 |                 list_del(pos);
147 |                 kfree(entry);
148 |                 break;
149 |             } else {
150 |                 printk(KERN_ERR"%s(): Skip entry: %p\n", __FUNCTION__, entry->handle);
151 |             }
152 |         }
153 |     }
154 | 
155 | do_exit:
156 |     return error;
157 | }
158 | 
159 | //-----------------------------------------------------------------------------
160 | 
161 | int ioctl_mem_state(struct gpumem *drv, unsigned long arg)
162 | {
163 |     int error = 0;
164 |     int size = 0;
165 |     int i=0;
166 |     struct gpumem_t *entry = 0;
167 |     struct gpudma_state_t header;
168 |     struct gpudma_state_t *param;
169 |     struct list_head *pos, *n;
170 | 
171 |     if(copy_from_user(&header, (void *)arg, sizeof(struct gpudma_state_t))) {
172 |         printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__);
173 |         error = -EFAULT;
174 |         goto do_exit;
175 |     }
176 | 
177 |     list_for_each_safe(pos, n, &drv->table_list) {
178 | 
179 |         entry = list_entry(pos, struct gpumem_t, list);
180 |         if(entry) {
181 |             if(entry->handle == header.handle) {
182 | 
183 |                 printk(KERN_ERR"%s(): param.handle = %p\n", __FUNCTION__, header.handle);
184 |                 printk(KERN_ERR"%s(): entry.handle = %p\n", __FUNCTION__, entry->handle);
185 | 
186 |                 if(!entry->page_table) {
187 |                     printk(KERN_ERR"%s(): Error - memory not pinned!\n", __FUNCTION__);
188 |                     return -EINVAL;
189 |                 }
190 | 
191 |                 if((entry->page_table->entries != header.page_count) || (entry->handle != header.handle)) {
192 |                     printk(KERN_ERR"%s(): Error - page counters or handle invalid!\n", __FUNCTION__);
193 |                     return -EINVAL;
194 |                 }
195 | 
196 |                 size = (sizeof(uint64_t)*header.page_count) + sizeof(struct gpudma_state_t);
197 |                 param = kzalloc(size, GFP_KERNEL);
198 |                 if(!param) {
199 |                     printk(KERN_ERR"%s(): Error allocate memory!\n", __FUNCTION__);
200 |                     return -ENOMEM;
201 |                 }
202 |                 param->page_size = get_nv_page_size(entry->page_table->page_size);
203 |                 for(i=0; i<entry->page_table->entries; i++) {
204 |                     struct nvidia_p2p_page *nvp = entry->page_table->pages[i];
205 |                     if(nvp) {
206 |                         param->pages[i] = nvp->physical_address;
207 |                         param->page_count++;
208 |                         printk(KERN_ERR"%s(): %02d - 0x%llx\n", __FUNCTION__, i, param->pages[i]);
209 |                     }
210 |                 }
211 |                 printk(KERN_ERR"%s(): page_count = %ld\n", __FUNCTION__, (long int)param->page_count);
212 |                 param->handle = header.handle;
213 |                 if(copy_to_user((void *)arg, param, size)) {
214 |                     printk(KERN_DEBUG"%s(): Error in copy_to_user()\n", __FUNCTION__);
215 |                     error = -EFAULT;
216 |                 }
217 | 
218 |                 kfree(param);
219 |             } else {
220 |                 printk(KERN_ERR"%s(): Skip entry: %p\n", __FUNCTION__, entry->handle);
221 |             }
222 |         }
223 |     }
224 | 
225 | do_exit:
226 |     return error;
227 | }
228 | 
229 | //-----------------------------------------------------------------------------
230 | 


--------------------------------------------------------------------------------
/module/ioctlrw.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _IOCTLRW_H_
 3 | #define _IOCTLRW_H_
 4 | 
 5 | //-----------------------------------------------------------------------------
 6 | 
 7 | int ioctl_mem_lock(struct gpumem *drv, unsigned long arg);
 8 | int ioctl_mem_unlock(struct gpumem *drv, unsigned long arg);
 9 | int ioctl_mem_state(struct gpumem *drv, unsigned long arg);
10 | 
11 | //-----------------------------------------------------------------------------
12 | 
13 | #endif //_IOCTLRW_H_
14 | 


--------------------------------------------------------------------------------