├── .gitignore ├── README.md ├── app ├── Makefile └── main.cpp ├── app_template ├── .cproject ├── .project ├── app_template.dxy ├── common │ ├── gpumemioctl.h │ ├── utypes.h │ └── utypes_linux.h ├── create_doc ├── cuda │ └── check_counter.cu ├── host │ ├── cl_cuda.cu │ ├── cl_cuda.h │ ├── cl_cuda_test.cpp │ ├── main.cpp │ ├── task_data.h │ ├── tf_test.h │ ├── tf_testcnt.cpp │ ├── tf_testcnt.h │ ├── tf_testthread.cpp │ └── tf_testthread.h ├── run_cycle_1M └── run_cycle_64M └── module ├── Makefile ├── drvload.sh ├── gpumemdrv.c ├── gpumemdrv.h ├── gpumemioctl.h ├── gpumemproc.c ├── gpumemproc.h ├── ioctlrw.c └── ioctlrw.h /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE! Don't add files that are generated in specific 3 | # subdirectories here. Add them in the ".gitignore" file 4 | # in that subdirectory instead. 5 | # 6 | # Normal rules 7 | # 8 | .* 9 | *.o 10 | *.o.* 11 | *.a 12 | *.s 13 | *.su 14 | *.mod.c 15 | *.i 16 | *.lst 17 | *.order 18 | *.elf 19 | *.swp 20 | *.bin 21 | *.patch 22 | *.cfgtmp 23 | *.orig 24 | *~ 25 | \#*# 26 | *.ko 27 | *.symvers 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPUDirect RDMA example. 2 | 3 | ## Install 4 | 1 Clone repo and get source code of NVIDIA-Linux-x86_64-X.Y driver 5 | the same version as installed in your systems. 6 | 7 | 2 Extract it in the gpudma project directory and create symbolic link "nvidia" on NVIDIA-Linux-x86_64-X.Y driver directory. 8 | Default location is ~/gpudma; For another location you must set variable GPUDMA_DIR, for example: GPUDMA_DIR=/xprj/gpudma 9 | 10 | 3 Build NVIDIA driver in nvidia/kernel. We need only Module.symvers file from nvidia/kernel directory. 11 | 12 | 4 Build gpumem module. 13 | 14 | 5 Build application app 15 | 16 | 6 Build application app_template 17 | 18 | **Linux commands:** 19 | 20 | git clone https://github.com/karakozov/gpudma.git 21 | 22 | cp ~/Downloads/NVIDIA-Linux-x86_64-367.57.run ~/gpudma 23 | 24 | ./NVIDIA-Linux-x86_64-367.57.run -x 25 | 26 | ln -svf NVIDIA-Linux-x86_64-367.57 nvidia 27 | 28 | cd ~/gpudma/nvidia/kernel && make 29 | 30 | cd ~/gpudma/module && make 31 | 32 | cd ~/gpudma/app && make 33 | 34 | ## Load driver 35 | 36 | cd ~/gpudma/module && ./drvload.sh 37 | 38 | Check driver: ls -l /dev/gpumem 39 | 40 | crw-rw-rw-. 1 root root 10, 55 Apr 2 21:57 /dev/gpumem 41 | 42 | ## Run app example 43 | 44 | cd ~/gpudma/app && ./gpu_direct 45 | 46 | Application create CUDA context and allocate GPU memory. 47 | This memory pointer passed to gpumem module. Gpumem module get address of all physical 48 | pages of the allocates area and GPU page size. Application can get addresses and do mmap(), 49 | fill data pattern and free all of them. Than release GPU memory allocation and unlock pages. 50 | 51 | Test must be finished with message: "Test successful" 52 | 53 | ## Build and run app_template 54 | 55 | app_template must be built with Nsight Eclipse Edition from NVIDIA. 56 | 57 | Command line for launch: **app_template** **-count** ncount **-size** nsize 58 | * ncount - block counts for read, 0 - for infinity cycle; Default is 16; 59 | * nsize - size of one buffers in kbytes. Maximum size is 65536. Default is 256; 60 | 61 | Main mode is infinity cycle (ncount=0). There are two command for launch application: 62 | * run_cycle_1M - launch with buffers of 1 megabytes 63 | * run_cycle_64M - launch with buffers of 64 megabytes 64 | 65 | Infinity cycle must be executed only from console. Nsight Eclipse Edition cannot correct display status line with "\r" symbol. If you can do it then send me about it, please. 66 | For launch application from Nsight Eclipse Edition use non-zero value for count argument. This is enough for debugging. 67 | 68 | There are main executing stages: 69 | 70 | 1. Create exemplar TF_TestCnt - launch thread for working with CUDA 71 | 72 | 2. Prepare 73 | * Open device 74 | * Allocate three buffers with size and map in the BAR1 - class CL_Cuda 75 | * Allocate 64 kbytes buffer for struct TaskMonitor 76 | * Allocate page-locked HOST memory for td->hostBuffer 77 | * Allocate page-locked HOST memory for struct TaskHostMonitor 78 | 79 | 3. Launch main cycle - TF_TestCnt::Run() 80 | * Launch thread for filling buffers - TF_TestCnt::FillThreadStart() 81 | * Launch kernel for checking data - run_checkCounter() 82 | * Check flag in the host memory and start DMA transfer - cudaMemcpyAsync() 83 | * Check data: TestCnt::CheckHostData() 84 | * Measuring velosity of data transfer 85 | 86 | 4. Periodcal launch function TF_TestCnt::StepTable() from function main() for display status information. It is working only for infinity cycle mode. Function display several parameters: 87 | * CUDA_RD - number of received buffers to CUDA 88 | * CUDA_OK - number of correct buffers to CUDA 89 | * CUDA_ERR - number of incorrect buffers to CUDA 90 | * HOST_RD - number of received buffers to HOST 91 | * HOST_OK - number of correct buffers to HOST 92 | * HOST_ERR - number of incorrect buffers to HOST 93 | * E2C_CUR - current velosity of data transfer from external device to CUDA 94 | * E2C_AVR - avarage velosity of data transfer from external device to CUDA 95 | * C2H_CUR - current velosity of data transfer from CUDA to HOST 96 | * C2H_AVR - avarage velosity of data transfer from CUDA to HOST 97 | 98 | 5. Function run_checkCounter() launch wrap of 32 thread for checking data. 99 | 100 | Thread 0 is difference from another: 101 | * Read ts->irqFlag in the global memory and write it in the local wrap memory. 102 | * Write checking data to output buffers 103 | 104 | Thread 0 and another threads : 105 | * Check flag ptrMonitor->flagExit and exit if it is set. 106 | * Check received data 107 | * Write first 16 errors to struct "check" 108 | 109 | 6. Display result after exiting from main cycle - TF_TestCnt::GetResult() 110 | 111 | 7. Free memory 112 | 113 | Some notes: 114 | * app_template/create_doc.sh - create documentation via doxygen 115 | * There is class CL_Cuda_private for internal data for CL_Cuda 116 | * There is file task_data.h with structs: 117 | * TaskData - internal task for TF_TestCnt 118 | * TaskMonitor - struct for shared memory in the CUDA 119 | * TaskHostMonitor - struct for shared memory in the HOST 120 | * TaskBufferStatus - struct for work with one buffer 121 | * TaskCheckData - struct for error data 122 | * const int TaskCounts=32 - number of threads in the wrap 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /app/Makefile: -------------------------------------------------------------------------------- 1 | 2 | PHONY = clean all 3 | TARGET_NAME = gpu_direct 4 | 5 | all: $(TARGET_NAME) 6 | 7 | ROOT_DIR = $(shell pwd) 8 | 9 | CC = $(CROSS_COMPILE)gcc 10 | LD = $(CROSS_COMPILE)gcc 11 | 12 | #NVIDIA_DRIVER_PATH := $(HOME)/gpudma/nvidia 13 | NVIDIA_DRIVER_PATH := ../nvidia 14 | 15 | 16 | INCDIR := . $(CUDADIR)/include ../module $(NVIDIA_DRIVER_PATH)/kernel/nvidia 17 | INCLUDE := $(addprefix -I, $(INCDIR)) 18 | CFLAGS := -D__LINUX__ -g -Wall $(INCLUDE) 19 | SRCFILE := $(wildcard *.cpp) 20 | OBJFILE := $(patsubst %.cpp,%.o, $(SRCFILE)) 21 | 22 | #LDFLAGS := -Wl,-rpath,$(CUDADIR)/lib64/stubs -L"$(CUDADIR)/lib64/stubs" -lcuda 23 | #LDFLAGS := -L/usr/local/cuda/lib64 -lcudart -L/usr/local/cuda/lib64/stubs -lcuda $(LIBS) 24 | LDFLAGS += -L$(CUDADIR)/lib64 -lcuda 25 | #LDFLAGS += -L$(CUDADIR)/lib64 -lcudart 26 | LDFLAGS += -lstdc++ 27 | 28 | $(TARGET_NAME): $(OBJFILE) 29 | $(LD) $(notdir $^) -o $(TARGET_NAME) $(LDFLAGS) 30 | cp $(TARGET_NAME) ../bin 31 | 32 | %.o: %.cpp 33 | $(CC) $(CFLAGS) -c -MD $< 34 | 35 | include $(wildcard *.d) 36 | 37 | 38 | clean: 39 | rm -f *.o *~ core 40 | rm -f *.d *~ core 41 | rm -f $(TARGET_NAME) 42 | 43 | distclean: 44 | rm -f *.o *~ core 45 | rm -f *.d *~ core 46 | rm -f $(TARGET_NAME) 47 | 48 | src: 49 | @echo $(SRCFILE) 50 | @echo $(OBJFILE) 51 | -------------------------------------------------------------------------------- /app/main.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "cuda.h" 3 | //#include "cuda_runtime_api.h" 4 | #include "gpumemioctl.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | //----------------------------------------------------------------------------- 23 | 24 | void checkError(CUresult status); 25 | bool wasError(CUresult status); 26 | 27 | //----------------------------------------------------------------------------- 28 | 29 | int main(int argc, char *argv[]) 30 | { 31 | gpudma_lock_t lock; 32 | gpudma_unlock_t unlock; 33 | gpudma_state_t *state = 0; 34 | int statesize = 0; 35 | int res = -1; 36 | unsigned count=0x0A000000; 37 | 38 | int fd = open("/dev/"GPUMEM_DRIVER_NAME, O_RDWR, 0); 39 | if (fd < 0) { 40 | printf("Error open file %s\n", "/dev/"GPUMEM_DRIVER_NAME); 41 | return -1; 42 | } 43 | 44 | checkError(cuInit(0)); 45 | 46 | int total = 0; 47 | checkError(cuDeviceGetCount(&total)); 48 | fprintf(stderr, "Total devices: %d\n", total); 49 | 50 | CUdevice device; 51 | checkError(cuDeviceGet(&device, 0)); 52 | 53 | char name[256]; 54 | checkError(cuDeviceGetName(name, 256, device)); 55 | fprintf(stderr, "Select device: %s\n", name); 56 | 57 | // get compute capabilities and the devicename 58 | int major = 0, minor = 0; 59 | checkError( cuDeviceComputeCapability(&major, &minor, device)); 60 | fprintf(stderr, "Compute capability: %d.%d\n", major, minor); 61 | 62 | size_t global_mem = 0; 63 | checkError( cuDeviceTotalMem(&global_mem, device)); 64 | fprintf(stderr, "Global memory: %llu MB\n", (unsigned long long)(global_mem >> 20)); 65 | if(global_mem > (unsigned long long)4*1024*1024*1024L) 66 | fprintf(stderr, "64-bit Memory Address support\n"); 67 | 68 | CUcontext context; 69 | checkError(cuCtxCreate(&context, 0, device)); 70 | 71 | size_t size = 0x100000; 72 | CUdeviceptr dptr = 0; 73 | unsigned int flag = 1; 74 | unsigned char *h_odata = NULL; 75 | h_odata = (unsigned char *)malloc(size); 76 | 77 | CUresult status = cuMemAlloc(&dptr, size); 78 | if(wasError(status)) { 79 | goto do_free_context; 80 | } 81 | 82 | fprintf(stderr, "Allocate memory address: 0x%llx\n", (unsigned long long)dptr); 83 | 84 | status = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr); 85 | if(wasError(status)) { 86 | goto do_free_memory; 87 | } 88 | 89 | fprintf(stderr, "Press enter to lock\n"); 90 | //getchar(); 91 | 92 | // TODO: add kernel driver interaction... 93 | lock.addr = dptr; 94 | lock.size = size; 95 | res = ioctl(fd, IOCTL_GPUMEM_LOCK, &lock); 96 | if(res < 0) { 97 | fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_LOCK\n"); 98 | goto do_free_attr; 99 | } 100 | 101 | fprintf(stderr, "Press enter to get state. We lock %ld pages\n", lock.page_count); 102 | //getchar(); 103 | 104 | statesize = (lock.page_count*sizeof(uint64_t) + sizeof(struct gpudma_state_t)); 105 | state = (struct gpudma_state_t*)malloc(statesize); 106 | if(!state) { 107 | goto do_free_attr; 108 | } 109 | memset(state, 0, statesize); 110 | state->handle = lock.handle; 111 | state->page_count = lock.page_count; 112 | res = ioctl(fd, IOCTL_GPUMEM_STATE, state); 113 | if(res < 0) { 114 | fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_UNLOCK\n"); 115 | goto do_unlock; 116 | } 117 | 118 | fprintf(stderr, "Page count 0x%lx\n", state->page_count); 119 | fprintf(stderr, "Page size 0x%lx\n", state->page_size); 120 | 121 | for(unsigned i=0; ipage_count; i++) { 122 | fprintf(stderr, "%02d: 0x%lx\n", i, state->pages[i]); 123 | void* va = mmap(0, state->page_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, (off_t)state->pages[i]); 124 | if(va == MAP_FAILED ) { 125 | fprintf(stderr, "%s(): %s\n", __FUNCTION__, strerror(errno)); 126 | va = 0; 127 | } else { 128 | //memset(va, 0x55, state->page_size); 129 | unsigned *ptr=(unsigned*)va; 130 | for( unsigned jj=0; jj<(state->page_size/4); jj++ ) 131 | { 132 | *ptr++=count++; 133 | } 134 | 135 | fprintf(stderr, "%s(): Physical Address 0x%lx -> Virtual Address %p\n", __FUNCTION__, state->pages[i], va); 136 | munmap(va, state->page_size); 137 | } 138 | } 139 | 140 | { 141 | //const void* d_idata = (const void*)dptr; 142 | //cudaMemcpy(h_odata, d_idata, size, cudaMemcpyDeviceToHost); 143 | //cudaDeviceSynchronize(); 144 | 145 | cuMemcpyDtoH( h_odata, dptr, size ); 146 | cuCtxSynchronize(); 147 | 148 | unsigned *ptr = (unsigned*)h_odata; 149 | unsigned val; 150 | unsigned expect_data=0x0A000000; 151 | unsigned cnt=size/4; 152 | unsigned error_cnt=0; 153 | for( unsigned ii=0; ii 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 36 | 41 | 42 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /app_template/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | app_template 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | 14 | 15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 16 | full,incremental, 17 | 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.core.cnature 23 | org.eclipse.cdt.core.ccnature 24 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 25 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 26 | 27 | 28 | -------------------------------------------------------------------------------- /app_template/app_template.dxy: -------------------------------------------------------------------------------- 1 | # Doxyfile 1.6.1 2 | 3 | # This file describes the settings to be used by the documentation system 4 | # doxygen (www.doxygen.org) for a project 5 | # 6 | # All text after a hash (#) is considered a comment and will be ignored 7 | # The format is: 8 | # TAG = value [value, ...] 9 | # For lists items can also be appended using: 10 | # TAG += value [value, ...] 11 | # Values that contain spaces should be placed between quotes (" ") 12 | 13 | #--------------------------------------------------------------------------- 14 | # Project related configuration options 15 | #--------------------------------------------------------------------------- 16 | 17 | # This tag specifies the encoding used for all characters in the config file 18 | # that follow. The default is UTF-8 which is also the encoding used for all 19 | # text before the first occurrence of this tag. Doxygen uses libiconv (or the 20 | # iconv built into libc) for the transcoding. See 21 | # http://www.gnu.org/software/libiconv for the list of possible encodings. 22 | 23 | DOXYFILE_ENCODING = UTF-8 24 | 25 | # The PROJECT_NAME tag is a single word (or a sequence of words surrounded 26 | # by quotes) that should identify the project. 27 | 28 | PROJECT_NAME = APP_TEMPLATE 29 | 30 | # The PROJECT_NUMBER tag can be used to enter a project or revision number. 31 | # This could be handy for archiving the generated documentation or 32 | # if some version control system is used. 33 | 34 | PROJECT_NUMBER = 0.1 35 | 36 | # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 37 | # base path where the generated documentation will be put. 38 | # If a relative path is entered, it will be relative to the location 39 | # where doxygen was started. If left blank the current directory will be used. 40 | 41 | OUTPUT_DIRECTORY = ./doc 42 | 43 | # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 44 | # 4096 sub-directories (in 2 levels) under the output directory of each output 45 | # format and will distribute the generated files over these directories. 46 | # Enabling this option can be useful when feeding doxygen a huge amount of 47 | # source files, where putting all generated files in the same directory would 48 | # otherwise cause performance problems for the file system. 49 | 50 | CREATE_SUBDIRS = NO 51 | 52 | # The OUTPUT_LANGUAGE tag is used to specify the language in which all 53 | # documentation generated by doxygen is written. Doxygen will use this 54 | # information to generate all constant output in the proper language. 55 | # The default language is English, other supported languages are: 56 | # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 57 | # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, 58 | # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English 59 | # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, 60 | # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, 61 | # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. 62 | 63 | OUTPUT_LANGUAGE = English 64 | 65 | # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 66 | # include brief member descriptions after the members that are listed in 67 | # the file and class documentation (similar to JavaDoc). 68 | # Set to NO to disable this. 69 | 70 | BRIEF_MEMBER_DESC = YES 71 | 72 | # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 73 | # the brief description of a member or function before the detailed description. 74 | # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 75 | # brief descriptions will be completely suppressed. 76 | 77 | REPEAT_BRIEF = YES 78 | 79 | # This tag implements a quasi-intelligent brief description abbreviator 80 | # that is used to form the text in various listings. Each string 81 | # in this list, if found as the leading text of the brief description, will be 82 | # stripped from the text and the result after processing the whole list, is 83 | # used as the annotated text. Otherwise, the brief description is used as-is. 84 | # If left blank, the following values are used ("$name" is automatically 85 | # replaced with the name of the entity): "The $name class" "The $name widget" 86 | # "The $name file" "is" "provides" "specifies" "contains" 87 | # "represents" "a" "an" "the" 88 | 89 | ABBREVIATE_BRIEF = 90 | 91 | # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 92 | # Doxygen will generate a detailed section even if there is only a brief 93 | # description. 94 | 95 | ALWAYS_DETAILED_SEC = NO 96 | 97 | # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 98 | # inherited members of a class in the documentation of that class as if those 99 | # members were ordinary class members. Constructors, destructors and assignment 100 | # operators of the base classes will not be shown. 101 | 102 | INLINE_INHERITED_MEMB = NO 103 | 104 | # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 105 | # path before files name in the file list and in the header files. If set 106 | # to NO the shortest path that makes the file name unique will be used. 107 | 108 | FULL_PATH_NAMES = YES 109 | 110 | # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 111 | # can be used to strip a user-defined part of the path. Stripping is 112 | # only done if one of the specified strings matches the left-hand part of 113 | # the path. The tag can be used to show relative paths in the file list. 114 | # If left blank the directory from which doxygen is run is used as the 115 | # path to strip. 116 | 117 | STRIP_FROM_PATH = 118 | 119 | # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 120 | # the path mentioned in the documentation of a class, which tells 121 | # the reader which header file to include in order to use a class. 122 | # If left blank only the name of the header file containing the class 123 | # definition is used. Otherwise one should specify the include paths that 124 | # are normally passed to the compiler using the -I flag. 125 | 126 | STRIP_FROM_INC_PATH = 127 | 128 | # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 129 | # (but less readable) file names. This can be useful is your file systems 130 | # doesn't support long names like on DOS, Mac, or CD-ROM. 131 | 132 | SHORT_NAMES = NO 133 | 134 | # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 135 | # will interpret the first line (until the first dot) of a JavaDoc-style 136 | # comment as the brief description. If set to NO, the JavaDoc 137 | # comments will behave just like regular Qt-style comments 138 | # (thus requiring an explicit @brief command for a brief description.) 139 | 140 | JAVADOC_AUTOBRIEF = NO 141 | 142 | # If the QT_AUTOBRIEF tag is set to YES then Doxygen will 143 | # interpret the first line (until the first dot) of a Qt-style 144 | # comment as the brief description. If set to NO, the comments 145 | # will behave just like regular Qt-style comments (thus requiring 146 | # an explicit \brief command for a brief description.) 147 | 148 | QT_AUTOBRIEF = NO 149 | 150 | # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 151 | # treat a multi-line C++ special comment block (i.e. a block of //! or /// 152 | # comments) as a brief description. This used to be the default behaviour. 153 | # The new default is to treat a multi-line C++ comment block as a detailed 154 | # description. Set this tag to YES if you prefer the old behaviour instead. 155 | 156 | MULTILINE_CPP_IS_BRIEF = NO 157 | 158 | # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 159 | # member inherits the documentation from any documented member that it 160 | # re-implements. 161 | 162 | INHERIT_DOCS = YES 163 | 164 | # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 165 | # a new page for each member. If set to NO, the documentation of a member will 166 | # be part of the file/class/namespace that contains it. 167 | 168 | SEPARATE_MEMBER_PAGES = NO 169 | 170 | # The TAB_SIZE tag can be used to set the number of spaces in a tab. 171 | # Doxygen uses this value to replace tabs by spaces in code fragments. 172 | 173 | TAB_SIZE = 8 174 | 175 | # This tag can be used to specify a number of aliases that acts 176 | # as commands in the documentation. An alias has the form "name=value". 177 | # For example adding "sideeffect=\par Side Effects:\n" will allow you to 178 | # put the command \sideeffect (or @sideeffect) in the documentation, which 179 | # will result in a user-defined paragraph with heading "Side Effects:". 180 | # You can put \n's in the value part of an alias to insert newlines. 181 | 182 | ALIASES = 183 | 184 | # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 185 | # sources only. Doxygen will then generate output that is more tailored for C. 186 | # For instance, some of the names that are used will be different. The list 187 | # of all members will be omitted, etc. 188 | 189 | OPTIMIZE_OUTPUT_FOR_C = NO 190 | 191 | # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 192 | # sources only. Doxygen will then generate output that is more tailored for 193 | # Java. For instance, namespaces will be presented as packages, qualified 194 | # scopes will look different, etc. 195 | 196 | OPTIMIZE_OUTPUT_JAVA = NO 197 | 198 | # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 199 | # sources only. Doxygen will then generate output that is more tailored for 200 | # Fortran. 201 | 202 | OPTIMIZE_FOR_FORTRAN = NO 203 | 204 | # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 205 | # sources. Doxygen will then generate output that is tailored for 206 | # VHDL. 207 | 208 | OPTIMIZE_OUTPUT_VHDL = NO 209 | 210 | # Doxygen selects the parser to use depending on the extension of the files it parses. 211 | # With this tag you can assign which parser to use for a given extension. 212 | # Doxygen has a built-in mapping, but you can override or extend it using this tag. 213 | # The format is ext=language, where ext is a file extension, and language is one of 214 | # the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, 215 | # Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat 216 | # .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), 217 | # use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. 218 | 219 | EXTENSION_MAPPING = C++ 220 | 221 | # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 222 | # to include (a tag file for) the STL sources as input, then you should 223 | # set this tag to YES in order to let doxygen match functions declarations and 224 | # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 225 | # func(std::string) {}). This also make the inheritance and collaboration 226 | # diagrams that involve STL classes more complete and accurate. 227 | 228 | BUILTIN_STL_SUPPORT = YES 229 | 230 | # If you use Microsoft's C++/CLI language, you should set this option to YES to 231 | # enable parsing support. 232 | 233 | CPP_CLI_SUPPORT = NO 234 | 235 | # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 236 | # Doxygen will parse them like normal C++ but will assume all classes use public 237 | # instead of private inheritance when no explicit protection keyword is present. 238 | 239 | SIP_SUPPORT = NO 240 | 241 | # For Microsoft's IDL there are propget and propput attributes to indicate getter 242 | # and setter methods for a property. Setting this option to YES (the default) 243 | # will make doxygen to replace the get and set methods by a property in the 244 | # documentation. This will only work if the methods are indeed getting or 245 | # setting a simple type. If this is not the case, or you want to show the 246 | # methods anyway, you should set this option to NO. 247 | 248 | IDL_PROPERTY_SUPPORT = YES 249 | 250 | # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 251 | # tag is set to YES, then doxygen will reuse the documentation of the first 252 | # member in the group (if any) for the other members of the group. By default 253 | # all members of a group must be documented explicitly. 254 | 255 | DISTRIBUTE_GROUP_DOC = NO 256 | 257 | # Set the SUBGROUPING tag to YES (the default) to allow class member groups of 258 | # the same type (for instance a group of public functions) to be put as a 259 | # subgroup of that type (e.g. under the Public Functions section). Set it to 260 | # NO to prevent subgrouping. Alternatively, this can be done per class using 261 | # the \nosubgrouping command. 262 | 263 | SUBGROUPING = YES 264 | 265 | # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 266 | # is documented as struct, union, or enum with the name of the typedef. So 267 | # typedef struct TypeS {} TypeT, will appear in the documentation as a struct 268 | # with name TypeT. When disabled the typedef will appear as a member of a file, 269 | # namespace, or class. And the struct will be named TypeS. This can typically 270 | # be useful for C code in case the coding convention dictates that all compound 271 | # types are typedef'ed and only the typedef is referenced, never the tag name. 272 | 273 | TYPEDEF_HIDES_STRUCT = NO 274 | 275 | # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to 276 | # determine which symbols to keep in memory and which to flush to disk. 277 | # When the cache is full, less often used symbols will be written to disk. 278 | # For small to medium size projects (<1000 input files) the default value is 279 | # probably good enough. For larger projects a too small cache size can cause 280 | # doxygen to be busy swapping symbols to and from disk most of the time 281 | # causing a significant performance penality. 282 | # If the system has enough physical memory increasing the cache will improve the 283 | # performance by keeping more symbols in memory. Note that the value works on 284 | # a logarithmic scale so increasing the size by one will rougly double the 285 | # memory usage. The cache size is given by this formula: 286 | # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, 287 | # corresponding to a cache size of 2^16 = 65536 symbols 288 | 289 | SYMBOL_CACHE_SIZE = 0 290 | 291 | #--------------------------------------------------------------------------- 292 | # Build related configuration options 293 | #--------------------------------------------------------------------------- 294 | 295 | # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 296 | # documentation are documented, even if no documentation was available. 297 | # Private class members and static file members will be hidden unless 298 | # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES 299 | 300 | EXTRACT_ALL = YES 301 | 302 | # If the EXTRACT_PRIVATE tag is set to YES all private members of a class 303 | # will be included in the documentation. 304 | 305 | EXTRACT_PRIVATE = YES 306 | 307 | # If the EXTRACT_STATIC tag is set to YES all static members of a file 308 | # will be included in the documentation. 309 | 310 | EXTRACT_STATIC = YES 311 | 312 | # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 313 | # defined locally in source files will be included in the documentation. 314 | # If set to NO only classes defined in header files are included. 315 | 316 | EXTRACT_LOCAL_CLASSES = YES 317 | 318 | # This flag is only useful for Objective-C code. When set to YES local 319 | # methods, which are defined in the implementation section but not in 320 | # the interface are included in the documentation. 321 | # If set to NO (the default) only methods in the interface are included. 322 | 323 | EXTRACT_LOCAL_METHODS = NO 324 | 325 | # If this flag is set to YES, the members of anonymous namespaces will be 326 | # extracted and appear in the documentation as a namespace called 327 | # 'anonymous_namespace{file}', where file will be replaced with the base 328 | # name of the file that contains the anonymous namespace. By default 329 | # anonymous namespace are hidden. 330 | 331 | EXTRACT_ANON_NSPACES = NO 332 | 333 | # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 334 | # undocumented members of documented classes, files or namespaces. 335 | # If set to NO (the default) these members will be included in the 336 | # various overviews, but no documentation section is generated. 337 | # This option has no effect if EXTRACT_ALL is enabled. 338 | 339 | HIDE_UNDOC_MEMBERS = NO 340 | 341 | # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 342 | # undocumented classes that are normally visible in the class hierarchy. 343 | # If set to NO (the default) these classes will be included in the various 344 | # overviews. This option has no effect if EXTRACT_ALL is enabled. 345 | 346 | HIDE_UNDOC_CLASSES = NO 347 | 348 | # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 349 | # friend (class|struct|union) declarations. 350 | # If set to NO (the default) these declarations will be included in the 351 | # documentation. 352 | 353 | HIDE_FRIEND_COMPOUNDS = NO 354 | 355 | # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 356 | # documentation blocks found inside the body of a function. 357 | # If set to NO (the default) these blocks will be appended to the 358 | # function's detailed documentation block. 359 | 360 | HIDE_IN_BODY_DOCS = NO 361 | 362 | # The INTERNAL_DOCS tag determines if documentation 363 | # that is typed after a \internal command is included. If the tag is set 364 | # to NO (the default) then the documentation will be excluded. 365 | # Set it to YES to include the internal documentation. 366 | 367 | INTERNAL_DOCS = NO 368 | 369 | # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 370 | # file names in lower-case letters. If set to YES upper-case letters are also 371 | # allowed. This is useful if you have classes or files whose names only differ 372 | # in case and if your file system supports case sensitive file names. Windows 373 | # and Mac users are advised to set this option to NO. 374 | 375 | CASE_SENSE_NAMES = YES 376 | 377 | # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 378 | # will show members with their full class and namespace scopes in the 379 | # documentation. If set to YES the scope will be hidden. 380 | 381 | HIDE_SCOPE_NAMES = NO 382 | 383 | # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 384 | # will put a list of the files that are included by a file in the documentation 385 | # of that file. 386 | 387 | SHOW_INCLUDE_FILES = YES 388 | 389 | # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 390 | # is inserted in the documentation for inline members. 391 | 392 | INLINE_INFO = YES 393 | 394 | # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 395 | # will sort the (detailed) documentation of file and class members 396 | # alphabetically by member name. If set to NO the members will appear in 397 | # declaration order. 398 | 399 | SORT_MEMBER_DOCS = YES 400 | 401 | # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 402 | # brief documentation of file, namespace and class members alphabetically 403 | # by member name. If set to NO (the default) the members will appear in 404 | # declaration order. 405 | 406 | SORT_BRIEF_DOCS = NO 407 | 408 | # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. 409 | 410 | SORT_MEMBERS_CTORS_1ST = NO 411 | 412 | # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 413 | # hierarchy of group names into alphabetical order. If set to NO (the default) 414 | # the group names will appear in their defined order. 415 | 416 | SORT_GROUP_NAMES = NO 417 | 418 | # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 419 | # sorted by fully-qualified names, including namespaces. If set to 420 | # NO (the default), the class list will be sorted only by class name, 421 | # not including the namespace part. 422 | # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. 423 | # Note: This option applies only to the class list, not to the 424 | # alphabetical list. 425 | 426 | SORT_BY_SCOPE_NAME = NO 427 | 428 | # The GENERATE_TODOLIST tag can be used to enable (YES) or 429 | # disable (NO) the todo list. This list is created by putting \todo 430 | # commands in the documentation. 431 | 432 | GENERATE_TODOLIST = YES 433 | 434 | # The GENERATE_TESTLIST tag can be used to enable (YES) or 435 | # disable (NO) the test list. This list is created by putting \test 436 | # commands in the documentation. 437 | 438 | GENERATE_TESTLIST = YES 439 | 440 | # The GENERATE_BUGLIST tag can be used to enable (YES) or 441 | # disable (NO) the bug list. This list is created by putting \bug 442 | # commands in the documentation. 443 | 444 | GENERATE_BUGLIST = YES 445 | 446 | # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 447 | # disable (NO) the deprecated list. This list is created by putting 448 | # \deprecated commands in the documentation. 449 | 450 | GENERATE_DEPRECATEDLIST= YES 451 | 452 | # The ENABLED_SECTIONS tag can be used to enable conditional 453 | # documentation sections, marked by \if sectionname ... \endif. 454 | 455 | ENABLED_SECTIONS = 456 | 457 | # The MAX_INITIALIZER_LINES tag determines the maximum number of lines 458 | # the initial value of a variable or define consists of for it to appear in 459 | # the documentation. If the initializer consists of more lines than specified 460 | # here it will be hidden. Use a value of 0 to hide initializers completely. 461 | # The appearance of the initializer of individual variables and defines in the 462 | # documentation can be controlled using \showinitializer or \hideinitializer 463 | # command in the documentation regardless of this setting. 464 | 465 | MAX_INITIALIZER_LINES = 30 466 | 467 | # Set the SHOW_USED_FILES tag to NO to disable the list of files generated 468 | # at the bottom of the documentation of classes and structs. If set to YES the 469 | # list will mention the files that were used to generate the documentation. 470 | 471 | SHOW_USED_FILES = YES 472 | 473 | # If the sources in your project are distributed over multiple directories 474 | # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 475 | # in the documentation. The default is NO. 476 | 477 | SHOW_DIRECTORIES = NO 478 | 479 | # Set the SHOW_FILES tag to NO to disable the generation of the Files page. 480 | # This will remove the Files entry from the Quick Index and from the 481 | # Folder Tree View (if specified). The default is YES. 482 | 483 | SHOW_FILES = YES 484 | 485 | # Set the SHOW_NAMESPACES tag to NO to disable the generation of the 486 | # Namespaces page. 487 | # This will remove the Namespaces entry from the Quick Index 488 | # and from the Folder Tree View (if specified). The default is YES. 489 | 490 | SHOW_NAMESPACES = YES 491 | 492 | # The FILE_VERSION_FILTER tag can be used to specify a program or script that 493 | # doxygen should invoke to get the current version for each file (typically from 494 | # the version control system). Doxygen will invoke the program by executing (via 495 | # popen()) the command , where is the value of 496 | # the FILE_VERSION_FILTER tag, and is the name of an input file 497 | # provided by doxygen. Whatever the program writes to standard output 498 | # is used as the file version. See the manual for examples. 499 | 500 | FILE_VERSION_FILTER = 501 | 502 | # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by 503 | # doxygen. The layout file controls the global structure of the generated output files 504 | # in an output format independent way. The create the layout file that represents 505 | # doxygen's defaults, run doxygen with the -l option. You can optionally specify a 506 | # file name after the option, if omitted DoxygenLayout.xml will be used as the name 507 | # of the layout file. 508 | 509 | LAYOUT_FILE = 510 | 511 | #--------------------------------------------------------------------------- 512 | # configuration options related to warning and progress messages 513 | #--------------------------------------------------------------------------- 514 | 515 | # The QUIET tag can be used to turn on/off the messages that are generated 516 | # by doxygen. Possible values are YES and NO. If left blank NO is used. 517 | 518 | QUIET = NO 519 | 520 | # The WARNINGS tag can be used to turn on/off the warning messages that are 521 | # generated by doxygen. Possible values are YES and NO. If left blank 522 | # NO is used. 523 | 524 | WARNINGS = YES 525 | 526 | # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 527 | # for undocumented members. If EXTRACT_ALL is set to YES then this flag will 528 | # automatically be disabled. 529 | 530 | WARN_IF_UNDOCUMENTED = YES 531 | 532 | # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 533 | # potential errors in the documentation, such as not documenting some 534 | # parameters in a documented function, or documenting parameters that 535 | # don't exist or using markup commands wrongly. 536 | 537 | WARN_IF_DOC_ERROR = YES 538 | 539 | # This WARN_NO_PARAMDOC option can be abled to get warnings for 540 | # functions that are documented, but have no documentation for their parameters 541 | # or return value. If set to NO (the default) doxygen will only warn about 542 | # wrong or incomplete parameter documentation, but not about the absence of 543 | # documentation. 544 | 545 | WARN_NO_PARAMDOC = NO 546 | 547 | # The WARN_FORMAT tag determines the format of the warning messages that 548 | # doxygen can produce. The string should contain the $file, $line, and $text 549 | # tags, which will be replaced by the file and line number from which the 550 | # warning originated and the warning text. Optionally the format may contain 551 | # $version, which will be replaced by the version of the file (if it could 552 | # be obtained via FILE_VERSION_FILTER) 553 | 554 | WARN_FORMAT = "$file:$line: $text" 555 | 556 | # The WARN_LOGFILE tag can be used to specify a file to which warning 557 | # and error messages should be written. If left blank the output is written 558 | # to stderr. 559 | 560 | WARN_LOGFILE = 561 | 562 | #--------------------------------------------------------------------------- 563 | # configuration options related to the input files 564 | #--------------------------------------------------------------------------- 565 | 566 | # The INPUT tag can be used to specify the files and/or directories that contain 567 | # documented source files. You may enter file names like "myfile.cpp" or 568 | # directories like "/usr/src/myproject". Separate the files or directories 569 | # with spaces. 570 | 571 | INPUT = ./cuda ./host ./common 572 | 573 | # This tag can be used to specify the character encoding of the source files 574 | # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 575 | # also the default input encoding. Doxygen uses libiconv (or the iconv built 576 | # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 577 | # the list of possible encodings. 578 | 579 | INPUT_ENCODING = UTF-8 580 | 581 | # If the value of the INPUT tag contains directories, you can use the 582 | # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 583 | # and *.h) to filter out the source-files in the directories. If left 584 | # blank the following patterns are tested: 585 | # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 586 | # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 587 | 588 | FILE_PATTERNS = *.h *.cpp *.cu 589 | 590 | # The RECURSIVE tag can be used to turn specify whether or not subdirectories 591 | # should be searched for input files as well. Possible values are YES and NO. 592 | # If left blank NO is used. 593 | 594 | RECURSIVE = YES 595 | 596 | # The EXCLUDE tag can be used to specify files and/or directories that should 597 | # excluded from the INPUT source files. This way you can easily exclude a 598 | # subdirectory from a directory tree whose root is specified with the INPUT tag. 599 | 600 | EXCLUDE = 601 | 602 | # The EXCLUDE_SYMLINKS tag can be used select whether or not files or 603 | # directories that are symbolic links (a Unix filesystem feature) are excluded 604 | # from the input. 605 | 606 | EXCLUDE_SYMLINKS = NO 607 | 608 | # If the value of the INPUT tag contains directories, you can use the 609 | # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 610 | # certain files from those directories. Note that the wildcards are matched 611 | # against the file with absolute path, so to exclude all test directories 612 | # for example use the pattern */test/* 613 | 614 | EXCLUDE_PATTERNS = 615 | 616 | # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 617 | # (namespaces, classes, functions, etc.) that should be excluded from the 618 | # output. The symbol name can be a fully qualified name, a word, or if the 619 | # wildcard * is used, a substring. Examples: ANamespace, AClass, 620 | # AClass::ANamespace, ANamespace::*Test 621 | 622 | EXCLUDE_SYMBOLS = 623 | 624 | # The EXAMPLE_PATH tag can be used to specify one or more files or 625 | # directories that contain example code fragments that are included (see 626 | # the \include command). 627 | 628 | EXAMPLE_PATH = 629 | 630 | # If the value of the EXAMPLE_PATH tag contains directories, you can use the 631 | # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 632 | # and *.h) to filter out the source-files in the directories. If left 633 | # blank all files are included. 634 | 635 | EXAMPLE_PATTERNS = 636 | 637 | # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 638 | # searched for input files to be used with the \include or \dontinclude 639 | # commands irrespective of the value of the RECURSIVE tag. 640 | # Possible values are YES and NO. If left blank NO is used. 641 | 642 | EXAMPLE_RECURSIVE = NO 643 | 644 | # The IMAGE_PATH tag can be used to specify one or more files or 645 | # directories that contain image that are included in the documentation (see 646 | # the \image command). 647 | 648 | IMAGE_PATH = 649 | 650 | # The INPUT_FILTER tag can be used to specify a program that doxygen should 651 | # invoke to filter for each input file. Doxygen will invoke the filter program 652 | # by executing (via popen()) the command , where 653 | # is the value of the INPUT_FILTER tag, and is the name of an 654 | # input file. Doxygen will then use the output that the filter program writes 655 | # to standard output. 656 | # If FILTER_PATTERNS is specified, this tag will be 657 | # ignored. 658 | 659 | INPUT_FILTER = 660 | 661 | # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 662 | # basis. 663 | # Doxygen will compare the file name with each pattern and apply the 664 | # filter if there is a match. 665 | # The filters are a list of the form: 666 | # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 667 | # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 668 | # is applied to all files. 669 | 670 | FILTER_PATTERNS = 671 | 672 | # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 673 | # INPUT_FILTER) will be used to filter the input files when producing source 674 | # files to browse (i.e. when SOURCE_BROWSER is set to YES). 675 | 676 | FILTER_SOURCE_FILES = NO 677 | 678 | #--------------------------------------------------------------------------- 679 | # configuration options related to source browsing 680 | #--------------------------------------------------------------------------- 681 | 682 | # If the SOURCE_BROWSER tag is set to YES then a list of source files will 683 | # be generated. Documented entities will be cross-referenced with these sources. 684 | # Note: To get rid of all source code in the generated output, make sure also 685 | # VERBATIM_HEADERS is set to NO. 686 | 687 | SOURCE_BROWSER = YES 688 | 689 | # Setting the INLINE_SOURCES tag to YES will include the body 690 | # of functions and classes directly in the documentation. 691 | 692 | INLINE_SOURCES = NO 693 | 694 | # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 695 | # doxygen to hide any special comment blocks from generated source code 696 | # fragments. Normal C and C++ comments will always remain visible. 697 | 698 | STRIP_CODE_COMMENTS = NO 699 | 700 | # If the REFERENCED_BY_RELATION tag is set to YES 701 | # then for each documented function all documented 702 | # functions referencing it will be listed. 703 | 704 | REFERENCED_BY_RELATION = NO 705 | 706 | # If the REFERENCES_RELATION tag is set to YES 707 | # then for each documented function all documented entities 708 | # called/used by that function will be listed. 709 | 710 | REFERENCES_RELATION = NO 711 | 712 | # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) 713 | # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from 714 | # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will 715 | # link to the source code. 716 | # Otherwise they will link to the documentation. 717 | 718 | REFERENCES_LINK_SOURCE = YES 719 | 720 | # If the USE_HTAGS tag is set to YES then the references to source code 721 | # will point to the HTML generated by the htags(1) tool instead of doxygen 722 | # built-in source browser. The htags tool is part of GNU's global source 723 | # tagging system (see http://www.gnu.org/software/global/global.html). You 724 | # will need version 4.8.6 or higher. 725 | 726 | USE_HTAGS = NO 727 | 728 | # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 729 | # will generate a verbatim copy of the header file for each class for 730 | # which an include is specified. Set to NO to disable this. 731 | 732 | VERBATIM_HEADERS = NO 733 | 734 | #--------------------------------------------------------------------------- 735 | # configuration options related to the alphabetical class index 736 | #--------------------------------------------------------------------------- 737 | 738 | # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 739 | # of all compounds will be generated. Enable this if the project 740 | # contains a lot of classes, structs, unions or interfaces. 741 | 742 | ALPHABETICAL_INDEX = YES 743 | 744 | # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 745 | # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 746 | # in which this list will be split (can be a number in the range [1..20]) 747 | 748 | COLS_IN_ALPHA_INDEX = 5 749 | 750 | # In case all classes in a project start with a common prefix, all 751 | # classes will be put under the same header in the alphabetical index. 752 | # The IGNORE_PREFIX tag can be used to specify one or more prefixes that 753 | # should be ignored while generating the index headers. 754 | 755 | IGNORE_PREFIX = 756 | 757 | #--------------------------------------------------------------------------- 758 | # configuration options related to the HTML output 759 | #--------------------------------------------------------------------------- 760 | 761 | # If the GENERATE_HTML tag is set to YES (the default) Doxygen will 762 | # generate HTML output. 763 | 764 | GENERATE_HTML = YES 765 | 766 | # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 767 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 768 | # put in front of it. If left blank `html' will be used as the default path. 769 | 770 | HTML_OUTPUT = html 771 | 772 | # The HTML_FILE_EXTENSION tag can be used to specify the file extension for 773 | # each generated HTML page (for example: .htm,.php,.asp). If it is left blank 774 | # doxygen will generate files with .html extension. 775 | 776 | HTML_FILE_EXTENSION = .html 777 | 778 | # The HTML_HEADER tag can be used to specify a personal HTML header for 779 | # each generated HTML page. If it is left blank doxygen will generate a 780 | # standard header. 781 | 782 | HTML_HEADER = 783 | 784 | # The HTML_FOOTER tag can be used to specify a personal HTML footer for 785 | # each generated HTML page. If it is left blank doxygen will generate a 786 | # standard footer. 787 | 788 | HTML_FOOTER = 789 | 790 | # If the HTML_TIMESTAMP tag is set to YES then the generated HTML 791 | # documentation will contain the timesstamp. 792 | 793 | HTML_TIMESTAMP = NO 794 | 795 | # The HTML_STYLESHEET tag can be used to specify a user-defined cascading 796 | # style sheet that is used by each HTML page. It can be used to 797 | # fine-tune the look of the HTML output. If the tag is left blank doxygen 798 | # will generate a default style sheet. Note that doxygen will try to copy 799 | # the style sheet file to the HTML output directory, so don't put your own 800 | # stylesheet in the HTML output directory as well, or it will be erased! 801 | 802 | HTML_STYLESHEET = 803 | 804 | # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 805 | # files or namespaces will be aligned in HTML using tables. If set to 806 | # NO a bullet list will be used. 807 | 808 | HTML_ALIGN_MEMBERS = YES 809 | 810 | # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 811 | # documentation will contain sections that can be hidden and shown after the 812 | # page has loaded. For this to work a browser that supports 813 | # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox 814 | # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). 815 | 816 | HTML_DYNAMIC_SECTIONS = NO 817 | 818 | # If the GENERATE_DOCSET tag is set to YES, additional index files 819 | # will be generated that can be used as input for Apple's Xcode 3 820 | # integrated development environment, introduced with OSX 10.5 (Leopard). 821 | # To create a documentation set, doxygen will generate a Makefile in the 822 | # HTML output directory. Running make will produce the docset in that 823 | # directory and running "make install" will install the docset in 824 | # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 825 | # it at startup. 826 | # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. 827 | 828 | GENERATE_DOCSET = NO 829 | 830 | # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 831 | # feed. A documentation feed provides an umbrella under which multiple 832 | # documentation sets from a single provider (such as a company or product suite) 833 | # can be grouped. 834 | 835 | DOCSET_FEEDNAME = "Doxygen generated docs" 836 | 837 | # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 838 | # should uniquely identify the documentation set bundle. This should be a 839 | # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 840 | # will append .docset to the name. 841 | 842 | DOCSET_BUNDLE_ID = org.doxygen.Project 843 | 844 | # If the GENERATE_HTMLHELP tag is set to YES, additional index files 845 | # will be generated that can be used as input for tools like the 846 | # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 847 | # of the generated HTML documentation. 848 | 849 | GENERATE_HTMLHELP = NO 850 | 851 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 852 | # be used to specify the file name of the resulting .chm file. You 853 | # can add a path in front of the file if the result should not be 854 | # written to the html output directory. 855 | 856 | CHM_FILE = 857 | 858 | # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 859 | # be used to specify the location (absolute path including file name) of 860 | # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 861 | # the HTML help compiler on the generated index.hhp. 862 | 863 | HHC_LOCATION = 864 | 865 | # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 866 | # controls if a separate .chi index file is generated (YES) or that 867 | # it should be included in the master .chm file (NO). 868 | 869 | GENERATE_CHI = NO 870 | 871 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING 872 | # is used to encode HtmlHelp index (hhk), content (hhc) and project file 873 | # content. 874 | 875 | CHM_INDEX_ENCODING = 876 | 877 | # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 878 | # controls whether a binary table of contents is generated (YES) or a 879 | # normal table of contents (NO) in the .chm file. 880 | 881 | BINARY_TOC = NO 882 | 883 | # The TOC_EXPAND flag can be set to YES to add extra items for group members 884 | # to the contents of the HTML help documentation and to the tree view. 885 | 886 | TOC_EXPAND = NO 887 | 888 | # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER 889 | # are set, an additional index file will be generated that can be used as input for 890 | # Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated 891 | # HTML documentation. 892 | 893 | GENERATE_QHP = NO 894 | 895 | # If the QHG_LOCATION tag is specified, the QCH_FILE tag can 896 | # be used to specify the file name of the resulting .qch file. 897 | # The path specified is relative to the HTML output folder. 898 | 899 | QCH_FILE = 900 | 901 | # The QHP_NAMESPACE tag specifies the namespace to use when generating 902 | # Qt Help Project output. For more information please see 903 | # http://doc.trolltech.com/qthelpproject.html#namespace 904 | 905 | QHP_NAMESPACE = 906 | 907 | # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 908 | # Qt Help Project output. For more information please see 909 | # http://doc.trolltech.com/qthelpproject.html#virtual-folders 910 | 911 | QHP_VIRTUAL_FOLDER = doc 912 | 913 | # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. 914 | # For more information please see 915 | # http://doc.trolltech.com/qthelpproject.html#custom-filters 916 | 917 | QHP_CUST_FILTER_NAME = 918 | 919 | # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see 920 | # Qt Help Project / Custom Filters. 921 | 922 | QHP_CUST_FILTER_ATTRS = 923 | 924 | # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's 925 | # filter section matches. 926 | # Qt Help Project / Filter Attributes. 927 | 928 | QHP_SECT_FILTER_ATTRS = 929 | 930 | # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can 931 | # be used to specify the location of Qt's qhelpgenerator. 932 | # If non-empty doxygen will try to run qhelpgenerator on the generated 933 | # .qhp file. 934 | 935 | QHG_LOCATION = 936 | 937 | # The DISABLE_INDEX tag can be used to turn on/off the condensed index at 938 | # top of each HTML page. The value NO (the default) enables the index and 939 | # the value YES disables it. 940 | 941 | DISABLE_INDEX = NO 942 | 943 | # This tag can be used to set the number of enum values (range [1..20]) 944 | # that doxygen will group on one line in the generated HTML documentation. 945 | 946 | ENUM_VALUES_PER_LINE = 4 947 | 948 | # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index 949 | # structure should be generated to display hierarchical information. 950 | # If the tag value is set to YES, a side panel will be generated 951 | # containing a tree-like index structure (just like the one that 952 | # is generated for HTML Help). For this to work a browser that supports 953 | # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). 954 | # Windows users are probably better off using the HTML help feature. 955 | 956 | GENERATE_TREEVIEW = YES 957 | 958 | # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, 959 | # and Class Hierarchy pages using a tree view instead of an ordered list. 960 | 961 | USE_INLINE_TREES = NO 962 | 963 | # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 964 | # used to set the initial width (in pixels) of the frame in which the tree 965 | # is shown. 966 | 967 | TREEVIEW_WIDTH = 250 968 | 969 | # Use this tag to change the font size of Latex formulas included 970 | # as images in the HTML documentation. The default is 10. Note that 971 | # when you change the font size after a successful doxygen run you need 972 | # to manually remove any form_*.png images from the HTML output directory 973 | # to force them to be regenerated. 974 | 975 | FORMULA_FONTSIZE = 10 976 | 977 | # When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript 978 | # and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP) 979 | # there is already a search function so this one should typically 980 | # be disabled. 981 | 982 | SEARCHENGINE = YES 983 | 984 | #--------------------------------------------------------------------------- 985 | # configuration options related to the LaTeX output 986 | #--------------------------------------------------------------------------- 987 | 988 | # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 989 | # generate Latex output. 990 | 991 | GENERATE_LATEX = YES 992 | 993 | # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 994 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 995 | # put in front of it. If left blank `latex' will be used as the default path. 996 | 997 | LATEX_OUTPUT = latex 998 | 999 | # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 1000 | # invoked. If left blank `latex' will be used as the default command name. 1001 | 1002 | LATEX_CMD_NAME = latex 1003 | 1004 | # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 1005 | # generate index for LaTeX. If left blank `makeindex' will be used as the 1006 | # default command name. 1007 | 1008 | MAKEINDEX_CMD_NAME = makeindex 1009 | 1010 | # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 1011 | # LaTeX documents. This may be useful for small projects and may help to 1012 | # save some trees in general. 1013 | 1014 | COMPACT_LATEX = NO 1015 | 1016 | # The PAPER_TYPE tag can be used to set the paper type that is used 1017 | # by the printer. Possible values are: a4, a4wide, letter, legal and 1018 | # executive. If left blank a4wide will be used. 1019 | 1020 | PAPER_TYPE = a4wide 1021 | 1022 | # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 1023 | # packages that should be included in the LaTeX output. 1024 | 1025 | EXTRA_PACKAGES = 1026 | 1027 | # The LATEX_HEADER tag can be used to specify a personal LaTeX header for 1028 | # the generated latex document. The header should contain everything until 1029 | # the first chapter. If it is left blank doxygen will generate a 1030 | # standard header. Notice: only use this tag if you know what you are doing! 1031 | 1032 | LATEX_HEADER = 1033 | 1034 | # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 1035 | # is prepared for conversion to pdf (using ps2pdf). The pdf file will 1036 | # contain links (just like the HTML output) instead of page references 1037 | # This makes the output suitable for online browsing using a pdf viewer. 1038 | 1039 | PDF_HYPERLINKS = YES 1040 | 1041 | # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 1042 | # plain latex in the generated Makefile. Set this option to YES to get a 1043 | # higher quality PDF documentation. 1044 | 1045 | USE_PDFLATEX = YES 1046 | 1047 | # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 1048 | # command to the generated LaTeX files. This will instruct LaTeX to keep 1049 | # running if errors occur, instead of asking the user for help. 1050 | # This option is also used when generating formulas in HTML. 1051 | 1052 | LATEX_BATCHMODE = NO 1053 | 1054 | # If LATEX_HIDE_INDICES is set to YES then doxygen will not 1055 | # include the index chapters (such as File Index, Compound Index, etc.) 1056 | # in the output. 1057 | 1058 | LATEX_HIDE_INDICES = NO 1059 | 1060 | # If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER. 1061 | 1062 | LATEX_SOURCE_CODE = NO 1063 | 1064 | #--------------------------------------------------------------------------- 1065 | # configuration options related to the RTF output 1066 | #--------------------------------------------------------------------------- 1067 | 1068 | # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 1069 | # The RTF output is optimized for Word 97 and may not look very pretty with 1070 | # other RTF readers or editors. 1071 | 1072 | GENERATE_RTF = NO 1073 | 1074 | # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 1075 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1076 | # put in front of it. If left blank `rtf' will be used as the default path. 1077 | 1078 | RTF_OUTPUT = rtf 1079 | 1080 | # If the COMPACT_RTF tag is set to YES Doxygen generates more compact 1081 | # RTF documents. This may be useful for small projects and may help to 1082 | # save some trees in general. 1083 | 1084 | COMPACT_RTF = NO 1085 | 1086 | # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 1087 | # will contain hyperlink fields. The RTF file will 1088 | # contain links (just like the HTML output) instead of page references. 1089 | # This makes the output suitable for online browsing using WORD or other 1090 | # programs which support those fields. 1091 | # Note: wordpad (write) and others do not support links. 1092 | 1093 | RTF_HYPERLINKS = NO 1094 | 1095 | # Load stylesheet definitions from file. Syntax is similar to doxygen's 1096 | # config file, i.e. a series of assignments. You only have to provide 1097 | # replacements, missing definitions are set to their default value. 1098 | 1099 | RTF_STYLESHEET_FILE = 1100 | 1101 | # Set optional variables used in the generation of an rtf document. 1102 | # Syntax is similar to doxygen's config file. 1103 | 1104 | RTF_EXTENSIONS_FILE = 1105 | 1106 | #--------------------------------------------------------------------------- 1107 | # configuration options related to the man page output 1108 | #--------------------------------------------------------------------------- 1109 | 1110 | # If the GENERATE_MAN tag is set to YES (the default) Doxygen will 1111 | # generate man pages 1112 | 1113 | GENERATE_MAN = NO 1114 | 1115 | # The MAN_OUTPUT tag is used to specify where the man pages will be put. 1116 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1117 | # put in front of it. If left blank `man' will be used as the default path. 1118 | 1119 | MAN_OUTPUT = man 1120 | 1121 | # The MAN_EXTENSION tag determines the extension that is added to 1122 | # the generated man pages (default is the subroutine's section .3) 1123 | 1124 | MAN_EXTENSION = .3 1125 | 1126 | # If the MAN_LINKS tag is set to YES and Doxygen generates man output, 1127 | # then it will generate one additional man file for each entity 1128 | # documented in the real man page(s). These additional files 1129 | # only source the real man page, but without them the man command 1130 | # would be unable to find the correct page. The default is NO. 1131 | 1132 | MAN_LINKS = NO 1133 | 1134 | #--------------------------------------------------------------------------- 1135 | # configuration options related to the XML output 1136 | #--------------------------------------------------------------------------- 1137 | 1138 | # If the GENERATE_XML tag is set to YES Doxygen will 1139 | # generate an XML file that captures the structure of 1140 | # the code including all documentation. 1141 | 1142 | GENERATE_XML = NO 1143 | 1144 | # The XML_OUTPUT tag is used to specify where the XML pages will be put. 1145 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1146 | # put in front of it. If left blank `xml' will be used as the default path. 1147 | 1148 | XML_OUTPUT = xml 1149 | 1150 | # The XML_SCHEMA tag can be used to specify an XML schema, 1151 | # which can be used by a validating XML parser to check the 1152 | # syntax of the XML files. 1153 | 1154 | XML_SCHEMA = 1155 | 1156 | # The XML_DTD tag can be used to specify an XML DTD, 1157 | # which can be used by a validating XML parser to check the 1158 | # syntax of the XML files. 1159 | 1160 | XML_DTD = 1161 | 1162 | # If the XML_PROGRAMLISTING tag is set to YES Doxygen will 1163 | # dump the program listings (including syntax highlighting 1164 | # and cross-referencing information) to the XML output. Note that 1165 | # enabling this will significantly increase the size of the XML output. 1166 | 1167 | XML_PROGRAMLISTING = YES 1168 | 1169 | #--------------------------------------------------------------------------- 1170 | # configuration options for the AutoGen Definitions output 1171 | #--------------------------------------------------------------------------- 1172 | 1173 | # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 1174 | # generate an AutoGen Definitions (see autogen.sf.net) file 1175 | # that captures the structure of the code including all 1176 | # documentation. Note that this feature is still experimental 1177 | # and incomplete at the moment. 1178 | 1179 | GENERATE_AUTOGEN_DEF = NO 1180 | 1181 | #--------------------------------------------------------------------------- 1182 | # configuration options related to the Perl module output 1183 | #--------------------------------------------------------------------------- 1184 | 1185 | # If the GENERATE_PERLMOD tag is set to YES Doxygen will 1186 | # generate a Perl module file that captures the structure of 1187 | # the code including all documentation. Note that this 1188 | # feature is still experimental and incomplete at the 1189 | # moment. 1190 | 1191 | GENERATE_PERLMOD = NO 1192 | 1193 | # If the PERLMOD_LATEX tag is set to YES Doxygen will generate 1194 | # the necessary Makefile rules, Perl scripts and LaTeX code to be able 1195 | # to generate PDF and DVI output from the Perl module output. 1196 | 1197 | PERLMOD_LATEX = NO 1198 | 1199 | # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 1200 | # nicely formatted so it can be parsed by a human reader. 1201 | # This is useful 1202 | # if you want to understand what is going on. 1203 | # On the other hand, if this 1204 | # tag is set to NO the size of the Perl module output will be much smaller 1205 | # and Perl will parse it just the same. 1206 | 1207 | PERLMOD_PRETTY = YES 1208 | 1209 | # The names of the make variables in the generated doxyrules.make file 1210 | # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 1211 | # This is useful so different doxyrules.make files included by the same 1212 | # Makefile don't overwrite each other's variables. 1213 | 1214 | PERLMOD_MAKEVAR_PREFIX = 1215 | 1216 | #--------------------------------------------------------------------------- 1217 | # Configuration options related to the preprocessor 1218 | #--------------------------------------------------------------------------- 1219 | 1220 | # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 1221 | # evaluate all C-preprocessor directives found in the sources and include 1222 | # files. 1223 | 1224 | ENABLE_PREPROCESSING = YES 1225 | 1226 | # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 1227 | # names in the source code. If set to NO (the default) only conditional 1228 | # compilation will be performed. Macro expansion can be done in a controlled 1229 | # way by setting EXPAND_ONLY_PREDEF to YES. 1230 | 1231 | MACRO_EXPANSION = NO 1232 | 1233 | # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 1234 | # then the macro expansion is limited to the macros specified with the 1235 | # PREDEFINED and EXPAND_AS_DEFINED tags. 1236 | 1237 | EXPAND_ONLY_PREDEF = NO 1238 | 1239 | # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 1240 | # in the INCLUDE_PATH (see below) will be search if a #include is found. 1241 | 1242 | SEARCH_INCLUDES = YES 1243 | 1244 | # The INCLUDE_PATH tag can be used to specify one or more directories that 1245 | # contain include files that are not input files but should be processed by 1246 | # the preprocessor. 1247 | 1248 | INCLUDE_PATH = 1249 | 1250 | # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 1251 | # patterns (like *.h and *.hpp) to filter out the header-files in the 1252 | # directories. If left blank, the patterns specified with FILE_PATTERNS will 1253 | # be used. 1254 | 1255 | INCLUDE_FILE_PATTERNS = 1256 | 1257 | # The PREDEFINED tag can be used to specify one or more macro names that 1258 | # are defined before the preprocessor is started (similar to the -D option of 1259 | # gcc). The argument of the tag is a list of macros of the form: name 1260 | # or name=definition (no spaces). If the definition and the = are 1261 | # omitted =1 is assumed. To prevent a macro definition from being 1262 | # undefined via #undef or recursively expanded use the := operator 1263 | # instead of the = operator. 1264 | 1265 | PREDEFINED = 1266 | 1267 | # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 1268 | # this tag can be used to specify a list of macro names that should be expanded. 1269 | # The macro definition that is found in the sources will be used. 1270 | # Use the PREDEFINED tag if you want to use a different macro definition. 1271 | 1272 | EXPAND_AS_DEFINED = 1273 | 1274 | # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 1275 | # doxygen's preprocessor will remove all function-like macros that are alone 1276 | # on a line, have an all uppercase name, and do not end with a semicolon. Such 1277 | # function macros are typically used for boiler-plate code, and will confuse 1278 | # the parser if not removed. 1279 | 1280 | SKIP_FUNCTION_MACROS = YES 1281 | 1282 | #--------------------------------------------------------------------------- 1283 | # Configuration::additions related to external references 1284 | #--------------------------------------------------------------------------- 1285 | 1286 | # The TAGFILES option can be used to specify one or more tagfiles. 1287 | # Optionally an initial location of the external documentation 1288 | # can be added for each tagfile. The format of a tag file without 1289 | # this location is as follows: 1290 | # 1291 | # TAGFILES = file1 file2 ... 1292 | # Adding location for the tag files is done as follows: 1293 | # 1294 | # TAGFILES = file1=loc1 "file2 = loc2" ... 1295 | # where "loc1" and "loc2" can be relative or absolute paths or 1296 | # URLs. If a location is present for each tag, the installdox tool 1297 | # does not have to be run to correct the links. 1298 | # Note that each tag file must have a unique name 1299 | # (where the name does NOT include the path) 1300 | # If a tag file is not located in the directory in which doxygen 1301 | # is run, you must also specify the path to the tagfile here. 1302 | 1303 | TAGFILES = 1304 | 1305 | # When a file name is specified after GENERATE_TAGFILE, doxygen will create 1306 | # a tag file that is based on the input files it reads. 1307 | 1308 | GENERATE_TAGFILE = 1309 | 1310 | # If the ALLEXTERNALS tag is set to YES all external classes will be listed 1311 | # in the class index. If set to NO only the inherited external classes 1312 | # will be listed. 1313 | 1314 | ALLEXTERNALS = NO 1315 | 1316 | # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 1317 | # in the modules index. If set to NO, only the current project's groups will 1318 | # be listed. 1319 | 1320 | EXTERNAL_GROUPS = YES 1321 | 1322 | # The PERL_PATH should be the absolute path and name of the perl script 1323 | # interpreter (i.e. the result of `which perl'). 1324 | 1325 | PERL_PATH = /usr/bin/perl 1326 | 1327 | #--------------------------------------------------------------------------- 1328 | # Configuration options related to the dot tool 1329 | #--------------------------------------------------------------------------- 1330 | 1331 | # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 1332 | # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 1333 | # or super classes. Setting the tag to NO turns the diagrams off. Note that 1334 | # this option is superseded by the HAVE_DOT option below. This is only a 1335 | # fallback. It is recommended to install and use dot, since it yields more 1336 | # powerful graphs. 1337 | 1338 | CLASS_DIAGRAMS = YES 1339 | 1340 | # You can define message sequence charts within doxygen comments using the \msc 1341 | # command. Doxygen will then run the mscgen tool (see 1342 | # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 1343 | # documentation. The MSCGEN_PATH tag allows you to specify the directory where 1344 | # the mscgen tool resides. If left empty the tool is assumed to be found in the 1345 | # default search path. 1346 | 1347 | MSCGEN_PATH = 1348 | 1349 | # If set to YES, the inheritance and collaboration graphs will hide 1350 | # inheritance and usage relations if the target is undocumented 1351 | # or is not a class. 1352 | 1353 | HIDE_UNDOC_RELATIONS = YES 1354 | 1355 | # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 1356 | # available from the path. This tool is part of Graphviz, a graph visualization 1357 | # toolkit from AT&T and Lucent Bell Labs. The other options in this section 1358 | # have no effect if this option is set to NO (the default) 1359 | 1360 | HAVE_DOT = NO 1361 | 1362 | # By default doxygen will write a font called FreeSans.ttf to the output 1363 | # directory and reference it in all dot files that doxygen generates. This 1364 | # font does not include all possible unicode characters however, so when you need 1365 | # these (or just want a differently looking font) you can specify the font name 1366 | # using DOT_FONTNAME. You need need to make sure dot is able to find the font, 1367 | # which can be done by putting it in a standard location or by setting the 1368 | # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory 1369 | # containing the font. 1370 | 1371 | DOT_FONTNAME = FreeSans 1372 | 1373 | # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. 1374 | # The default size is 10pt. 1375 | 1376 | DOT_FONTSIZE = 10 1377 | 1378 | # By default doxygen will tell dot to use the output directory to look for the 1379 | # FreeSans.ttf font (which doxygen will put there itself). If you specify a 1380 | # different font using DOT_FONTNAME you can set the path where dot 1381 | # can find it using this tag. 1382 | 1383 | DOT_FONTPATH = 1384 | 1385 | # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 1386 | # will generate a graph for each documented class showing the direct and 1387 | # indirect inheritance relations. Setting this tag to YES will force the 1388 | # the CLASS_DIAGRAMS tag to NO. 1389 | 1390 | CLASS_GRAPH = YES 1391 | 1392 | # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 1393 | # will generate a graph for each documented class showing the direct and 1394 | # indirect implementation dependencies (inheritance, containment, and 1395 | # class references variables) of the class with other documented classes. 1396 | 1397 | COLLABORATION_GRAPH = YES 1398 | 1399 | # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 1400 | # will generate a graph for groups, showing the direct groups dependencies 1401 | 1402 | GROUP_GRAPHS = YES 1403 | 1404 | # If the UML_LOOK tag is set to YES doxygen will generate inheritance and 1405 | # collaboration diagrams in a style similar to the OMG's Unified Modeling 1406 | # Language. 1407 | 1408 | UML_LOOK = NO 1409 | 1410 | # If set to YES, the inheritance and collaboration graphs will show the 1411 | # relations between templates and their instances. 1412 | 1413 | TEMPLATE_RELATIONS = NO 1414 | 1415 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 1416 | # tags are set to YES then doxygen will generate a graph for each documented 1417 | # file showing the direct and indirect include dependencies of the file with 1418 | # other documented files. 1419 | 1420 | INCLUDE_GRAPH = YES 1421 | 1422 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 1423 | # HAVE_DOT tags are set to YES then doxygen will generate a graph for each 1424 | # documented header file showing the documented files that directly or 1425 | # indirectly include this file. 1426 | 1427 | INCLUDED_BY_GRAPH = YES 1428 | 1429 | # If the CALL_GRAPH and HAVE_DOT options are set to YES then 1430 | # doxygen will generate a call dependency graph for every global function 1431 | # or class method. Note that enabling this option will significantly increase 1432 | # the time of a run. So in most cases it will be better to enable call graphs 1433 | # for selected functions only using the \callgraph command. 1434 | 1435 | CALL_GRAPH = NO 1436 | 1437 | # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 1438 | # doxygen will generate a caller dependency graph for every global function 1439 | # or class method. Note that enabling this option will significantly increase 1440 | # the time of a run. So in most cases it will be better to enable caller 1441 | # graphs for selected functions only using the \callergraph command. 1442 | 1443 | CALLER_GRAPH = NO 1444 | 1445 | # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 1446 | # will graphical hierarchy of all classes instead of a textual one. 1447 | 1448 | GRAPHICAL_HIERARCHY = YES 1449 | 1450 | # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 1451 | # then doxygen will show the dependencies a directory has on other directories 1452 | # in a graphical way. The dependency relations are determined by the #include 1453 | # relations between the files in the directories. 1454 | 1455 | DIRECTORY_GRAPH = YES 1456 | 1457 | # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 1458 | # generated by dot. Possible values are png, jpg, or gif 1459 | # If left blank png will be used. 1460 | 1461 | DOT_IMAGE_FORMAT = png 1462 | 1463 | # The tag DOT_PATH can be used to specify the path where the dot tool can be 1464 | # found. If left blank, it is assumed the dot tool can be found in the path. 1465 | 1466 | DOT_PATH = 1467 | 1468 | # The DOTFILE_DIRS tag can be used to specify one or more directories that 1469 | # contain dot files that are included in the documentation (see the 1470 | # \dotfile command). 1471 | 1472 | DOTFILE_DIRS = 1473 | 1474 | # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 1475 | # nodes that will be shown in the graph. If the number of nodes in a graph 1476 | # becomes larger than this value, doxygen will truncate the graph, which is 1477 | # visualized by representing a node as a red box. Note that doxygen if the 1478 | # number of direct children of the root node in a graph is already larger than 1479 | # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 1480 | # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. 1481 | 1482 | DOT_GRAPH_MAX_NODES = 50 1483 | 1484 | # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 1485 | # graphs generated by dot. A depth value of 3 means that only nodes reachable 1486 | # from the root by following a path via at most 3 edges will be shown. Nodes 1487 | # that lay further from the root node will be omitted. Note that setting this 1488 | # option to 1 or 2 may greatly reduce the computation time needed for large 1489 | # code bases. Also note that the size of a graph can be further restricted by 1490 | # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. 1491 | 1492 | MAX_DOT_GRAPH_DEPTH = 0 1493 | 1494 | # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 1495 | # background. This is disabled by default, because dot on Windows does not 1496 | # seem to support this out of the box. Warning: Depending on the platform used, 1497 | # enabling this option may lead to badly anti-aliased labels on the edges of 1498 | # a graph (i.e. they become hard to read). 1499 | 1500 | DOT_TRANSPARENT = NO 1501 | 1502 | # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 1503 | # files in one run (i.e. multiple -o and -T options on the command line). This 1504 | # makes dot run faster, but since only newer versions of dot (>1.8.10) 1505 | # support this, this feature is disabled by default. 1506 | 1507 | DOT_MULTI_TARGETS = NO 1508 | 1509 | # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 1510 | # generate a legend page explaining the meaning of the various boxes and 1511 | # arrows in the dot generated graphs. 1512 | 1513 | GENERATE_LEGEND = YES 1514 | 1515 | # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 1516 | # remove the intermediate dot files that are used to generate 1517 | # the various graphs. 1518 | 1519 | DOT_CLEANUP = YES 1520 | -------------------------------------------------------------------------------- /app_template/common/gpumemioctl.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __GPUDMAIOTCL_H__ 3 | #define __GPUDMAIOTCL_H__ 4 | 5 | //----------------------------------------------------------------------------- 6 | 7 | #define GPUMEM_DRIVER_NAME "gpumem" 8 | 9 | //----------------------------------------------------------------------------- 10 | 11 | #ifdef __linux__ 12 | #include 13 | #ifndef __KERNEL__ 14 | #include 15 | #endif 16 | #define GPUMEM_DEVICE_TYPE 'g' 17 | #define GPUMEM_MAKE_IOCTL(c) _IO(GPUMEM_DEVICE_TYPE, (c)) 18 | #endif 19 | 20 | #define IOCTL_GPUMEM_LOCK GPUMEM_MAKE_IOCTL(10) 21 | #define IOCTL_GPUMEM_UNLOCK GPUMEM_MAKE_IOCTL(11) 22 | #define IOCTL_GPUMEM_STATE GPUMEM_MAKE_IOCTL(12) 23 | 24 | //----------------------------------------------------------------------------- 25 | // for boundary alignment requirement 26 | #define GPU_BOUND_SHIFT 16 27 | #define GPU_BOUND_SIZE ((u64)1 << GPU_BOUND_SHIFT) 28 | #define GPU_BOUND_OFFSET (GPU_BOUND_SIZE-1) 29 | #define GPU_BOUND_MASK (~GPU_BOUND_OFFSET) 30 | 31 | //----------------------------------------------------------------------------- 32 | 33 | struct gpudma_lock_t { 34 | void* handle; 35 | uint64_t addr; 36 | uint64_t size; 37 | size_t page_count; 38 | }; 39 | 40 | //----------------------------------------------------------------------------- 41 | 42 | struct gpudma_unlock_t { 43 | void* handle; 44 | }; 45 | 46 | //----------------------------------------------------------------------------- 47 | 48 | struct gpudma_state_t { 49 | void* handle; 50 | size_t page_count; 51 | size_t page_size; 52 | uint64_t pages[1]; 53 | }; 54 | 55 | //----------------------------------------------------------------------------- 56 | 57 | 58 | #endif //_GPUDMAIOTCL_H_ 59 | -------------------------------------------------------------------------------- /app_template/common/utypes.h: -------------------------------------------------------------------------------- 1 | /*************************************************** 2 | * 3 | * UTYPES.H - define usefull types. 4 | * 5 | * (C) Instrumental Systems Corp. Ekkore, Dec. 1997-2001 6 | * 7 | ****************************************************/ 8 | 9 | 10 | #ifndef _UTYPES_H_ 11 | #define _UTYPES_H_ 12 | 13 | #include "utypes_linux.h" 14 | 15 | 16 | 17 | /************************************* 18 | * 19 | * Types for MSDOS 20 | */ 21 | 22 | 23 | #ifdef __MSDOS__ 24 | 25 | typedef unsigned char UINT08; 26 | typedef signed char SINT08; 27 | typedef unsigned short UINT16; 28 | typedef signed short SINT16; 29 | typedef unsigned long UINT32; 30 | typedef signed long SINT32; 31 | 32 | typedef unsigned char U08, *PU08; 33 | typedef signed char S08, *PS08; 34 | typedef unsigned short U16, *PU16; 35 | typedef signed short S16, *PS16; 36 | typedef unsigned long U32, *PU32; 37 | typedef signed long S32, *PS32; 38 | 39 | typedef float REAL32, *PREAL32; 40 | typedef double REAL64, *PREAL64; 41 | 42 | //typedef enum{ FALSE=0, TRUE=1} BOOL; 43 | typedef unsigned char UCHAR; 44 | typedef unsigned short USHORT; 45 | typedef unsigned long ULONG; 46 | typedef unsigned long HANDLE; 47 | 48 | #endif /* __MSDOS__ */ 49 | 50 | 51 | /************************************* 52 | * 53 | * Types for Windows 54 | */ 55 | 56 | 57 | #ifdef _WIN32 58 | 59 | typedef unsigned char UINT08; 60 | typedef signed char SINT08; 61 | typedef unsigned short UINT16; 62 | typedef signed short SINT16; 63 | typedef unsigned int UINT32; 64 | typedef signed int SINT32; 65 | 66 | typedef unsigned char U08, *PU08; 67 | typedef signed char S08, *PS08; 68 | typedef unsigned short U16, *PU16; 69 | typedef signed short S16, *PS16; 70 | typedef unsigned int U32, *PU32; 71 | typedef signed int S32, *PS32; 72 | 73 | #if _MSC_VER == 1200 74 | #else 75 | typedef unsigned long long int U64, *PU64; 76 | typedef signed long long int S64, *PS64; 77 | #endif 78 | 79 | typedef float REAL32, *PREAL32; 80 | typedef double REAL64, *PREAL64; 81 | 82 | #ifdef _WIN64 83 | typedef wchar_t BRDCHAR; 84 | #define _BRDC(x) L ## x 85 | #define BRDC_strlen wcslen 86 | #define BRDC_strcpy wcscpy 87 | #define BRDC_strncpy wcsncpy 88 | #define BRDC_strcmp wcscmp 89 | #define BRDC_stricmp _wcsicmp 90 | #define BRDC_strnicmp wcsnicmp 91 | #define BRDC_strcat wcscat 92 | #define BRDC_strchr wcschr 93 | #define BRDC_strstr wcsstr 94 | #define BRDC_strtol wcstol 95 | #define BRDC_strtoul wcstoul 96 | #define BRDC_strtod wcstod 97 | #define BRDC_atol _wtol 98 | #define BRDC_atoi _wtoi 99 | #define BRDC_atoi64 _wtoi64 100 | #define BRDC_atof _wtof 101 | #define BRDC_printf wprintf 102 | #define BRDC_fprintf fwprintf 103 | #define BRDC_sprintf swprintf 104 | #define BRDC_vsprintf vswprintf 105 | #define BRDC_sscanf swscanf 106 | #define BRDC_fopen _wfopen 107 | #define BRDC_sopen _wsopen 108 | #define BRDC_fgets fgetws 109 | #define BRDC_fputs fputws 110 | #define BRDC_getenv _wgetenv 111 | #define BRDC_main wmain 112 | #else 113 | typedef char BRDCHAR; 114 | #define _BRDC(x) x 115 | #define BRDC_strlen strlen 116 | #define BRDC_strcpy strcpy 117 | #define BRDC_strncpy strncpy 118 | #define BRDC_strcmp strcmp 119 | #define BRDC_stricmp _stricmp 120 | #define BRDC_strnicmp _strnicmp 121 | #define BRDC_strcat strcat 122 | #define BRDC_strchr strchr 123 | #define BRDC_strstr strstr 124 | #define BRDC_strtol strtol 125 | #define BRDC_strtoul strtoul 126 | #define BRDC_strtod strtod 127 | #define BRDC_atol atol 128 | #define BRDC_atoi atoi 129 | #define BRDC_atoi64 _atoi64 130 | #define BRDC_atof atof 131 | #define BRDC_printf printf 132 | #define BRDC_fprintf fprintf 133 | #define BRDC_sprintf sprintf 134 | #define BRDC_vsprintf vsprintf 135 | #define BRDC_sscanf sscanf 136 | #define BRDC_fopen fopen 137 | #define BRDC_sopen sopen 138 | #define BRDC_fgets fgets 139 | #define BRDC_fputs fputs 140 | #define BRDC_getenv getenv 141 | #define BRDC_main main 142 | #endif 143 | 144 | #endif /* _WIN32 */ 145 | 146 | 147 | /************************************* 148 | * 149 | * Types for TMS320C3x/C4x 150 | */ 151 | 152 | 153 | #if defined(_TMS320C30) || defined(_TMS320C40 ) 154 | 155 | /* 156 | typedef unsigned char UINT08; 157 | typedef signed char SINT08; 158 | typedef unsigned short UINT16; 159 | typedef signed short SINT16; 160 | */ 161 | typedef unsigned int UINT32; 162 | typedef signed int SINT32; 163 | 164 | typedef unsigned int U32, *PU32; 165 | typedef signed int S32, *PS32; 166 | 167 | typedef float REAL32, *PREAL32; 168 | typedef double REAL64, *PREAL64; 169 | 170 | typedef UINT32 ULONG; 171 | typedef UINT32 USHORT; 172 | typedef UINT32 UCHAR; 173 | 174 | #endif /* _TMS320C30 || _TMS320C40 */ 175 | 176 | 177 | /************************************* 178 | * 179 | * Types for TMS320C6x 180 | */ 181 | 182 | 183 | #ifdef _TMS320C6X 184 | 185 | typedef unsigned char UINT08; 186 | typedef signed char SINT08; 187 | typedef unsigned short UINT16; 188 | typedef signed short SINT16; 189 | typedef unsigned int UINT32; 190 | typedef signed int SINT32; 191 | 192 | typedef unsigned char U08, *PU08; 193 | typedef signed char S08, *PS08; 194 | typedef unsigned short U16, *PU16; 195 | typedef signed short S16, *PS16; 196 | typedef unsigned int U32, *PU32; 197 | typedef signed int S32, *PS32; 198 | 199 | typedef unsigned long long int U64, *PU64; 200 | typedef signed long long int S64, *PS64; 201 | 202 | typedef float REAL32, *PREAL32; 203 | typedef double REAL64, *PREAL64; 204 | 205 | typedef UINT32 ULONG; 206 | typedef UINT16 USHORT; 207 | typedef UINT08 UCHAR; 208 | 209 | #endif /* _TMS320C6X */ 210 | 211 | 212 | /************************************* 213 | * 214 | * Types for ADSP2106x 215 | */ 216 | 217 | 218 | #if defined(__ADSP21060__) || defined(__ADSP21061__) || defined(__ADSP21062__)|| defined(__ADSP21065L__) 219 | 220 | typedef unsigned int UINT32; 221 | typedef signed int SINT32; 222 | 223 | typedef unsigned int U32, *PU32; 224 | typedef signed int S32, *PS32; 225 | 226 | typedef float REAL32, *PREAL32; 227 | typedef double REAL64, *PREAL64; 228 | 229 | typedef UINT32 ULONG; 230 | typedef UINT32 USHORT; 231 | typedef UINT32 UCHAR; 232 | 233 | #endif /* __ADSP2106x__ */ 234 | 235 | /************************************* 236 | * 237 | * Types for ADSP2116x 238 | */ 239 | 240 | 241 | #if defined(__ADSP21160__) || defined(__ADSP21161__) 242 | 243 | typedef unsigned int UINT32; 244 | typedef signed int SINT32; 245 | 246 | typedef unsigned int U32, *PU32; 247 | typedef signed int S32, *PS32; 248 | 249 | typedef float REAL32, *PREAL32; 250 | typedef double REAL64, *PREAL64; 251 | 252 | typedef UINT32 ULONG; 253 | typedef UINT32 USHORT; 254 | typedef UINT32 UCHAR; 255 | 256 | #endif /* __ADSP2116x__ */ 257 | 258 | /************************************* 259 | * 260 | * Types for ADSP-TS101 261 | */ 262 | 263 | 264 | #if defined(__ADSPTS__) 265 | 266 | typedef unsigned int UINT32; 267 | typedef signed int SINT32; 268 | 269 | typedef unsigned int U32, *PU32; 270 | typedef signed int S32, *PS32; 271 | 272 | typedef unsigned long long int U64, *PU64; 273 | typedef signed long long int S64, *PS64; 274 | 275 | typedef float REAL32, *PREAL32; 276 | typedef long double REAL64, *PREAL64; 277 | 278 | typedef UINT32 ULONG; 279 | typedef UINT32 USHORT; 280 | typedef UINT32 UCHAR; 281 | 282 | #endif /* __ADSPTS__ */ 283 | 284 | /************************************* 285 | * 286 | * Types for MC24 287 | */ 288 | 289 | #if defined(__GNUC__) && !defined(__linux__) 290 | 291 | typedef unsigned char UINT08; 292 | typedef signed char SINT08; 293 | typedef unsigned short UINT16; 294 | typedef signed short SINT16; 295 | typedef unsigned long UINT32; 296 | typedef signed long SINT32; 297 | 298 | typedef unsigned char U08, *PU08; 299 | typedef signed char S08, *PS08; 300 | typedef unsigned short U16, *PU16; 301 | typedef signed short S16, *PS16; 302 | typedef unsigned long U32, *PU32; 303 | typedef signed long S32, *PS32; 304 | 305 | typedef unsigned long long int U64, *PU64; 306 | typedef signed long long int S64, *PS64; 307 | 308 | typedef float REAL32, *PREAL32; 309 | typedef double REAL64, *PREAL64; 310 | 311 | typedef unsigned char UCHAR; 312 | typedef unsigned short USHORT; 313 | typedef unsigned long ULONG; 314 | typedef unsigned long HANDLE; 315 | 316 | #endif /* __GNUC__ */ 317 | 318 | /************************************* 319 | * 320 | * Type Aliasing 321 | */ 322 | 323 | typedef UINT32 Uns; 324 | 325 | /************************************************* 326 | * 327 | * Entry Point types 328 | * 329 | */ 330 | #if !defined(WIN32) && !defined(__WIN32__) 331 | //#ifndef WIN32 332 | #define FENTRY 333 | #define STDCALL 334 | #else 335 | #include 336 | #define DllImport __declspec( dllimport ) 337 | #define DllExport __declspec( dllexport ) 338 | #define FENTRY DllExport 339 | #define STDCALL __stdcall 340 | #define huge 341 | #endif // WIN32 342 | 343 | 344 | #endif /* _UTYPES_H_ */ 345 | 346 | /* 347 | * End of File 348 | */ 349 | 350 | 351 | 352 | -------------------------------------------------------------------------------- /app_template/common/utypes_linux.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _UTYPES_LINUX_H_ 3 | #define _UTYPES_LINUX_H_ 4 | 5 | #if defined (__LINUX__) || defined(__linux__) 6 | 7 | #include 8 | #include 9 | 10 | typedef uint8_t u8, UINT08, U08, *PU08, BYTE, U8; 11 | typedef uint16_t u16, UINT16, U16, *PU16, WORD; 12 | typedef uint32_t u32, UINT32, U32, *PU32; 13 | typedef uint64_t u64, UINT64, U64, *PU64; 14 | typedef int8_t s8, SINT08, S08, *PS08; 15 | typedef int16_t s16, SINT16, S16, *PS16; 16 | typedef int32_t s32, SINT32, S32, *PS32; 17 | typedef int64_t s64, SINT64, S64, *PS64, __int64; 18 | 19 | //typedef int32_t LONG; 20 | typedef long LONG; 21 | typedef int64_t __int64; 22 | 23 | typedef float REAL32, *PREAL32; 24 | typedef double REAL64, *PREAL64; 25 | 26 | #if !defined(TRUE) && !defined(FALSE) 27 | typedef enum { FALSE=0, TRUE=1 } BOOL; 28 | #endif 29 | 30 | typedef uint8_t UCHAR, *PUCHAR; 31 | typedef uint16_t USHORT, *PUSHORT; 32 | typedef uint32_t ULONG, *PULONG, *PUINT, UINT; 33 | typedef int HANDLE; 34 | typedef void* HINSTANCE; 35 | typedef void* PVOID; 36 | typedef void VOID; 37 | typedef uint32_t DWORD; 38 | typedef int64_t __int64; 39 | 40 | typedef int SOCKET; 41 | typedef char TCHAR; 42 | typedef char* PTCHAR; 43 | typedef char* LPTSTR; 44 | 45 | // added for 64-bit windows driver compatibility 46 | typedef char BRDCHAR; 47 | #define _BRDC(x) x 48 | #define BRDC_strlen strlen 49 | #define BRDC_strcpy strcpy 50 | #define BRDC_strncpy strncpy 51 | #define BRDC_strcmp strcmp 52 | #define BRDC_stricmp _stricmp 53 | #define BRDC_strnicmp _strnicmp 54 | #define BRDC_strcat strcat 55 | #define BRDC_strchr strchr 56 | #define BRDC_strstr strstr 57 | #define BRDC_strtol strtol 58 | #define BRDC_strtod strtod 59 | #define BRDC_atol atol 60 | #define BRDC_atoi atoi 61 | #define BRDC_atoi64 atoll 62 | #define BRDC_atof atof 63 | #define BRDC_printf printf 64 | #define BRDC_fprintf fprintf 65 | #define BRDC_sprintf sprintf 66 | #define BRDC_vsprintf vsprintf 67 | #define BRDC_sscanf sscanf 68 | #define BRDC_fopen fopen 69 | #define BRDC_sopen sopen 70 | #define BRDC_fgets fgets 71 | #define BRDC_getenv getenv 72 | #define BRDC_main main 73 | #define BRDC_fputs fputs 74 | 75 | //------------------------------------- 76 | 77 | #define lstrcpy strcpy 78 | #define lstrcpyA strcpy 79 | #define lstrcat strcat 80 | #define lstrcatA strcat 81 | #define lstrlen strlen 82 | #define lstrlenA strlen 83 | #define lstrcmpi strcasecmp 84 | #define _tcsstr strstr 85 | #define _tcscpy_s strcpy 86 | #define _tcscpy strcpy 87 | #define _tcschr strchr 88 | #define sprintf_s sprintf 89 | #define _tcscat_s strcat 90 | #define _tcslen strlen 91 | #define _tcscpy strcpy 92 | 93 | #define _T(x) x 94 | #define _TEXT(x) x 95 | #define INFINITE (-1) 96 | 97 | #define _stricmp strcmp 98 | #define stricmp strcmp 99 | #define _strnicmp strncmp 100 | 101 | typedef const char* LPCTSTR; 102 | typedef char* PCTSTR; 103 | typedef char* PTSTR; 104 | typedef void* LPVOID; 105 | typedef int LPOVERLAPPED; 106 | 107 | #ifndef MAX_PATH 108 | #define MAX_PATH PATH_MAX 109 | #endif 110 | 111 | #endif /* __linux__ */ 112 | 113 | #endif /* _UTYPES_LINUX_H_ */ 114 | 115 | /* 116 | * End of File 117 | */ 118 | -------------------------------------------------------------------------------- /app_template/create_doc: -------------------------------------------------------------------------------- 1 | doxygen app_template.dxy 2 | -------------------------------------------------------------------------------- /app_template/cuda/check_counter.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | // System includes 5 | #include 6 | #include 7 | 8 | // CUDA runtime 9 | #include 10 | 11 | // helper functions and utilities to work with CUDA 12 | #include 13 | #include 14 | 15 | #include "task_data.h" 16 | 17 | 18 | 19 | /** 20 | * \brief CUDA kernel for check buffer 21 | * 22 | * \param sharedMemory area for exchange status information with host 23 | * \param nbuf number of buffer 24 | * 25 | * 26 | */ 27 | __global__ void checkCounterKernel( long *sharedMemory, int nbuf ) 28 | { 29 | 30 | __shared__ int shFlagIrq; 31 | 32 | TaskMonitor *ptrMonitor = (TaskMonitor*)sharedMemory; 33 | TaskBufferStatus *ts=(TaskBufferStatus *)sharedMemory; 34 | ts+=nbuf; 35 | 36 | uint64_t step = TaskCounts; 37 | int size=ts->sizeOfKBytes; 38 | int cnt=1024/8*size/step; 39 | 40 | uint64_t expect_data=nbuf*1024*size/8; 41 | expect_data += threadIdx.x; 42 | 43 | uint64_t *src = (uint64_t*)(ts->ptrCudaIn); 44 | src+=threadIdx.x; 45 | 46 | uint64_t *dst; 47 | 48 | TaskCheckData* check= &(ts->check[threadIdx.x]); 49 | 50 | unsigned int totalErrorForBuf=0; 51 | unsigned int errorCnt=0; 52 | unsigned int block_rd=0; 53 | unsigned int block_ok=0; 54 | unsigned int block_error=0; 55 | 56 | unsigned int flagError=0; 57 | 58 | TaskHostStatus *ptrHostStatus = ts->ptrHostStatus; 59 | shFlagIrq=0; 60 | 61 | 62 | //printf( "src=%p x=%d y=%d z=%d expect_data=0x%.8lX\n", src, threadIdx.x, threadIdx.y, threadIdx.z, expect_data ); 63 | 64 | 65 | for( int loop=0; ; loop++ ) 66 | { 67 | if( 1==ptrMonitor->flagExit ) 68 | { 69 | break; 70 | } 71 | 72 | if( 0==threadIdx.x ) 73 | shFlagIrq=ts->irqFlag; 74 | 75 | 76 | if( 1!=shFlagIrq ) 77 | { 78 | for( volatile int jj=0; jj<1000; jj++ ); 79 | 80 | continue; 81 | } 82 | 83 | src = (uint64_t*)(ts->ptrCudaIn); 84 | src+=threadIdx.x; 85 | 86 | __syncthreads(); 87 | 88 | 89 | flagError=0; 90 | check->flagError=1; 91 | 92 | if( 0==threadIdx.x ) 93 | { 94 | 95 | dst=(uint64_t*)(ts->ptrCudaOut); 96 | dst+= ts->indexWr * cnt; 97 | 98 | for( int ii=0; iinblock[errorCnt]=block_rd; 110 | check->adr[errorCnt]=ii; 111 | check->expect_data[errorCnt]=expect_data; 112 | check->receive_data[errorCnt]=val; 113 | } 114 | errorCnt++; 115 | flagError++; 116 | } 117 | expect_data+=step; 118 | } 119 | 120 | { 121 | int n=ts->indexWr+1; 122 | if( n==ts->indexMax ) 123 | n=0; 124 | ts->indexWr=n; 125 | ptrHostStatus->indexWr=n; 126 | } 127 | 128 | } else 129 | { 130 | for( int ii=0; iinblock[errorCnt]=block_rd; 140 | check->adr[errorCnt]=ii; 141 | check->expect_data[errorCnt]=expect_data; 142 | check->receive_data[errorCnt]=val; 143 | } 144 | errorCnt++; 145 | flagError++; 146 | } 147 | expect_data+=step; 148 | } 149 | 150 | } 151 | 152 | 153 | check->flagError=flagError; 154 | check->cntError=errorCnt; 155 | 156 | if( 0==threadIdx.x ) 157 | ptrMonitor->block[nbuf].irqFlag=0; 158 | 159 | expect_data += 2*1024*size/8; 160 | 161 | __syncthreads(); 162 | 163 | block_rd++; 164 | 165 | if( 0==threadIdx.x ) 166 | { 167 | // Check all task 168 | unsigned int flagErr=0; 169 | for( int ii=0; iicheck[ii].flagError ) 172 | { 173 | flagErr=1; 174 | } 175 | } 176 | if( 0==flagErr) 177 | { 178 | block_ok++; 179 | } else 180 | { 181 | block_error++; 182 | } 183 | 184 | ts->blockRd=block_rd; 185 | ts->blockOk=block_ok; 186 | ts->blockError=block_error; 187 | //printf( "buf: %d expect_data= 0x%.8lX \n", nbuf, expect_data ); 188 | } 189 | 190 | } 191 | 192 | 193 | } 194 | 195 | /** 196 | * \brief start checkCounterKernel 197 | * 198 | * \param sharedMemory pointer in CUDA memory of shared data 199 | * \param nbuf number of buffer 200 | * \param stream CUDA stream for this kernel 201 | * 202 | */ 203 | int run_checkCounter( long *sharedMemory, int nbuf, cudaStream_t& stream ) 204 | { 205 | 206 | //Kernel configuration, where a two-dimensional grid and 207 | //three-dimensional blocks are configured. 208 | dim3 dimGrid(1, 1); 209 | dim3 dimBlock(TaskCounts, 1, 1); 210 | checkCounterKernel<<>>( sharedMemory, nbuf ); 211 | 212 | return 0; 213 | } 214 | 215 | 216 | //__global__ void MonitorKernel( long* sharedMemory, int nbuf, unsigned int index_rd ) 217 | //{ 218 | // 219 | // TaskMonitor *ptrMonitor = (TaskMonitor*)sharedMemory; 220 | // TaskBufferStatus *ts=(TaskBufferStatus *)sharedMemory; 221 | // ts+=nbuf; 222 | // 223 | // for( int loop=0; ; loop++ ) 224 | // { 225 | // if( 1==ptrMonitor->flagExit ) 226 | // { 227 | // break; 228 | // } 229 | // 230 | // if( index_rd!=ptrMonitor->block[0].indexWr ) 231 | // break; 232 | // 233 | // for( volatile int jj=0; jj<10000; jj++ ); 234 | // } 235 | // 236 | // 237 | //} 238 | // 239 | //int run_Monitor( long* sharedMemory, int nbuf, unsigned int index_rd, cudaStream_t stream ) 240 | //{ 241 | // 242 | // //Kernel configuration, where a two-dimensional grid and 243 | // //three-dimensional blocks are configured. 244 | // dim3 dimGrid(1, 1); 245 | // dim3 dimBlock(1, 1, 1); 246 | // MonitorKernel<<>>(sharedMemory, nbuf, index_rd ); 247 | // 248 | // 249 | //} 250 | -------------------------------------------------------------------------------- /app_template/host/cl_cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * cl_cuda.cpp 3 | * 4 | * Created on: Feb 4, 2017 5 | * Author: Dmitry Smekhov 6 | */ 7 | 8 | #include "cl_cuda.h" 9 | 10 | // System includes 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | // CUDA runtime 26 | #include 27 | #include 28 | 29 | //#include "cuda.h" 30 | //#include "cuda_runtime_api.h" 31 | 32 | #include "gpumemioctl.h" 33 | 34 | // helper functions and utilities to work with CUDA 35 | #include 36 | #include 37 | 38 | void checkError(CUresult status); 39 | bool wasError(CUresult status); 40 | 41 | /** 42 | * \brief Private data for CL_Cuda class 43 | */ 44 | class CL_Cuda_private 45 | { 46 | public: 47 | 48 | int devID; //!< Id for CUDA device 49 | cudaDeviceProp props; //!< attributes for CUDA device 50 | 51 | int fd; //!< description of gpumem driver 52 | 53 | CUdevice device; //!< Descriptor CUDA device 54 | char name[256]; //!< Name of CUDA device 55 | int major, minor; //!< Capability numbers; 56 | size_t global_mem; //!< Size of memory on CUDA device 57 | CUcontext context; //!< Contex for all cuda functions 58 | 59 | }; 60 | 61 | /** 62 | * \brief Constructor 63 | * 64 | * \param argc argc from main function 65 | * \param argv argv from main function 66 | */ 67 | CL_Cuda::CL_Cuda( int argc, char** argv ) 68 | { 69 | 70 | 71 | pd = new CL_Cuda_private(); 72 | 73 | cudaDeviceReset(); 74 | 75 | checkError(cuInit(0)); 76 | 77 | // int total = 0; 78 | // cudaGetDeviceCount( &total ); 79 | // fprintf(stderr, "Total devices: %d\n", total); 80 | // 81 | pd->devID=0; 82 | cudaSetDevice(pd->devID); 83 | 84 | int total = 0; 85 | checkError(cuDeviceGetCount(&total)); 86 | fprintf(stderr, "Total devices: %d\n", total); 87 | 88 | 89 | checkError(cuDeviceGet(&pd->device, 0)); 90 | 91 | 92 | checkError(cuDeviceGetName( pd->name, 256, pd->device)); 93 | fprintf(stderr, "Select device: %s\n", pd->name); 94 | 95 | // get compute capabilities and the devicename 96 | pd->major = 0; pd->minor = 0; 97 | checkError( cuDeviceComputeCapability(&pd->major, &pd->minor, pd->device)); 98 | fprintf(stderr, "Compute capability: %d.%d\n", pd->major, pd->minor); 99 | 100 | pd->global_mem = 0; 101 | checkError( cuDeviceTotalMem(&pd->global_mem, pd->device)); 102 | fprintf(stderr, "Global memory: %llu MB\n", (unsigned long long)(pd->global_mem >> 20)); 103 | if(pd->global_mem > (unsigned long long)4*1024*1024*1024L) 104 | fprintf(stderr, "64-bit Memory Address support\n"); 105 | 106 | 107 | 108 | checkError(cuCtxCreate(&pd->context, 0, pd->device)); 109 | //checkError(cuCtxGetCurrent(&pd->context)); 110 | 111 | pd->devID=0; 112 | //cudaSetDevice(pd->devID); 113 | 114 | 115 | pd->fd = open("/dev/"GPUMEM_DRIVER_NAME, O_RDWR, 0); 116 | if(pd->fd < 0) 117 | { 118 | printf("Error open file %s\n", "/dev/"GPUMEM_DRIVER_NAME); 119 | throw( "Error /dev/gpumem"); 120 | } 121 | 122 | } 123 | 124 | 125 | CL_Cuda::~CL_Cuda() 126 | { 127 | // TODO Auto-generated destructor stub 128 | delete pd; pd=NULL; 129 | } 130 | 131 | 132 | //! Allocate buffer in CUDA memory and map it in BAR1 space 133 | void CL_Cuda::AllocateBar1Buffer( int sizeOfKb, BAR1_BUF *pAdr ) 134 | { 135 | 136 | size_t size = sizeOfKb * 1024; 137 | gpudma_lock_t lock; 138 | gpudma_state_t *state = 0; 139 | unsigned int flag = 1; 140 | CUdeviceptr dptr = 0; 141 | int statesize = 0; 142 | int res = -1; 143 | 144 | int thLevel=0; // Level of local throw 145 | 146 | try 147 | { 148 | 149 | if( 0xA00!=pAdr->state) 150 | { 151 | fprintf(stderr, "BAR1_BUF is busy. state=0x%.3X != 0xA00\n", pAdr->state ); 152 | throw(0); 153 | } 154 | pAdr->state=0xA01; 155 | 156 | CUresult status = cuMemAlloc(&dptr, size); 157 | if(wasError(status)) { 158 | throw(thLevel); 159 | } 160 | thLevel++; 161 | 162 | fprintf(stderr, "Allocate memory address: 0x%llx\n", (unsigned long long)dptr); 163 | 164 | status = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr); 165 | if(wasError(status)) { 166 | throw(thLevel); //goto do_free_memory; 167 | } 168 | 169 | 170 | // TODO: add kernel driver interaction... 171 | lock.addr = dptr; 172 | lock.size = size; 173 | res = ioctl(pd->fd, IOCTL_GPUMEM_LOCK, &lock); 174 | if(res < 0) { 175 | fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_LOCK\n"); 176 | throw(thLevel); // goto do_free_attr; 177 | } 178 | 179 | 180 | pAdr->phy_addr= new uint64_t[lock.page_count]; 181 | pAdr->app_addr= new void*[lock.page_count]; 182 | thLevel++; 183 | 184 | statesize = (lock.page_count*sizeof(uint64_t) + sizeof(struct gpudma_state_t)); 185 | state = (struct gpudma_state_t*)malloc(statesize); 186 | if(!state) { 187 | throw(thLevel); // goto do_free_attr; 188 | } 189 | memset(state, 0, statesize); 190 | state->handle = lock.handle; 191 | state->page_count = lock.page_count; 192 | res = ioctl(pd->fd, IOCTL_GPUMEM_STATE, state); 193 | if(res < 0) { 194 | fprintf(stderr, "Error in IOCTL_GPUDMA_MEM_UNLOCK\n"); 195 | throw(thLevel);// goto do_unlock; 196 | } 197 | 198 | fprintf(stderr, "Page count 0x%lx\n", state->page_count); 199 | fprintf(stderr, "Page size 0x%lx\n", state->page_size); 200 | 201 | pAdr->page_count=state->page_count; 202 | pAdr->page_size=state->page_size; 203 | pAdr->cuda_addr=(void*)dptr; 204 | pAdr->sizeOfBytes=size; 205 | 206 | 207 | for(unsigned ii=0; iipage_count; ii++) { 208 | if( state->page_count<16 ) 209 | fprintf(stderr, "%02d: 0x%lx\n", ii, state->pages[ii]); 210 | void* va = mmap(0, state->page_size, PROT_READ|PROT_WRITE, MAP_SHARED, pd->fd, (off_t)state->pages[ii]); 211 | if(va == MAP_FAILED ) { 212 | fprintf(stderr, "%s(): %s\n", __FUNCTION__, strerror(errno)); 213 | va = 0; 214 | throw(thLevel); 215 | } else { 216 | //fprintf(stderr, "%s(): Physical Address 0x%lx -> Virtual Address %p\n", __FUNCTION__, state->pages[i], va); 217 | pAdr->app_addr[ii]=va; 218 | pAdr->phy_addr[ii]=state->pages[ii]; 219 | } 220 | } 221 | pAdr->state=0xA05; // Success 222 | fprintf(stderr, "CL_Cuda::AllocateBar1Buffer() - buffer id=%d is allocated, size=%d kB \n", pAdr->id, sizeOfKb ); 223 | } catch( int n ) 224 | { 225 | switch( n ) 226 | { 227 | 228 | case 2: 229 | delete pAdr->phy_addr; pAdr->phy_addr=NULL; 230 | delete pAdr->app_addr; pAdr->app_addr=NULL; 231 | case 1: 232 | cuMemFree(dptr); 233 | default: 234 | pAdr->state=0xA00; 235 | break; 236 | } 237 | throw(0); 238 | } catch( ... ) 239 | { 240 | throw( 0 ); 241 | } 242 | } 243 | 244 | //! Release buffer from BAR1 space and from CUDA memory 245 | void CL_Cuda::FreeBar1Buffer( BAR1_BUF *pAdr ) 246 | { 247 | 248 | if( 0xA05!=pAdr->state) 249 | { 250 | fprintf(stderr, "BAR1_BUF is not allocate. state=0x%.3X != 0xA05\n", pAdr->state ); 251 | throw(0); 252 | } 253 | pAdr->state = 0xA10; 254 | 255 | // unmap virtual address 256 | void *va; 257 | for(unsigned ii=0; iipage_count; ii++) 258 | { 259 | va=pAdr->app_addr[ii]; 260 | munmap(va, pAdr->page_size); 261 | pAdr->app_addr[ii]=NULL; 262 | } 263 | 264 | // free CUDA memory 265 | cuMemFree((CUdeviceptr)(pAdr->cuda_addr)); 266 | 267 | // free array 268 | delete pAdr->app_addr; pAdr->app_addr=NULL; 269 | delete pAdr->phy_addr; pAdr->phy_addr=NULL; 270 | 271 | // Set empty state of pAdr 272 | pAdr->state = 0xA00; 273 | fprintf(stderr, "CL_Cuda::FreeBar1Buffer() - buffer id=%d is cleared \n", pAdr->id); 274 | 275 | } 276 | 277 | 278 | void checkError(CUresult status) 279 | { 280 | if(status != CUDA_SUCCESS) { 281 | const char *perrstr = 0; 282 | CUresult ok = cuGetErrorString(status,&perrstr); 283 | if(ok == CUDA_SUCCESS) { 284 | if(perrstr) { 285 | fprintf(stderr, "info: %s\n", perrstr); 286 | } else { 287 | fprintf(stderr, "info: unknown error\n"); 288 | } 289 | } 290 | throw(0); 291 | } 292 | } 293 | 294 | bool wasError(CUresult status) 295 | { 296 | if(status != CUDA_SUCCESS) { 297 | const char *perrstr = 0; 298 | CUresult ok = cuGetErrorString(status,&perrstr); 299 | if(ok == CUDA_SUCCESS) { 300 | if(perrstr) { 301 | fprintf(stderr, "info: %s\n", perrstr); 302 | } else { 303 | fprintf(stderr, "info: unknown error\n"); 304 | } 305 | } 306 | return true; 307 | } 308 | return false; 309 | } 310 | -------------------------------------------------------------------------------- /app_template/host/cl_cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cl_cuda.h 3 | * 4 | * Created on: Feb 4, 2017 5 | * Author: Dmitry Smekhov 6 | */ 7 | 8 | #ifndef CL_CUDA_H_ 9 | #define CL_CUDA_H_ 10 | 11 | #include 12 | #include 13 | 14 | class CL_Cuda_private; 15 | 16 | /** 17 | * \brief Common actions for CUDA device 18 | */ 19 | class CL_Cuda 20 | { 21 | 22 | private: 23 | CL_Cuda_private *pd; 24 | 25 | public: 26 | CL_Cuda( int argc, char** argv ); 27 | virtual ~CL_Cuda(); 28 | 29 | //! Description buffer in BAR1 space 30 | struct BAR1_BUF 31 | { 32 | int id; //!< User id for buffer 33 | int state; //!< Status of buffer 34 | size_t sizeOfBytes;//!< Size buffer of bytes 35 | int page_count; //!< Count of pages 36 | int page_size; //!< Size of page 37 | void* cuda_addr; //!< address in CUDA memory 38 | uint64_t* phy_addr; //!< Array of physical addresses of pages 39 | void** app_addr; //!< Array of virtual addresses of pages in the application address space 40 | 41 | BAR1_BUF() 42 | { 43 | id=-1; 44 | state=0xA00; 45 | sizeOfBytes=0; 46 | page_count=0; 47 | page_size=0; 48 | phy_addr=0; 49 | app_addr=0; 50 | cuda_addr=0; 51 | } 52 | }; 53 | 54 | //! Allocate buffer in CUDA memory and map it in BAR1 space 55 | void AllocateBar1Buffer( int sizeOfKb, BAR1_BUF *pAdr ); 56 | 57 | //! Release buffer from BAR1 space and from CUDA memory 58 | void FreeBar1Buffer( BAR1_BUF *pAdr ); 59 | 60 | }; 61 | 62 | #endif /* CL_CUDA_H_ */ 63 | -------------------------------------------------------------------------------- /app_template/host/cl_cuda_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * cl_cuda_test.cpp 3 | * 4 | * Created on: Feb 4, 2017 5 | * Author: user52 6 | */ 7 | 8 | #include "cl_cuda.h" 9 | 10 | -------------------------------------------------------------------------------- /app_template/host/main.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | // System includes 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | 11 | #include "tf_testcnt.h" 12 | 13 | 14 | static volatile int exit_flag = 0; 15 | 16 | void signa_handler(int signo) 17 | { 18 | exit_flag = 1; 19 | } 20 | 21 | 22 | //int run_cuda(int argc, char **argv); 23 | 24 | int main(int argc, char **argv) 25 | { 26 | 27 | int ret; 28 | 29 | signal(SIGINT, signa_handler); 30 | 31 | try 32 | { 33 | 34 | TF_TestCnt *pTest = new TF_TestCnt( argc, argv ); 35 | 36 | for( int ii=0; ; ii++) 37 | { 38 | if( pTest->Prepare(ii) ) 39 | break; 40 | } 41 | 42 | pTest->Start(); 43 | 44 | for( ; ; ) 45 | { 46 | 47 | if( pTest->isComplete() ) 48 | break; 49 | 50 | if( exit_flag ) 51 | { 52 | pTest->Stop(); 53 | } 54 | 55 | pTest->StepTable(); 56 | 57 | usleep( 10000 ); // 100 ms 58 | 59 | } 60 | 61 | //pTest->GetResult(); 62 | 63 | delete pTest; pTest=NULL; 64 | 65 | } catch( ... ) 66 | { 67 | 68 | } 69 | 70 | //fprintf( stderr, "\nPress any key for exit\n" ); 71 | //getchar(); 72 | 73 | return ret; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /app_template/host/task_data.h: -------------------------------------------------------------------------------- 1 | 2 | #include "cl_cuda.h" 3 | 4 | //! Number of task for check one buffer 5 | const int TaskCounts=32; 6 | 7 | 8 | /** 9 | * \brief Struct for check data in one task for one buffer 10 | */ 11 | struct TaskCheckData 12 | { 13 | unsigned int flagError; //!< 1 - error in current runs 14 | unsigned int cntError; //!< number of errors for all runs 15 | 16 | unsigned int nblock[16]; //!< number block 17 | unsigned int adr[16]; //!< address into block 18 | uint64_t expect_data[16]; //!< expect data 19 | uint64_t receive_data[16]; //!< receive data 20 | 21 | TaskCheckData() 22 | { 23 | for( int ii=0; ii<16; ii++ ) 24 | { 25 | nblock[ii]=0; 26 | adr[ii]=0; 27 | expect_data[ii]=0; 28 | receive_data[ii]=0; 29 | } 30 | flagError=0; 31 | cntError=0; 32 | } 33 | 34 | 35 | }; 36 | 37 | struct TaskHostStatus; 38 | 39 | /** 40 | * \brief Struct for status calculate 41 | */ 42 | struct TaskBufferStatus 43 | { 44 | unsigned int irqFlag; //!< 1 - ready data in bar1 buffer 45 | unsigned int res0; 46 | unsigned int res1; 47 | unsigned int blockRd; //!< count of read buffer 48 | unsigned int blockOk; //!< count of correct buffers 49 | unsigned int blockError; //!< count of buffer with errors 50 | unsigned int sizeOfKBytes; //!< size of buffers in kilobytes 51 | 52 | void* ptrCudaIn; //!< pointer on bar1 buffer in the Cuda memory 53 | 54 | void* ptrCudaOut; //!< pointer on output buffer in the Cuda memory 55 | 56 | TaskHostStatus *ptrHostStatus; //!< pointer on TaskHostStatus in the Host memory 57 | 58 | unsigned int indexWr; //!< block number for next write 59 | unsigned int indexRd; //!< block number for read 60 | unsigned int indexMax; //!< count blocks in output buffer 61 | 62 | TaskCheckData check[ TaskCounts ]; //!< current results for test one buffer 63 | }; 64 | 65 | 66 | /** 67 | * \brief Struct of data in monitor area in BAR1 68 | * 69 | */ 70 | struct TaskMonitor 71 | { 72 | TaskBufferStatus block[3]; //!< Status of buffer0 73 | int sig; //!< signature: 0xAA24 74 | int flagExit; //!< 1 - exit from programm 75 | int res0; 76 | int res1; 77 | 78 | }; 79 | 80 | /** 81 | * \brief Struct for process status in the host memory 82 | */ 83 | struct TaskHostStatus 84 | { 85 | 86 | unsigned int indexWr; //!< block number for next write 87 | unsigned int indexRd; //!< block number for read 88 | //unsigned int indexMax; //!< count blocks in output buffer 89 | 90 | }; 91 | 92 | /** 93 | * \brief Struct of data in monitor area in the host memory 94 | * 95 | */ 96 | struct TaskHostMonitor 97 | { 98 | TaskHostStatus status[3]; //!< Status of process 99 | 100 | }; 101 | 102 | 103 | 104 | /** 105 | * \brief collection data for TF_TestCnt 106 | */ 107 | struct TaskData 108 | { 109 | TaskMonitor* ptrMonitor; //!< address monitor struct in the HOST memory 110 | CL_Cuda::BAR1_BUF monitor; //!< description of monitor buffer in BAR1 111 | CL_Cuda::BAR1_BUF bar1[3]; //!< description of buffer in BAR1 112 | 113 | uint64_t currentCounter; //!< Current value for fill buffers 114 | 115 | int cycleCnt; 116 | 117 | int sizeBufferOfKb; //!< Size buffer [kbytes]. Must be n*64 118 | int countOfCycle; //!< Number of cycle. 0 - infinitely 119 | 120 | int sizeBufferOfBytes; //!< Size of BAR1 buffer in bytes 121 | int countOfBuffers; //!< Conunt of buffers, from 1 to 3 122 | 123 | //void* decimationBuffers[3]; //!< Buffer in the CUDA memory for 124 | 125 | 126 | size_t outputSizeBuffer; //!< size of output buffer [bytes] 127 | size_t outputSizeBlock; //!< size of output block [bytes] 128 | size_t outputCountBlock; //!< count blocks in the output buffer 129 | 130 | 131 | uint64_t* hostBuffer; //!< data from device 132 | 133 | TaskHostMonitor* hostMonitor; //!< monitor data in the host memory 134 | 135 | unsigned int hostBlockRd; //!< count blocks which host received 136 | unsigned int hostBlockOk; //!< block without errors 137 | unsigned int hostBlockError; //!< block with errors; 138 | 139 | uint64_t hostExpectData; //!< expect data for checking 140 | 141 | TaskCheckData hostCheck; //!< result of checking host data 142 | 143 | double velosityExtToCudaCurrent; //!< velosity data transfer from external device to Cuda for last 4 sec 144 | double velosityExtToCudaAvr; //!< velosity data transfer from external device to Cuda from start 145 | 146 | double velosityCudaToHostCurrent; //!< velosity data transfer from external device to Cuda for last 4 sec 147 | double velosityCudaToHostAvr; //!< velosity data transfer from external device to Cuda from start 148 | 149 | TaskData() 150 | { 151 | cycleCnt=0; 152 | sizeBufferOfBytes=0; 153 | countOfBuffers=3; 154 | currentCounter=0; 155 | 156 | hostBuffer=NULL; 157 | hostMonitor=NULL; 158 | 159 | hostBlockRd=0; 160 | hostBlockOk=0; 161 | hostBlockError=0; 162 | hostExpectData=0; 163 | 164 | velosityExtToCudaCurrent=0; 165 | velosityExtToCudaAvr=0; 166 | velosityCudaToHostCurrent=0; 167 | velosityCudaToHostAvr=0; 168 | 169 | } 170 | }; 171 | -------------------------------------------------------------------------------- /app_template/host/tf_test.h: -------------------------------------------------------------------------------- 1 | /* 2 | * tf_test.h 3 | * 4 | * Created on: Jan 29, 2017 5 | * Author: Dmitry Smekhov 6 | */ 7 | 8 | #ifndef TF_TEST_H_ 9 | #define TF_TEST_H_ 10 | 11 | 12 | /** 13 | * \brief Base class for testing device 14 | */ 15 | class TF_Test 16 | { 17 | 18 | public: 19 | 20 | virtual int Prepare( int cnt )=0; 21 | 22 | virtual void Start( void )=0; 23 | 24 | virtual void Stop( void ) {}; 25 | 26 | virtual int isComplete( void ) { return 0; }; 27 | 28 | virtual void StepTable( void ) {}; 29 | 30 | virtual void GetResult( void ) {}; 31 | }; 32 | 33 | 34 | 35 | 36 | #endif /* TF_TEST_H_ */ 37 | -------------------------------------------------------------------------------- /app_template/host/tf_testcnt.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TF_TestCnt.cpp 3 | * 4 | * Created on: Jan 29, 2017 5 | * Author: Dmitry Smekhov 6 | */ 7 | 8 | #include 9 | #include 10 | #include "stdio.h" 11 | 12 | #include 13 | 14 | #include "tf_testcnt.h" 15 | #include "cl_cuda.h" 16 | #include 17 | #include 18 | 19 | #include "task_data.h" 20 | #include 21 | 22 | double getTime( void ) 23 | { 24 | clock_t t=clock(); 25 | double ret= t / (double)CLOCKS_PER_SEC; 26 | return ret; 27 | } 28 | 29 | TF_TestCnt::TF_TestCnt( int argc, char **argv ) : TF_TestThread( argc, argv ) 30 | { 31 | 32 | 33 | td = new TaskData; 34 | 35 | td->countOfCycle = GetFromCommnadLine( argc, argv, "-count", 16 ); 36 | td->sizeBufferOfKb = GetFromCommnadLine( argc, argv, "-size", 256); 37 | 38 | 39 | m_pCuda=NULL; 40 | 41 | m_argc=argc; 42 | m_argv=argv; 43 | 44 | } 45 | 46 | TF_TestCnt::~TF_TestCnt() { 47 | 48 | delete m_pCuda; m_pCuda=NULL; 49 | delete td; td=NULL; 50 | } 51 | 52 | 53 | /** 54 | * \brief Display current information about cheking buffers 55 | * 56 | * Function display information if 0==td->countOfCycle 57 | * function is called from main with interval of 100 ms 58 | */ 59 | void TF_TestCnt::StepTable( void ) 60 | { 61 | 62 | 63 | if( 0!=td->countOfCycle ) 64 | return; 65 | 66 | unsigned blockRd=0; 67 | unsigned blockOk=0; 68 | unsigned blockError=0; 69 | 70 | for( int ii=0; ii<3; ii++ ) 71 | { 72 | blockRd+=td->ptrMonitor->block[ii].blockRd; 73 | blockOk+=td->ptrMonitor->block[ii].blockOk; 74 | blockError+=td->ptrMonitor->block[ii].blockError; 75 | 76 | } 77 | 78 | printf( " %7d %7d %8d %7d %7d %8d %7.1lf %7.1lf %7.1lf %7.1lf\r", blockRd, blockOk, blockError, 79 | td->hostBlockRd, 80 | //td->ptrMonitor->block[0].indexWr, td->hostMonitor->status[0].indexRd 81 | td->hostBlockOk, 82 | td->hostBlockError, 83 | td->velosityExtToCudaCurrent, 84 | td->velosityExtToCudaAvr, 85 | td->velosityCudaToHostCurrent, 86 | td->velosityCudaToHostAvr 87 | 88 | //td->hostMonitor->status[0].indexWr, 89 | //td->hostMonitor->status[0].indexRd 90 | ); 91 | } 92 | 93 | /** 94 | * \brief Prepare CUDA and buffers 95 | * 96 | * Open CUDA device 97 | * Allocate three buffers and buffer for monitor 98 | */ 99 | void TF_TestCnt::PrepareInThread( void ) 100 | { 101 | 102 | m_pCuda = new CL_Cuda( m_argc, m_argv ); 103 | 104 | td->monitor.id=100; 105 | 106 | 107 | td->countOfBuffers=3; 108 | int size=td->sizeBufferOfKb; 109 | td->sizeBufferOfBytes=size*1024; 110 | 111 | 112 | for( int ii=0; iicountOfBuffers ; ii++ ) 113 | { 114 | td->bar1[ii].id=ii; 115 | m_pCuda->AllocateBar1Buffer( size, &(td->bar1[ii]) ); 116 | } 117 | m_pCuda->AllocateBar1Buffer( 256, &(td->monitor) ); 118 | 119 | 120 | td->ptrMonitor=(TaskMonitor*)td->monitor.app_addr[0]; 121 | 122 | 123 | size_t outSizeBlock=size*1024/TaskCounts; // size of output buffer 124 | int n=512*1024*1024 / outSizeBlock; // count blocks in 512 MB buffer 125 | 126 | size_t outSizeBuffer=n*outSizeBlock; 127 | 128 | td->outputSizeBuffer = outSizeBuffer; 129 | td->outputSizeBlock = outSizeBlock; 130 | td->outputCountBlock = n; 131 | 132 | cudaError_t ret; 133 | 134 | void *ptr=NULL; 135 | ret=cudaMallocHost( &ptr, outSizeBlock ); 136 | if( cudaSuccess!=ret ) 137 | throw( "Error page-locked memory allocate for hostBuffer" ); 138 | td->hostBuffer=(uint64_t*)ptr; 139 | 140 | ptr=NULL; 141 | ret=cudaMallocHost( &ptr, 4096 ); 142 | if( cudaSuccess!=ret ) 143 | throw( "Error page-locked memory allocate for hostMonitor" ); 144 | td->hostMonitor=(TaskHostMonitor*)ptr; 145 | 146 | 147 | for( int ii=0; iicountOfBuffers; ii++ ) 148 | { 149 | td->ptrMonitor->block[ii].ptrCudaIn=(void*)(td->bar1[ii].cuda_addr); 150 | 151 | td->ptrMonitor->block[ii].sizeOfKBytes=size; 152 | 153 | td->ptrMonitor->block[ii].irqFlag=0; 154 | td->ptrMonitor->block[ii].blockOk=0; 155 | td->ptrMonitor->block[ii].blockError=0; 156 | td->ptrMonitor->block[ii].blockRd=0; 157 | for( int jj=0; jjptrMonitor->block[ii].check[jj].cntError=0; 160 | td->ptrMonitor->block[ii].check[jj].flagError=0; 161 | } 162 | 163 | td->ptrMonitor->block[ii].indexRd=0; 164 | td->ptrMonitor->block[ii].indexWr=0; 165 | 166 | td->ptrMonitor->block[ii].indexMax = td->outputCountBlock; 167 | ptr=NULL; 168 | ret= cudaMalloc( &ptr, outSizeBuffer ); 169 | if( cudaSuccess != ret ) 170 | throw( "Error memory allocation for output buffer" ); 171 | td->ptrMonitor->block[ii].ptrCudaOut=ptr; 172 | 173 | td->ptrMonitor->block[ii].ptrHostStatus=&td->hostMonitor->status[ii]; 174 | 175 | 176 | } 177 | 178 | td->ptrMonitor->flagExit=0; 179 | td->ptrMonitor->sig=0xAA24; 180 | 181 | 182 | printf( "td->countOfCycle=%d\n", td->countOfCycle ); 183 | printf( "td->sizeBufferOfKb=%d [kB]\n\n", td->sizeBufferOfKb ); 184 | 185 | if( 0==td->countOfCycle ) 186 | printf( "\n CUDA_RD CUDA_OK CUDA_ERR HOST_RD HOST_OK HOST_ERR E2C_CUR E2C_AVR C2H_CUR C2H_AVR \n" ); 187 | 188 | } 189 | 190 | /** 191 | * \brief Free buffers and close device 192 | * 193 | */ 194 | void TF_TestCnt::CleanupInThread( void ) 195 | { 196 | 197 | for( int ii=0; iicountOfBuffers; ii++ ) 198 | { 199 | m_pCuda->FreeBar1Buffer( &(td->bar1[ii]) ); 200 | } 201 | m_pCuda->FreeBar1Buffer( &(td->monitor) ); 202 | 203 | cudaFreeHost( td->hostBuffer ); 204 | cudaFreeHost( td->hostMonitor ); 205 | 206 | delete m_pCuda; m_pCuda=NULL; 207 | 208 | fprintf( stderr, "%s - Ok\n", __FUNCTION__ ); 209 | } 210 | 211 | /** 212 | * \brief fill buffer 213 | * 214 | * \param pBar1 description of buffer 215 | * 216 | * function fill bar1 buffer via pBar1->app_addr[] 217 | * 218 | */ 219 | void TF_TestCnt::FillCounter( CL_Cuda::BAR1_BUF *pBar1 ) 220 | { 221 | if( 0xA05 != pBar1->state ) 222 | throw(0); 223 | 224 | int size64=pBar1->page_size/8; 225 | uint64_t *dst; 226 | uint64_t val=td->currentCounter; 227 | 228 | for( int page=0; pagepage_count; page++ ) 229 | { 230 | dst=(uint64_t*) (pBar1->app_addr[page]); 231 | for( int ii=size64; ii; ii--) 232 | *dst++=val++; 233 | 234 | } 235 | td->currentCounter=val; 236 | } 237 | 238 | 239 | int run_checkCounter( long *sharedMemory, int nbuf, cudaStream_t& stream ); 240 | int run_Monitor( long* sharedMemory, int nbuf, unsigned int index_rd, cudaStream_t stream ); 241 | 242 | /** 243 | * \brief Main working cycle 244 | * 245 | * It is main working cycle. 246 | * Function FillCounter simulate to work external DMA channel. 247 | * 248 | */ 249 | void TF_TestCnt::Run( void ) 250 | { 251 | 252 | 253 | FillThreadStart(); 254 | 255 | long *ptrCudaMonitor=(long*)(td->monitor.cuda_addr); 256 | 257 | cudaStream_t streamBuf0; 258 | cudaStream_t streamBuf1; 259 | cudaStream_t streamBuf2; 260 | //cudaStream_t streamMonitor; 261 | cudaStream_t streamDMA; 262 | 263 | cudaStreamCreate( &streamBuf0 ); 264 | cudaStreamCreate( &streamBuf1 ); 265 | cudaStreamCreate( &streamBuf2 ); 266 | //cudaStreamCreate( &streamMonitor ); 267 | cudaStreamCreate( &streamDMA ); 268 | 269 | 270 | run_checkCounter( ptrCudaMonitor, 0, streamBuf0 ); 271 | run_checkCounter( ptrCudaMonitor, 1, streamBuf1 ); 272 | run_checkCounter( ptrCudaMonitor, 2, streamBuf2 ); 273 | 274 | 275 | int val; 276 | int blockRd; 277 | 278 | int nbuf; 279 | //unsigned int indexRd[3]={ 0, 0, 0 }; 280 | td->hostMonitor->status[0].indexRd=0; 281 | td->hostMonitor->status[1].indexRd=0; 282 | td->hostMonitor->status[2].indexRd=0; 283 | 284 | cudaError_t ret; 285 | 286 | int status=1; 287 | 288 | volatile unsigned int index_wr; 289 | unsigned int index_rd; 290 | 291 | double time_start = getTime(); 292 | double time_last=time_start; 293 | double time_current; 294 | double velosity; 295 | 296 | unsigned int blockRdLast=0; 297 | unsigned int blockHostRdLast=0; 298 | 299 | for( int kk=0; ; kk++ ) 300 | { 301 | 302 | 303 | time_current=getTime(); 304 | if( time_current-time_last>4 ) 305 | { 306 | blockRd=td->ptrMonitor->block[0].blockRd + td->ptrMonitor->block[1].blockRd + td->ptrMonitor->block[2].blockRd; 307 | 308 | velosity = (double)1.0*td->sizeBufferOfKb*1024*(blockRd-blockRdLast)/(time_current-time_last); 309 | td->velosityExtToCudaCurrent=velosity/1024/1024; 310 | 311 | velosity = (double)1.0*td->sizeBufferOfKb*blockRd; 312 | velosity/=(time_current-time_start); 313 | td->velosityExtToCudaAvr=velosity/1024; 314 | 315 | blockRdLast=blockRd; 316 | 317 | 318 | blockRd = td->hostBlockRd; 319 | velosity = (double)1.0*td->outputSizeBlock*(blockRd-blockHostRdLast)/(time_current-time_last); 320 | td->velosityCudaToHostCurrent=velosity/1024/1024; 321 | 322 | velosity = (double)1.0*td->outputSizeBlock*blockRd; 323 | velosity/=(time_current-time_start); 324 | td->velosityCudaToHostAvr=velosity/1024/1024; 325 | 326 | 327 | time_last=time_current; 328 | blockHostRdLast=blockRd; 329 | 330 | } 331 | 332 | 333 | if( m_isTerminate || (td->countOfCycle>0 && td->countOfCycle==blockRd )) 334 | { 335 | td->ptrMonitor->flagExit=1; 336 | break; 337 | } 338 | 339 | switch( status ) 340 | { 341 | // case 0: // run monitor 342 | // run_Monitor( ptrCudaMonitor, nbuf, td->hostMonitor->status[nbuf].indexRd, streamMonitor ); 343 | // 344 | // status=1; 345 | // break; 346 | 347 | case 1: // wait for ready current buffer and start DMA read 348 | // ret=cudaStreamQuery( streamMonitor ); 349 | // if( cudaSuccess==ret ) 350 | index_wr = td->hostMonitor->status[nbuf].indexWr; 351 | index_rd = td->hostMonitor->status[nbuf].indexRd; 352 | if( index_wr!=index_rd ) 353 | { 354 | 355 | //hostBlockRdprintf( "status 1: nbuf=%d index_rd=%d\n", nbuf, indexRd[nbuf]); 356 | uint64_t* d_src=(uint64_t*)(td->ptrMonitor->block[nbuf].ptrCudaOut); 357 | d_src+=td->hostMonitor->status[nbuf].indexRd * td->outputSizeBlock/8; 358 | 359 | cudaMemcpyAsync( td->hostBuffer, d_src, td->outputSizeBlock, cudaMemcpyDeviceToHost, streamDMA ); 360 | //cudaMemcpy( td->hostBuffer, d_src, td->outputSizeBlock, cudaMemcpyDeviceToHost ); 361 | //usleep( 1 ); 362 | status=2; 363 | } 364 | break; 365 | case 2: // wait for data transfer complete 366 | //ret=cudaStreamQuery( streamDMA ); 367 | //if( cudaSuccess==ret ) 368 | { 369 | cudaStreamSynchronize( streamDMA ); 370 | CheckHostData( td->hostBuffer ); 371 | 372 | td->hostBlockRd++; 373 | int n=td->hostMonitor->status[nbuf].indexRd+1; 374 | if( n==td->outputCountBlock ) 375 | n=0; 376 | td->hostMonitor->status[nbuf].indexRd=n; 377 | 378 | n=nbuf+1; 379 | if( td->countOfBuffers==n ) 380 | n=0; 381 | nbuf=n; 382 | 383 | status=1; 384 | } 385 | break; 386 | } 387 | 388 | 389 | 390 | usleep( 1000 ); 391 | 392 | } 393 | 394 | usleep( 10000 ); 395 | 396 | td->ptrMonitor->flagExit=1; 397 | 398 | 399 | cudaStreamSynchronize( streamBuf0 ); 400 | cudaStreamSynchronize( streamBuf1 ); 401 | cudaStreamSynchronize( streamBuf2 ); 402 | 403 | 404 | GetResult(); 405 | 406 | FillThreadDestroy(); 407 | 408 | return; 409 | 410 | 411 | 412 | } 413 | 414 | /** 415 | * \brief Display result for all buffers 416 | * 417 | */ 418 | void TF_TestCnt::GetResult( void ) 419 | { 420 | GetResultBuffer( 0 ); 421 | GetResultBuffer( 1 ); 422 | GetResultBuffer( 2 ); 423 | 424 | GetHostResult(); 425 | } 426 | 427 | /** 428 | * \brief Display result for one buffers 429 | * 430 | * \param nbuf number of buffer 431 | * 432 | */ 433 | void TF_TestCnt::GetResultBuffer( int nbuf ) 434 | { 435 | 436 | TaskBufferStatus *ts=&(td->ptrMonitor->block[nbuf]); 437 | printf( "\nBuffer %d\n", nbuf ); 438 | printf( "block_rd=%d\n", ts->blockRd ); 439 | printf( "block_ok=%d\n", ts->blockOk ); 440 | printf( "block_error=%d\n", ts->blockError ); 441 | 442 | int flag_ok=1; 443 | for( int ii=0; iicheck[ii].cntError ) 446 | { 447 | flag_ok=0; 448 | break; 449 | } 450 | } 451 | 452 | if( 1==flag_ok ) 453 | { 454 | printf( "Task 0:%d - Ok\n", TaskCounts-1 ); 455 | 456 | } else 457 | { 458 | 459 | 460 | for( int ii=0; iicheck[ii].cntError; 463 | if( 0==cntError ) 464 | { 465 | printf( "Task %d -Ok\n", ii ); 466 | } else 467 | { 468 | printf( "\nTask %d \n", ii ); 469 | printf( " cntError=%d\n", cntError); 470 | if( cntError>16 ) 471 | cntError=16; 472 | for( int jj=0; jjcheck[ii].nblock[jj], 477 | ts->check[ii].adr[jj], 478 | ts->check[ii].receive_data[jj], 479 | ts->check[ii].expect_data[jj] 480 | ); 481 | } 482 | } 483 | 484 | } 485 | } 486 | 487 | 488 | 489 | } 490 | 491 | void TF_TestCnt::FillThreadStart( void ) 492 | { 493 | int res = pthread_attr_init(&m_attrFillThread); 494 | if(res != 0) { 495 | fprintf(stderr, "%s\n", "Stream not started"); 496 | throw( "Stream not started" ); 497 | } 498 | 499 | res = pthread_attr_setdetachstate(&m_attrFillThread, PTHREAD_CREATE_JOINABLE); 500 | if(res != 0) { 501 | fprintf(stderr, "%s\n", "Stream not started"); 502 | throw( "Stream not started" ); 503 | } 504 | 505 | res = pthread_create(&m_hFillThread, &m_attrFillThread, FillThreadFunc, this); 506 | if(res != 0) { 507 | fprintf(stderr, "%s\n", "Stream not started"); 508 | throw( "Stream not started" ); 509 | } 510 | } 511 | 512 | void TF_TestCnt::FillThreadDestroy( void ) 513 | { 514 | 515 | } 516 | 517 | 518 | void* TF_TestCnt::FillThreadFunc( void* lpvThreadParm ) 519 | { 520 | TF_TestCnt *test=(TF_TestCnt*)lpvThreadParm; 521 | void* ret; 522 | if( !test ) 523 | return 0; 524 | ret=test->FillExecute(); 525 | return ret; 526 | } 527 | 528 | void* TF_TestCnt::FillExecute( void ) 529 | { 530 | 531 | //printf( "\nFillCounter Start\n"); 532 | for( ; ; ) 533 | { 534 | 535 | // Check for checkCounter finished checking buffer 0 536 | // for( ; ; ) 537 | // { 538 | // val = td->ptrMonitor->block[0].irqFlag; 539 | // if( 0==val ) 540 | // break; 541 | // } 542 | FillCounter( &td->bar1[0]); 543 | td->ptrMonitor->block[0].irqFlag=1; 544 | 545 | usleep( 1 ); 546 | if( td->ptrMonitor->flagExit ) 547 | break; 548 | 549 | 550 | // Check for checkCounter finished checking buffer 1 551 | // for( ; ; ) 552 | // { 553 | // val = td->ptrMonitor->block[1].irqFlag; 554 | // if( 0==val ) 555 | // break; 556 | // } 557 | FillCounter( &td->bar1[1]); 558 | td->ptrMonitor->block[1].irqFlag=1; 559 | 560 | usleep( 1 ); 561 | if( td->ptrMonitor->flagExit ) 562 | break; 563 | 564 | // Check for checkCounter finished checking buffer 2 565 | // for( ; ; ) 566 | // { 567 | // val = td->ptrMonitor->block[2].irqFlag; 568 | // if( 0==val ) 569 | // break; 570 | // } 571 | FillCounter( &td->bar1[2]); 572 | td->ptrMonitor->block[2].irqFlag=1; 573 | 574 | usleep( 1 ); 575 | if( td->ptrMonitor->flagExit ) 576 | break; 577 | 578 | } 579 | //printf( "\nFillCounter Stop\n"); 580 | 581 | return NULL; 582 | } 583 | 584 | 585 | //! Check received data 586 | void TF_TestCnt::CheckHostData( uint64_t* src ) 587 | { 588 | //printf( "CheckHostData: 0x%.8lX \n", *src ); 589 | 590 | int cnt=td->outputSizeBlock/8; 591 | 592 | uint64_t step=TaskCounts; 593 | uint64_t val; 594 | uint64_t expect_data = td->hostExpectData; 595 | 596 | unsigned int errorCnt=td->hostCheck.cntError; 597 | 598 | int flagError=0; 599 | for( int ii=0; iihostCheck.nblock[errorCnt]=td->hostBlockRd; 609 | td->hostCheck.adr[errorCnt]=ii; 610 | td->hostCheck.expect_data[errorCnt]=expect_data; 611 | td->hostCheck.receive_data[errorCnt]=val; 612 | } 613 | errorCnt++; 614 | flagError=1; 615 | } 616 | expect_data+=step; 617 | } 618 | td->hostExpectData=expect_data; 619 | td->hostCheck.cntError=errorCnt; 620 | if( flagError ) 621 | { 622 | td->hostBlockError++; 623 | } 624 | else 625 | { 626 | td->hostBlockOk++; 627 | } 628 | 629 | } 630 | 631 | //! Print results for host buffer 632 | void TF_TestCnt::GetHostResult( void ) 633 | { 634 | printf( "\nHost \n" ); 635 | printf( "block_rd=%d\n", td->hostBlockRd ); 636 | printf( "block_ok=%d\n", td->hostBlockOk); 637 | printf( "block_error=%d\n", td->hostBlockError ); 638 | 639 | unsigned int cntError=td->hostCheck.cntError; 640 | if( 0==cntError ) 641 | { 642 | printf( "Host - Ok\n" ); 643 | } else 644 | { 645 | printf( " cntError=%d\n", cntError); 646 | if( cntError>16 ) 647 | cntError=16; 648 | for( int jj=0; jjhostCheck.nblock[jj], 653 | td->hostCheck.adr[jj], 654 | td->hostCheck.receive_data[jj], 655 | td->hostCheck.expect_data[jj] 656 | ); 657 | } 658 | } 659 | 660 | 661 | } 662 | -------------------------------------------------------------------------------- /app_template/host/tf_testcnt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TF_TestCnt.h 3 | * 4 | * Created on: Jan 29, 2017 5 | * Author: Dmitry Smekhov 6 | */ 7 | 8 | #ifndef TF_TESTCNT_H_ 9 | #define TF_TESTCNT_H_ 10 | 11 | #include 12 | 13 | #include "tf_testthread.h" 14 | 15 | //class CL_Cuda; 16 | //struct CL_Cuda::BAR1_BUF; 17 | #include "cl_cuda.h" 18 | 19 | 20 | struct TaskData; 21 | struct TaskBufferStatus; 22 | 23 | /** 24 | * \brief Checking the transmission counter at CUDA device 25 | * 26 | * Key actions: 27 | * -# Open CUDA device 28 | * -# Open gpumem driver 29 | * -# Allocate three buffers in the CUDA memory 30 | * -# Mapping buffers in the BAR1 space on CUDA device 31 | * -# Filling the buffer 64-bit counter via BAR1 32 | * -# Checking buffer in the CUDA device 33 | * -# Decimation buffer and transfer to the HOST 34 | * -# Transfer result of checking to HOST 35 | * 36 | * 37 | * Steps 5-8 are carried out in a loop 38 | * 39 | * 40 | */ 41 | class TF_TestCnt: public TF_TestThread 42 | { 43 | public: 44 | TF_TestCnt( int argc, char **argv ); 45 | virtual ~TF_TestCnt(); 46 | 47 | 48 | virtual void StepTable( void ); 49 | 50 | virtual void PrepareInThread( void ); 51 | 52 | virtual void CleanupInThread( void ); 53 | 54 | virtual void Run( void ); 55 | 56 | virtual void GetResult( void ); 57 | 58 | //! Number of arguments 59 | int m_argc; 60 | 61 | //! Pointers to arguments 62 | char** m_argv; 63 | 64 | 65 | struct TaskData *td; //!< Local data for test 66 | 67 | CL_Cuda *m_pCuda; //!< Cuda device 68 | 69 | 70 | //! Fill buffer in Cuda memory via BAR1 71 | void FillCounter( CL_Cuda::BAR1_BUF *pBar1 ); 72 | 73 | //! Print results for buffer 74 | void GetResultBuffer( int nbuf ); 75 | 76 | 77 | pthread_t m_hFillThread; 78 | pthread_attr_t m_attrFillThread; 79 | 80 | 81 | void FillThreadStart( void ); 82 | 83 | void FillThreadDestroy( void ); 84 | 85 | static void* FillThreadFunc( void* lpvThreadParm ); 86 | 87 | void* FillExecute( void ); 88 | 89 | 90 | 91 | //! Check received data 92 | void CheckHostData( uint64_t* src ); 93 | 94 | //! Print results for host buffer 95 | void GetHostResult( void ); 96 | }; 97 | 98 | #endif /* TF_TESTCNT_H_ */ 99 | -------------------------------------------------------------------------------- /app_template/host/tf_testthread.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TF_TestThread.cpp 3 | * 4 | * Created on: Jan 29, 2017 5 | * Author: Dmitry Smekhov 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "tf_testthread.h" 15 | 16 | 17 | 18 | TF_TestThread::TF_TestThread( int argc, char **argv ) 19 | { 20 | // TODO Auto-generated constructor stub 21 | 22 | m_isPrepareComplete=0; 23 | m_isComplete=0; 24 | m_isTerminate=0; 25 | m_CycleCnt=0; 26 | 27 | pthread_mutex_t m_StartMutex = PTHREAD_MUTEX_INITIALIZER; 28 | pthread_cond_t m_StartCond = PTHREAD_COND_INITIALIZER; 29 | 30 | } 31 | 32 | TF_TestThread::~TF_TestThread() 33 | { 34 | 35 | } 36 | 37 | 38 | 39 | int TF_TestThread::Prepare( int cnt ) 40 | { 41 | if( 0==cnt ) 42 | { 43 | int res = pthread_attr_init(&m_attrThread); 44 | if(res != 0) { 45 | fprintf(stderr, "%s\n", "Stream not started"); 46 | throw( "Stream not started" ); 47 | } 48 | 49 | res = pthread_attr_setdetachstate(&m_attrThread, PTHREAD_CREATE_JOINABLE); 50 | if(res != 0) { 51 | fprintf(stderr, "%s\n", "Stream not started"); 52 | throw( "Stream not started" ); 53 | } 54 | 55 | res = pthread_create(&m_hThread, &m_attrThread, ThreadFunc, this); 56 | if(res != 0) { 57 | fprintf(stderr, "%s\n", "Stream not started"); 58 | throw( "Stream not started" ); 59 | } 60 | } 61 | 62 | int ret=m_isPrepareComplete; 63 | 64 | return ret; 65 | } 66 | 67 | void* TF_TestThread::ThreadFunc( void* lpvThreadParm ) 68 | { 69 | TF_TestThread *test=(TF_TestThread*)lpvThreadParm; 70 | void* ret; 71 | if( !test ) 72 | return 0; 73 | ret=test->Execute(); 74 | return ret; 75 | } 76 | 77 | void* TF_TestThread::Execute( void ) 78 | { 79 | PrepareInThread(); 80 | m_isPrepareComplete=1; 81 | 82 | // Wait for Start function 83 | pthread_mutex_lock( &m_StartMutex ); 84 | pthread_cond_wait( &m_StartCond, &m_StartMutex ); 85 | pthread_mutex_unlock( &m_StartMutex ); 86 | 87 | Run(); 88 | 89 | CleanupInThread(); 90 | 91 | m_isComplete=1; 92 | return NULL; 93 | } 94 | 95 | void TF_TestThread::Start( void ) 96 | { 97 | 98 | // Start Thread 99 | pthread_mutex_lock( &m_StartMutex ); 100 | pthread_cond_signal( &m_StartCond ); 101 | pthread_mutex_unlock( &m_StartMutex ); 102 | } 103 | 104 | void TF_TestThread::Stop( void ) 105 | { 106 | m_isTerminate=1; 107 | //fprintf( stderr, "%s - Ok\n", __FUNCTION__ ); 108 | } 109 | 110 | int TF_TestThread::isComplete( void ) 111 | { 112 | return m_isComplete; 113 | } 114 | 115 | /** 116 | * \brief get value from command line 117 | * 118 | * format command line: 119 | * 120 | * 121 | * \param argc number of argument 122 | * \param argv pointers to arguments 123 | * \param name key of argument 124 | * \parma defValue default value for arguments 125 | * 126 | * \return value of argument or default value of argument 127 | */ 128 | int TF_TestThread::GetFromCommnadLine( int argc, char **argv, char* name, int defValue ) 129 | { 130 | int ret=defValue; 131 | for( int ii=1; ii 12 | #include "tf_test.h" 13 | 14 | 15 | /** 16 | * \brief Base class for application with thread 17 | * 18 | * 19 | * 20 | */ 21 | class TF_TestThread: public TF_Test { 22 | public: 23 | TF_TestThread( int argc, char **argv ); 24 | virtual ~TF_TestThread(); 25 | 26 | 27 | virtual int Prepare( int cnt ); 28 | 29 | virtual void Start( void ); 30 | 31 | virtual void Stop( void ); 32 | 33 | virtual int isComplete( void ); 34 | 35 | virtual void StepTable( void ) {}; 36 | 37 | 38 | static void* ThreadFunc( void* lpvThreadParm ); 39 | 40 | void* Execute( void ); 41 | 42 | virtual void PrepareInThread( void ) {}; 43 | 44 | virtual void CleanupInThread( void ) {}; 45 | 46 | virtual void Run( void ) {}; 47 | 48 | 49 | int m_isPrepareComplete; 50 | int m_isComplete; 51 | int m_isTerminate; 52 | 53 | int m_CycleCnt; 54 | 55 | pthread_mutex_t m_StartMutex; 56 | pthread_cond_t m_StartCond; 57 | 58 | pthread_t m_hThread; 59 | pthread_attr_t m_attrThread; 60 | 61 | int GetFromCommnadLine( int argc, char **argv, char* name, int defValue ); 62 | 63 | }; 64 | 65 | #endif /* TF_TestThread_H_ */ 66 | -------------------------------------------------------------------------------- /app_template/run_cycle_1M: -------------------------------------------------------------------------------- 1 | ./Debug/app_template -count 0 -size 1024 2 | -------------------------------------------------------------------------------- /app_template/run_cycle_64M: -------------------------------------------------------------------------------- 1 | ./Debug/app_template -count 0 -size 65536 2 | -------------------------------------------------------------------------------- /module/Makefile: -------------------------------------------------------------------------------- 1 | 2 | KERNELVER := $(shell uname -r) 3 | 4 | ifndef GPUDMA_DIR 5 | 6 | NVIDIA_DRIVER_PATH := $(HOME)/gpudma/nvidia 7 | 8 | else 9 | 10 | NVIDIA_DRIVER_PATH := $(GPUDMA_DIR)/nvidia 11 | 12 | endif 13 | 14 | KBUILD_EXTRA_SYMBOLS := $(NVIDIA_DRIVER_PATH)/kernel/Module.symvers 15 | EXTRA_CFLAGS += -fno-stack-protector 16 | #EXTRA_CFLAGS += -fno-stack-protector -fno-stack-protector-strong 17 | #EXTRA_CFLAGS += -fno-pie 18 | EXTRA_CFLAGS += -O2 19 | EXTRA_CFLAGS += -I$(NVIDIA_DRIVER_PATH)/kernel/nvidia 20 | 21 | ccflags-y += $(EXTRA_CFLAGS) 22 | 23 | ifneq ($(KERNELRELEASE),) 24 | 25 | obj-m += gpumem.o 26 | gpumem-objs := gpumemproc.o ioctlrw.o gpumemdrv.o 27 | 28 | else 29 | 30 | endif 31 | 32 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build 33 | 34 | 35 | #PWD := $(shell pwd) 36 | #PWD := $(PWD) 37 | 38 | 39 | 40 | all: 41 | $(MAKE) -C $(KERNELDIR) M=$(PWD) modules 42 | 43 | 44 | 45 | distclean: 46 | rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.bak .*.cache *.d 47 | clean: 48 | rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.bak .*.cache *.d *.markers *.symvers *.order 49 | -------------------------------------------------------------------------------- /module/drvload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo insmod ./gpumem.ko 4 | sudo chmod 666 /dev/gpumem 5 | -------------------------------------------------------------------------------- /module/gpumemdrv.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | //#include 15 | //#include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "gpumemdrv.h" 26 | #include "ioctlrw.h" 27 | #include "gpumemioctl.h" 28 | #include "gpumemproc.h" 29 | 30 | //----------------------------------------------------------------------------- 31 | 32 | MODULE_AUTHOR("Vladimir Karakozov. karakozov@gmail.com"); 33 | MODULE_LICENSE("GPL"); 34 | 35 | //----------------------------------------------------------------------------- 36 | static struct gpumem dev; 37 | //----------------------------------------------------------------------------- 38 | 39 | static struct gpumem *file_to_device( struct file *file ) 40 | { 41 | return (struct gpumem*)file->private_data; 42 | } 43 | 44 | //-------------------------------------------------------------------- 45 | 46 | static int gpumem_open( struct inode *inode, struct file *file ) 47 | { 48 | file->private_data = (void*)&dev; 49 | return 0; 50 | } 51 | 52 | //----------------------------------------------------------------------------- 53 | 54 | static int gpumem_close( struct inode *inode, struct file *file ) 55 | { 56 | file->private_data = 0; 57 | return 0; 58 | } 59 | 60 | //----------------------------------------------------------------------------- 61 | 62 | static long gpumem_ioctl( struct file *file, unsigned int cmd, unsigned long arg ) 63 | { 64 | int error = 0; 65 | struct gpumem *dev = file_to_device(file); 66 | if(!dev) { 67 | printk(KERN_ERR"%s(): ioctl driver failed\n", __FUNCTION__); 68 | return -ENODEV; 69 | } 70 | 71 | switch(cmd) { 72 | 73 | case IOCTL_GPUMEM_LOCK: error = ioctl_mem_lock(dev, arg); break; 74 | case IOCTL_GPUMEM_UNLOCK: error = ioctl_mem_unlock(dev, arg); break; 75 | case IOCTL_GPUMEM_STATE: error = ioctl_mem_state(dev, arg); break; 76 | default: 77 | printk(KERN_DEBUG"%s(): Unknown ioctl command\n", __FUNCTION__); 78 | error = -EINVAL; 79 | break; 80 | } 81 | 82 | return error; 83 | } 84 | 85 | //----------------------------------------------------------------------------- 86 | 87 | int gpumem_mmap(struct file *file, struct vm_area_struct *vma) 88 | { 89 | size_t size = vma->vm_end - vma->vm_start; 90 | 91 | if (!(vma->vm_flags & VM_MAYSHARE)) 92 | return -EINVAL; 93 | 94 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 95 | 96 | if (remap_pfn_range(vma, 97 | vma->vm_start, 98 | vma->vm_pgoff, 99 | size, 100 | vma->vm_page_prot)) { 101 | pr_err("%s(): error in remap_page_range.\n", __func__ ); 102 | return -EAGAIN; 103 | } 104 | 105 | return 0; 106 | } 107 | 108 | //----------------------------------------------------------------------------- 109 | 110 | struct file_operations gpumem_fops = { 111 | 112 | .owner = THIS_MODULE, 113 | .unlocked_ioctl = gpumem_ioctl, 114 | .compat_ioctl = gpumem_ioctl, 115 | .open = gpumem_open, 116 | .release = gpumem_close, 117 | .mmap = gpumem_mmap, 118 | }; 119 | 120 | //----------------------------------------------------------------------------- 121 | 122 | static struct miscdevice gpumem_dev = { 123 | 124 | MISC_DYNAMIC_MINOR, 125 | GPUMEM_DRIVER_NAME, 126 | &gpumem_fops 127 | }; 128 | 129 | //----------------------------------------------------------------------------- 130 | 131 | static int __init gpumem_init(void) 132 | { 133 | pr_info(GPUMEM_DRIVER_NAME ": %s()\n", __func__); 134 | dev.proc = 0; 135 | sema_init(&dev.sem, 1); 136 | INIT_LIST_HEAD(&dev.table_list); 137 | gpumem_register_proc(GPUMEM_DRIVER_NAME, 0, &dev); 138 | misc_register(&gpumem_dev); 139 | return 0; 140 | } 141 | 142 | //----------------------------------------------------------------------------- 143 | 144 | static void __exit gpumem_cleanup(void) 145 | { 146 | pr_info(GPUMEM_DRIVER_NAME ": %s()\n", __func__); 147 | gpumem_remove_proc(GPUMEM_DRIVER_NAME); 148 | misc_deregister(&gpumem_dev); 149 | } 150 | 151 | //----------------------------------------------------------------------------- 152 | 153 | module_init(gpumem_init); 154 | module_exit(gpumem_cleanup); 155 | 156 | //----------------------------------------------------------------------------- 157 | -------------------------------------------------------------------------------- /module/gpumemdrv.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef GPUMEM_H 4 | #define GPUMEM_H 5 | 6 | //----------------------------------------------------------------------------- 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "nv-p2p.h" 14 | 15 | //----------------------------------------------------------------------------- 16 | 17 | struct gpumem_t { 18 | struct list_head list; 19 | void *handle; 20 | u64 virt_start; 21 | nvidia_p2p_page_table_t* page_table; 22 | }; 23 | 24 | //----------------------------------------------------------------------------- 25 | 26 | struct gpumem { 27 | struct semaphore sem; 28 | struct proc_dir_entry* proc; 29 | struct list_head table_list; 30 | }; 31 | 32 | //----------------------------------------------------------------------------- 33 | 34 | int get_nv_page_size(int val); 35 | 36 | //----------------------------------------------------------------------------- 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /module/gpumemioctl.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __GPUDMAIOTCL_H__ 3 | #define __GPUDMAIOTCL_H__ 4 | 5 | //----------------------------------------------------------------------------- 6 | 7 | #define GPUMEM_DRIVER_NAME "gpumem" 8 | 9 | //----------------------------------------------------------------------------- 10 | 11 | #ifdef __linux__ 12 | #include 13 | #ifndef __KERNEL__ 14 | #include 15 | #endif 16 | #define GPUMEM_DEVICE_TYPE 'g' 17 | #define GPUMEM_MAKE_IOCTL(c) _IO(GPUMEM_DEVICE_TYPE, (c)) 18 | #endif 19 | 20 | #define IOCTL_GPUMEM_LOCK GPUMEM_MAKE_IOCTL(10) 21 | #define IOCTL_GPUMEM_UNLOCK GPUMEM_MAKE_IOCTL(11) 22 | #define IOCTL_GPUMEM_STATE GPUMEM_MAKE_IOCTL(12) 23 | 24 | //----------------------------------------------------------------------------- 25 | // for boundary alignment requirement 26 | #define GPU_BOUND_SHIFT 16 27 | #define GPU_BOUND_SIZE ((u64)1 << GPU_BOUND_SHIFT) 28 | #define GPU_BOUND_OFFSET (GPU_BOUND_SIZE-1) 29 | #define GPU_BOUND_MASK (~GPU_BOUND_OFFSET) 30 | 31 | //----------------------------------------------------------------------------- 32 | 33 | struct gpudma_lock_t { 34 | void* handle; 35 | uint64_t addr; 36 | uint64_t size; 37 | size_t page_count; 38 | }; 39 | 40 | //----------------------------------------------------------------------------- 41 | 42 | struct gpudma_unlock_t { 43 | void* handle; 44 | }; 45 | 46 | //----------------------------------------------------------------------------- 47 | 48 | struct gpudma_state_t { 49 | void* handle; 50 | size_t page_count; 51 | size_t page_size; 52 | uint64_t pages[1]; 53 | }; 54 | 55 | //----------------------------------------------------------------------------- 56 | 57 | 58 | #endif //_GPUDMAIOTCL_H_ 59 | -------------------------------------------------------------------------------- /module/gpumemproc.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #define __NO_VERSION__ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "gpumemdrv.h" 17 | #include "gpumemproc.h" 18 | 19 | //-------------------------------------------------------------------- 20 | 21 | struct log_buf_t { 22 | struct seq_file *param; 23 | }; 24 | 25 | //-------------------------------------------------------------------- 26 | 27 | #define print_info(S...) seq_printf(S) 28 | 29 | //-------------------------------------------------------------------- 30 | 31 | static void show_mem_info( struct gpumem *drv, struct seq_file *m ) 32 | { 33 | struct list_head *pos, *n; 34 | int i=0, idx=0; 35 | if(!drv || !m) { 36 | printk(KERN_DEBUG"%s(): EINVAL\n", __FUNCTION__ ); 37 | return; 38 | } 39 | 40 | print_info(m, "%s\n", "Pinned memory info:"); 41 | 42 | list_for_each_safe(pos, n, &drv->table_list) { 43 | 44 | struct gpumem_t *entry = list_entry(pos, struct gpumem_t, list); 45 | if(entry) { 46 | if(entry->virt_start) { 47 | 48 | print_info(m, "%d: Entry - %p\n", idx, entry); 49 | print_info(m, "Virtual GPU address - 0x%llx\n", entry->virt_start); 50 | print_info(m, "Number of pages - %d\n", entry->page_table->entries); 51 | print_info(m, "Page size - 0x%x\n", get_nv_page_size(entry->page_table->page_size)); 52 | 53 | for(i=0; ipage_table->entries; i++) { 54 | struct nvidia_p2p_page *nvp = entry->page_table->pages[i]; 55 | if(nvp) { 56 | print_info(m, "%02d: - 0x%llx\n", i, nvp->physical_address); 57 | } 58 | } 59 | 60 | print_info(m, "\n"); 61 | 62 | ++idx; 63 | } 64 | } 65 | } 66 | } 67 | 68 | //-------------------------------------------------------------------- 69 | 70 | static int gpumem_proc_show(struct seq_file *m, void *v) 71 | { 72 | struct gpumem *p = m->private; 73 | 74 | show_mem_info( p, m ); 75 | 76 | return 0; 77 | } 78 | 79 | //-------------------------------------------------------------------- 80 | 81 | static int gpumem_proc_open(struct inode *inode, struct file *file) 82 | { 83 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) 84 | struct gpumem *p = (struct gpumem *)PDE_DATA(inode); 85 | #else 86 | struct gpumem *p = (struct gpumem *)PDE(inode)->data; 87 | #endif 88 | return single_open(file, gpumem_proc_show, p); 89 | } 90 | 91 | //-------------------------------------------------------------------- 92 | 93 | static int gpumem_proc_release(struct inode *inode, struct file *file) 94 | { 95 | return single_release(inode, file); 96 | } 97 | 98 | //-------------------------------------------------------------------- 99 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) 100 | static const struct proc_ops gpumem_proc_fops = { 101 | .proc_open = gpumem_proc_open, 102 | .proc_read = seq_read, 103 | .proc_lseek = seq_lseek, 104 | .proc_release = gpumem_proc_release, 105 | }; 106 | #else 107 | static const struct file_operations gpumem_proc_fops = { 108 | .owner = THIS_MODULE, 109 | .open = gpumem_proc_open, 110 | .read = seq_read, 111 | .llseek = seq_lseek, 112 | .release = gpumem_proc_release, 113 | }; 114 | #endif 115 | 116 | //-------------------------------------------------------------------- 117 | 118 | void gpumem_register_proc( char *name, void *fptr, void *data ) 119 | { 120 | struct gpumem *p = (struct gpumem*)data; 121 | 122 | if(!data) { 123 | printk(KERN_DEBUG"%s(): Invalid driver pointer\n", __FUNCTION__ ); 124 | return; 125 | } 126 | 127 | p->proc = proc_create_data(name, S_IRUGO, NULL, &gpumem_proc_fops, p); 128 | if(!p->proc) { 129 | printk(KERN_DEBUG"%s(): Error register /proc entry\n", __FUNCTION__); 130 | } 131 | } 132 | 133 | //-------------------------------------------------------------------- 134 | 135 | void gpumem_remove_proc( char *name ) 136 | { 137 | remove_proc_entry(name, NULL); 138 | } 139 | 140 | //-------------------------------------------------------------------- 141 | 142 | -------------------------------------------------------------------------------- /module/gpumemproc.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __GPUDMAPROC_H__ 3 | #define __GPUDMAPROC_H__ 4 | 5 | void gpumem_register_proc(char *name, void *fptr, void *data); 6 | void gpumem_remove_proc(char *name); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /module/ioctlrw.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #define __NO_VERSION__ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "gpumemdrv.h" 14 | #include "gpumemioctl.h" 15 | 16 | //----------------------------------------------------------------------------- 17 | 18 | int get_nv_page_size(int val) 19 | { 20 | switch(val) { 21 | case NVIDIA_P2P_PAGE_SIZE_4KB: return 4*1024; 22 | case NVIDIA_P2P_PAGE_SIZE_64KB: return 64*1024; 23 | case NVIDIA_P2P_PAGE_SIZE_128KB: return 128*1024; 24 | } 25 | return 0; 26 | } 27 | 28 | //-------------------------------------------------------------------- 29 | 30 | void free_nvp_callback(void *data) 31 | { 32 | int res; 33 | struct gpumem_t *entry = (struct gpumem_t*)data; 34 | if(entry) { 35 | res = nvidia_p2p_free_page_table(entry->page_table); 36 | if(res == 0) { 37 | printk(KERN_ERR"%s(): nvidia_p2p_free_page_table() - OK!\n", __FUNCTION__); 38 | //entry->virt_start = 0ULL; 39 | //entry->page_table = 0; 40 | } else { 41 | printk(KERN_ERR"%s(): Error in nvidia_p2p_free_page_table()\n", __FUNCTION__); 42 | } 43 | } 44 | } 45 | 46 | //----------------------------------------------------------------------------- 47 | 48 | int ioctl_mem_lock(struct gpumem *drv, unsigned long arg) 49 | { 50 | int error = 0; 51 | size_t pin_size = 0ULL; 52 | struct gpumem_t *entry = 0; 53 | struct gpudma_lock_t param; 54 | 55 | if(copy_from_user(¶m, (void *)arg, sizeof(struct gpudma_lock_t))) { 56 | printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__); 57 | error = -EFAULT; 58 | goto do_exit; 59 | } 60 | 61 | entry = (struct gpumem_t*)kzalloc(sizeof(struct gpumem_t), GFP_KERNEL); 62 | if(!entry) { 63 | printk(KERN_ERR"%s(): Error allocate memory to mapping struct\n", __FUNCTION__); 64 | error = -ENOMEM; 65 | goto do_exit; 66 | } 67 | 68 | INIT_LIST_HEAD(&entry->list); 69 | entry->handle = entry; 70 | 71 | entry->virt_start = (param.addr & GPU_BOUND_MASK); 72 | pin_size = (param.addr + param.size - entry->virt_start); 73 | if(!pin_size) { 74 | printk(KERN_ERR"%s(): Error invalid memory size!\n", __FUNCTION__); 75 | error = -EINVAL; 76 | goto do_free_mem; 77 | } 78 | 79 | error = nvidia_p2p_get_pages(0, 0, entry->virt_start, pin_size, &entry->page_table, free_nvp_callback, entry); 80 | if(error != 0) { 81 | printk(KERN_ERR"%s(): Error in nvidia_p2p_get_pages()\n", __FUNCTION__); 82 | error = -EINVAL; 83 | goto do_free_mem; 84 | } 85 | 86 | param.page_count = entry->page_table->entries; 87 | param.handle = entry; 88 | 89 | printk(KERN_ERR"%s(): param.handle: %p\n", __FUNCTION__, param.handle); 90 | 91 | if(copy_to_user((void *)arg, ¶m, sizeof(struct gpudma_lock_t))) { 92 | printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__); 93 | error = -EFAULT; 94 | goto do_unlock_pages; 95 | } 96 | 97 | list_add_tail(&entry->list, &drv->table_list); 98 | 99 | printk(KERN_ERR"%s(): Add new entry. handle: %p\n", __FUNCTION__, entry->handle); 100 | 101 | return 0; 102 | 103 | do_unlock_pages: 104 | nvidia_p2p_put_pages(0, 0, entry->virt_start, entry->page_table); 105 | do_free_mem: 106 | kfree(entry); 107 | do_exit: 108 | return error; 109 | } 110 | 111 | //----------------------------------------------------------------------------- 112 | 113 | int ioctl_mem_unlock(struct gpumem *drv, unsigned long arg) 114 | { 115 | int error = -EINVAL; 116 | struct gpumem_t *entry = 0; 117 | struct gpudma_unlock_t param; 118 | struct list_head *pos, *n; 119 | 120 | if(copy_from_user(¶m, (void *)arg, sizeof(struct gpudma_unlock_t))) { 121 | printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__); 122 | error = -EFAULT; 123 | goto do_exit; 124 | } 125 | 126 | list_for_each_safe(pos, n, &drv->table_list) { 127 | 128 | entry = list_entry(pos, struct gpumem_t, list); 129 | if(entry) { 130 | if(entry->handle == param.handle) { 131 | 132 | printk(KERN_ERR"%s(): param.handle = %p\n", __FUNCTION__, param.handle); 133 | printk(KERN_ERR"%s(): entry.handle = %p\n", __FUNCTION__, entry->handle); 134 | 135 | if(entry->virt_start && entry->page_table) { 136 | error = nvidia_p2p_put_pages(0, 0, entry->virt_start, entry->page_table); 137 | if(error != 0) { 138 | printk(KERN_ERR"%s(): Error in nvidia_p2p_put_pages()\n", __FUNCTION__); 139 | goto do_exit; 140 | } 141 | //entry->virt_start = 0ULL; 142 | //entry->page_table = 0; 143 | printk(KERN_ERR"%s(): nvidia_p2p_put_pages() - Ok!\n", __FUNCTION__); 144 | } 145 | 146 | list_del(pos); 147 | kfree(entry); 148 | break; 149 | } else { 150 | printk(KERN_ERR"%s(): Skip entry: %p\n", __FUNCTION__, entry->handle); 151 | } 152 | } 153 | } 154 | 155 | do_exit: 156 | return error; 157 | } 158 | 159 | //----------------------------------------------------------------------------- 160 | 161 | int ioctl_mem_state(struct gpumem *drv, unsigned long arg) 162 | { 163 | int error = 0; 164 | int size = 0; 165 | int i=0; 166 | struct gpumem_t *entry = 0; 167 | struct gpudma_state_t header; 168 | struct gpudma_state_t *param; 169 | struct list_head *pos, *n; 170 | 171 | if(copy_from_user(&header, (void *)arg, sizeof(struct gpudma_state_t))) { 172 | printk(KERN_ERR"%s(): Error in copy_from_user()\n", __FUNCTION__); 173 | error = -EFAULT; 174 | goto do_exit; 175 | } 176 | 177 | list_for_each_safe(pos, n, &drv->table_list) { 178 | 179 | entry = list_entry(pos, struct gpumem_t, list); 180 | if(entry) { 181 | if(entry->handle == header.handle) { 182 | 183 | printk(KERN_ERR"%s(): param.handle = %p\n", __FUNCTION__, header.handle); 184 | printk(KERN_ERR"%s(): entry.handle = %p\n", __FUNCTION__, entry->handle); 185 | 186 | if(!entry->page_table) { 187 | printk(KERN_ERR"%s(): Error - memory not pinned!\n", __FUNCTION__); 188 | return -EINVAL; 189 | } 190 | 191 | if((entry->page_table->entries != header.page_count) || (entry->handle != header.handle)) { 192 | printk(KERN_ERR"%s(): Error - page counters or handle invalid!\n", __FUNCTION__); 193 | return -EINVAL; 194 | } 195 | 196 | size = (sizeof(uint64_t)*header.page_count) + sizeof(struct gpudma_state_t); 197 | param = kzalloc(size, GFP_KERNEL); 198 | if(!param) { 199 | printk(KERN_ERR"%s(): Error allocate memory!\n", __FUNCTION__); 200 | return -ENOMEM; 201 | } 202 | param->page_size = get_nv_page_size(entry->page_table->page_size); 203 | for(i=0; ipage_table->entries; i++) { 204 | struct nvidia_p2p_page *nvp = entry->page_table->pages[i]; 205 | if(nvp) { 206 | param->pages[i] = nvp->physical_address; 207 | param->page_count++; 208 | printk(KERN_ERR"%s(): %02d - 0x%llx\n", __FUNCTION__, i, param->pages[i]); 209 | } 210 | } 211 | printk(KERN_ERR"%s(): page_count = %ld\n", __FUNCTION__, (long int)param->page_count); 212 | param->handle = header.handle; 213 | if(copy_to_user((void *)arg, param, size)) { 214 | printk(KERN_DEBUG"%s(): Error in copy_to_user()\n", __FUNCTION__); 215 | error = -EFAULT; 216 | } 217 | 218 | kfree(param); 219 | } else { 220 | printk(KERN_ERR"%s(): Skip entry: %p\n", __FUNCTION__, entry->handle); 221 | } 222 | } 223 | } 224 | 225 | do_exit: 226 | return error; 227 | } 228 | 229 | //----------------------------------------------------------------------------- 230 | -------------------------------------------------------------------------------- /module/ioctlrw.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _IOCTLRW_H_ 3 | #define _IOCTLRW_H_ 4 | 5 | //----------------------------------------------------------------------------- 6 | 7 | int ioctl_mem_lock(struct gpumem *drv, unsigned long arg); 8 | int ioctl_mem_unlock(struct gpumem *drv, unsigned long arg); 9 | int ioctl_mem_state(struct gpumem *drv, unsigned long arg); 10 | 11 | //----------------------------------------------------------------------------- 12 | 13 | #endif //_IOCTLRW_H_ 14 | --------------------------------------------------------------------------------