├── .DS_Store
├── LICENSE
├── README.md
├── all_build.sh
├── demo.py
├── develop.sh
├── extra
├── cudpp
│ ├── .gitattributes
│ ├── .gitignore
│ ├── .gitmodules
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── cmake
│ │ └── FindGLEW.cmake
│ ├── cudpp-config-version.cmake.in
│ ├── cudpp-config.cmake.in
│ ├── doc
│ │ ├── CUDPP_slides.pdf
│ │ ├── bib
│ │ │ ├── README.txt
│ │ │ ├── bib.py
│ │ │ ├── cudpp.bib
│ │ │ ├── cudpp.bst
│ │ │ ├── cudpp_refs.html
│ │ │ └── cudpp_refs_bib.html
│ │ ├── building-cudpp.md
│ │ ├── changelog.md
│ │ ├── changelog.txt
│ │ ├── cudpp.doxygen
│ │ ├── cudpp_refs.md
│ │ ├── cudpp_refs_bib.md
│ │ ├── example_simpleCUDPP.dox
│ │ └── license.md
│ ├── ext
│ │ ├── cub
│ │ │ ├── .cproject
│ │ │ ├── .project
│ │ │ ├── .settings
│ │ │ │ ├── language.settings.xml
│ │ │ │ ├── org.eclipse.cdt.codan.core.prefs
│ │ │ │ ├── org.eclipse.cdt.core.prefs
│ │ │ │ ├── org.eclipse.cdt.ui.prefs
│ │ │ │ └── org.eclipse.core.runtime.prefs
│ │ │ ├── CHANGE_LOG.TXT
│ │ │ ├── LICENSE.TXT
│ │ │ ├── README.md
│ │ │ ├── common.mk
│ │ │ ├── cub
│ │ │ │ ├── agent
│ │ │ │ │ ├── agent_histogram.cuh
│ │ │ │ │ ├── agent_radix_sort_downsweep.cuh
│ │ │ │ │ ├── agent_radix_sort_upsweep.cuh
│ │ │ │ │ ├── agent_reduce.cuh
│ │ │ │ │ ├── agent_reduce_by_key.cuh
│ │ │ │ │ ├── agent_rle.cuh
│ │ │ │ │ ├── agent_scan.cuh
│ │ │ │ │ ├── agent_segment_fixup.cuh
│ │ │ │ │ ├── agent_select_if.cuh
│ │ │ │ │ ├── agent_spmv_csrt.cuh
│ │ │ │ │ ├── agent_spmv_orig.cuh
│ │ │ │ │ ├── agent_spmv_row_based.cuh
│ │ │ │ │ └── single_pass_scan_operators.cuh
│ │ │ │ ├── block
│ │ │ │ │ ├── block_adjacent_difference.cuh
│ │ │ │ │ ├── block_discontinuity.cuh
│ │ │ │ │ ├── block_exchange.cuh
│ │ │ │ │ ├── block_histogram.cuh
│ │ │ │ │ ├── block_load.cuh
│ │ │ │ │ ├── block_radix_rank.cuh
│ │ │ │ │ ├── block_radix_sort.cuh
│ │ │ │ │ ├── block_raking_layout.cuh
│ │ │ │ │ ├── block_reduce.cuh
│ │ │ │ │ ├── block_scan.cuh
│ │ │ │ │ ├── block_shuffle.cuh
│ │ │ │ │ ├── block_store.cuh
│ │ │ │ │ └── specializations
│ │ │ │ │ │ ├── block_histogram_atomic.cuh
│ │ │ │ │ │ ├── block_histogram_sort.cuh
│ │ │ │ │ │ ├── block_reduce_raking.cuh
│ │ │ │ │ │ ├── block_reduce_raking_commutative_only.cuh
│ │ │ │ │ │ ├── block_reduce_warp_reductions.cuh
│ │ │ │ │ │ ├── block_scan_raking.cuh
│ │ │ │ │ │ ├── block_scan_warp_scans.cuh
│ │ │ │ │ │ ├── block_scan_warp_scans2.cuh
│ │ │ │ │ │ └── block_scan_warp_scans3.cuh
│ │ │ │ ├── cub.cuh
│ │ │ │ ├── device
│ │ │ │ │ ├── device_histogram.cuh
│ │ │ │ │ ├── device_partition.cuh
│ │ │ │ │ ├── device_radix_sort.cuh
│ │ │ │ │ ├── device_reduce.cuh
│ │ │ │ │ ├── device_run_length_encode.cuh
│ │ │ │ │ ├── device_scan.cuh
│ │ │ │ │ ├── device_segmented_radix_sort.cuh
│ │ │ │ │ ├── device_segmented_reduce.cuh
│ │ │ │ │ ├── device_select.cuh
│ │ │ │ │ ├── device_spmv.cuh
│ │ │ │ │ └── dispatch
│ │ │ │ │ │ ├── dispatch_histogram.cuh
│ │ │ │ │ │ ├── dispatch_radix_sort.cuh
│ │ │ │ │ │ ├── dispatch_reduce.cuh
│ │ │ │ │ │ ├── dispatch_reduce_by_key.cuh
│ │ │ │ │ │ ├── dispatch_rle.cuh
│ │ │ │ │ │ ├── dispatch_scan.cuh
│ │ │ │ │ │ ├── dispatch_select_if.cuh
│ │ │ │ │ │ ├── dispatch_spmv_csrt.cuh
│ │ │ │ │ │ ├── dispatch_spmv_orig.cuh
│ │ │ │ │ │ └── dispatch_spmv_row_based.cuh
│ │ │ │ ├── grid
│ │ │ │ │ ├── grid_barrier.cuh
│ │ │ │ │ ├── grid_even_share.cuh
│ │ │ │ │ ├── grid_mapping.cuh
│ │ │ │ │ └── grid_queue.cuh
│ │ │ │ ├── host
│ │ │ │ │ └── mutex.cuh
│ │ │ │ ├── iterator
│ │ │ │ │ ├── arg_index_input_iterator.cuh
│ │ │ │ │ ├── cache_modified_input_iterator.cuh
│ │ │ │ │ ├── cache_modified_output_iterator.cuh
│ │ │ │ │ ├── constant_input_iterator.cuh
│ │ │ │ │ ├── counting_input_iterator.cuh
│ │ │ │ │ ├── discard_output_iterator.cuh
│ │ │ │ │ ├── tex_obj_input_iterator.cuh
│ │ │ │ │ ├── tex_ref_input_iterator.cuh
│ │ │ │ │ └── transform_input_iterator.cuh
│ │ │ │ ├── thread
│ │ │ │ │ ├── thread_load.cuh
│ │ │ │ │ ├── thread_operators.cuh
│ │ │ │ │ ├── thread_reduce.cuh
│ │ │ │ │ ├── thread_scan.cuh
│ │ │ │ │ ├── thread_search.cuh
│ │ │ │ │ └── thread_store.cuh
│ │ │ │ ├── util_allocator.cuh
│ │ │ │ ├── util_arch.cuh
│ │ │ │ ├── util_debug.cuh
│ │ │ │ ├── util_device.cuh
│ │ │ │ ├── util_macro.cuh
│ │ │ │ ├── util_namespace.cuh
│ │ │ │ ├── util_ptx.cuh
│ │ │ │ ├── util_type.cuh
│ │ │ │ └── warp
│ │ │ │ │ ├── specializations
│ │ │ │ │ ├── warp_reduce_shfl.cuh
│ │ │ │ │ ├── warp_reduce_smem.cuh
│ │ │ │ │ ├── warp_scan_shfl.cuh
│ │ │ │ │ └── warp_scan_smem.cuh
│ │ │ │ │ ├── warp_reduce.cuh
│ │ │ │ │ └── warp_scan.cuh
│ │ │ ├── eclipse code style profile.xml
│ │ │ ├── examples
│ │ │ │ ├── block
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── Makefile
│ │ │ │ │ ├── example_block_radix_sort.cu
│ │ │ │ │ ├── example_block_reduce.cu
│ │ │ │ │ ├── example_block_scan.cu
│ │ │ │ │ └── reduce_by_key.cu
│ │ │ │ └── device
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── Makefile
│ │ │ │ │ ├── example_device_partition_flagged.cu
│ │ │ │ │ ├── example_device_partition_if.cu
│ │ │ │ │ ├── example_device_radix_sort.cu
│ │ │ │ │ ├── example_device_reduce.cu
│ │ │ │ │ ├── example_device_scan.cu
│ │ │ │ │ ├── example_device_select_flagged.cu
│ │ │ │ │ ├── example_device_select_if.cu
│ │ │ │ │ ├── example_device_select_unique.cu
│ │ │ │ │ └── example_device_sort_find_non_trivial_runs.cu
│ │ │ ├── experimental
│ │ │ │ ├── .gitignore
│ │ │ │ ├── Makefile
│ │ │ │ ├── defunct
│ │ │ │ │ ├── example_coo_spmv.cu
│ │ │ │ │ └── test_device_seg_reduce.cu
│ │ │ │ ├── histogram
│ │ │ │ │ ├── histogram_cub.h
│ │ │ │ │ ├── histogram_gmem_atomics.h
│ │ │ │ │ └── histogram_smem_atomics.h
│ │ │ │ ├── histogram_compare.cu
│ │ │ │ ├── sparse_matrix.h
│ │ │ │ ├── spmv_compare.cu
│ │ │ │ └── spmv_script.sh
│ │ │ ├── test
│ │ │ │ ├── .gitignore
│ │ │ │ ├── Makefile
│ │ │ │ ├── link_a.cu
│ │ │ │ ├── link_b.cu
│ │ │ │ ├── link_main.cpp
│ │ │ │ ├── mersenne.h
│ │ │ │ ├── test_allocator.cu
│ │ │ │ ├── test_block_histogram.cu
│ │ │ │ ├── test_block_load_store.cu
│ │ │ │ ├── test_block_radix_sort.cu
│ │ │ │ ├── test_block_reduce.cu
│ │ │ │ ├── test_block_scan.cu
│ │ │ │ ├── test_device_histogram.cu
│ │ │ │ ├── test_device_radix_sort.cu
│ │ │ │ ├── test_device_reduce.cu
│ │ │ │ ├── test_device_reduce_by_key.cu
│ │ │ │ ├── test_device_run_length_encode.cu
│ │ │ │ ├── test_device_scan.cu
│ │ │ │ ├── test_device_select_if.cu
│ │ │ │ ├── test_device_select_unique.cu
│ │ │ │ ├── test_grid_barrier.cu
│ │ │ │ ├── test_iterator.cu
│ │ │ │ ├── test_util.h
│ │ │ │ ├── test_warp_reduce.cu
│ │ │ │ └── test_warp_scan.cu
│ │ │ └── tune
│ │ │ │ ├── .gitignore
│ │ │ │ ├── Makefile
│ │ │ │ └── tune_device_reduce.cu
│ │ └── moderngpu
│ │ │ ├── README.md
│ │ │ ├── benchmarkinsert
│ │ │ ├── Makefile
│ │ │ ├── benchmarkinsert.cu
│ │ │ ├── benchmarkinsert.vcxproj
│ │ │ └── benchmarkinsert.vcxproj.filters
│ │ │ ├── benchmarkintervalmove
│ │ │ ├── Makefile
│ │ │ ├── benchmarkintervalmove.cu
│ │ │ ├── benchmarkintervalmove.vcxproj
│ │ │ └── benchmarkintervalmove.vcxproj.filters
│ │ │ ├── benchmarkjoin
│ │ │ ├── Makefile
│ │ │ ├── benchmarkjoin.cu
│ │ │ ├── benchmarkjoin.vcxproj
│ │ │ └── benchmarkjoin.vcxproj.filters
│ │ │ ├── benchmarklaunchbox
│ │ │ ├── Makefile
│ │ │ ├── benchmarklaunchbox.cu
│ │ │ ├── benchmarklaunchbox.vcxproj
│ │ │ └── benchmarklaunchbox.vcxproj.filters
│ │ │ ├── benchmarkloadbalance
│ │ │ ├── Makefile
│ │ │ ├── benchmarkloadbalance.cu
│ │ │ ├── benchmarkloadbalance.vcxproj
│ │ │ └── benchmarkloadbalance.vcxproj.filters
│ │ │ ├── benchmarklocalitysort
│ │ │ ├── Makefile
│ │ │ ├── benchmarklocalitysort.cu
│ │ │ ├── benchmarklocalitysort.vcxproj
│ │ │ └── benchmarklocalitysort.vcxproj.filters
│ │ │ ├── benchmarkmerge
│ │ │ ├── Makefile
│ │ │ ├── benchmarkmerge.cu
│ │ │ ├── benchmarkmerge.vcxproj
│ │ │ └── benchmarkmerge.vcxproj.filters
│ │ │ ├── benchmarkreducebykey
│ │ │ ├── Makefile
│ │ │ ├── benchmarkreducebykey.cu
│ │ │ ├── benchmarkreducebykey.vcxproj
│ │ │ └── benchmarkreducebykey.vcxproj.filters
│ │ │ ├── benchmarkscan
│ │ │ ├── Makefile
│ │ │ ├── benchmarkscan.cu
│ │ │ ├── benchmarkscan.vcxproj
│ │ │ └── benchmarkscan.vcxproj.filters
│ │ │ ├── benchmarksegreduce
│ │ │ ├── Makefile
│ │ │ ├── benchmarksegreduce.cu
│ │ │ ├── benchmarksegreduce.vcxproj
│ │ │ └── benchmarksegreduce.vcxproj.filters
│ │ │ ├── benchmarksegsort
│ │ │ ├── Makefile
│ │ │ ├── benchmarksegsort.cu
│ │ │ ├── benchmarksegsort.vcxproj
│ │ │ └── benchmarksegsort.vcxproj.filters
│ │ │ ├── benchmarksets
│ │ │ ├── Makefile
│ │ │ ├── benchmarksets.cu
│ │ │ ├── benchmarksets.vcxproj
│ │ │ └── benchmarksets.vcxproj.filters
│ │ │ ├── benchmarksort
│ │ │ ├── Makefile
│ │ │ ├── benchmarksort.cu
│ │ │ ├── benchmarksort.vcxproj
│ │ │ └── benchmarksort.vcxproj.filters
│ │ │ ├── benchmarksortedsearch
│ │ │ ├── Makefile
│ │ │ ├── benchmarksortedsearch.cu
│ │ │ ├── benchmarksortedsearch.vcxproj
│ │ │ └── benchmarksortedsearch.vcxproj.filters
│ │ │ ├── benchmarkspmvcsr
│ │ │ ├── Makefile
│ │ │ ├── benchmarkspmvcsr.cu
│ │ │ ├── benchmarkspmvcsr.vcxproj
│ │ │ └── benchmarkspmvcsr.vcxproj.filters
│ │ │ ├── common.mk
│ │ │ ├── demo
│ │ │ ├── Makefile
│ │ │ ├── demo.cu
│ │ │ ├── demo.vcxproj
│ │ │ └── demo.vcxproj.filters
│ │ │ ├── include
│ │ │ ├── device
│ │ │ │ ├── ctaloadbalance.cuh
│ │ │ │ ├── ctamerge.cuh
│ │ │ │ ├── ctascan.cuh
│ │ │ │ ├── ctasearch.cuh
│ │ │ │ ├── ctasegreduce.cuh
│ │ │ │ ├── ctasegscan.cuh
│ │ │ │ ├── ctasegsort.cuh
│ │ │ │ ├── ctasortedsearch.cuh
│ │ │ │ ├── devicetypes.cuh
│ │ │ │ ├── deviceutil.cuh
│ │ │ │ ├── intrinsics.cuh
│ │ │ │ ├── launchbox.cuh
│ │ │ │ ├── loadstore.cuh
│ │ │ │ ├── serialsets.cuh
│ │ │ │ └── sortnetwork.cuh
│ │ │ ├── kernels
│ │ │ │ ├── bulkinsert.cuh
│ │ │ │ ├── bulkremove.cuh
│ │ │ │ ├── csrtools.cuh
│ │ │ │ ├── cubradixsort.cuh
│ │ │ │ ├── intervalmove.cuh
│ │ │ │ ├── join.cuh
│ │ │ │ ├── loadbalance.cuh
│ │ │ │ ├── localitysort.cuh
│ │ │ │ ├── merge.cuh
│ │ │ │ ├── mergesort.cuh
│ │ │ │ ├── reduce.cuh
│ │ │ │ ├── reducebykey.cuh
│ │ │ │ ├── scan.cuh
│ │ │ │ ├── search.cuh
│ │ │ │ ├── segmentedsort.cuh
│ │ │ │ ├── segreduce.cuh
│ │ │ │ ├── segreducecsr.cuh
│ │ │ │ ├── sets.cuh
│ │ │ │ ├── sortedsearch.cuh
│ │ │ │ └── spmvcsr.cuh
│ │ │ ├── mgpudevice.cuh
│ │ │ ├── mgpuenums.h
│ │ │ ├── mgpuhost.cuh
│ │ │ ├── mmio.h
│ │ │ ├── moderngpu.cuh
│ │ │ ├── sparsematrix.h
│ │ │ └── util
│ │ │ │ ├── format.h
│ │ │ │ ├── mgpualloc.h
│ │ │ │ ├── mgpucontext.h
│ │ │ │ ├── static.h
│ │ │ │ └── util.h
│ │ │ ├── mgpu_benchmarks.xlsx
│ │ │ ├── moderngpu.sln
│ │ │ ├── parallelmerge
│ │ │ ├── Makefile
│ │ │ ├── parallelmerge.cu
│ │ │ ├── parallelmerge.vcxproj
│ │ │ └── parallelmerge.vcxproj.filters
│ │ │ ├── src
│ │ │ ├── mgpucontext.cu
│ │ │ ├── mgpuutil.cpp
│ │ │ ├── mmio.cpp
│ │ │ └── sparsematrix.cpp
│ │ │ ├── testlaunchbox
│ │ │ ├── Makefile
│ │ │ ├── testlaunchbox.cu
│ │ │ ├── testlaunchbox.vcxproj
│ │ │ └── testlaunchbox.vcxproj.filters
│ │ │ ├── testsegsortbyflags
│ │ │ ├── testsegsortbyflags.cu
│ │ │ ├── testsegsortbyflags.vcxproj
│ │ │ └── testsegsortbyflags.vcxproj.filters
│ │ │ └── vs.props
│ ├── include
│ │ ├── cudpp.h
│ │ ├── cudpp_config.h
│ │ ├── cudpp_config.h.in
│ │ └── cudpp_hash.h
│ ├── license.txt
│ └── src
│ │ ├── cudpp
│ │ ├── CMakeLists.txt
│ │ ├── app
│ │ │ ├── compact_app.cu
│ │ │ ├── compress_app.cu
│ │ │ ├── listrank_app.cu
│ │ │ ├── mergesort_app.cu
│ │ │ ├── multisplit_app.cu
│ │ │ ├── radixsort_app.cu
│ │ │ ├── rand_app.cu
│ │ │ ├── reduce_app.cu
│ │ │ ├── sa_app.cu
│ │ │ ├── scan_app.cu
│ │ │ ├── segmented_scan_app.cu
│ │ │ ├── spmvmult_app.cu
│ │ │ ├── stringsort_app.cu
│ │ │ └── tridiagonal_app.cu
│ │ ├── cta
│ │ │ ├── compress_cta.cuh
│ │ │ ├── mergesort_cta.cuh
│ │ │ ├── radixsort_cta.cuh
│ │ │ ├── rand_cta.cuh
│ │ │ ├── scan_cta.cuh
│ │ │ ├── segmented_scan_cta.cuh
│ │ │ └── stringsort_cta.cuh
│ │ ├── cuda_util.h
│ │ ├── cudpp.cpp
│ │ ├── cudpp_compact.h
│ │ ├── cudpp_compress.h
│ │ ├── cudpp_globals.h
│ │ ├── cudpp_listrank.h
│ │ ├── cudpp_manager.cpp
│ │ ├── cudpp_manager.h
│ │ ├── cudpp_maximal_launch.cpp
│ │ ├── cudpp_maximal_launch.h
│ │ ├── cudpp_mergesort.h
│ │ ├── cudpp_multisplit.h
│ │ ├── cudpp_plan.cpp
│ │ ├── cudpp_plan.h
│ │ ├── cudpp_radixsort.h
│ │ ├── cudpp_rand.h
│ │ ├── cudpp_reduce.h
│ │ ├── cudpp_sa.h
│ │ ├── cudpp_scan.h
│ │ ├── cudpp_segscan.h
│ │ ├── cudpp_spmvmult.h
│ │ ├── cudpp_stringsort.h
│ │ ├── cudpp_tridiagonal.h
│ │ ├── cudpp_util.h
│ │ ├── kernel
│ │ │ ├── compact_kernel.cuh
│ │ │ ├── compress_kernel.cuh
│ │ │ ├── listrank_kernel.cuh
│ │ │ ├── mergesort_kernel.cuh
│ │ │ ├── multisplit_kernel.cuh
│ │ │ ├── radixsort_kernel.cuh
│ │ │ ├── rand_kernel.cuh
│ │ │ ├── reduce_kernel.cuh
│ │ │ ├── sa_kernel.cuh
│ │ │ ├── scan_kernel.cuh
│ │ │ ├── segmented_scan_kernel.cuh
│ │ │ ├── spmvmult_kernel.cuh
│ │ │ ├── stringsort_kernel.cuh
│ │ │ ├── tridiagonal_kernel.cuh
│ │ │ └── vector_kernel.cuh
│ │ └── sharedmem.h
│ │ └── cudpp_hash
│ │ ├── CMakeLists.txt
│ │ ├── cudpp_hash.cpp
│ │ ├── debugging.cpp
│ │ ├── debugging.cu
│ │ ├── debugging.h
│ │ ├── definitions.h
│ │ ├── hash_compacting.cpp
│ │ ├── hash_compacting.cu
│ │ ├── hash_compacting.h
│ │ ├── hash_functions.cu
│ │ ├── hash_functions.h
│ │ ├── hash_multivalue.cpp
│ │ ├── hash_multivalue.cu
│ │ ├── hash_multivalue.h
│ │ ├── hash_table.cpp
│ │ ├── hash_table.cu
│ │ ├── hash_table.cuh
│ │ ├── hash_table.h
│ │ ├── mt19937ar.cpp
│ │ └── mt19937ar.h
└── easy_profiler
│ ├── CMakeLists.txt
│ ├── LICENSE
│ ├── LICENSE.APACHE
│ ├── LICENSE.MIT
│ ├── README.md
│ ├── appveyor.bat
│ ├── appveyor.yml
│ ├── easy_profiler_converter
│ ├── CMakeLists.txt
│ ├── converter.cpp
│ ├── converter.h
│ ├── include
│ │ └── json.hpp
│ ├── main.cpp
│ ├── reader.cpp
│ └── reader.h
│ ├── easy_profiler_core
│ ├── CMakeLists.txt
│ ├── LICENSE.APACHE
│ ├── LICENSE.MIT
│ ├── alignment_helpers.h
│ ├── base_block_descriptor.cpp
│ ├── block.cpp
│ ├── block_descriptor.cpp
│ ├── block_descriptor.h
│ ├── chunk_allocator.h
│ ├── cmake
│ │ └── config.cmake.in
│ ├── current_thread.h
│ ├── current_time.h
│ ├── easy_socket.cpp
│ ├── event_trace_status.h
│ ├── event_trace_win.cpp
│ ├── event_trace_win.h
│ ├── hashed_cstr.h
│ ├── include
│ │ └── easy
│ │ │ ├── arbitrary_value.h
│ │ │ ├── details
│ │ │ ├── arbitrary_value_aux.h
│ │ │ ├── arbitrary_value_public_types.h
│ │ │ ├── easy_compiler_support.h
│ │ │ ├── profiler_aux.h
│ │ │ ├── profiler_colors.h
│ │ │ ├── profiler_in_use.h
│ │ │ └── profiler_public_types.h
│ │ │ ├── easy_net.h
│ │ │ ├── easy_protocol.h
│ │ │ ├── easy_socket.h
│ │ │ ├── profiler.h
│ │ │ ├── reader.h
│ │ │ ├── serialized_block.h
│ │ │ ├── utility.h
│ │ │ └── writer.h
│ ├── nonscoped_block.cpp
│ ├── nonscoped_block.h
│ ├── profile_manager.cpp
│ ├── profile_manager.h
│ ├── profiler.cpp
│ ├── reader.cpp
│ ├── resources.rc
│ ├── serialized_block.cpp
│ ├── spin_lock.h
│ ├── stack_buffer.h
│ ├── thread_storage.cpp
│ ├── thread_storage.h
│ └── writer.cpp
│ ├── profiler_gui
│ ├── CMakeLists.txt
│ ├── arbitrary_value_inspector.cpp
│ ├── arbitrary_value_inspector.h
│ ├── arbitrary_value_tooltip.cpp
│ ├── arbitrary_value_tooltip.h
│ ├── blocks_graphics_view.cpp
│ ├── blocks_graphics_view.h
│ ├── blocks_tree_widget.cpp
│ ├── blocks_tree_widget.h
│ ├── bookmarks_editor.cpp
│ ├── bookmarks_editor.h
│ ├── common_functions.cpp
│ ├── common_functions.h
│ ├── common_types.h
│ ├── complexity_calculator.h
│ ├── descriptors_tree_widget.cpp
│ ├── descriptors_tree_widget.h
│ ├── dialog.cpp
│ ├── dialog.h
│ ├── fps_widget.cpp
│ ├── fps_widget.h
│ ├── globals.cpp
│ ├── globals.h
│ ├── globals_qobjects.cpp
│ ├── globals_qobjects.h
│ ├── graphics_block_item.cpp
│ ├── graphics_block_item.h
│ ├── graphics_image_item.cpp
│ ├── graphics_image_item.h
│ ├── graphics_ruler_item.cpp
│ ├── graphics_ruler_item.h
│ ├── graphics_scrollbar.cpp
│ ├── graphics_scrollbar.h
│ ├── graphics_slider_area.cpp
│ ├── graphics_slider_area.h
│ ├── images
│ │ ├── attribution.txt
│ │ ├── default
│ │ │ ├── arrow-down-disabled.svg
│ │ │ ├── arrow-down-hover.svg
│ │ │ ├── arrow-down-pressed.svg
│ │ │ ├── arrow-down.svg
│ │ │ ├── arrow-left.svg
│ │ │ ├── arrow-right.svg
│ │ │ ├── arrow-up-disabled.svg
│ │ │ ├── arrow-up-hover.svg
│ │ │ ├── arrow-up-pressed.svg
│ │ │ ├── arrow-up.svg
│ │ │ ├── big-o.svg
│ │ │ ├── binoculars.svg
│ │ │ ├── check-disabled.svg
│ │ │ ├── check-partial-disabled.svg
│ │ │ ├── check-partial.svg
│ │ │ ├── check.svg
│ │ │ ├── close-hover.svg
│ │ │ ├── close-white-hover.svg
│ │ │ ├── close-white-pressed.svg
│ │ │ ├── close-white.svg
│ │ │ ├── close.svg
│ │ │ ├── collapse.svg
│ │ │ ├── colors-black.svg
│ │ │ ├── colors.svg
│ │ │ ├── crop.svg
│ │ │ ├── csv.svg
│ │ │ ├── delete-old.svg
│ │ │ ├── delete.svg
│ │ │ ├── expand.svg
│ │ │ ├── lan.svg
│ │ │ ├── lan_on.svg
│ │ │ ├── list.svg
│ │ │ ├── maximize-white-hover.svg
│ │ │ ├── maximize-white-pressed.svg
│ │ │ ├── maximize-white.svg
│ │ │ ├── minimize-white-hover.svg
│ │ │ ├── minimize-white-pressed.svg
│ │ │ ├── minimize-white.svg
│ │ │ ├── minimize.svg
│ │ │ ├── off.svg
│ │ │ ├── open-folder.svg
│ │ │ ├── open-folder2.svg
│ │ │ ├── play.svg
│ │ │ ├── radio-indicator-disabled.svg
│ │ │ ├── radio-indicator.svg
│ │ │ ├── reload-folder2.svg
│ │ │ ├── reload.svg
│ │ │ ├── save.svg
│ │ │ ├── search-next.svg
│ │ │ ├── search-prev.svg
│ │ │ ├── settings.svg
│ │ │ ├── statistics.svg
│ │ │ ├── statistics2.svg
│ │ │ ├── stop.svg
│ │ │ ├── to-fullscreen.svg
│ │ │ ├── to-window.svg
│ │ │ ├── wifi.svg
│ │ │ ├── wifi_on.svg
│ │ │ ├── window.svg
│ │ │ └── yx.svg
│ │ ├── logo.ico
│ │ └── logo.svg
│ ├── main.cpp
│ ├── main_window.cpp
│ ├── main_window.h
│ ├── resources.qrc
│ ├── resources.rc
│ ├── round_progress_widget.cpp
│ ├── round_progress_widget.h
│ ├── themes
│ │ ├── default.css
│ │ └── default.scss
│ ├── thread_pool.cpp
│ ├── thread_pool.h
│ ├── thread_pool_task.cpp
│ ├── thread_pool_task.h
│ ├── timer.cpp
│ ├── timer.h
│ ├── tree_widget_item.cpp
│ ├── tree_widget_item.h
│ ├── tree_widget_loader.cpp
│ ├── tree_widget_loader.h
│ ├── window_header.cpp
│ └── window_header.h
│ ├── reader
│ ├── CMakeLists.txt
│ └── main.cpp
│ ├── sample
│ ├── CMakeLists.txt
│ ├── express_sample.cpp
│ ├── main.cpp
│ └── main_clock.cpp
│ └── scripts
│ ├── context_switch_logger.stp
│ ├── make_style.sh
│ └── test.sh
├── model.py
├── p1.yml
├── setup.py
└── sparseconvnet
├── SCN
├── CPU
│ ├── ActivePooling.cpp
│ ├── AffineReluTrivialConvolution.cpp
│ ├── AveragePooling.cpp
│ ├── BatchNormalization.cpp
│ ├── BatchwiseMultiplicativeDropout.cpp
│ ├── Convolution.cpp
│ ├── Deconvolution.cpp
│ ├── IOLayers.cpp
│ ├── LeakyReLU.cpp
│ ├── MaxPooling.cpp
│ ├── NetworkInNetwork.cpp
│ ├── SparseToDense.cpp
│ └── UnPooling.cpp
├── CUDA
│ ├── ActivePooling.cpp
│ ├── ActivePooling.cu
│ ├── AffineReluTrivialConvolution.cpp
│ ├── AffineReluTrivialConvolution.cu
│ ├── AveragePooling.cpp
│ ├── AveragePooling.cu
│ ├── BatchNormalization.cpp
│ ├── BatchNormalization.cu
│ ├── BatchwiseMultiplicativeDropout.cpp
│ ├── BatchwiseMultiplicativeDropout.cu
│ ├── CUDPPWrapper.cu
│ ├── CUDPPWrapper.hpp
│ ├── Convolution.cpp
│ ├── Convolution.cu
│ ├── Deconvolution.cpp
│ ├── Deconvolution.cu
│ ├── IOLayers.cpp
│ ├── IOLayers.cu
│ ├── LeakyReLU.cpp
│ ├── LeakyReLU.cu
│ ├── MaxPooling.cpp
│ ├── MaxPooling.cu
│ ├── NetworkInNetwork.cpp
│ ├── RuleBookIterator.h
│ ├── SparseToDense.cpp
│ ├── SparseToDense.cu
│ ├── SubmanifoldRules_cuda.cpp
│ ├── SubmanifoldRules_cuda.cu
│ ├── UnPooling.cpp
│ ├── UnPooling.cu
│ └── kernel_hash.cuh
├── Metadata
│ ├── 32bits.h
│ ├── 64bits.h
│ ├── ActivePoolingRules.h
│ ├── ConvolutionRules.h
│ ├── FullConvolutionRules.h
│ ├── IOLayersRules.h
│ ├── Metadata.cpp
│ ├── Metadata.h
│ ├── PermutohedralSubmanifoldConvolutionRules.h
│ ├── RandomizedStrideRules.h
│ ├── RectangularRegions.h
│ ├── SubmanifoldConvolutionRules.h
│ └── resultHash.h
├── cuda.cu
├── misc
│ └── drawCurve.cpp
├── pybind.cpp
├── sparseconvnet.h
├── sparseconvnet_cpu.cpp
└── sparseconvnet_cuda.cpp
├── __init__.py
├── activations.py
├── averagePooling.py
├── batchNormalization.py
├── classificationTrainValidate.py
├── convolution.py
├── deconvolution.py
├── denseToSparse.py
├── dropout.py
├── fullConvolution.py
├── identity.py
├── inputBatch.py
├── ioLayers.py
├── maxPooling.py
├── metadata.py
├── networkArchitectures.py
├── networkInNetwork.py
├── permutohedralSubmanifoldConvolution.py
├── randomizedStrideConvolution.py
├── randomizedStrideMaxPooling.py
├── sequential.py
├── shapeContext.py
├── sparseConvNetTensor.py
├── sparseToDense.py
├── sparsify.py
├── spectral_norm.py
├── submanifoldConvolution.py
├── tables.py
├── unPooling.py
└── utils.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/.DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD License
2 |
3 | For SparseConvNet software
4 |
5 | Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without modification,
8 | are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name Facebook nor the names of its contributors may be used to
18 | endorse or promote products derived from this software without specific
19 | prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # INS-Conv: Incremental Sparse Convolution for Online 3D segmentation
2 |
3 | This is the incremental sparse convolution library implemented based on [SparseConvNet](https://github.com/facebookresearch/SparseConvNet) and [Live Semantic 3D Perception for Immersive Augmented Reality](https://ieeexplore.ieee.org/abstract/document/8998140). The later describes a more efficient GPU implementation of the original submanifold sparse convolution. Our method supports incremental computing of sparse convolution, including SSC, convolution/deconvolution, BN, IO, and residual structure, etc.
4 | ## Environment setup
5 |
6 | ### Preliminary Requirements:
7 | * Ubuntu 16.04
8 | * CUDA 9.0
9 |
16 |
17 | ### Install
18 | ```conda
19 | conda env create -f p1.yml
20 | ```
21 |
22 | ```bash
23 | sh all_build.sh
24 | ```
25 |
26 | ### Demo
27 | For training, you could train an arbitary model using the original sparseconvnet.
28 |
29 | For incremental inference, demo.py gives an example of the INS-Conv library.
30 |
31 | We also provide the code for the online 3D semantic instance segmentation demo as in our video, you can download by the following link:
32 | https://drive.google.com/file/d/1sYpMFc1dVXZSZEDhfqQZbMoabiZZikuI/view?usp=sharing
33 |
--------------------------------------------------------------------------------
/all_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2016-present, Facebook, Inc.
3 | # All rights reserved.
4 | #
5 | # This source code is licensed under the BSD-style license found in the
6 | # LICENSE file in the root directory of this source tree.
7 | conda activate p1
8 | #
9 | ## make cudpp
10 | cd extra/cudpp
11 | rm -rf build/
12 | mkdir build
13 | cd build
14 | cmake ..
15 | make -j32
16 | cd ../../..
17 | #
18 | ## make easy profile
19 | #
20 | cd extra/easy_profiler
21 | rm -rf build/
22 | mkdir build
23 | cd build
24 | cmake ..
25 | make -j32
26 | cd ../../..
27 | #
28 | #
29 |
30 | rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet/*.so
31 | python setup.py develop
32 |
33 |
34 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import copy
3 | import numpy as np
4 | import math
5 | import time
6 | import sparseconvnet as scn
7 | import torch.nn.functional as F
8 | import torch.optim as optim
9 | import torch.nn as nn
10 | import sparseconvnet as scn
11 | import model
12 | import sys
13 |
14 |
15 | config = {'dimension': 3, 'full_scale': 4096}
16 |
17 | Model = model.Naive_UNet(config)
18 |
19 | Model = Model.cuda()
20 |
21 | points_0 = torch.randint(100, 150, size=(10000, 3)).cuda() # frame 0 point xyz
22 | features_0 = torch.rand((10000, 3)).cuda() # frame 0 point features
23 |
24 | # the initial update of the network, use inccrement=False
25 | # this will save a checkpoint of network feataures, do it every 100 frames.
26 | output_0 = Model([points_0, features_0], increment=False)
27 |
28 |
29 | points_1 = torch.randint(100, 150, size=(1000, 3)).cuda() # xyz of incremental points of frame 1
30 | features_1 = torch.rand((1000, 3)).cuda() # features of incremental points of frame 1
31 |
32 |
33 | # incremental update of the network, use inccrement=True
34 | # just need to input the incremental points and their features, the input residuals will be computed automaticlly
35 | # the output is the results of incremental points of frame 1
36 | output_1 = Model([points_1, features_1], increment=True)
37 |
38 | '''
39 | ...
40 | more frame
41 | '''
42 |
--------------------------------------------------------------------------------
/develop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2016-present, Facebook, Inc.
3 | # All rights reserved.
4 | #
5 | # This source code is licensed under the BSD-style license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet_SCN*.so
9 | python setup.py develop
10 |
--------------------------------------------------------------------------------
/extra/cudpp/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set default behaviour, in case users don't have core.autocrlf set.
2 | * text=auto
3 |
4 | # Explicitly declare text files we want to always be normalized and converted
5 | # to native line endings on checkout.
6 | *.c text
7 | *.cpp text
8 | *.cu text
9 | *.cuh text
10 | *.h text
11 |
12 | # Denote all files that are truly binary and should not be modified.
13 | *.dll binary
14 | *.lib binary
15 | *.pdf binary
16 |
--------------------------------------------------------------------------------
/extra/cudpp/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | .vscode/*
3 | apps/
4 |
--------------------------------------------------------------------------------
/extra/cudpp/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ext/cub"]
2 | path = ext/cub
3 | url = git://github.com/NVlabs/cub.git
4 | [submodule "ext/moderngpu"]
5 | path = ext/moderngpu
6 | url = git://github.com/NVlabs/moderngpu.git
7 |
--------------------------------------------------------------------------------
/extra/cudpp/cmake/FindGLEW.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # Try to find GLEW library and include path.
3 | # Once done this will define
4 | #
5 | # GLEW_FOUND
6 | # GLEW_INCLUDE_PATH
7 | # GLEW_LIBRARY
8 | #
9 | IF (WIN32)
10 | FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h
11 | ${GLEW_ROOT_DIR}/include
12 | DOC "The directory where GL/glew.h resides")
13 | if (CMAKE_SIZEOF_VOID_P EQUAL 8)
14 | set(GLEWNAMES glew GLEW glew64 glew64s)
15 | else ()
16 | set(GLEWNAMES glew GLEW glew32 glew32s)
17 | endif (CMAKE_SIZEOF_VOID_P EQUAL 8)
18 |
19 | FIND_LIBRARY( GLEW_LIBRARY
20 | NAMES ${GLEWNAMES}
21 | PATHS
22 | ${GLEW_ROOT_DIR}/bin
23 | ${GLEW_ROOT_DIR}/lib
24 | DOC "The GLEW library")
25 | ELSE (WIN32)
26 | FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h
27 | /usr/include
28 | /usr/local/include
29 | /sw/include
30 | /opt/local/include
31 | ${GLEW_ROOT_DIR}/include
32 | DOC "The directory where GL/glew.h resides")
33 | FIND_LIBRARY( GLEW_LIBRARY
34 | NAMES GLEW libGLEW
35 | PATHS
36 | /usr/lib64
37 | /usr/lib
38 | /usr/local/lib64
39 | /usr/local/lib
40 | /sw/lib
41 | /opt/local/lib
42 | ${GLEW_ROOT_DIR}/lib
43 | DOC "The GLEW library")
44 | ENDIF (WIN32)
45 |
46 | IF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY)
47 | SET( FOUND_GLEW 1)
48 | ELSE (GLEW_INCLUDE_PATH AND GLEW_LIBRARY)
49 | SET( FOUND_GLEW 0)
50 | ENDIF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY)
51 |
52 | MARK_AS_ADVANCED( FOUND_GLEW )
--------------------------------------------------------------------------------
/extra/cudpp/cudpp-config-version.cmake.in:
--------------------------------------------------------------------------------
1 | set(PACKAGE_VERSION @cudpp_VERSION_MAJOR@.@cudpp_VERSION_MINOR@.@cudpp_VERSION_PATCH@)
2 |
3 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
4 | set(PACKAGE_VERSION_COMPATIBLE FALSE)
5 | else()
6 | set(PACKAGE_VERSION_COMPATIBLE TRUE)
7 | if ("${PACKAGE_VERSION}" STREQUAL "${PACKAGE_FIND_VERSION}")
8 | set(PACKAGE_VERSION_EXACT TRUE)
9 | endif()
10 | endif()
11 |
--------------------------------------------------------------------------------
/extra/cudpp/cudpp-config.cmake.in:
--------------------------------------------------------------------------------
1 | # This file should be installed in the lib directory. Find the root directory.
2 | get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH)
3 | get_filename_component(_install_dir "${_dir}/.." ABSOLUTE)
4 |
5 | # Load the targets include.
6 | get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH)
7 | include("${_install_dir}/lib/cudpp-targets.cmake")
8 |
9 | set(cudpp_INCLUDE_DIRS "@cudpp_INCLUDE_DIRS_CONFIG@")
10 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/CUDPP_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/extra/cudpp/doc/CUDPP_slides.pdf
--------------------------------------------------------------------------------
/extra/cudpp/doc/bib/README.txt:
--------------------------------------------------------------------------------
1 | Run bib.py in this directory to generate 2 HTML files:
2 | - cudpp_refs.html, a date-sorted list of references that use CUDPP
3 | (every file in cudpp.bib)
4 | - cudpp_refs_bib.html, the BibTeX for each of those refs
5 |
6 | Input files are:
7 | - cudpp.bib. Add new bibtex entries here.
8 | - cudpp.bst. BibTeX style file for how the resulting HTML will look.
9 | Bug JDO if you don't like the format.
10 |
11 | bib.py calls two external programs, both from the bibtex2html package.
12 |
13 | http://www.lri.fr/~filliatr/bibtex2html/
14 |
15 | After you regenerate the two html files, check them into the
16 | repository (also cudpp.bib) so they can be included the next time the
17 | documentation is rebuilt.
18 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/bib/bib.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys, os, re
4 |
5 | # Generate all pubs from cudpp.bib; put refs into ref.txt
6 | # This just generates a list of the cite keys from cudpp.bib into ref.txt.
7 | os.system("bib2bib -oc ref.txt cudpp.bib")
8 |
9 | # sort by reverse-date; don't generate keys; use cudpp.bst as bib style file
10 | # writes into cudpp_refs.html and cudpp_refs_bib.html
11 | os.putenv("openout_any", "r")
12 | os.system("bibtex2html -d -r -dl -nokeys -html-entities --no-footer --no-keywords -citefile ref.txt -s cudpp -nodoc -o cudpp_refs cudpp.bib")
13 | html_file = open('cudpp_refs.html')
14 | html = html_file.read()
15 | html_file.close()
16 |
17 | # if we want to munge the resulting text, do it here
18 | # right now this script is only 2 calls and a file remove though
19 |
20 | # write the file back
21 | html_file = open('cudpp_refs.html', 'w')
22 | print >> html_file, html
23 | html_file.close()
24 |
25 | # clean up temp files
26 | os.remove("ref.txt")
27 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/building-cudpp.md:
--------------------------------------------------------------------------------
1 | Building CUDPP {#building-cudpp}
2 | ==============
3 |
4 | CUDPP has currently been tested on Windows, Mac OS X and Linux.
5 | See the [Release Notes](@ref release-notes) for release-specific platform support.
6 |
7 | Initial checkout
8 |
9 | $ git clone https://github.com/cudpp/cudpp.git
10 |
11 | With CUDPP 2.2, we've added suffix_array which has dependencies on [cub](http://nvlabs.github.io/cub/) and [moderngpu](http://nvlabs.github.io/moderngpu/) libraries. They are added as submodules and you will need to pull them in using the following two commands in the cudpp/ root directory:
12 |
13 | $ git submodule init
14 |
15 | $ git submodule update
16 |
17 | Thrust Dependency {#build-thrust}
18 | =================
19 |
20 | Starting with release 2.0, CUDPP uses the [Thrust](http://thrust.github.io)
21 | library for the implementation of cudppRadixSort(). Thrust is included with
22 | the CUDA Toolkit, so if you are using CUDA 4.0 or later, you need to do
23 | nothing else. If you are using an earlier version of CUDA however, you will
24 | need to download the Thrust source distribution and install it in your
25 | CUDA/include path before building CUDPP
26 |
27 | Building CUDPP using CMake {#build-cmake}
28 | ==========================
29 |
30 | CUDPP 2.0 uses CMake for cross-platform builds. Follow the instructions
31 | [on the CUDPP Wiki](https://github.com/cudpp/cudpp/wiki/BuildingCUDPPwithCMake)
32 | to build CUDPP.
33 |
34 | Warnings {#warnings}
35 | ========
36 |
37 | You may see warnings during compilation of the form "warning: Double is not
38 | supported. Demoting to float". You can safely disregard these warnings.
39 | They are generated by the CUDA compiler when kernels that are specialized
40 | for multiple data types are compiled for CUDA targets that do not support
41 | double precision floating point.
42 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/changelog.md:
--------------------------------------------------------------------------------
1 | CUDPP Change Log {#changelog}
2 | ================
3 |
4 | @include changelog.txt
5 |
6 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/cudpp_refs.md:
--------------------------------------------------------------------------------
1 | Publications that use CUDPP {#cudpp_refs}
2 | ===========================
3 |
4 | @htmlinclude doc/bib/cudpp_refs.html
5 |
6 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/cudpp_refs_bib.md:
--------------------------------------------------------------------------------
1 | Bibtex for publications that use CUDPP {#cudpp_refs_bib}
2 | ======================================
3 |
4 | @htmlinclude doc/bib/cudpp_refs_bib.html
5 |
6 |
--------------------------------------------------------------------------------
/extra/cudpp/doc/license.md:
--------------------------------------------------------------------------------
1 | CUDPP License {#license}
2 | =============
3 |
4 | BSD License
5 | =============
6 |
7 | CUDPP is released under the [BSD license](http://www.opensource.org/licenses/bsd-license.php).
8 |
9 | Non source-code content (such as documentation, web pages, etc.) from CUDPP
10 | is distributed under a [Creative Commons Attribution-ShareAlike 3.0 (CC BY-SA 3.0)](http://creativecommons.org/licenses/by-sa/3.0/) license.
11 |
12 | Note that prior to release 1.1 of CUDPP, the license used was a modified
13 | BSD license. With release 1.1, this license was replaced with the pure BSD
14 | license to facilitate the use of open source hosting of the code.
15 |
16 | CUDPP also includes the [Mersenne twister code](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html) of [Makoto Matsumoto](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/eindex.html),
17 | also licensed under BSD.
18 |
19 | CUDPP also calls functions in the [Thrust](http://thrust.github.io) template library,
20 | which is included with the CUDA Toolkit and licensed under the Apache 2.0 open source
21 | license.
22 |
23 | CUDPP also includes a modified version of FindGLEW.cmake from
24 | [nvidia-texture-tools](http://code.google.com/p/nvidia-texture-tools/),
25 | licensed under the [MIT license](http://www.opensource.org/licenses/mit-license.php).
26 |
27 | @include license.txt
28 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | GIT_CUB
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder
10 | clean,full,incremental,
11 |
12 |
13 |
14 |
15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder
16 | full,incremental,
17 |
18 |
19 |
20 |
21 |
22 | org.eclipse.cdt.core.cnature
23 | org.eclipse.cdt.managedbuilder.core.managedBuildNature
24 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature
25 | org.eclipse.cdt.core.ccnature
26 |
27 |
28 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/.settings/language.settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/.settings/org.eclipse.cdt.ui.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | formatter_profile=_B40C
3 | formatter_settings_version=1
4 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/.settings/org.eclipse.core.runtime.prefs:
--------------------------------------------------------------------------------
1 | content-types/enabled=true
2 | content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh
3 | content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu
4 | eclipse.preferences.version=1
5 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/LICENSE.TXT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
2 | Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 | * Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | * Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | * Neither the name of the NVIDIA CORPORATION nor the
12 | names of its contributors may be used to endorse or promote products
13 | derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/cub/util_namespace.cuh:
--------------------------------------------------------------------------------
1 | /******************************************************************************
2 | * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
4 | *
5 | * Redistribution and use in source and binary forms, with or without
6 | * modification, are permitted provided that the following conditions are met:
7 | * * Redistributions of source code must retain the above copyright
8 | * notice, this list of conditions and the following disclaimer.
9 | * * Redistributions in binary form must reproduce the above copyright
10 | * notice, this list of conditions and the following disclaimer in the
11 | * documentation and/or other materials provided with the distribution.
12 | * * Neither the name of the NVIDIA CORPORATION nor the
13 | * names of its contributors may be used to endorse or promote products
14 | * derived from this software without specific prior written permission.
15 | *
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | *
27 | ******************************************************************************/
28 |
29 | /**
30 | * \file
31 | * Place-holder for prefixing the cub namespace
32 | */
33 |
34 | #pragma once
35 |
36 | // For example:
37 | //#define CUB_NS_PREFIX namespace thrust{ namespace detail {
38 | //#define CUB_NS_POSTFIX } }
39 |
40 | #ifndef CUB_NS_PREFIX
41 | #define CUB_NS_PREFIX
42 | #endif
43 |
44 | #ifndef CUB_NS_POSTFIX
45 | #define CUB_NS_POSTFIX
46 | #endif
47 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/examples/block/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /Debug
3 | /Release
4 | /cuda55.sdf
5 | /cuda55.suo
6 | /cuda60.sdf
7 | /cuda60.suo
8 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/examples/block/reduce_by_key.cu:
--------------------------------------------------------------------------------
1 |
2 |
3 | #include
4 |
5 |
6 | template <
7 | int BLOCK_THREADS, ///< Number of CTA threads
8 | typename KeyT, ///< Key type
9 | typename ValueT> ///< Value type
10 | __global__ void Kernel()
11 | {
12 | // Tuple type for scanning (pairs accumulated segment-value with segment-index)
13 | typedef cub::KeyValuePair OffsetValuePairT;
14 |
15 | // Reduce-value-by-segment scan operator
16 | typedef cub::ReduceBySegmentOp ReduceBySegmentOpT;
17 |
18 | // Parameterized BlockDiscontinuity type for setting head flags
19 | typedef cub::BlockDiscontinuity<
20 | KeyT,
21 | BLOCK_THREADS>
22 | BlockDiscontinuityKeysT;
23 |
24 | // Parameterized BlockScan type
25 | typedef cub::BlockScan<
26 | OffsetValuePairT,
27 | BLOCK_THREADS,
28 | cub::BLOCK_SCAN_WARP_SCANS>
29 | BlockScanT;
30 |
31 | // Shared memory
32 | __shared__ union
33 | {
34 | typename BlockScanT::TempStorage scan; // Scan storage
35 | typename BlockDiscontinuityKeysT::TempStorage discontinuity; // Discontinuity storage
36 | } temp_storage;
37 |
38 |
39 | // Read data (each thread gets 3 items each, every 9 items is a segment)
40 | KeyT my_keys[3] = {threadIdx.x / 3, threadIdx.x / 3, threadIdx.x / 3};
41 | ValueT my_values[3] = {1, 1, 1};
42 |
43 | // Set head segment head flags
44 | int my_flags[3];
45 | BlockDiscontinuityKeysT(temp_storage.discontinuity).FlagHeads(
46 | my_flags,
47 | my_keys,
48 | cub::Inequality());
49 |
50 | __syncthreads();
51 |
52 |
53 |
54 |
55 |
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/examples/device/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /Debug
3 | /ipch
4 | /Release
5 | /cuda55.sdf
6 | /cuda55.suo
7 | /cuda60.sdf
8 | /cuda60.suo
9 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/experimental/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/experimental/spmv_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216
4 | do
5 | echo `date`, `$1 --dense=$i $2 $3 $4 $5 $6 $7`
6 | done
7 |
8 | echo
9 | echo
10 |
11 | for i in `ls /home/dumerrill/graphs/spmv/*.mtx`
12 | do
13 | if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]]
14 | then
15 | echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null`
16 | fi
17 | done
18 |
19 | echo
20 | echo
21 |
22 | for i in `ls /scratch/dumerrill/graphs/mtx/*.mtx`
23 | #for i in `ls /cygdrive/w/Dev/UFget/mtx/*.mtx`
24 | do
25 | if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]]
26 | then
27 | echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null`
28 | fi
29 | done
30 |
31 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/test/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /link_main.obj
3 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/test/link_a.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | void a()
4 | {
5 | printf("a() called\n");
6 |
7 | cub::DoubleBuffer d_keys;
8 | cub::DoubleBuffer d_values;
9 | size_t temp_storage_bytes = 0;
10 | cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024);
11 | }
12 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/test/link_b.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | void b()
4 | {
5 | printf("b() called\n");
6 |
7 | cub::DoubleBuffer d_keys;
8 | cub::DoubleBuffer d_values;
9 | size_t temp_storage_bytes = 0;
10 | cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024);
11 | }
12 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/test/link_main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | extern void a();
4 | extern void b();
5 |
6 | int main()
7 | {
8 | printf("hello world\n");
9 | return 0;
10 | }
11 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/cub/tune/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/README.md:
--------------------------------------------------------------------------------
1 | moderngpu
2 | =========
3 |
4 | Design patterns for GPU computing
5 |
6 | Modern GPU is code and commentary intended to promote new and productive ways of thinking about GPU computing.
7 |
8 | http://nvlabs.github.io/moderngpu
9 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkinsert/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkinsert
7 |
8 | benchmarkinsert.o: benchmarkinsert.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkinsert: benchmarkinsert.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkinsert
22 |
23 | clean:
24 | rm -f benchmarkinsert.o mgpucontext.o mgpuutil.o benchmarkinsert
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkinsert/benchmarkinsert.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkintervalmove/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkintervalmove
7 |
8 | benchmarkintervalmove.o: benchmarkintervalmove.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkintervalmove: benchmarkintervalmove.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkintervalmove
22 |
23 | clean:
24 | rm -f benchmarkintervalmove.o mgpucontext.o mgpuutil.o benchmarkintervalmove
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkintervalmove/benchmarkintervalmove.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 |
25 | Source Files
26 |
27 |
28 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkjoin/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkjoin
7 |
8 | benchmarkjoin.o: benchmarkjoin.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkjoin: benchmarkjoin.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkjoin
22 |
23 | clean:
24 | rm -f benchmarkjoin.o mgpucontext.o mgpuutil.o benchmarkjoin
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkjoin/benchmarkjoin.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarklaunchbox/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarklaunchbox
7 |
8 | benchmarklaunchbox.o: benchmarklaunchbox.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarklaunchbox: benchmarklaunchbox.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarklaunchbox
22 |
23 | clean:
24 | rm -f benchmarklaunchbox.o mgpucontext.o mgpuutil.o benchmarklaunchbox
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarklaunchbox/benchmarklaunchbox.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkloadbalance/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkloadbalance
7 |
8 | benchmarkloadbalance.o: benchmarkloadbalance.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkloadbalance: benchmarkloadbalance.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkloadbalance
22 |
23 | clean:
24 | rm -f benchmarkloadbalance.o mgpucontext.o mgpuutil.o benchmarkloadbalance
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkloadbalance/benchmarkloadbalance.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarklocalitysort/Makefile:
--------------------------------------------------------------------------------
1 | include ../common.mk
2 |
3 | all: build
4 |
5 | build: benchmarklocalitysort
6 |
7 | benchmarklocalitysort.o: benchmarklocalitysort.cu
8 | nvcc $(NVCCFLAGS) -o $@ -c $<
9 |
10 | mgpucontext.o: ../src/mgpucontext.cu
11 | nvcc $(NVCCFLAGS) -o $@ -c $<
12 |
13 | mgpuutil.o: ../src/mgpuutil.cpp
14 | nvcc $(NVCCFLAGS) -o $@ -c $<
15 |
16 | benchmarklocalitysort: benchmarklocalitysort.o mgpucontext.o mgpuutil.o
17 | nvcc $(NVCCFLAGS) -o $@ $+
18 |
19 | run: build
20 | ./benchmarklocalitysort
21 |
22 | clean:
23 | rm -f benchmarklocalitysort.o mgpucontext.o mgpuutil.o benchmarklocalitysort
24 |
25 | clobber:
26 | clean
27 |
28 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarklocalitysort/benchmarklocalitysort.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkmerge/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkmerge
7 |
8 | benchmarkmerge.o: benchmarkmerge.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkmerge: benchmarkmerge.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkmerge
22 |
23 | clean:
24 | rm -f benchmarkmerge.o mgpucontext.o mgpuutil.o benchmarkmerge
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkmerge/benchmarkmerge.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkreducebykey/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkreducebykey
7 |
8 | benchmarkreducebykey.o: benchmarkreducebykey.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkreducebykey: benchmarkreducebykey.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkreducebykey
22 |
23 | clean:
24 | rm -f benchmarkreducebykey.o mgpucontext.o mgpuutil.o benchmarkreducebykey
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkreducebykey/benchmarkreducebykey.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkscan/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkscan
7 |
8 | benchmarkscan.o: benchmarkscan.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarkscan: benchmarkscan.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarkscan
22 |
23 | clean:
24 | rm -f benchmarkscan.o mgpucontext.o mgpuutil.o benchmarkscan
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkscan/benchmarkscan.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksegreduce/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarksegreduce
7 |
8 | benchmarksegreduce.o: benchmarksegreduce.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarksegreduce: benchmarksegreduce.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarksegreduce
22 |
23 | clean:
24 | rm -f benchmarksegreduce.o mgpucontext.o mgpuutil.o benchmarksegreduce
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksegreduce/benchmarksegreduce.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksegsort/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarksegsort
7 |
8 | benchmarksegsort.o: benchmarksegsort.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarksegsort: benchmarksegsort.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarksegsort
22 |
23 | clean:
24 | rm -f benchmarksegsort.o mgpucontext.o mgpuutil.o benchmarksegsort
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksegsort/benchmarksegsort.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksets/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarksets
7 |
8 | benchmarksets.o: benchmarksets.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarksets: benchmarksets.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarksets
22 |
23 | clean:
24 | rm -f benchmarksets.o mgpucontext.o mgpuutil.o benchmarksets
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksets/benchmarksets.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksort/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarksort
7 |
8 | benchmarksort.o: benchmarksort.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | benchmarksort: benchmarksort.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./benchmarksort
22 |
23 | clean:
24 | rm -f benchmarksort.o mgpucontext.o mgpuutil.o benchmarksort
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksort/benchmarksort.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksortedsearch/Makefile:
--------------------------------------------------------------------------------
1 | include ../common.mk
2 |
3 | all: build
4 |
5 | build: benchmarksortedsearch
6 |
7 | benchmarksortedsearch.o: benchmarksortedsearch.cu
8 | nvcc $(NVCCFLAGS) -o $@ -c $<
9 |
10 | mgpucontext.o: ../src/mgpucontext.cu
11 | nvcc $(NVCCFLAGS) -o $@ -c $<
12 |
13 | mgpuutil.o: ../src/mgpuutil.cpp
14 | nvcc $(NVCCFLAGS) -o $@ -c $<
15 |
16 | benchmarksortedsearch: benchmarksortedsearch.o mgpucontext.o mgpuutil.o
17 | nvcc $(NVCCFLAGS) -o $@ $+
18 |
19 | run: build
20 | ./benchmarksortedsearch
21 |
22 | clean:
23 | rm -f benchmarksortedsearch.o mgpucontext.o mgpuutil.o benchmarksortedsearch
24 |
25 | clobber:
26 | clean
27 |
28 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarksortedsearch/benchmarksortedsearch.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkspmvcsr/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: benchmarkspmvcsr
7 |
8 | benchmarkspmvcsr.o: benchmarkspmvcsr.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | mmio.o: ../src/mmio.cpp
18 | nvcc $(NVCCFLAGS) -o $@ -c $<
19 |
20 | sparsematrix.o: ../src/sparsematrix.cpp
21 | nvcc $(NVCCFLAGS) -o $@ -c $<
22 |
23 | benchmarkspmvcsr: benchmarkspmvcsr.o mgpucontext.o mgpuutil.o mmio.o sparsematrix.o
24 | nvcc $(NVCCFLAGS) -o $@ $+ -lcusparse
25 |
26 | run: build
27 | ./benchmarkspmvcsr
28 |
29 | clean:
30 | rm -f benchmarkspmvcsr.o mgpucontext.o mgpuutil.o benchmarkspmvcsr
31 |
32 | clobber:
33 | clean
34 |
35 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/benchmarkspmvcsr/benchmarkspmvcsr.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 | Source Files
27 |
28 |
29 | Source Files
30 |
31 |
32 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/common.mk:
--------------------------------------------------------------------------------
1 |
2 | ifeq ($(dbg),1)
3 | NVCCFLAGS += -g -G
4 | endif
5 |
6 | ifdef NVCC_BITS
7 | NVCCFLAGS += -m $(NVCC_BITS)
8 | endif
9 |
10 | ifdef NVCC_VERBOSE
11 | NVCCFLAGS += -Xptxas="-v"
12 | endif
13 |
14 | INCLUDES := -I ../include
15 |
16 | GENCODE_SM20 := -gencode arch=compute_20,code=sm_20
17 | GENCODE_SM30 := -gencode arch=compute_30,code=sm_30
18 | GENCODE_SM35 := -gencode arch=compute_35,code=sm_35
19 |
20 | GENCODE_FLAGS := $(GENCODE_SM20) $(GENCODE_SM35)
21 |
22 | NVCCFLAGS += $(GENCODE_FLAGS) $(INCLUDES)
23 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/demo/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: demo
7 |
8 | mgpucontext.o: ../src/mgpucontext.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpuutil.o: ../src/mgpuutil.cpp
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | demo.o: demo.cu
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | demo: demo.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./demo
22 |
23 | clean:
24 | rm -f demo.o mgpucontext.o mgpuutil.o demo
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/include/kernels/cubradixsort.cuh:
--------------------------------------------------------------------------------
1 | #include "device/deviceutil.cuh"
2 | #include "cub/cub.cuh"
3 |
4 | namespace mgpu {
5 |
6 | template
7 | bool CubRadixSort(Key* keys_global, Key* keys2_global, int count, int beginBit,
8 | int endBit, CudaContext& context) {
9 |
10 | cub::DoubleBuffer keys(keys_global, keys2_global);
11 |
12 | size_t tempBytes = 0;
13 | cub::DeviceRadixSort::SortKeys(0, tempBytes, keys, count, beginBit, endBit,
14 | context.Stream());
15 |
16 | MGPU_MEM(byte) tempDevice = context.Malloc(tempBytes);
17 |
18 | cub::DeviceRadixSort::SortKeys(tempDevice->get(), tempBytes, keys, count,
19 | beginBit, endBit, context.Stream());
20 | MGPU_SYNC_CHECK("cub::DeviceRadixSort::SortKeys");
21 |
22 | return 1 == keys.selector;
23 | }
24 |
25 | template
26 | bool CubRadixSort(Key* keys_global, Key* keys2_global, Value* values_global,
27 | Value* values2_global, int count, int beginBit, int endBit,
28 | CudaContext& context) {
29 |
30 | cub::DoubleBuffer keys(keys_global, keys2_global);
31 | cub::DoubleBuffer values(values_global, values2_global);
32 |
33 | size_t tempBytes = 0;
34 | cub::DeviceRadixSort::SortPairs(0, tempBytes, keys, values, count,
35 | beginBit, endBit, context.Stream());
36 |
37 | MGPU_MEM(byte) tempDevice = context.Malloc(tempBytes);
38 |
39 | cub::DeviceRadixSort::SortPairs(tempDevice->get(), tempBytes, keys, values,
40 | count, beginBit, endBit, context.Stream());
41 | MGPU_SYNC_CHECK("cub::DeviceRadixSort::SortPairs");
42 |
43 | return 1 == keys.selector;
44 | }
45 |
46 | } // namespace mgpu
47 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/mgpu_benchmarks.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/extra/cudpp/ext/moderngpu/mgpu_benchmarks.xlsx
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/parallelmerge/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: parallelmerge
7 |
8 | parallelmerge.o: parallelmerge.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | parallelmerge: parallelmerge.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./parallelmerge
22 |
23 | clean:
24 | rm -f parallelmerge.o mgpucontext.o mgpuutil.o parallelmerge
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/parallelmerge/parallelmerge.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/testlaunchbox/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../common.mk
3 |
4 | all: build
5 |
6 | build: testlaunchbox
7 |
8 | testlaunchbox.o: testlaunchbox.cu
9 | nvcc $(NVCCFLAGS) -o $@ -c $<
10 |
11 | mgpucontext.o: ../src/mgpucontext.cu
12 | nvcc $(NVCCFLAGS) -o $@ -c $<
13 |
14 | mgpuutil.o: ../src/mgpuutil.cpp
15 | nvcc $(NVCCFLAGS) -o $@ -c $<
16 |
17 | testlaunchbox: testlaunchbox.o mgpucontext.o mgpuutil.o
18 | nvcc $(NVCCFLAGS) -o $@ $+
19 |
20 | run: build
21 | ./testlaunchbox
22 |
23 | clean:
24 | rm -f testlaunchbox.o mgpucontext.o mgpuutil.o testlaunchbox
25 |
26 | clobber:
27 | clean
28 |
29 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/testlaunchbox/testlaunchbox.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/testsegsortbyflags/testsegsortbyflags.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | Source Files
24 |
25 |
26 |
--------------------------------------------------------------------------------
/extra/cudpp/ext/moderngpu/vs.props:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | ../include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)
9 | _CRT_SECURE_NO_WARNINGS;_UNICODE;UNICODE;%(PreprocessorDefinitions)
10 |
11 |
12 | compute_20,sm_20;compute_35,sm_35
13 |
14 |
15 | true
16 | false
17 | true
18 | true
19 |
20 |
21 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/extra/cudpp/include/cudpp_config.h:
--------------------------------------------------------------------------------
1 | // This file is automatically generated. DO NOT EDIT
2 |
3 | /* #undef CUDPP_STATIC_LIB */
4 |
--------------------------------------------------------------------------------
/extra/cudpp/include/cudpp_config.h.in:
--------------------------------------------------------------------------------
1 | // This file is automatically generated. DO NOT EDIT
2 |
3 | #cmakedefine CUDPP_STATIC_LIB
--------------------------------------------------------------------------------
/extra/cudpp/license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2007-2010 The Regents of the University of California, Davis
2 | campus ("The Regents") and NVIDIA Corporation ("NVIDIA"). All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without modification,
5 | are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice,
8 | this list of conditions and the following disclaimer.
9 | * Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 | * Neither the name of the The Regents, nor NVIDIA, nor the names of its
13 | contributors may be used to endorse or promote products derived from this
14 | software without specific prior written permission.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_compact.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt in
8 | // the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_compact.h
14 | *
15 | * @brief Compact functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_COMPACT_H_
19 | #define _CUDPP_COMPACT_H_
20 |
21 | class CUDPPCompactPlan;
22 |
23 | extern "C"
24 | void allocCompactStorage(CUDPPCompactPlan* plan);
25 |
26 | extern "C"
27 | void freeCompactStorage(CUDPPCompactPlan* plan);
28 |
29 | extern "C"
30 | void cudppCompactDispatch(void *d_out,
31 | size_t *d_numValidElements,
32 | const void *d_in,
33 | const unsigned int *d_isValid,
34 | size_t numElements,
35 | const CUDPPCompactPlan *plan);
36 |
37 | #endif // _CUDPP_COMPACT_H_
38 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_compress.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // CUDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_compress.h
14 | *
15 | * @brief Compress functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_COMPRESS_H_
19 | #define _CUDPP_COMPRESS_H_
20 |
21 | class CUDPPCompressPlan;
22 | class CUDPPBwtPlan;
23 | class CUDPPMtfPlan;
24 |
25 | // Compress
26 | extern "C"
27 | void allocCompressStorage(CUDPPCompressPlan* plan);
28 |
29 | extern "C"
30 | void freeCompressStorage(CUDPPCompressPlan* plan);
31 |
32 | extern "C"
33 | void cudppCompressDispatch(unsigned char *d_uncompressed,
34 | int *d_bwtIndex,
35 | unsigned int *d_histSize,
36 | unsigned int *d_hist,
37 | unsigned int *d_encodeOffset,
38 | unsigned int *d_compressedSize,
39 | unsigned int *d_compressed,
40 | size_t numElements,
41 | const CUDPPCompressPlan *plan);
42 |
43 | // BWT
44 | extern "C"
45 | void allocBwtStorage(CUDPPBwtPlan* plan);
46 |
47 | extern "C"
48 | void freeBwtStorage(CUDPPBwtPlan* plan);
49 |
50 | extern "C"
51 | void cudppBwtDispatch(unsigned char *d_in,
52 | unsigned char *d_out,
53 | int *d_index,
54 | size_t numElements,
55 | const CUDPPBwtPlan *plan);
56 |
57 | // MTF
58 | extern "C"
59 | void allocMtfStorage(CUDPPMtfPlan* plan);
60 |
61 | extern "C"
62 | void freeMtfStorage(CUDPPMtfPlan* plan);
63 |
64 | extern "C"
65 | void cudppMtfDispatch(unsigned char *d_in,
66 | unsigned char *d_out,
67 | size_t numElements,
68 | const CUDPPMtfPlan *plan);
69 |
70 | #endif // _CUDPP_COMPRESS_H_
71 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_listrank.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // CUDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_listrank.h
14 | *
15 | * @brief ListRank functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_LISTRANK_H_
19 | #define _CUDPP_LISTRANK_H_
20 |
21 | class CUDPPListRankPlan;
22 |
23 | // ListRank
24 | extern "C"
25 | void allocListRankStorage(CUDPPListRankPlan* plan);
26 |
27 | extern "C"
28 | void freeListRankStorage(CUDPPListRankPlan* plan);
29 |
30 | extern "C"
31 | CUDPPResult cudppListRankDispatch(void *d_ranked_values,
32 | void *d_unranked_values,
33 | void *d_next_indices,
34 | size_t head,
35 | size_t numElements,
36 | const CUDPPListRankPlan *plan);
37 |
38 | #endif // _CUDPP_LISTRANK_H_
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_manager.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision: 3572$
5 | // $Date: 2007-11-19 13:58:06 +0000 (Mon, 19 Nov 2007) $
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 | #ifndef __CUDPP_MANAGER_H__
11 | #define __CUDPP_MANAGER_H__
12 |
13 | #include
14 |
15 | /** @brief Internal manager class for CUDPPP resources
16 | *
17 | */
18 | class CUDPPManager
19 | {
20 | public:
21 |
22 | CUDPPManager();
23 | ~CUDPPManager();
24 |
25 | //! @internal Convert an opaque handle to a pointer to a manager
26 | //! @param [in] cudppHandle Handle to the Manager object
27 | //! @returns Pointer to CUDPP manager
28 | static CUDPPManager* getManagerFromHandle(CUDPPHandle cudppHandle)
29 | {
30 | return reinterpret_cast(cudppHandle);
31 | }
32 |
33 | void getDeviceProps(cudaDeviceProp & props) { props = m_deviceProps; }
34 |
35 | //! @internal Get an opaque handle for this manager
36 | //! @returns CUDPP handle for this manager
37 | CUDPPHandle getHandle()
38 | {
39 | return reinterpret_cast(this);
40 | }
41 |
42 | private:
43 | cudaDeviceProp m_deviceProps;
44 | };
45 |
46 | #endif // __CUDPP_PLAN_MANAGER_H__
47 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_maximal_launch.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 | #ifndef _MAXIMAL_LAUNCH_H_
11 | #define _MAXIMAL_LAUNCH_H_
12 |
13 | #include "cuda_runtime.h"
14 |
15 | extern "C"
16 | size_t maxBlocks(cudaFuncAttributes &attribs,
17 | cudaDeviceProp &devprop,
18 | size_t bytesDynamicSharedMem,
19 | size_t threadsPerBlock);
20 |
21 | extern "C"
22 | size_t maxBlocksFromPointer(void* kernel,
23 | size_t bytesDynamicSharedMem,
24 | size_t threadsPerBlock);
25 |
26 | #ifdef __cplusplus
27 |
28 | template
29 | size_t maxBlocks(T kernel,
30 | size_t bytesDynamicSharedMem,
31 | size_t threadsPerBlock)
32 | {
33 | return maxBlocksFromPointer((void*)kernel, bytesDynamicSharedMem, threadsPerBlock);
34 | }
35 | #endif
36 |
37 | #endif // _MAXIMAL_LAUNCH_H_
38 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_mergesort.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 | #ifndef __MERGESORT_H__
11 | #define __MERGESORT_H__
12 |
13 | #include "cudpp_globals.h"
14 | #include "cudpp.h"
15 | #include "cudpp_plan.h"
16 |
17 | extern "C"
18 | void allocMergeSortStorage(CUDPPMergeSortPlan* plan);
19 |
20 | extern "C"
21 | void freeMergeSortStorage(CUDPPMergeSortPlan* plan);
22 |
23 | extern "C"
24 | void cudppMergeSortDispatch(void *keys,
25 | void *values,
26 | size_t numElements,
27 | const CUDPPMergeSortPlan *plan);
28 |
29 |
30 | #endif // __MERGESORT_H__
31 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_multisplit.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 | #ifndef __MULTISPLIT_H__
11 | #define __MULTISPLIT_H__
12 |
13 | #include
14 | #include "cudpp_globals.h"
15 | #include "cudpp.h"
16 | #include "cudpp_plan.h"
17 |
18 |
19 | extern "C"
20 | void allocMultiSplitStorage(CUDPPMultiSplitPlan* plan);
21 |
22 | extern "C"
23 | void freeMultiSplitStorage(CUDPPMultiSplitPlan* plan);
24 |
25 | extern "C"
26 | void cudppMultiSplitDispatch(unsigned int *d_keys,
27 | unsigned int *d_values,
28 | size_t numElements,
29 | size_t numBuckets,
30 | BucketMappingFunc bucketMappingFunc,
31 | const CUDPPMultiSplitPlan *plan);
32 |
33 | #endif // __MULTISPLIT_H__
34 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_radixsort.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 | #ifndef __RADIXSORT_H__
11 | #define __RADIXSORT_H__
12 |
13 | #include "cudpp_globals.h"
14 | #include "cudpp.h"
15 | #include "cudpp_plan.h"
16 |
17 |
18 | void allocRadixSortStorage(CUDPPRadixSortPlan* plan);
19 |
20 | void freeRadixSortStorage(CUDPPRadixSortPlan* plan);
21 |
22 | void cudppRadixSortDispatch(void *keys,
23 | void *values,
24 | size_t numElements,
25 | const CUDPPRadixSortPlan *plan);
26 |
27 |
28 | #endif // __RADIXSORT_H__
29 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_rand.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt in
8 | // the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_rand.h
14 | *
15 | * @brief rand functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef __CUDPP_RAND_H__
19 | #define __CUDPP_RAND_H__
20 |
21 | #include "cudpp_globals.h"
22 | #include "cudpp.h"
23 | #include "cudpp_plan.h"
24 |
25 | extern "C"
26 | void cudppRandDispatch(void * d_out, size_t num_elements, const CUDPPRandPlan * plan);
27 |
28 | #endif //__CUDPP_RAND_H__
29 |
30 |
31 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_reduce.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_reduce.h
14 | *
15 | * @brief Reduce functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_REDUCE_H_
19 | #define _CUDPP_REDUCE_H_
20 |
21 | class CUDPPReducePlan;
22 |
23 |
24 | void allocReduceStorage(CUDPPReducePlan *plan);
25 |
26 | void freeReduceStorage(CUDPPReducePlan *plan);
27 |
28 | void cudppReduceDispatch(void *d_out,
29 | const void *d_in,
30 | size_t numElements,
31 | const CUDPPReducePlan *plan);
32 |
33 | #endif // _CUDPP_REDUCE_H_
34 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_sa.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt in
8 | // the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_sa.h
14 | *
15 | * @brief Suffix Array functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_SA_H_
19 | #define _CUDPP_SA_H_
20 |
21 | class CUDPPSaPlan;
22 |
23 | extern "C"
24 | void allocSaStorage(CUDPPSaPlan* plan);
25 |
26 | extern "C"
27 | void freeSaStorage(CUDPPSaPlan* plan);
28 |
29 | extern "C"
30 | void cudppSuffixArrayDispatch(unsigned char* d_str,
31 | unsigned int* d_keys_sa,
32 | size_t d_str_length,
33 | const CUDPPSaPlan *plan);
34 |
35 | #endif // _CUDPP_SA_H_
36 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_scan.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_scan.h
14 | *
15 | * @brief Scan functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_SCAN_H_
19 | #define _CUDPP_SCAN_H_
20 |
21 | class CUDPPScanPlan;
22 |
23 | extern "C"
24 | void allocScanStorage(CUDPPScanPlan *plan);
25 |
26 | extern "C"
27 | void freeScanStorage(CUDPPScanPlan *plan);
28 |
29 | extern "C"
30 | void cudppScanDispatch(void *d_out,
31 | const void *d_in,
32 | size_t numElements,
33 | size_t numRows,
34 | const CUDPPScanPlan *plan);
35 |
36 | #endif // _CUDPP_SCAN_H_
37 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_segscan.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt in
8 | // the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_segscan.h
14 | *
15 | * @brief Scan functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_SEGMENTEDSCAN_H_
19 | #define _CUDPP_SEGMENTEDSCAN_H_
20 |
21 | class CUDPPSegmentedScanPlan;
22 |
23 | extern "C"
24 | void allocSegmentedScanStorage(CUDPPSegmentedScanPlan *plan);
25 |
26 | extern "C"
27 | void freeSegmentedScanStorage(CUDPPSegmentedScanPlan *plan);
28 |
29 | extern "C"
30 | void cudppSegmentedScanDispatch(void *d_out,
31 | const void *d_idata,
32 | const unsigned int *d_iflags,
33 | size_t numElements,
34 | const CUDPPSegmentedScanPlan *plan);
35 |
36 | #endif // _CUDPP_SEGMENTEDSCAN_H_
37 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_spmvmult.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt in
8 | // the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * cudpp_spmvmult.h
14 | *
15 | * @brief Scan functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef _CUDPP_SPMVMULT_H_
19 | #define _CUDPP_SPMVMULT_H_
20 |
21 | class CUDPPSparseMatrixVectorMultiplyPlan;
22 |
23 | extern "C"
24 | void allocSparseMatrixVectorMultiplyStorage(CUDPPSparseMatrixVectorMultiplyPlan *plan,
25 | const void *A,
26 | const unsigned int *rowindx,
27 | const unsigned int *indx);
28 |
29 | extern "C"
30 | void freeSparseMatrixVectorMultiplyStorage(CUDPPSparseMatrixVectorMultiplyPlan *plan);
31 |
32 | extern "C"
33 | void cudppSparseMatrixVectorMultiplyDispatch(void *d_y,
34 | const void *d_x,
35 | const CUDPPSparseMatrixVectorMultiplyPlan *plan);
36 |
37 | #endif // _CUDPP_SPMVMULT_H_
38 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_stringsort.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt
8 | // in the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 | #ifndef __STRINGSORT_H__
11 | #define __STRINGSORT_H__
12 |
13 | #include "cudpp_globals.h"
14 | #include "cudpp.h"
15 | #include "cudpp_plan.h"
16 |
17 | extern "C"
18 | void allocStringSortStorage(CUDPPStringSortPlan* plan);
19 |
20 | extern "C"
21 | void freeStringSortStorage(CUDPPStringSortPlan* plan);
22 |
23 |
24 |
25 | extern "C"
26 | void cudppStringSortDispatch(unsigned int *keys,
27 | unsigned int *values,
28 | unsigned int *stringVals,
29 | size_t numElements,
30 | size_t stringArrayLength,
31 | unsigned char termC,
32 | const CUDPPStringSortPlan *plan);
33 |
34 | //Some helper functions needed to transform data
35 | extern "C"
36 | void dotAdd(unsigned int* d_address,
37 | unsigned int* numSpaces,
38 | unsigned int* packedAddress,
39 | size_t numElements,
40 | size_t stringArrayLength);
41 |
42 | extern "C"
43 | void calculateAlignedOffsets(unsigned int* d_address,
44 | unsigned int* numSpaces,
45 | unsigned char* d_stringVals,
46 | unsigned char termC,
47 | size_t numElements,
48 | size_t stringArrayLength);
49 | extern "C"
50 | void packStrings(unsigned int* packedStrings,
51 | unsigned char* d_stringVals,
52 | unsigned int* d_keys,
53 | unsigned int* packedAddress,
54 | unsigned int* address,
55 | size_t numElements,
56 | size_t stringArrayLength,
57 | unsigned char termC);
58 |
59 | extern "C"
60 | void unpackStrings(unsigned int* packedAddress,
61 | unsigned int* packedAddressRef,
62 | unsigned int* address,
63 | unsigned int* addressRef,
64 | size_t numElements);
65 |
66 | #endif // __STRINGSORT_H__
67 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp/cudpp_tridiagonal.h:
--------------------------------------------------------------------------------
1 | // -------------------------------------------------------------
2 | // cuDPP -- CUDA Data Parallel Primitives library
3 | // -------------------------------------------------------------
4 | // $Revision$
5 | // $Date$
6 | // -------------------------------------------------------------
7 | // This source code is distributed under the terms of license.txt in
8 | // the root directory of this source distribution.
9 | // -------------------------------------------------------------
10 |
11 | /**
12 | * @file
13 | * tridiagonal.h
14 | *
15 | * @brief tridiagonal functionality header file - contains CUDPP interface (not public)
16 | */
17 |
18 | #ifndef __CUDPP_TRIDIAGONAL_H__
19 | #define __CUDPP_TRIDIAGONAL_H__
20 |
21 | #include "cudpp_globals.h"
22 | #include "cudpp.h"
23 | #include "cudpp_plan.h"
24 |
25 | CUDPPResult cudppTridiagonalDispatch(void *d_a,
26 | void *d_b,
27 | void *d_c,
28 | void *d_d,
29 | void *d_x,
30 | int systemSize,
31 | int numSystems,
32 | const CUDPPTridiagonalPlan * plan);
33 |
34 | #endif //__CUDPP_TRIDIAGONAL_H__
35 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp_hash/hash_functions.cu:
--------------------------------------------------------------------------------
1 | #include "hash_table.h"
2 | #include "debugging.h"
3 |
4 | #include
5 |
6 | #include
7 |
8 | namespace CudaHT {
9 | namespace CuckooHashing {
10 |
11 | void GenerateFunctions(const unsigned N,
12 | const unsigned num_keys,
13 | const unsigned *d_keys,
14 | const unsigned table_size,
15 | uint2 *constants) {
16 | bool regenerate = true;
17 |
18 | while (regenerate) {
19 | regenerate = false;
20 |
21 | // Generate a set of hash function constants for this build attempt.
22 | for (unsigned i = 0 ; i < N; ++i) {
23 | unsigned new_a = genrand_int32() % kPrimeDivisor;
24 | constants[i].x = (1 > new_a ? 1 : new_a);
25 | constants[i].y = genrand_int32() % kPrimeDivisor;
26 | }
27 |
28 | #ifdef FORCEFULLY_GENERATE_NO_CYCLES
29 | // Ensure that every key gets N different slots.
30 | regenerate = CheckAssignedSameSlot(N, num_keys, d_keys, table_size, constants);
31 | #endif
32 | }
33 |
34 |
35 | #ifdef TAKE_HASH_FUNCTION_STATISTICS
36 | // Examine how well distributed the items are.
37 | TakeHashFunctionStatistics(num_keys, d_keys, table_size, constants, N);
38 | #endif
39 | }
40 |
41 | }; // namespace CuckooHashing
42 | }; // namespace CudaHT
43 |
--------------------------------------------------------------------------------
/extra/cudpp/src/cudpp_hash/mt19937ar.h:
--------------------------------------------------------------------------------
1 | void init_genrand(unsigned long s);
2 | void init_by_array(unsigned long init_key[], int key_length);
3 | unsigned long genrand_int32(void);
4 | long genrand_int31(void);
5 | double genrand_real1(void);
6 | double genrand_real2(void);
7 | double genrand_real3(void);
8 | double genrand_res53(void);
9 |
--------------------------------------------------------------------------------
/extra/easy_profiler/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.0)
2 | project(easy_profiler CXX)
3 |
4 | set_property(GLOBAL PROPERTY USE_FOLDERS ON)
5 |
6 | if (CMAKE_VERSION VERSION_LESS "3.1")
7 | if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
8 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++11")
9 | endif ()
10 | else ()
11 | set(CMAKE_CXX_STANDARD 11)
12 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
13 | endif ()
14 |
15 | option(EASY_PROFILER_NO_GUI "Build easy_profiler without the GUI application (required Qt)" OFF)
16 |
17 | set(EASY_PROGRAM_VERSION_MAJOR 2)
18 | set(EASY_PROGRAM_VERSION_MINOR 1)
19 | set(EASY_PROGRAM_VERSION_PATCH 0)
20 | set(EASY_PRODUCT_VERSION_STRING "${EASY_PROGRAM_VERSION_MAJOR}.${EASY_PROGRAM_VERSION_MINOR}.${EASY_PROGRAM_VERSION_PATCH}")
21 |
22 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
23 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
24 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
25 |
26 | # set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_LIST_DIR}/sdk)
27 |
28 | macro(easy_define_target_option TARGET SOURCE_OPTION TARGET_DEFINITION)
29 | if (${SOURCE_OPTION})
30 | set(_VALUE 1)
31 | else ()
32 | set(_VALUE 0)
33 | endif ()
34 | target_compile_options(${TARGET} PUBLIC -D${TARGET_DEFINITION}=${_VALUE})
35 | endmacro()
36 |
37 | SET(CMAKE_INSTALL_RPATH "$ORIGIN")
38 |
39 | add_subdirectory(easy_profiler_core)
40 | if (NOT EASY_PROFILER_NO_GUI)
41 | add_subdirectory(profiler_gui)
42 | endif()
43 | add_subdirectory(easy_profiler_converter)
44 |
45 | if (NOT EASY_PROFILER_NO_SAMPLES)
46 | add_subdirectory(sample)
47 | add_subdirectory(reader)
48 | endif ()
49 |
--------------------------------------------------------------------------------
/extra/easy_profiler/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016-2018 Sergey Yagovtsev, Victor Zarubkin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/extra/easy_profiler/LICENSE.MIT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016-2018 Sergey Yagovtsev, Victor Zarubkin
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7 | of the Software, and to permit persons to whom the Software is furnished
8 | to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
15 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
17 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 | USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/extra/easy_profiler/appveyor.bat:
--------------------------------------------------------------------------------
1 | mkdir build_msvc
2 | cd build_msvc
3 | cmake -G "%GENERATOR%" ../
4 | cmake --build . --config Release
5 |
6 | goto :EOF
7 |
--------------------------------------------------------------------------------
/extra/easy_profiler/appveyor.yml:
--------------------------------------------------------------------------------
1 | platform:
2 | - Win64
3 |
4 | configuration:
5 | - Release
6 |
7 | environment:
8 | matrix:
9 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013
10 | Qt5Widgets_DIR: "C:\\Qt\\5.5\\msvc2013_64\\lib\\cmake\\Qt5Widgets"
11 | GENERATOR: "Visual Studio 12 2013 Win64"
12 |
13 | test: off
14 |
15 | build_script:
16 | - CALL appveyor.bat
17 |
18 | skip_commits:
19 | message: /.*\[skip appveyor\].*/
20 |
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_converter/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CPP_FILES
2 | converter.cpp
3 | reader.cpp)
4 |
5 | set(HEADER_FILES
6 | converter.h
7 | reader.h)
8 |
9 | include_directories(../easy_profiler_core/)
10 | include_directories(./include)
11 |
12 | add_executable(profiler_converter ${HEADER_FILES} ${CPP_FILES} main.cpp)
13 | target_link_libraries(profiler_converter easy_profiler)
14 |
15 | install(
16 | TARGETS
17 | profiler_converter
18 | RUNTIME
19 | DESTINATION
20 | bin
21 | )
22 |
23 | set_property(TARGET profiler_converter PROPERTY INSTALL_RPATH_USE_LINK_PATH TRUE)
24 |
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_converter/main.cpp:
--------------------------------------------------------------------------------
1 | ///std
2 | #include
3 | #include
4 | #include "converter.h"
5 |
6 | using namespace profiler::reader;
7 |
8 | int main(int argc, char* argv[])
9 | {
10 | std::string filename, output_json_filename;
11 |
12 | if (argc > 1 && argv[1])
13 | {
14 | filename = argv[1];
15 | }
16 | else
17 | {
18 | std::cout << "Usage: " << argv[0] << " INPUT_PROF_FILE [OUTPUT_JSON_FILE]\n"
19 | "where:\n"
20 | "INPUT_PROF_FILE // Required\n"
21 | "OUTPUT_JSON_FILE (if not specified output will be print in stdout) // Optional\n";
22 | return 1;
23 | }
24 |
25 | if (argc > 2 && argv[2])
26 | {
27 | output_json_filename = argv[2];
28 | }
29 |
30 | JsonExporter js;
31 | js.convert(filename, output_json_filename);
32 |
33 | return 0;
34 | }
35 |
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_core/LICENSE.MIT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016-2018 Sergey Yagovtsev, Victor Zarubkin
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7 | of the Software, and to permit persons to whom the Software is furnished
8 | to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
15 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
17 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 | USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_core/cmake/config.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 |
3 | include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")
4 | check_required_components("@PROJECT_NAME@")
5 |
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_core/event_trace_status.h:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #ifndef EASY_PROFILER__EVENT_TRACE_STATUS__H_
5 | #define EASY_PROFILER__EVENT_TRACE_STATUS__H_
6 |
7 | //////////////////////////////////////////////////////////////////////////
8 | //////////////////////////////////////////////////////////////////////////
9 |
10 | enum class EventTracingEnableStatus : unsigned char
11 | {
12 | LaunchedSuccessfully = 0,
13 | PermissionDenied,
14 | AlreadyLaunched,
15 | BadPropertiesSize,
16 | OpenTraceFailed,
17 | UnknownError,
18 | };
19 |
20 | //////////////////////////////////////////////////////////////////////////
21 | //////////////////////////////////////////////////////////////////////////
22 |
23 | #endif // EASY_PROFILER__EVENT_TRACE_STATUS__H_
24 |
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_core/include/easy/easy_protocol.h:
--------------------------------------------------------------------------------
1 | #ifndef EASY_PROPROTOCOL_H
2 | #define EASY_PROPROTOCOL_H
3 | ///C++
4 | #include
5 | #include
6 | #include
7 |
8 | ///this
9 | #include
10 | //#include
11 |
12 | ///for actual version vistit https://github.com/yse/easy_profiler/wiki/.prof-file-format-v1.3.0
13 |
14 | namespace profiler {
15 |
16 | namespace reader {
17 |
18 | struct BlockDescriptor;
19 |
20 | struct BlocksInfo //12
21 | {
22 | uint32_t totalBlocksCount; //4 bytes
23 | uint64_t totalBlocksMemory; //8 bytes
24 | };
25 |
26 | struct DescriptorsInfo //12
27 | {
28 | uint32_t allDescriptorsCount; //4 bytes
29 | uint64_t allDescriptorsMemory; //8 bytes
30 | };
31 |
32 | struct FileHeader //64
33 | {
34 | uint32_t signature; //4
35 | uint32_t version; //4
36 | uint64_t processId; //8
37 | int64_t cpuFrequency; //8
38 | uint64_t beginTime; //8
39 | uint64_t endTime; //8
40 | BlocksInfo serializedBlocksInfo; //12
41 | DescriptorsInfo blocksDescriptorInfo; //12
42 | };
43 |
44 | struct BlockInfo
45 | {
46 | uint64_t beginTime;
47 | uint64_t endTime;
48 | const BlockDescriptor* descriptor;
49 | uint32_t blockIndex;
50 | };
51 |
52 | struct ContextSwitchEvent
53 | {
54 | uint64_t beginTime;
55 | uint64_t endTime;
56 | uint64_t targetThreadId;
57 | std::string targetProcess; ///< Contains process id and process name
58 | };
59 |
60 | struct BlockDescriptor
61 | {
62 | uint32_t parentId; ///< This will differ from id if this descriptor was created from runtime named block
63 | uint32_t id;
64 | int lineNumber;
65 | uint32_t argbColor;
66 | uint8_t blockType;
67 | uint8_t status;
68 | std::string blockName;
69 | std::string fileName;
70 | };
71 |
72 | } //namespace reader
73 |
74 | } //namespace profiler
75 |
76 | #endif
77 |
--------------------------------------------------------------------------------
/extra/easy_profiler/easy_profiler_core/resources.rc:
--------------------------------------------------------------------------------
1 | 1 VERSIONINFO
2 |
3 | # define EASY_STRINGIFY(a) #a
4 | # define EASY_STRINGIFICATION(a) EASY_STRINGIFY(a)
5 |
6 | #define EASY_PROFILER_PRODUCT_VERSION "v" EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MAJOR) "." \
7 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MINOR) "." \
8 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_PATCH)
9 |
10 | FILEVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH
11 | PRODUCTVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH
12 | BEGIN
13 | BLOCK "StringFileInfo"
14 | BEGIN
15 | BLOCK "080904b0"
16 | BEGIN
17 | VALUE "CompanyName", "EasySolutions"
18 | VALUE "FileDescription", "Lightweight profiler library for C++"
19 | VALUE "LegalCopyright", "Copyright (C) 2016-2018 Victor Zarubkin, Sergey Yagovtsev"
20 | VALUE "LegalTrademarks1", "All Rights Reserved"
21 | VALUE "LegalTrademarks2", "All Rights Reserved"
22 | VALUE "ProductName", "easy_profiler lib"
23 | VALUE "ProductVersion", EASY_PROFILER_PRODUCT_VERSION
24 | END
25 | END
26 | BLOCK "VarFileInfo"
27 | BEGIN
28 | VALUE "Translation", 0x809, 1200
29 | END
30 | END
31 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-down-disabled.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-down-hover.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-down-pressed.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-down.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-left.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-right.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-up-disabled.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-up-hover.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-up-pressed.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/arrow-up.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/big-o.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
22 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/binoculars.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
21 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/check-disabled.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
9 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/check-partial-disabled.svg:
--------------------------------------------------------------------------------
1 |
2 |
8 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/check-partial.svg:
--------------------------------------------------------------------------------
1 |
2 |
8 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/check.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
9 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/close-hover.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/close-white-hover.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/close-white-pressed.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/close-white.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/close.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/collapse.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/crop.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/csv.svg:
--------------------------------------------------------------------------------
1 |
16 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/delete-old.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/delete.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/expand.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
16 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/lan.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
26 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/lan_on.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
26 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/maximize-white-hover.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/maximize-white-pressed.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/maximize-white.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/minimize-white-hover.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/minimize-white-pressed.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/minimize-white.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/minimize.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/off.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/open-folder.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
15 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/open-folder2.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/play.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
12 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/radio-indicator-disabled.svg:
--------------------------------------------------------------------------------
1 |
2 |
8 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/radio-indicator.svg:
--------------------------------------------------------------------------------
1 |
2 |
8 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/reload-folder2.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
25 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/reload.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
14 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/search-next.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
24 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/search-prev.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
24 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/statistics.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
15 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/statistics2.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/stop.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
12 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/to-fullscreen.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/to-window.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
19 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/wifi.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
19 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/wifi_on.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
19 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/window.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/default/yx.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/extra/easy_profiler/profiler_gui/images/logo.ico
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/images/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
19 |
--------------------------------------------------------------------------------
/extra/easy_profiler/profiler_gui/resources.rc:
--------------------------------------------------------------------------------
1 | IDI_ICON1 ICON DISCARDABLE "images/logo.ico"
2 | 1 VERSIONINFO
3 | FILEVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH
4 | PRODUCTVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH
5 |
6 | # define EASY_STRINGIFY(a) #a
7 | # define EASY_STRINGIFICATION(a) EASY_STRINGIFY(a)
8 |
9 | #define EASY_PROFILER_PRODUCT_VERSION "v" EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MAJOR) "." \
10 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MINOR) "." \
11 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_PATCH)
12 |
13 | BEGIN
14 | BLOCK "StringFileInfo"
15 | BEGIN
16 | BLOCK "080904b0"
17 | BEGIN
18 | VALUE "CompanyName", "EasySolutions"
19 | VALUE "FileDescription", "EasyProfiler"
20 | VALUE "InternalName", "profiler_gui"
21 | VALUE "LegalCopyright", "Copyright (C) 2016-2018 Victor Zarubkin, Sergey Yagovtsev"
22 | VALUE "LegalTrademarks1", "All Rights Reserved"
23 | VALUE "LegalTrademarks2", "All Rights Reserved"
24 | VALUE "OriginalFilename", "profiler_gui.exe"
25 | VALUE "ProductName", "easy_profiler gui application"
26 | VALUE "ProductVersion", EASY_PROFILER_PRODUCT_VERSION
27 | END
28 | END
29 | BLOCK "VarFileInfo"
30 | BEGIN
31 | VALUE "Translation", 0x809, 1200
32 | END
33 | END
34 |
--------------------------------------------------------------------------------
/extra/easy_profiler/reader/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | add_executable(profiler_reader main.cpp)
3 | target_link_libraries(profiler_reader easy_profiler)
4 |
--------------------------------------------------------------------------------
/extra/easy_profiler/sample/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CPP_FILES
2 | main.cpp
3 | )
4 |
5 | set(SOURCES
6 | ${CPP_FILES}
7 | )
8 |
9 | link_directories(${CMAKE_SOURCE_DIR}/../bin)
10 |
11 | add_executable(profiler_sample ${SOURCES})
12 | target_link_libraries(profiler_sample easy_profiler)
13 |
14 | add_executable(profiler_sample_disabled_profiler ${SOURCES})
15 | target_link_libraries(profiler_sample_disabled_profiler easy_profiler)
16 | target_compile_definitions(profiler_sample_disabled_profiler PRIVATE DISABLE_EASY_PROFILER)
17 |
--------------------------------------------------------------------------------
/extra/easy_profiler/sample/express_sample.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #include
10 | #include
11 |
12 | int OBJECTS = 500;
13 |
14 | void modellingThread(){
15 | EASY_THREAD("Modelling");
16 |
17 | static const int N = OBJECTS;
18 |
19 | volatile double *pos[N];
20 | for (int i = 0; i < N; ++i)
21 | {
22 | pos[i] = new volatile double[3];
23 | }
24 |
25 | {
26 | EASY_BLOCK("Collisions");
27 | volatile int i, j;
28 | volatile double dist;
29 | for (i = 0; i < N; ++i)
30 | {
31 | for (j = i + 1; j < N; ++j)
32 | {
33 | EASY_BLOCK("Check");
34 | volatile double v[3];
35 | v[0] = pos[i][0] - pos[j][0];
36 | v[1] = pos[i][1] - pos[j][1];
37 | v[2] = pos[i][2] - pos[j][2];
38 | dist = v[0] * v[0] + v[1] * v[1] + v[2] * v[2];
39 | if (dist < 10000)
40 | {
41 | dist *= dist;
42 | }
43 | }
44 | }
45 | }
46 |
47 | for (int i = 0; i < N; ++i)
48 | {
49 | delete [] pos[i];
50 | }
51 | }
52 |
53 | //////////////////////////////////////////////////////////////////////////
54 |
55 | int main(int argc, char* argv[])
56 | {
57 | if (argc > 1 && argv[1]){
58 | OBJECTS = std::atoi(argv[1]);
59 | }
60 |
61 | std::cout << "Objects count: " << OBJECTS << std::endl;
62 |
63 | auto start = std::chrono::system_clock::now();
64 |
65 |
66 | EASY_PROFILER_ENABLE;
67 | EASY_MAIN_THREAD;
68 |
69 |
70 | modellingThread();
71 |
72 | auto end = std::chrono::system_clock::now();
73 | auto elapsed =
74 | std::chrono::duration_cast(end - start);
75 |
76 | std::cout << "Elapsed time: " << elapsed.count() << " usec" << std::endl;
77 |
78 | auto blocks_count = profiler::dumpBlocksToFile("test.prof");
79 |
80 | std::cout << "Blocks count: " << blocks_count << std::endl;
81 |
82 | return 0;
83 | }
84 |
--------------------------------------------------------------------------------
/extra/easy_profiler/scripts/context_switch_logger.stp:
--------------------------------------------------------------------------------
1 | global target_pid
2 | global target_name
3 |
4 | probe scheduler.ctxswitch {
5 |
6 | if (target_pid != 0
7 | && next_pid != target_pid
8 | && prev_pid != target_pid)
9 | next
10 |
11 | if (target_name != ""
12 | && prev_task_name != target_name
13 | && next_task_name != target_name)
14 | next
15 |
16 | //printf("Switch from %d(%s) to %d(%s) at %d\n",prev_tid, prev_task_name,next_tid,next_task_name, gettimeofday_ns())
17 | printf("%d %d %d %s %d\n", get_cycles(), prev_tid, next_tid, next_task_name, next_pid)
18 | //printf("%d %d %d\n",gettimeofday_ns(),prev_tid, next_tid )
19 | }
20 |
21 | probe begin
22 | {
23 | target_pid = 0
24 | target_name = ""
25 |
26 | %( $# == 1 || $# > 2 %?
27 | log("Wrong number of arguments, use none, 'pid nr' or 'name proc'")
28 | exit()
29 | %)
30 |
31 | %( $# == 2 %?
32 | if(@1 == "pid")
33 | target_pid = strtol(@2, 10)
34 | if(@1 == "name")
35 | target_name = @2
36 | %)
37 | }
38 |
--------------------------------------------------------------------------------
/extra/easy_profiler/scripts/make_style.sh:
--------------------------------------------------------------------------------
1 | if [ "$#" -ne 1 ]; then
2 | echo -e "Usage: \n$0 DIRECTORY\n\twhere DIRECTORY is a directory with sources for styling"
3 | exit 1
4 | fi
5 |
6 | if ! [ -x "$(command -v clang-format)" ]; then
7 | echo 'Error: clang-format is not installed. Please install clang-format with minimal version 3.8' >&2
8 | exit 1
9 | fi
10 |
11 | DIR=$1
12 |
13 | FILES=`find $DIR -name "*.h" -or -name "*.cpp"`
14 |
15 | for FILE in $FILES
16 | do
17 | echo "Set style for $FILE"
18 | clang-format -i $FILE
19 | done
20 |
--------------------------------------------------------------------------------
/extra/easy_profiler/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | unamestr=`uname`
4 | SUBDIR="./bin"
5 | if [[ ! "$unamestr" == 'Linux' ]]; then
6 | SUBDIR="./bin/Release/"
7 | fi
8 |
9 | DISABLED_PROF=$SUBDIR/profiler_sample_disabled_profiler
10 | ENABLED_PROF=$SUBDIR/profiler_sample
11 |
12 | TEMP_FILE_ENABLE="enable.info"
13 | TEMP_FILE_DISABLE="disable.info"
14 | RESULT_FILE="result.csv"
15 | RESULT_FILE_TMP="result.csv.tmp"
16 |
17 | HEADER="Blocks count, dT prof enabled usec, dT prof disabled usec,delta, usec/block"
18 |
19 | #echo "Blocks count, dT prof enabled usec, dT prof disabled usec,delta, usec/block" > $RESULT_FILE
20 |
21 | rm -rf $RESULT_FILE
22 |
23 | for i in {1..9}
24 | do
25 | OBJECTS_COUNT=$(($i*100))
26 | for j in {10..15}
27 | do
28 | RENDER_COUNT=$(($j*100))
29 | for k in {10..15}
30 | do
31 | MODELLING_COUNT=$(($k*100))
32 | $ENABLED_PROF $OBJECTS_COUNT $RENDER_COUNT $MODELLING_COUNT > $TEMP_FILE_ENABLE
33 | $DISABLED_PROF $OBJECTS_COUNT $RENDER_COUNT $MODELLING_COUNT > $TEMP_FILE_DISABLE
34 | DT_ENA=`cat $TEMP_FILE_ENABLE | grep Elapsed| awk '{print $3}'`
35 | N_ENA=`cat $TEMP_FILE_ENABLE | grep Blocks| awk '{print $3}'`
36 | N_DIS=`cat $TEMP_FILE_DISABLE | grep Elapsed| awk '{print $3}'`
37 |
38 | DELTA=$(($DT_ENA-$N_DIS))
39 | USEC_BLOCK=`awk "BEGIN{print $DELTA/$N_ENA}"`
40 |
41 | echo $N_ENA,$DT_ENA,$N_DIS,$DELTA,$USEC_BLOCK >> $RESULT_FILE
42 | done
43 | done
44 | echo $i
45 |
46 | done
47 |
48 | cat $RESULT_FILE | sort > $RESULT_FILE_TMP
49 |
50 | echo $HEADER > $RESULT_FILE
51 | cat $RESULT_FILE_TMP >> $RESULT_FILE
52 |
53 | rm -rf $TEMP_FILE_ENABLE
54 | rm -rf $TEMP_FILE_DISABLE
55 | rm -rf $RESULT_FILE_TMP
56 |
57 | echo "See result in $RESULT_FILE"
58 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import sparseconvnet as scn
2 | import torch
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | import sys, os, time
6 |
7 |
8 | class Naive_UNet(nn.Module):
9 | def __init__(self, config):
10 | nn.Module.__init__(self)
11 | m = 32 # 16 or 32
12 | residual_blocks = True # True or False
13 | block_reps = 2 # Conv block repetition factor: 1 or 2
14 |
15 | self.sparseModel = scn.Sequential().add(
16 | scn.InputLayer(config['dimension'], config['full_scale'], mode=4)).add(
17 | scn.SubmanifoldConvolution(config['dimension'], 3, m, 3, False)).add(
18 | scn.UNet(config['dimension'], block_reps, [m, 2*m, 3*m, 4*m, 5*m, 6*m, 7*m], residual_blocks)).add(
19 | scn.BatchNormReLU(m)).add(
20 | scn.OutputLayer(config['dimension']))
21 | self.linear = nn.Linear(m, 20)
22 |
23 | def forward(self, x, increment=False):
24 | x = self.sparseModel(x, increment)
25 | x = self.linear(x)
26 | return x
27 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch, os
8 | from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension
9 | from setuptools import setup, find_packages
10 |
11 | this_dir = os.path.dirname(os.path.realpath(__file__))
12 | torch_dir = os.path.dirname(torch.__file__)
13 | conda_include_dir = '/'.join(torch_dir.split('/')[:-4]) + '/include'
14 |
15 | # extra = {'cxx': ['-std=c++11', '-fopenmp'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp']}
16 | extra = {'cxx': ['-std=c++11','-g', '-fopenmp', '-DBUILD_WITH_EASY_PROFILER'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp', '-DBUILD_WITH_EASY_PROFILER']}
17 | #extra = {'cxx': ['-std=c++11', '-fopenmp'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp']}
18 | print(this_dir)
19 | setup(
20 | name='sparseconvnet',
21 | version='0.2',
22 | description='Submanifold (Spatially) Sparse Convolutional Networks https://arxiv.org/abs/1706.01307',
23 | author='Facebook AI Research',
24 | author_email='benjamingraham@fb.com',
25 | url='https://github.com/facebookresearch/SparseConvNet',
26 | packages=['sparseconvnet','sparseconvnet.SCN'],
27 | ext_modules=[
28 | CUDAExtension('sparseconvnet.SCN',
29 | [
30 | 'sparseconvnet/SCN/cuda.cu', 'sparseconvnet/SCN/sparseconvnet_cuda.cpp', 'sparseconvnet/SCN/pybind.cpp'],
31 | include_dirs=[conda_include_dir,this_dir+'/sparseconvnet/SCN/',this_dir+'/extra/easy_profiler/easy_profiler_core/include',this_dir + '/extra/cudpp/include'],
32 | library_dirs = ['/usr/local/lib', this_dir + '/extra/cudpp/build/lib',this_dir+'/extra/easy_profiler/build/bin'],
33 | libraries = ['easy_profiler', 'cudpp', 'cudpp_hash'],
34 | extra_compile_args=extra)
35 | if torch.cuda.is_available() else
36 | CppExtension('sparseconvnet.SCN',
37 | ['sparseconvnet/SCN/pybind.cpp', 'sparseconvnet/SCN/sparseconvnet_cpu.cpp'],
38 | include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
39 | library_dirs = ['/usr/local/lib', this_dir + '/extra/cudpp/build/lib',this_dir+'/extra/easy_profiler/build/bin',this_dir+'/extra/easy_profiler/easy_profiler_core/include',this_dir + '/extra/cudpp/include'],
40 | libraries = ['easy_profiler', 'cudpp', 'cudpp_hash'],
41 | extra_compile_args=extra['cxx'])],
42 | cmdclass={'build_ext': BuildExtension},
43 | zip_safe=False,
44 | )
45 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | void cpu_BatchwiseMultiplicativeDropout_updateOutput(
9 | /*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
10 | /*float*/ at::Tensor noise, T alpha) {
11 | output_features.resize_as_(input_features);
12 | auto nActive = input_features.size(0);
13 | auto nPlanes = input_features.size(1);
14 | auto iF = input_features.data();
15 | auto oF = output_features.data();
16 | auto nz = noise.data();
17 | for (Int row = 0; row < nActive; row++)
18 | for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
19 | plane++, o++, i++)
20 | oF[o] = (iF[i] > 0) ? iF[i] * nz[plane] : iF[i] * nz[plane] * alpha;
21 | }
22 | template
23 | void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
24 | /*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
25 | /*float*/ at::Tensor d_output_features, /*float*/ at::Tensor noise,
26 | T alpha) {
27 | d_input_features.resize_as_(d_output_features);
28 | auto nActive = input_features.size(0);
29 | auto nPlanes = input_features.size(1);
30 | auto iF = input_features.data();
31 | auto diF = d_input_features.data();
32 | auto doF = d_output_features.data();
33 | auto nz = noise.data();
34 | for (Int row = 0; row < nActive; row++)
35 | for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
36 | plane++, o++, i++)
37 | diF[i] = (iF[i] > 0) ? doF[o] * nz[plane] : doF[o] * nz[plane] * alpha;
38 | }
39 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CPU/LeakyReLU.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features,
9 | /*float*/ at::Tensor output_features, T alpha) {
10 | output_features.resize_as_(input_features);
11 | auto iF = input_features.data();
12 | auto oF = output_features.data();
13 | auto n = input_features.numel();
14 |
15 | for (Int i = 0; i < n; i++) {
16 | const T x = iF[i];
17 | const T r = (x > 0) ? 1 : alpha;
18 | oF[i] = x * r;
19 | }
20 | }
21 | template
22 | void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor input_features,
23 | /*float*/ at::Tensor d_input_features,
24 | /*float*/ at::Tensor d_output_features,
25 | T alpha) {
26 | d_input_features.resize_as_(d_output_features);
27 | auto iF = input_features.data();
28 | auto diF = d_input_features.data();
29 | auto doF = d_output_features.data();
30 | auto n = d_input_features.numel();
31 |
32 | for (Int i = 0; i < n; i++) {
33 | const T r = (iF[i] > 0) ? 1 : alpha;
34 | diF[i] = doF[i] * r;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CPU/NetworkInNetwork.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
9 | /*float*/ at::Tensor output_features,
10 | /*float*/ at::Tensor weight,
11 | /*float*/ at::Tensor bias) {
12 | auto nActive = input_features.size(0);
13 | auto input_nPlanes = weight.size(0);
14 | auto output_nPlanes = weight.size(1);
15 | output_features.resize_({nActive, output_nPlanes});
16 | if (bias.numel())
17 | output_features.copy_(bias);
18 | else
19 | output_features.zero_();
20 | if (nActive)
21 | output_features.addmm_(input_features, weight);
22 | return nActive * input_nPlanes * output_nPlanes;
23 | }
24 | template
25 | void cpu_NetworkInNetwork_updateGradInput(
26 | /*float*/ at::Tensor d_input_features,
27 | /*float*/ at::Tensor d_output_features,
28 | /*float*/ at::Tensor weight) {
29 |
30 | int nActive = d_output_features.size(0);
31 | d_input_features.resize_({nActive, weight.size(0)});
32 | d_input_features.zero_();
33 | if (nActive)
34 | at::mm_out(d_input_features, d_output_features, weight.t());
35 | }
36 | template
37 | void cpu_NetworkInNetwork_accGradParameters(
38 | /*float*/ at::Tensor input_features,
39 | /*float*/ at::Tensor d_output_features,
40 | /*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) {
41 | auto nActive = input_features.size(0);
42 | if (nActive and d_bias.numel())
43 | at::sum_out(d_bias, d_output_features, {0}, false);
44 | if (nActive)
45 | at::mm_out(d_weight, input_features.t(), d_output_features);
46 | }
47 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CUDA/ActivePooling.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | void ActivePooling_ForwardPass(T *input_features, T *output_features,
9 | Int batchSize, Int maxActive, Int nPlanes,
10 | Int *rules, bool average);
11 |
12 | template
13 | void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
14 | Int batchSize, Int maxActive, Int nPlanes,
15 | Int *rules, bool average);
16 |
17 | template
18 | void cuda_ActivePooling_updateOutput(
19 | /*long*/ at::Tensor inputSize, Metadata &m,
20 | /*cuda float*/ at::Tensor input_features,
21 | /*cuda float*/ at::Tensor output_features, bool average) {
22 |
23 | Int nPlanes = input_features.size(1);
24 | auto _rules = m.getActivePoolingRuleBook(inputSize);
25 | Int batchSize = _rules[1][0];
26 | Int maxActive = _rules[1][1];
27 | output_features.resize_({batchSize, nPlanes});
28 | output_features.zero_();
29 |
30 | auto iF = input_features.data();
31 | auto oF = output_features.data();
32 | ActivePooling_ForwardPass(iF, oF, batchSize, maxActive, nPlanes,
33 | &_rules[0][0], average);
34 | }
35 | template
36 | void cuda_ActivePooling_updateGradInput(
37 | /*long*/ at::Tensor inputSize, Metadata &m,
38 | /*cuda float*/ at::Tensor input_features,
39 | /*cuda float*/ at::Tensor d_input_features,
40 | /*cuda float*/ at::Tensor d_output_features, bool average) {
41 |
42 | Int nPlanes = input_features.size(1);
43 | auto _rules = m.getActivePoolingRuleBook(inputSize);
44 | Int batchSize = _rules[1][0];
45 | Int maxActive = _rules[1][1];
46 | d_input_features.resize_as_(input_features);
47 | d_input_features.zero_();
48 |
49 | auto diF = d_input_features.data();
50 | auto doF = d_output_features.data();
51 |
52 | ActivePooling_BackwardPass(diF, doF, batchSize, maxActive, nPlanes,
53 | &_rules[0][0], average);
54 | }
55 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | void bmd_f(T *input_features, T *output_features, T *noise, Int nActive,
9 | Int nPlanes, T alpha);
10 | template
11 | void bmd_b(T *input_features, T *d_input_features, T *d_output_features,
12 | T *noise, Int nActive, Int nPlanes, T alpha);
13 |
14 | template
15 | void cuda_BatchwiseMultiplicativeDropout_updateOutput(
16 | /*cuda float*/ at::Tensor input_features,
17 | /*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor noise,
18 | T alpha) {
19 | output_features.resize_as_(input_features);
20 | auto nActive = input_features.size(0);
21 | auto nPlanes = input_features.size(1);
22 | bmd_f(input_features.data(), output_features.data(), noise.data(),
23 | nActive, nPlanes, alpha);
24 | }
25 |
26 | template
27 | void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
28 | /*cuda float*/ at::Tensor input_features,
29 | /*cuda float*/ at::Tensor d_input_features,
30 | /*cuda float*/ at::Tensor d_output_features,
31 | /*cuda float*/ at::Tensor noise, T alpha) {
32 | d_input_features.resize_as_(d_output_features);
33 | auto nActive = input_features.size(0);
34 | auto nPlanes = input_features.size(1);
35 | bmd_b(input_features.data(), d_input_features.data(),
36 | d_output_features.data(), noise.data(), nActive, nPlanes, alpha);
37 | }
38 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CUDA/LeakyReLU.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | void LeakyReLU_fp(T *input_features, T *output_features, Int n, T alpha);
9 | template
10 | void LeakyReLU_bp(T *input_features, T *d_input_features, T *output_features,
11 | Int n, T alpha);
12 |
13 | template
14 | void cuda_LeakyReLU_updateOutput(/*cuda float*/ at::Tensor input_features,
15 | /*cuda float*/ at::Tensor output_features,
16 | T alpha) {
17 | output_features.resize_as_(input_features);
18 | auto n = input_features.numel();
19 | LeakyReLU_fp(input_features.data(), output_features.data(), n,
20 | alpha);
21 | }
22 |
23 | template
24 | void cuda_LeakyReLU_updateGradInput(
25 | /*cuda float*/ at::Tensor input_features,
26 | /*cuda float*/ at::Tensor d_input_features,
27 | /*cuda float*/ at::Tensor d_output_features, T alpha) {
28 | d_input_features.resize_as_(d_output_features);
29 | auto n = d_input_features.numel();
30 | LeakyReLU_bp(input_features.data(), d_input_features.data(),
31 | d_output_features.data(), n, alpha);
32 | }
33 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CUDA/LeakyReLU.cu:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | __global__ void LeakyReLU_fp_(T *input_features, T *output_features, Int n,
9 | T alpha) {
10 | for (Int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += 16 * 1024)
11 | output_features[i] = (input_features[i] > 0) ? input_features[i]
12 | : (input_features[i] * alpha);
13 | }
14 | template
15 | void LeakyReLU_fp(T *input_features, T *output_features, Int n, T alpha) {
16 | LeakyReLU_fp_<<<16, 1024>>>(input_features, output_features, n, alpha);
17 | }
18 | template
19 | __global__ void LeakyReLU_bp_(T *input_features, T *d_input_features,
20 | T *d_output_features, Int n, T alpha) {
21 | for (Int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += 16 * 1024)
22 | d_input_features[i] = (input_features[i] > 0)
23 | ? d_output_features[i]
24 | : (d_output_features[i] * alpha);
25 | }
26 | template
27 | void LeakyReLU_bp(T *input_features, T *d_input_features, T *output_features,
28 | Int n, T alpha) {
29 | LeakyReLU_bp_<<<16, 1024>>>(input_features, d_input_features,
30 | output_features, n, alpha);
31 | }
32 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CUDA/NetworkInNetwork.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | #include
8 |
9 | template
10 | double cuda_NetworkInNetwork_updateOutput(
11 | /*cuda float*/ at::Tensor input_features,
12 | /*cuda float*/ at::Tensor output_features,
13 | /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor bias) {
14 | auto nActive = input_features.size(0);
15 | auto input_nPlanes = weight.size(0);
16 | auto output_nPlanes = weight.size(1);
17 | output_features.resize_({nActive, output_nPlanes});
18 | if (bias.numel())
19 | output_features.copy_(bias);
20 | else
21 | output_features.zero_();
22 | if (nActive)
23 | output_features.addmm_(input_features, weight);
24 | return nActive * input_nPlanes * output_nPlanes;
25 | }
26 |
27 | template
28 | void cuda_NetworkInNetwork_updateGradInput(
29 | /*cuda float*/ at::Tensor d_input_features,
30 | /*cuda float*/ at::Tensor d_output_features,
31 | /*cuda float*/ at::Tensor weight) {
32 |
33 | int nActive = d_output_features.size(0);
34 | d_input_features.resize_({nActive, weight.size(0)});
35 | d_input_features.zero_();
36 | if (nActive)
37 | at::mm_out(d_input_features, d_output_features, weight.t());
38 | }
39 |
40 | template
41 | void cuda_NetworkInNetwork_accGradParameters(
42 | /*cuda float*/ at::Tensor input_features,
43 | /*cuda float*/ at::Tensor d_output_features,
44 | /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor d_bias) {
45 | auto nActive = input_features.size(0);
46 | if (nActive and d_bias.numel())
47 | at::sum_out(d_bias, d_output_features, {0}, false);
48 | if (nActive)
49 | at::mm_out(d_weight, input_features.t(), d_output_features);
50 | }
51 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/CUDA/SparseToDense.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | template
8 | void cuda_SparseToDense_ForwardPass(T *input_features, T *output_features,
9 | Int nPlanes, Int spatialVolume,
10 | RuleBook _rules);
11 | template
12 | void cuda_SparseToDense_BackwardPass(T *d_input_features, T *d_output_features,
13 | Int nPlanes, Int spatialVolume,
14 | RuleBook _rules);
15 |
16 | template
17 | void cuda_SparseToDense_updateOutput(
18 | /*long*/ at::Tensor inputSize, Metadata &m,
19 | /*cuda float*/ at::Tensor input_features,
20 | /*cuda float*/ at::Tensor output_features, long nPlanes) {
21 |
22 | {
23 | std::array sz;
24 | sz[0] = m.grids.begin()->second.size(); // batch size
25 | sz[1] = nPlanes;
26 | long *in_sz = inputSize.data();
27 | for (Int i = 0; i < Dimension; ++i)
28 | sz[i + 2] = in_sz[i];
29 | output_features.resize_(sz);
30 | output_features.zero_();
31 | }
32 | if (input_features.ndimension() == 2) {
33 | auto _rules = m.getSparseToDenseRuleBook(inputSize, true);
34 | Int _nPlanes = input_features.size(1);
35 | auto iF = input_features.data();
36 | auto oF = output_features.data();
37 | long spatialVolume = inputSize.prod().data()[0];
38 | cuda_SparseToDense_ForwardPass(iF, oF, _nPlanes, spatialVolume, _rules);
39 | }
40 | }
41 | template
42 | void cuda_SparseToDense_updateGradInput(
43 | /*long*/ at::Tensor inputSize, Metadata &m,
44 | /*cuda float*/ at::Tensor input_features,
45 | /*cuda float*/ at::Tensor d_input_features,
46 | /*cuda float*/ at::Tensor d_output_features) {
47 |
48 | d_input_features.resize_as_(input_features);
49 | d_input_features.zero_();
50 |
51 | if (input_features.ndimension() == 2) {
52 | auto _rules = m.getSparseToDenseRuleBook(inputSize, true);
53 | long spatialVolume = inputSize.prod().data()[0];
54 | Int _nPlanes = d_input_features.size(1);
55 | auto diF = d_input_features.data();
56 | auto doF = d_output_features.data();
57 | cuda_SparseToDense_BackwardPass(diF, doF, _nPlanes, spatialVolume,
58 | _rules);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/Metadata/64bits.h:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | #include
8 |
9 | // Using 64 bit integers for coordinates and memory calculations.
10 |
11 | using Int = int64_t;
12 |
13 | // Point is a point in the d-dimensional integer lattice
14 | // (i.e. square-grid/cubic-grid, ...)
15 | template using Point = std::array;
16 |
17 | template
18 | Point LongTensorToPoint(/*long*/ at::Tensor &t) {
19 | Point p;
20 | long *td = t.data();
21 | for (Int i = 0; i < dimension; i++)
22 | p[i] = td[i];
23 | return p;
24 | }
25 | template
26 | Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
27 | /*long*/ at::Tensor &t1) {
28 | Point<2 * dimension> p;
29 | long *td;
30 | td = t0.data();
31 | for (Int i = 0; i < dimension; i++)
32 | p[i] = td[i];
33 | td = t1.data();
34 | for (Int i = 0; i < dimension; i++)
35 | p[i + dimension] = td[i];
36 | return p;
37 | }
38 | template
39 | Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
40 | /*long*/ at::Tensor &t1,
41 | /*long*/ at::Tensor &t2) {
42 | Point<3 * dimension> p;
43 | long *td;
44 | td = t0.data();
45 | for (Int i = 0; i < dimension; i++)
46 | p[i] = td[i];
47 | td = t1.data();
48 | for (Int i = 0; i < dimension; i++)
49 | p[i + dimension] = td[i];
50 | td = t2.data();
51 | for (Int i = 0; i < dimension; i++)
52 | p[i + 2 * dimension] = td[i];
53 | return p;
54 | }
55 |
56 | // FNV Hash function for Point
57 | template struct IntArrayHash {
58 | std::size_t operator()(Point const &p) const {
59 | Int hash = -3750763034362895579; // 14695981039346656037;
60 | for (auto x : p) {
61 | hash *= 1099511628211;
62 | hash ^= x;
63 | }
64 | return hash;
65 | }
66 | };
67 |
68 | #define at_kINT at::kLong
69 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/Metadata/ActivePoolingRules.h:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | #ifndef ACTIVEPOOLING_H
8 | #define ACTIVEPOOLING_H
9 |
10 | // Return the maximum number of active sites in the batch
11 | // rules has size 1.
12 | // rules[0] is a batchSize x (maxActive + 1) matrix.
13 | // First column is number of active sites for that sample (<= maxActive)
14 | // Remaining maxActive columns give the active sites, zero padded.
15 |
16 | template
17 | void activePoolingRules(SparseGrids &SGs, RuleBook &rules) {
18 | rules.clear();
19 | rules.resize(2);
20 | auto &r = rules[0];
21 | Int maxActive = 0;
22 | for (auto &sg : SGs)
23 | maxActive = std::max(maxActive, (Int)sg.mp.size());
24 | for (auto &sg : SGs) {
25 | r.push_back(sg.mp.size());
26 | for (auto &iter : sg.mp)
27 | r.push_back(sg.ctr + iter.second);
28 | while (rules.size() % (maxActive + 1) != 0)
29 | r.push_back(0); // padding
30 | }
31 | rules[1].push_back(SGs.size());
32 | rules[1].push_back(maxActive);
33 | }
34 | #endif /* ACTIVEPOOLING_H */
35 |
--------------------------------------------------------------------------------
/sparseconvnet/SCN/misc/drawCurve.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2016-present, Facebook, Inc.
2 | // All rights reserved.
3 | //
4 | // This source code is licensed under the BSD-style license found in the
5 | // LICENSE file in the root directory of this source tree.
6 |
7 | // Helper function to draw pen strokes with
8 | // nPlanes = 3, feature vector = (1,dx,dy)
9 | void cpu_float_DrawCurve_2(Metadata<2> &m,
10 | /*float*/ at::Tensor features,
11 | /*float*/ at::Tensor stroke) {
12 | at::Tensor location = at::zeros(at::CPU(at::kLong), {2});
13 | auto location_ = location.data();
14 |
15 | auto vec = at::zeros(at::CPU(at::kFloat), {3});
16 | auto vec_ = vec.data();
17 |
18 | int n = stroke.size(0) - 1;
19 | float *s = stroke.data(); // stroke is a [n+1,2] array
20 | long idx = 0;
21 | float x1, y1, x2, y2; // n line segments (x1,y1) to (x2,y2)
22 | x2 = s[idx++];
23 | y2 = s[idx++];
24 | for (int i = 0; i < n; ++i) {
25 | x1 = x2;
26 | y1 = y2;
27 | x2 = s[idx++];
28 | y2 = s[idx++];
29 | float inverse_length =
30 | powf(1e-10 + (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1), -0.5);
31 | vec_[0] = 1;
32 | vec_[1] = (x2 - x1) * inverse_length;
33 | vec_[2] = (y2 - y1) * inverse_length;
34 | for (float a = 0; a < 1; a += inverse_length) {
35 | location_[0] = x1 * a + x2 * (1 - a);
36 | location_[1] = y1 * a + y2 * (1 - a);
37 | m.setInputSpatialLocation(features, location, vec, false);
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/sparseconvnet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | forward_pass_multiplyAdd_count = 0
8 | forward_pass_hidden_states = 0
9 | from .activations import Tanh, Sigmoid, ReLU, LeakyReLU, ELU, SELU, BatchNormELU
10 | from .averagePooling import AveragePooling
11 | from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU, MeanOnlyBNLeakyReLU
12 | from .classificationTrainValidate import ClassificationTrainValidate
13 | from .convolution import Convolution
14 | from .deconvolution import Deconvolution
15 | from .denseToSparse import DenseToSparse
16 | from .dropout import Dropout, BatchwiseDropout
17 | from .fullConvolution import FullConvolution, TransposeConvolution
18 | from .identity import Identity
19 | from .inputBatch import InputBatch
20 | from .ioLayers import InputLayer, OutputLayer, BLInputLayer, BLOutputLayer, InputLayerInput
21 | from .maxPooling import MaxPooling
22 | from .metadata import Metadata
23 | from .networkArchitectures import *
24 | from .networkInNetwork import NetworkInNetwork
25 | from .permutohedralSubmanifoldConvolution import PermutohedralSubmanifoldConvolution, permutohedral_basis
26 | from .randomizedStrideConvolution import RandomizedStrideConvolution
27 | from .randomizedStrideMaxPooling import RandomizedStrideMaxPooling
28 | from .sequential import Sequential
29 | from .sparseConvNetTensor import SparseConvNetTensor
30 | from .sparseToDense import SparseToDense
31 | from .sparsify import Sparsify
32 | from .spectral_norm import spectral_norm
33 | from .submanifoldConvolution import SubmanifoldConvolution, ValidConvolution
34 | from .tables import *
35 | from .unPooling import UnPooling
36 | from .utils import append_tensors, AddCoords, add_feature_planes, concatenate_feature_planes, compare_sparse
37 | from .shapeContext import ShapeContext, MultiscaleShapeContext
38 |
--------------------------------------------------------------------------------
/sparseconvnet/dropout.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from torch.autograd import Function
8 | from torch.nn import Module
9 | from .utils import *
10 | from .sparseConvNetTensor import SparseConvNetTensor
11 |
12 |
13 | class Dropout(Module):
14 | def __init__(self, p=0.5):
15 | Module.__init__(self)
16 | self.p = p
17 |
18 | def forward(self, input):
19 | output = SparseConvNetTensor()
20 | i = input.features
21 | if self.training:
22 | m = i.new().resize_(1).expand_as(i).fill_(1 - self.p)
23 | output.features = i * torch.bernoulli(m)
24 | else:
25 | output.features = i * (1 - self.p)
26 | output.metadata = input.metadata
27 | output.spatial_size = input.spatial_size
28 | return output
29 |
30 | def input_spatial_size(self, out_size):
31 | return out_size
32 |
33 |
34 | class BatchwiseDropout(Module):
35 | def __init__(self, p=0.5):
36 | Module.__init__(self)
37 | self.p = p
38 |
39 | def forward(self, input):
40 | output = SparseConvNetTensor()
41 | i = input.features
42 | if self.training:
43 | m = i.new().resize_(1).expand(1, i.shape[1]).fill_(1 - self.p)
44 | output.features = i * torch.bernoulli(m)
45 | else:
46 | output.features = i * (1 - self.p)
47 | output.metadata = input.metadata
48 | output.spatial_size = input.spatial_size
49 | return output
50 |
51 | def input_spatial_size(self, out_size):
52 | return out_size
53 |
--------------------------------------------------------------------------------
/sparseconvnet/identity.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from torch.nn import Module
8 |
9 |
10 | class Identity(Module):
11 | def forward(self, input, increment=False):
12 | return input
13 |
14 | def input_spatial_size(self, out_size):
15 | return out_size
16 |
--------------------------------------------------------------------------------
/sparseconvnet/metadata.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | """
8 | Store Metadata relating to which spatial locations are active at each scale.
9 | Convolutions, submanifold convolutions and 'convolution reversing' deconvolutions
10 | all coexist within the same MetaData object as long as each spatial size
11 | only occurs once.
12 | """
13 |
14 | import sparseconvnet.SCN
15 |
16 | def Metadata(dim):
17 | return getattr(sparseconvnet.SCN, 'Metadata_%d'%dim)()
18 |
--------------------------------------------------------------------------------
/sparseconvnet/sequential.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 |
9 | class Sequential(torch.nn.Sequential):
10 | def input_spatial_size(self, out_size):
11 | for m in reversed(self._modules):
12 | out_size = self._modules[m].input_spatial_size(out_size)
13 | return out_size
14 |
15 | def add(self, module):
16 | self._modules[str(len(self._modules))] = module
17 | return self
18 |
19 |
20 | def forward(self, input, increment=False):
21 | for module in self:
22 | #if increment:
23 | # print("Sequence", module)
24 | input = module(input, increment)
25 | return input
26 |
27 |
28 | def reweight(self, input):
29 | for module in self._modules.values():
30 | if isinstance(module, Sequential):
31 | input = module.reweight(input)
32 | elif hasattr(input, 'features') and hasattr(module, 'weight') and hasattr(module, 'bias'):
33 | f = module(input).features
34 | f = f - module.bias
35 | s = f.std(0)
36 | f = f / s
37 | module.weight = torch.nn.Parameter(module.weight/s)
38 | module.bias = torch.nn.Parameter(-f.mean(0))
39 | input = module(input)
40 | else:
41 | input = module(input)
42 | return input
43 |
44 | def rebias(self, input):
45 | for module in self._modules.values():
46 | if isinstance(module, Sequential):
47 | input = module.reweight(input)
48 | elif hasattr(input, 'features') and hasattr(module, 'bias'):
49 | f = module(input).features
50 | f = f - module.bias
51 | module.bias = torch.nn.Parameter(-f.mean(0))
52 | input = module(input)
53 | else:
54 | input = module(input)
55 | return input
56 |
--------------------------------------------------------------------------------
/sparseconvnet/sparsify.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import sparseconvnet
8 | from torch.autograd import Function, Variable
9 | from torch.nn import Module, Parameter
10 | from .utils import *
11 | from .sparseConvNetTensor import SparseConvNetTensor
12 | from .metadata import Metadata
13 |
14 | class Sparsify(Module):
15 | def __init__(self, dimension):
16 | Module.__init__(self)
17 | self.dimension = dimension
18 | def forward(self, input, increment=False):
19 | if input.features.numel():
20 | output = SparseConvNetTensor()
21 | output.metadata = Metadata(self.dimension)
22 | output.spatial_size = input.spatial_size
23 | active = input.features[:,0]>0
24 | output.features=input.features[active]
25 | active=active.type('torch.LongTensor')
26 | input.metadata.sparsifyMetadata(
27 | output.metadata,
28 | input.spatial_size,
29 | active.byte(),
30 | active.cumsum(0))
31 | return output
32 | else:
33 | return input
34 |
--------------------------------------------------------------------------------
/sparseconvnet/tables.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from torch.autograd import Function
8 | from torch.nn import Module
9 | from .utils import *
10 | from .sparseConvNetTensor import SparseConvNetTensor
11 |
12 |
13 | class JoinTable(torch.nn.Sequential):
14 | def __init__(self, *args):
15 | torch.nn.Sequential.__init__(self, *args)
16 |
17 | def forward(self, input, increment=False):
18 | output = SparseConvNetTensor()
19 | output.metadata = input[0].metadata
20 | output.spatial_size = input[0].spatial_size
21 | output.features = torch.cat([i.features for i in input], 1)
22 | return output
23 |
24 | def input_spatial_size(self, out_size):
25 | return out_size
26 |
27 |
28 | class AddTable(torch.nn.Sequential):
29 | def __init__(self, *args):
30 | torch.nn.Sequential.__init__(self, *args)
31 |
32 | def forward(self, input, increment=False):
33 | output = SparseConvNetTensor()
34 | output.metadata = input[0].metadata
35 | output.spatial_size = input[0].spatial_size
36 | output.features = sum([i.features for i in input])
37 | return output
38 |
39 | def input_spatial_size(self, out_size):
40 | return out_size
41 |
42 |
43 | class ConcatTable(torch.nn.Sequential):
44 | def __init__(self, *args):
45 | torch.nn.Sequential.__init__(self, *args)
46 |
47 | def forward(self, input, increment=False):
48 | return [module(input, increment) for module in self._modules.values()]
49 |
50 | def add(self, module):
51 | self._modules[str(len(self._modules))] = module
52 | return self
53 |
54 | def input_spatial_size(self, out_size):
55 | return self._modules['0'].input_spatial_size(out_size)
56 |
--------------------------------------------------------------------------------