├── .DS_Store ├── LICENSE ├── README.md ├── all_build.sh ├── demo.py ├── develop.sh ├── extra ├── cudpp │ ├── .gitattributes │ ├── .gitignore │ ├── .gitmodules │ ├── CMakeLists.txt │ ├── README.md │ ├── cmake │ │ └── FindGLEW.cmake │ ├── cudpp-config-version.cmake.in │ ├── cudpp-config.cmake.in │ ├── doc │ │ ├── CUDPP_slides.pdf │ │ ├── bib │ │ │ ├── README.txt │ │ │ ├── bib.py │ │ │ ├── cudpp.bib │ │ │ ├── cudpp.bst │ │ │ ├── cudpp_refs.html │ │ │ └── cudpp_refs_bib.html │ │ ├── building-cudpp.md │ │ ├── changelog.md │ │ ├── changelog.txt │ │ ├── cudpp.doxygen │ │ ├── cudpp_refs.md │ │ ├── cudpp_refs_bib.md │ │ ├── example_simpleCUDPP.dox │ │ └── license.md │ ├── ext │ │ ├── cub │ │ │ ├── .cproject │ │ │ ├── .project │ │ │ ├── .settings │ │ │ │ ├── language.settings.xml │ │ │ │ ├── org.eclipse.cdt.codan.core.prefs │ │ │ │ ├── org.eclipse.cdt.core.prefs │ │ │ │ ├── org.eclipse.cdt.ui.prefs │ │ │ │ └── org.eclipse.core.runtime.prefs │ │ │ ├── CHANGE_LOG.TXT │ │ │ ├── LICENSE.TXT │ │ │ ├── README.md │ │ │ ├── common.mk │ │ │ ├── cub │ │ │ │ ├── agent │ │ │ │ │ ├── agent_histogram.cuh │ │ │ │ │ ├── agent_radix_sort_downsweep.cuh │ │ │ │ │ ├── agent_radix_sort_upsweep.cuh │ │ │ │ │ ├── agent_reduce.cuh │ │ │ │ │ ├── agent_reduce_by_key.cuh │ │ │ │ │ ├── agent_rle.cuh │ │ │ │ │ ├── agent_scan.cuh │ │ │ │ │ ├── agent_segment_fixup.cuh │ │ │ │ │ ├── agent_select_if.cuh │ │ │ │ │ ├── agent_spmv_csrt.cuh │ │ │ │ │ ├── agent_spmv_orig.cuh │ │ │ │ │ ├── agent_spmv_row_based.cuh │ │ │ │ │ └── single_pass_scan_operators.cuh │ │ │ │ ├── block │ │ │ │ │ ├── block_adjacent_difference.cuh │ │ │ │ │ ├── block_discontinuity.cuh │ │ │ │ │ ├── block_exchange.cuh │ │ │ │ │ ├── block_histogram.cuh │ │ │ │ │ ├── block_load.cuh │ │ │ │ │ ├── block_radix_rank.cuh │ │ │ │ │ ├── block_radix_sort.cuh │ │ │ │ │ ├── block_raking_layout.cuh │ │ │ │ │ ├── block_reduce.cuh │ │ │ │ │ ├── block_scan.cuh │ │ │ │ │ ├── block_shuffle.cuh │ │ │ │ │ ├── block_store.cuh │ │ │ │ │ └── specializations │ │ │ │ │ │ ├── block_histogram_atomic.cuh │ │ │ │ │ │ ├── block_histogram_sort.cuh │ │ │ │ │ │ ├── block_reduce_raking.cuh │ │ │ │ │ │ ├── block_reduce_raking_commutative_only.cuh │ │ │ │ │ │ ├── block_reduce_warp_reductions.cuh │ │ │ │ │ │ ├── block_scan_raking.cuh │ │ │ │ │ │ ├── block_scan_warp_scans.cuh │ │ │ │ │ │ ├── block_scan_warp_scans2.cuh │ │ │ │ │ │ └── block_scan_warp_scans3.cuh │ │ │ │ ├── cub.cuh │ │ │ │ ├── device │ │ │ │ │ ├── device_histogram.cuh │ │ │ │ │ ├── device_partition.cuh │ │ │ │ │ ├── device_radix_sort.cuh │ │ │ │ │ ├── device_reduce.cuh │ │ │ │ │ ├── device_run_length_encode.cuh │ │ │ │ │ ├── device_scan.cuh │ │ │ │ │ ├── device_segmented_radix_sort.cuh │ │ │ │ │ ├── device_segmented_reduce.cuh │ │ │ │ │ ├── device_select.cuh │ │ │ │ │ ├── device_spmv.cuh │ │ │ │ │ └── dispatch │ │ │ │ │ │ ├── dispatch_histogram.cuh │ │ │ │ │ │ ├── dispatch_radix_sort.cuh │ │ │ │ │ │ ├── dispatch_reduce.cuh │ │ │ │ │ │ ├── dispatch_reduce_by_key.cuh │ │ │ │ │ │ ├── dispatch_rle.cuh │ │ │ │ │ │ ├── dispatch_scan.cuh │ │ │ │ │ │ ├── dispatch_select_if.cuh │ │ │ │ │ │ ├── dispatch_spmv_csrt.cuh │ │ │ │ │ │ ├── dispatch_spmv_orig.cuh │ │ │ │ │ │ └── dispatch_spmv_row_based.cuh │ │ │ │ ├── grid │ │ │ │ │ ├── grid_barrier.cuh │ │ │ │ │ ├── grid_even_share.cuh │ │ │ │ │ ├── grid_mapping.cuh │ │ │ │ │ └── grid_queue.cuh │ │ │ │ ├── host │ │ │ │ │ └── mutex.cuh │ │ │ │ ├── iterator │ │ │ │ │ ├── arg_index_input_iterator.cuh │ │ │ │ │ ├── cache_modified_input_iterator.cuh │ │ │ │ │ ├── cache_modified_output_iterator.cuh │ │ │ │ │ ├── constant_input_iterator.cuh │ │ │ │ │ ├── counting_input_iterator.cuh │ │ │ │ │ ├── discard_output_iterator.cuh │ │ │ │ │ ├── tex_obj_input_iterator.cuh │ │ │ │ │ ├── tex_ref_input_iterator.cuh │ │ │ │ │ └── transform_input_iterator.cuh │ │ │ │ ├── thread │ │ │ │ │ ├── thread_load.cuh │ │ │ │ │ ├── thread_operators.cuh │ │ │ │ │ ├── thread_reduce.cuh │ │ │ │ │ ├── thread_scan.cuh │ │ │ │ │ ├── thread_search.cuh │ │ │ │ │ └── thread_store.cuh │ │ │ │ ├── util_allocator.cuh │ │ │ │ ├── util_arch.cuh │ │ │ │ ├── util_debug.cuh │ │ │ │ ├── util_device.cuh │ │ │ │ ├── util_macro.cuh │ │ │ │ ├── util_namespace.cuh │ │ │ │ ├── util_ptx.cuh │ │ │ │ ├── util_type.cuh │ │ │ │ └── warp │ │ │ │ │ ├── specializations │ │ │ │ │ ├── warp_reduce_shfl.cuh │ │ │ │ │ ├── warp_reduce_smem.cuh │ │ │ │ │ ├── warp_scan_shfl.cuh │ │ │ │ │ └── warp_scan_smem.cuh │ │ │ │ │ ├── warp_reduce.cuh │ │ │ │ │ └── warp_scan.cuh │ │ │ ├── eclipse code style profile.xml │ │ │ ├── examples │ │ │ │ ├── block │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── example_block_radix_sort.cu │ │ │ │ │ ├── example_block_reduce.cu │ │ │ │ │ ├── example_block_scan.cu │ │ │ │ │ └── reduce_by_key.cu │ │ │ │ └── device │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── example_device_partition_flagged.cu │ │ │ │ │ ├── example_device_partition_if.cu │ │ │ │ │ ├── example_device_radix_sort.cu │ │ │ │ │ ├── example_device_reduce.cu │ │ │ │ │ ├── example_device_scan.cu │ │ │ │ │ ├── example_device_select_flagged.cu │ │ │ │ │ ├── example_device_select_if.cu │ │ │ │ │ ├── example_device_select_unique.cu │ │ │ │ │ └── example_device_sort_find_non_trivial_runs.cu │ │ │ ├── experimental │ │ │ │ ├── .gitignore │ │ │ │ ├── Makefile │ │ │ │ ├── defunct │ │ │ │ │ ├── example_coo_spmv.cu │ │ │ │ │ └── test_device_seg_reduce.cu │ │ │ │ ├── histogram │ │ │ │ │ ├── histogram_cub.h │ │ │ │ │ ├── histogram_gmem_atomics.h │ │ │ │ │ └── histogram_smem_atomics.h │ │ │ │ ├── histogram_compare.cu │ │ │ │ ├── sparse_matrix.h │ │ │ │ ├── spmv_compare.cu │ │ │ │ └── spmv_script.sh │ │ │ ├── test │ │ │ │ ├── .gitignore │ │ │ │ ├── Makefile │ │ │ │ ├── link_a.cu │ │ │ │ ├── link_b.cu │ │ │ │ ├── link_main.cpp │ │ │ │ ├── mersenne.h │ │ │ │ ├── test_allocator.cu │ │ │ │ ├── test_block_histogram.cu │ │ │ │ ├── test_block_load_store.cu │ │ │ │ ├── test_block_radix_sort.cu │ │ │ │ ├── test_block_reduce.cu │ │ │ │ ├── test_block_scan.cu │ │ │ │ ├── test_device_histogram.cu │ │ │ │ ├── test_device_radix_sort.cu │ │ │ │ ├── test_device_reduce.cu │ │ │ │ ├── test_device_reduce_by_key.cu │ │ │ │ ├── test_device_run_length_encode.cu │ │ │ │ ├── test_device_scan.cu │ │ │ │ ├── test_device_select_if.cu │ │ │ │ ├── test_device_select_unique.cu │ │ │ │ ├── test_grid_barrier.cu │ │ │ │ ├── test_iterator.cu │ │ │ │ ├── test_util.h │ │ │ │ ├── test_warp_reduce.cu │ │ │ │ └── test_warp_scan.cu │ │ │ └── tune │ │ │ │ ├── .gitignore │ │ │ │ ├── Makefile │ │ │ │ └── tune_device_reduce.cu │ │ └── moderngpu │ │ │ ├── README.md │ │ │ ├── benchmarkinsert │ │ │ ├── Makefile │ │ │ ├── benchmarkinsert.cu │ │ │ ├── benchmarkinsert.vcxproj │ │ │ └── benchmarkinsert.vcxproj.filters │ │ │ ├── benchmarkintervalmove │ │ │ ├── Makefile │ │ │ ├── benchmarkintervalmove.cu │ │ │ ├── benchmarkintervalmove.vcxproj │ │ │ └── benchmarkintervalmove.vcxproj.filters │ │ │ ├── benchmarkjoin │ │ │ ├── Makefile │ │ │ ├── benchmarkjoin.cu │ │ │ ├── benchmarkjoin.vcxproj │ │ │ └── benchmarkjoin.vcxproj.filters │ │ │ ├── benchmarklaunchbox │ │ │ ├── Makefile │ │ │ ├── benchmarklaunchbox.cu │ │ │ ├── benchmarklaunchbox.vcxproj │ │ │ └── benchmarklaunchbox.vcxproj.filters │ │ │ ├── benchmarkloadbalance │ │ │ ├── Makefile │ │ │ ├── benchmarkloadbalance.cu │ │ │ ├── benchmarkloadbalance.vcxproj │ │ │ └── benchmarkloadbalance.vcxproj.filters │ │ │ ├── benchmarklocalitysort │ │ │ ├── Makefile │ │ │ ├── benchmarklocalitysort.cu │ │ │ ├── benchmarklocalitysort.vcxproj │ │ │ └── benchmarklocalitysort.vcxproj.filters │ │ │ ├── benchmarkmerge │ │ │ ├── Makefile │ │ │ ├── benchmarkmerge.cu │ │ │ ├── benchmarkmerge.vcxproj │ │ │ └── benchmarkmerge.vcxproj.filters │ │ │ ├── benchmarkreducebykey │ │ │ ├── Makefile │ │ │ ├── benchmarkreducebykey.cu │ │ │ ├── benchmarkreducebykey.vcxproj │ │ │ └── benchmarkreducebykey.vcxproj.filters │ │ │ ├── benchmarkscan │ │ │ ├── Makefile │ │ │ ├── benchmarkscan.cu │ │ │ ├── benchmarkscan.vcxproj │ │ │ └── benchmarkscan.vcxproj.filters │ │ │ ├── benchmarksegreduce │ │ │ ├── Makefile │ │ │ ├── benchmarksegreduce.cu │ │ │ ├── benchmarksegreduce.vcxproj │ │ │ └── benchmarksegreduce.vcxproj.filters │ │ │ ├── benchmarksegsort │ │ │ ├── Makefile │ │ │ ├── benchmarksegsort.cu │ │ │ ├── benchmarksegsort.vcxproj │ │ │ └── benchmarksegsort.vcxproj.filters │ │ │ ├── benchmarksets │ │ │ ├── Makefile │ │ │ ├── benchmarksets.cu │ │ │ ├── benchmarksets.vcxproj │ │ │ └── benchmarksets.vcxproj.filters │ │ │ ├── benchmarksort │ │ │ ├── Makefile │ │ │ ├── benchmarksort.cu │ │ │ ├── benchmarksort.vcxproj │ │ │ └── benchmarksort.vcxproj.filters │ │ │ ├── benchmarksortedsearch │ │ │ ├── Makefile │ │ │ ├── benchmarksortedsearch.cu │ │ │ ├── benchmarksortedsearch.vcxproj │ │ │ └── benchmarksortedsearch.vcxproj.filters │ │ │ ├── benchmarkspmvcsr │ │ │ ├── Makefile │ │ │ ├── benchmarkspmvcsr.cu │ │ │ ├── benchmarkspmvcsr.vcxproj │ │ │ └── benchmarkspmvcsr.vcxproj.filters │ │ │ ├── common.mk │ │ │ ├── demo │ │ │ ├── Makefile │ │ │ ├── demo.cu │ │ │ ├── demo.vcxproj │ │ │ └── demo.vcxproj.filters │ │ │ ├── include │ │ │ ├── device │ │ │ │ ├── ctaloadbalance.cuh │ │ │ │ ├── ctamerge.cuh │ │ │ │ ├── ctascan.cuh │ │ │ │ ├── ctasearch.cuh │ │ │ │ ├── ctasegreduce.cuh │ │ │ │ ├── ctasegscan.cuh │ │ │ │ ├── ctasegsort.cuh │ │ │ │ ├── ctasortedsearch.cuh │ │ │ │ ├── devicetypes.cuh │ │ │ │ ├── deviceutil.cuh │ │ │ │ ├── intrinsics.cuh │ │ │ │ ├── launchbox.cuh │ │ │ │ ├── loadstore.cuh │ │ │ │ ├── serialsets.cuh │ │ │ │ └── sortnetwork.cuh │ │ │ ├── kernels │ │ │ │ ├── bulkinsert.cuh │ │ │ │ ├── bulkremove.cuh │ │ │ │ ├── csrtools.cuh │ │ │ │ ├── cubradixsort.cuh │ │ │ │ ├── intervalmove.cuh │ │ │ │ ├── join.cuh │ │ │ │ ├── loadbalance.cuh │ │ │ │ ├── localitysort.cuh │ │ │ │ ├── merge.cuh │ │ │ │ ├── mergesort.cuh │ │ │ │ ├── reduce.cuh │ │ │ │ ├── reducebykey.cuh │ │ │ │ ├── scan.cuh │ │ │ │ ├── search.cuh │ │ │ │ ├── segmentedsort.cuh │ │ │ │ ├── segreduce.cuh │ │ │ │ ├── segreducecsr.cuh │ │ │ │ ├── sets.cuh │ │ │ │ ├── sortedsearch.cuh │ │ │ │ └── spmvcsr.cuh │ │ │ ├── mgpudevice.cuh │ │ │ ├── mgpuenums.h │ │ │ ├── mgpuhost.cuh │ │ │ ├── mmio.h │ │ │ ├── moderngpu.cuh │ │ │ ├── sparsematrix.h │ │ │ └── util │ │ │ │ ├── format.h │ │ │ │ ├── mgpualloc.h │ │ │ │ ├── mgpucontext.h │ │ │ │ ├── static.h │ │ │ │ └── util.h │ │ │ ├── mgpu_benchmarks.xlsx │ │ │ ├── moderngpu.sln │ │ │ ├── parallelmerge │ │ │ ├── Makefile │ │ │ ├── parallelmerge.cu │ │ │ ├── parallelmerge.vcxproj │ │ │ └── parallelmerge.vcxproj.filters │ │ │ ├── src │ │ │ ├── mgpucontext.cu │ │ │ ├── mgpuutil.cpp │ │ │ ├── mmio.cpp │ │ │ └── sparsematrix.cpp │ │ │ ├── testlaunchbox │ │ │ ├── Makefile │ │ │ ├── testlaunchbox.cu │ │ │ ├── testlaunchbox.vcxproj │ │ │ └── testlaunchbox.vcxproj.filters │ │ │ ├── testsegsortbyflags │ │ │ ├── testsegsortbyflags.cu │ │ │ ├── testsegsortbyflags.vcxproj │ │ │ └── testsegsortbyflags.vcxproj.filters │ │ │ └── vs.props │ ├── include │ │ ├── cudpp.h │ │ ├── cudpp_config.h │ │ ├── cudpp_config.h.in │ │ └── cudpp_hash.h │ ├── license.txt │ └── src │ │ ├── cudpp │ │ ├── CMakeLists.txt │ │ ├── app │ │ │ ├── compact_app.cu │ │ │ ├── compress_app.cu │ │ │ ├── listrank_app.cu │ │ │ ├── mergesort_app.cu │ │ │ ├── multisplit_app.cu │ │ │ ├── radixsort_app.cu │ │ │ ├── rand_app.cu │ │ │ ├── reduce_app.cu │ │ │ ├── sa_app.cu │ │ │ ├── scan_app.cu │ │ │ ├── segmented_scan_app.cu │ │ │ ├── spmvmult_app.cu │ │ │ ├── stringsort_app.cu │ │ │ └── tridiagonal_app.cu │ │ ├── cta │ │ │ ├── compress_cta.cuh │ │ │ ├── mergesort_cta.cuh │ │ │ ├── radixsort_cta.cuh │ │ │ ├── rand_cta.cuh │ │ │ ├── scan_cta.cuh │ │ │ ├── segmented_scan_cta.cuh │ │ │ └── stringsort_cta.cuh │ │ ├── cuda_util.h │ │ ├── cudpp.cpp │ │ ├── cudpp_compact.h │ │ ├── cudpp_compress.h │ │ ├── cudpp_globals.h │ │ ├── cudpp_listrank.h │ │ ├── cudpp_manager.cpp │ │ ├── cudpp_manager.h │ │ ├── cudpp_maximal_launch.cpp │ │ ├── cudpp_maximal_launch.h │ │ ├── cudpp_mergesort.h │ │ ├── cudpp_multisplit.h │ │ ├── cudpp_plan.cpp │ │ ├── cudpp_plan.h │ │ ├── cudpp_radixsort.h │ │ ├── cudpp_rand.h │ │ ├── cudpp_reduce.h │ │ ├── cudpp_sa.h │ │ ├── cudpp_scan.h │ │ ├── cudpp_segscan.h │ │ ├── cudpp_spmvmult.h │ │ ├── cudpp_stringsort.h │ │ ├── cudpp_tridiagonal.h │ │ ├── cudpp_util.h │ │ ├── kernel │ │ │ ├── compact_kernel.cuh │ │ │ ├── compress_kernel.cuh │ │ │ ├── listrank_kernel.cuh │ │ │ ├── mergesort_kernel.cuh │ │ │ ├── multisplit_kernel.cuh │ │ │ ├── radixsort_kernel.cuh │ │ │ ├── rand_kernel.cuh │ │ │ ├── reduce_kernel.cuh │ │ │ ├── sa_kernel.cuh │ │ │ ├── scan_kernel.cuh │ │ │ ├── segmented_scan_kernel.cuh │ │ │ ├── spmvmult_kernel.cuh │ │ │ ├── stringsort_kernel.cuh │ │ │ ├── tridiagonal_kernel.cuh │ │ │ └── vector_kernel.cuh │ │ └── sharedmem.h │ │ └── cudpp_hash │ │ ├── CMakeLists.txt │ │ ├── cudpp_hash.cpp │ │ ├── debugging.cpp │ │ ├── debugging.cu │ │ ├── debugging.h │ │ ├── definitions.h │ │ ├── hash_compacting.cpp │ │ ├── hash_compacting.cu │ │ ├── hash_compacting.h │ │ ├── hash_functions.cu │ │ ├── hash_functions.h │ │ ├── hash_multivalue.cpp │ │ ├── hash_multivalue.cu │ │ ├── hash_multivalue.h │ │ ├── hash_table.cpp │ │ ├── hash_table.cu │ │ ├── hash_table.cuh │ │ ├── hash_table.h │ │ ├── mt19937ar.cpp │ │ └── mt19937ar.h └── easy_profiler │ ├── CMakeLists.txt │ ├── LICENSE │ ├── LICENSE.APACHE │ ├── LICENSE.MIT │ ├── README.md │ ├── appveyor.bat │ ├── appveyor.yml │ ├── easy_profiler_converter │ ├── CMakeLists.txt │ ├── converter.cpp │ ├── converter.h │ ├── include │ │ └── json.hpp │ ├── main.cpp │ ├── reader.cpp │ └── reader.h │ ├── easy_profiler_core │ ├── CMakeLists.txt │ ├── LICENSE.APACHE │ ├── LICENSE.MIT │ ├── alignment_helpers.h │ ├── base_block_descriptor.cpp │ ├── block.cpp │ ├── block_descriptor.cpp │ ├── block_descriptor.h │ ├── chunk_allocator.h │ ├── cmake │ │ └── config.cmake.in │ ├── current_thread.h │ ├── current_time.h │ ├── easy_socket.cpp │ ├── event_trace_status.h │ ├── event_trace_win.cpp │ ├── event_trace_win.h │ ├── hashed_cstr.h │ ├── include │ │ └── easy │ │ │ ├── arbitrary_value.h │ │ │ ├── details │ │ │ ├── arbitrary_value_aux.h │ │ │ ├── arbitrary_value_public_types.h │ │ │ ├── easy_compiler_support.h │ │ │ ├── profiler_aux.h │ │ │ ├── profiler_colors.h │ │ │ ├── profiler_in_use.h │ │ │ └── profiler_public_types.h │ │ │ ├── easy_net.h │ │ │ ├── easy_protocol.h │ │ │ ├── easy_socket.h │ │ │ ├── profiler.h │ │ │ ├── reader.h │ │ │ ├── serialized_block.h │ │ │ ├── utility.h │ │ │ └── writer.h │ ├── nonscoped_block.cpp │ ├── nonscoped_block.h │ ├── profile_manager.cpp │ ├── profile_manager.h │ ├── profiler.cpp │ ├── reader.cpp │ ├── resources.rc │ ├── serialized_block.cpp │ ├── spin_lock.h │ ├── stack_buffer.h │ ├── thread_storage.cpp │ ├── thread_storage.h │ └── writer.cpp │ ├── profiler_gui │ ├── CMakeLists.txt │ ├── arbitrary_value_inspector.cpp │ ├── arbitrary_value_inspector.h │ ├── arbitrary_value_tooltip.cpp │ ├── arbitrary_value_tooltip.h │ ├── blocks_graphics_view.cpp │ ├── blocks_graphics_view.h │ ├── blocks_tree_widget.cpp │ ├── blocks_tree_widget.h │ ├── bookmarks_editor.cpp │ ├── bookmarks_editor.h │ ├── common_functions.cpp │ ├── common_functions.h │ ├── common_types.h │ ├── complexity_calculator.h │ ├── descriptors_tree_widget.cpp │ ├── descriptors_tree_widget.h │ ├── dialog.cpp │ ├── dialog.h │ ├── fps_widget.cpp │ ├── fps_widget.h │ ├── globals.cpp │ ├── globals.h │ ├── globals_qobjects.cpp │ ├── globals_qobjects.h │ ├── graphics_block_item.cpp │ ├── graphics_block_item.h │ ├── graphics_image_item.cpp │ ├── graphics_image_item.h │ ├── graphics_ruler_item.cpp │ ├── graphics_ruler_item.h │ ├── graphics_scrollbar.cpp │ ├── graphics_scrollbar.h │ ├── graphics_slider_area.cpp │ ├── graphics_slider_area.h │ ├── images │ │ ├── attribution.txt │ │ ├── default │ │ │ ├── arrow-down-disabled.svg │ │ │ ├── arrow-down-hover.svg │ │ │ ├── arrow-down-pressed.svg │ │ │ ├── arrow-down.svg │ │ │ ├── arrow-left.svg │ │ │ ├── arrow-right.svg │ │ │ ├── arrow-up-disabled.svg │ │ │ ├── arrow-up-hover.svg │ │ │ ├── arrow-up-pressed.svg │ │ │ ├── arrow-up.svg │ │ │ ├── big-o.svg │ │ │ ├── binoculars.svg │ │ │ ├── check-disabled.svg │ │ │ ├── check-partial-disabled.svg │ │ │ ├── check-partial.svg │ │ │ ├── check.svg │ │ │ ├── close-hover.svg │ │ │ ├── close-white-hover.svg │ │ │ ├── close-white-pressed.svg │ │ │ ├── close-white.svg │ │ │ ├── close.svg │ │ │ ├── collapse.svg │ │ │ ├── colors-black.svg │ │ │ ├── colors.svg │ │ │ ├── crop.svg │ │ │ ├── csv.svg │ │ │ ├── delete-old.svg │ │ │ ├── delete.svg │ │ │ ├── expand.svg │ │ │ ├── lan.svg │ │ │ ├── lan_on.svg │ │ │ ├── list.svg │ │ │ ├── maximize-white-hover.svg │ │ │ ├── maximize-white-pressed.svg │ │ │ ├── maximize-white.svg │ │ │ ├── minimize-white-hover.svg │ │ │ ├── minimize-white-pressed.svg │ │ │ ├── minimize-white.svg │ │ │ ├── minimize.svg │ │ │ ├── off.svg │ │ │ ├── open-folder.svg │ │ │ ├── open-folder2.svg │ │ │ ├── play.svg │ │ │ ├── radio-indicator-disabled.svg │ │ │ ├── radio-indicator.svg │ │ │ ├── reload-folder2.svg │ │ │ ├── reload.svg │ │ │ ├── save.svg │ │ │ ├── search-next.svg │ │ │ ├── search-prev.svg │ │ │ ├── settings.svg │ │ │ ├── statistics.svg │ │ │ ├── statistics2.svg │ │ │ ├── stop.svg │ │ │ ├── to-fullscreen.svg │ │ │ ├── to-window.svg │ │ │ ├── wifi.svg │ │ │ ├── wifi_on.svg │ │ │ ├── window.svg │ │ │ └── yx.svg │ │ ├── logo.ico │ │ └── logo.svg │ ├── main.cpp │ ├── main_window.cpp │ ├── main_window.h │ ├── resources.qrc │ ├── resources.rc │ ├── round_progress_widget.cpp │ ├── round_progress_widget.h │ ├── themes │ │ ├── default.css │ │ └── default.scss │ ├── thread_pool.cpp │ ├── thread_pool.h │ ├── thread_pool_task.cpp │ ├── thread_pool_task.h │ ├── timer.cpp │ ├── timer.h │ ├── tree_widget_item.cpp │ ├── tree_widget_item.h │ ├── tree_widget_loader.cpp │ ├── tree_widget_loader.h │ ├── window_header.cpp │ └── window_header.h │ ├── reader │ ├── CMakeLists.txt │ └── main.cpp │ ├── sample │ ├── CMakeLists.txt │ ├── express_sample.cpp │ ├── main.cpp │ └── main_clock.cpp │ └── scripts │ ├── context_switch_logger.stp │ ├── make_style.sh │ └── test.sh ├── model.py ├── p1.yml ├── setup.py └── sparseconvnet ├── SCN ├── CPU │ ├── ActivePooling.cpp │ ├── AffineReluTrivialConvolution.cpp │ ├── AveragePooling.cpp │ ├── BatchNormalization.cpp │ ├── BatchwiseMultiplicativeDropout.cpp │ ├── Convolution.cpp │ ├── Deconvolution.cpp │ ├── IOLayers.cpp │ ├── LeakyReLU.cpp │ ├── MaxPooling.cpp │ ├── NetworkInNetwork.cpp │ ├── SparseToDense.cpp │ └── UnPooling.cpp ├── CUDA │ ├── ActivePooling.cpp │ ├── ActivePooling.cu │ ├── AffineReluTrivialConvolution.cpp │ ├── AffineReluTrivialConvolution.cu │ ├── AveragePooling.cpp │ ├── AveragePooling.cu │ ├── BatchNormalization.cpp │ ├── BatchNormalization.cu │ ├── BatchwiseMultiplicativeDropout.cpp │ ├── BatchwiseMultiplicativeDropout.cu │ ├── CUDPPWrapper.cu │ ├── CUDPPWrapper.hpp │ ├── Convolution.cpp │ ├── Convolution.cu │ ├── Deconvolution.cpp │ ├── Deconvolution.cu │ ├── IOLayers.cpp │ ├── IOLayers.cu │ ├── LeakyReLU.cpp │ ├── LeakyReLU.cu │ ├── MaxPooling.cpp │ ├── MaxPooling.cu │ ├── NetworkInNetwork.cpp │ ├── RuleBookIterator.h │ ├── SparseToDense.cpp │ ├── SparseToDense.cu │ ├── SubmanifoldRules_cuda.cpp │ ├── SubmanifoldRules_cuda.cu │ ├── UnPooling.cpp │ ├── UnPooling.cu │ └── kernel_hash.cuh ├── Metadata │ ├── 32bits.h │ ├── 64bits.h │ ├── ActivePoolingRules.h │ ├── ConvolutionRules.h │ ├── FullConvolutionRules.h │ ├── IOLayersRules.h │ ├── Metadata.cpp │ ├── Metadata.h │ ├── PermutohedralSubmanifoldConvolutionRules.h │ ├── RandomizedStrideRules.h │ ├── RectangularRegions.h │ ├── SubmanifoldConvolutionRules.h │ └── resultHash.h ├── cuda.cu ├── misc │ └── drawCurve.cpp ├── pybind.cpp ├── sparseconvnet.h ├── sparseconvnet_cpu.cpp └── sparseconvnet_cuda.cpp ├── __init__.py ├── activations.py ├── averagePooling.py ├── batchNormalization.py ├── classificationTrainValidate.py ├── convolution.py ├── deconvolution.py ├── denseToSparse.py ├── dropout.py ├── fullConvolution.py ├── identity.py ├── inputBatch.py ├── ioLayers.py ├── maxPooling.py ├── metadata.py ├── networkArchitectures.py ├── networkInNetwork.py ├── permutohedralSubmanifoldConvolution.py ├── randomizedStrideConvolution.py ├── randomizedStrideMaxPooling.py ├── sequential.py ├── shapeContext.py ├── sparseConvNetTensor.py ├── sparseToDense.py ├── sparsify.py ├── spectral_norm.py ├── submanifoldConvolution.py ├── tables.py ├── unPooling.py └── utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For SparseConvNet software 4 | 5 | Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # INS-Conv: Incremental Sparse Convolution for Online 3D segmentation 2 | 3 | This is the incremental sparse convolution library implemented based on [SparseConvNet](https://github.com/facebookresearch/SparseConvNet) and [Live Semantic 3D Perception for Immersive Augmented Reality](https://ieeexplore.ieee.org/abstract/document/8998140). The later describes a more efficient GPU implementation of the original submanifold sparse convolution. Our method supports incremental computing of sparse convolution, including SSC, convolution/deconvolution, BN, IO, and residual structure, etc. 4 | ## Environment setup 5 | 6 | ### Preliminary Requirements: 7 | * Ubuntu 16.04 8 | * CUDA 9.0 9 | 16 | 17 | ### Install 18 | ```conda 19 | conda env create -f p1.yml 20 | ``` 21 | 22 | ```bash 23 | sh all_build.sh 24 | ``` 25 | 26 | ### Demo 27 | For training, you could train an arbitary model using the original sparseconvnet. 28 | 29 | For incremental inference, demo.py gives an example of the INS-Conv library. 30 | 31 | We also provide the code for the online 3D semantic instance segmentation demo as in our video, you can download by the following link: 32 | https://drive.google.com/file/d/1sYpMFc1dVXZSZEDhfqQZbMoabiZZikuI/view?usp=sharing 33 | -------------------------------------------------------------------------------- /all_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016-present, Facebook, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | conda activate p1 8 | # 9 | ## make cudpp 10 | cd extra/cudpp 11 | rm -rf build/ 12 | mkdir build 13 | cd build 14 | cmake .. 15 | make -j32 16 | cd ../../.. 17 | # 18 | ## make easy profile 19 | # 20 | cd extra/easy_profiler 21 | rm -rf build/ 22 | mkdir build 23 | cd build 24 | cmake .. 25 | make -j32 26 | cd ../../.. 27 | # 28 | # 29 | 30 | rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet/*.so 31 | python setup.py develop 32 | 33 | 34 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import copy 3 | import numpy as np 4 | import math 5 | import time 6 | import sparseconvnet as scn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | import torch.nn as nn 10 | import sparseconvnet as scn 11 | import model 12 | import sys 13 | 14 | 15 | config = {'dimension': 3, 'full_scale': 4096} 16 | 17 | Model = model.Naive_UNet(config) 18 | 19 | Model = Model.cuda() 20 | 21 | points_0 = torch.randint(100, 150, size=(10000, 3)).cuda() # frame 0 point xyz 22 | features_0 = torch.rand((10000, 3)).cuda() # frame 0 point features 23 | 24 | # the initial update of the network, use inccrement=False 25 | # this will save a checkpoint of network feataures, do it every 100 frames. 26 | output_0 = Model([points_0, features_0], increment=False) 27 | 28 | 29 | points_1 = torch.randint(100, 150, size=(1000, 3)).cuda() # xyz of incremental points of frame 1 30 | features_1 = torch.rand((1000, 3)).cuda() # features of incremental points of frame 1 31 | 32 | 33 | # incremental update of the network, use inccrement=True 34 | # just need to input the incremental points and their features, the input residuals will be computed automaticlly 35 | # the output is the results of incremental points of frame 1 36 | output_1 = Model([points_1, features_1], increment=True) 37 | 38 | ''' 39 | ... 40 | more frame 41 | ''' 42 | -------------------------------------------------------------------------------- /develop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016-present, Facebook, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet_SCN*.so 9 | python setup.py develop 10 | -------------------------------------------------------------------------------- /extra/cudpp/.gitattributes: -------------------------------------------------------------------------------- 1 | # Set default behaviour, in case users don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files we want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.c text 7 | *.cpp text 8 | *.cu text 9 | *.cuh text 10 | *.h text 11 | 12 | # Denote all files that are truly binary and should not be modified. 13 | *.dll binary 14 | *.lib binary 15 | *.pdf binary 16 | -------------------------------------------------------------------------------- /extra/cudpp/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | .vscode/* 3 | apps/ 4 | -------------------------------------------------------------------------------- /extra/cudpp/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ext/cub"] 2 | path = ext/cub 3 | url = git://github.com/NVlabs/cub.git 4 | [submodule "ext/moderngpu"] 5 | path = ext/moderngpu 6 | url = git://github.com/NVlabs/moderngpu.git 7 | -------------------------------------------------------------------------------- /extra/cudpp/cmake/FindGLEW.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Try to find GLEW library and include path. 3 | # Once done this will define 4 | # 5 | # GLEW_FOUND 6 | # GLEW_INCLUDE_PATH 7 | # GLEW_LIBRARY 8 | # 9 | IF (WIN32) 10 | FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h 11 | ${GLEW_ROOT_DIR}/include 12 | DOC "The directory where GL/glew.h resides") 13 | if (CMAKE_SIZEOF_VOID_P EQUAL 8) 14 | set(GLEWNAMES glew GLEW glew64 glew64s) 15 | else () 16 | set(GLEWNAMES glew GLEW glew32 glew32s) 17 | endif (CMAKE_SIZEOF_VOID_P EQUAL 8) 18 | 19 | FIND_LIBRARY( GLEW_LIBRARY 20 | NAMES ${GLEWNAMES} 21 | PATHS 22 | ${GLEW_ROOT_DIR}/bin 23 | ${GLEW_ROOT_DIR}/lib 24 | DOC "The GLEW library") 25 | ELSE (WIN32) 26 | FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h 27 | /usr/include 28 | /usr/local/include 29 | /sw/include 30 | /opt/local/include 31 | ${GLEW_ROOT_DIR}/include 32 | DOC "The directory where GL/glew.h resides") 33 | FIND_LIBRARY( GLEW_LIBRARY 34 | NAMES GLEW libGLEW 35 | PATHS 36 | /usr/lib64 37 | /usr/lib 38 | /usr/local/lib64 39 | /usr/local/lib 40 | /sw/lib 41 | /opt/local/lib 42 | ${GLEW_ROOT_DIR}/lib 43 | DOC "The GLEW library") 44 | ENDIF (WIN32) 45 | 46 | IF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) 47 | SET( FOUND_GLEW 1) 48 | ELSE (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) 49 | SET( FOUND_GLEW 0) 50 | ENDIF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) 51 | 52 | MARK_AS_ADVANCED( FOUND_GLEW ) -------------------------------------------------------------------------------- /extra/cudpp/cudpp-config-version.cmake.in: -------------------------------------------------------------------------------- 1 | set(PACKAGE_VERSION @cudpp_VERSION_MAJOR@.@cudpp_VERSION_MINOR@.@cudpp_VERSION_PATCH@) 2 | 3 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") 4 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 5 | else() 6 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 7 | if ("${PACKAGE_VERSION}" STREQUAL "${PACKAGE_FIND_VERSION}") 8 | set(PACKAGE_VERSION_EXACT TRUE) 9 | endif() 10 | endif() 11 | -------------------------------------------------------------------------------- /extra/cudpp/cudpp-config.cmake.in: -------------------------------------------------------------------------------- 1 | # This file should be installed in the lib directory. Find the root directory. 2 | get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH) 3 | get_filename_component(_install_dir "${_dir}/.." ABSOLUTE) 4 | 5 | # Load the targets include. 6 | get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH) 7 | include("${_install_dir}/lib/cudpp-targets.cmake") 8 | 9 | set(cudpp_INCLUDE_DIRS "@cudpp_INCLUDE_DIRS_CONFIG@") 10 | -------------------------------------------------------------------------------- /extra/cudpp/doc/CUDPP_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/extra/cudpp/doc/CUDPP_slides.pdf -------------------------------------------------------------------------------- /extra/cudpp/doc/bib/README.txt: -------------------------------------------------------------------------------- 1 | Run bib.py in this directory to generate 2 HTML files: 2 | - cudpp_refs.html, a date-sorted list of references that use CUDPP 3 | (every file in cudpp.bib) 4 | - cudpp_refs_bib.html, the BibTeX for each of those refs 5 | 6 | Input files are: 7 | - cudpp.bib. Add new bibtex entries here. 8 | - cudpp.bst. BibTeX style file for how the resulting HTML will look. 9 | Bug JDO if you don't like the format. 10 | 11 | bib.py calls two external programs, both from the bibtex2html package. 12 | 13 | http://www.lri.fr/~filliatr/bibtex2html/ 14 | 15 | After you regenerate the two html files, check them into the 16 | repository (also cudpp.bib) so they can be included the next time the 17 | documentation is rebuilt. 18 | -------------------------------------------------------------------------------- /extra/cudpp/doc/bib/bib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys, os, re 4 | 5 | # Generate all pubs from cudpp.bib; put refs into ref.txt 6 | # This just generates a list of the cite keys from cudpp.bib into ref.txt. 7 | os.system("bib2bib -oc ref.txt cudpp.bib") 8 | 9 | # sort by reverse-date; don't generate keys; use cudpp.bst as bib style file 10 | # writes into cudpp_refs.html and cudpp_refs_bib.html 11 | os.putenv("openout_any", "r") 12 | os.system("bibtex2html -d -r -dl -nokeys -html-entities --no-footer --no-keywords -citefile ref.txt -s cudpp -nodoc -o cudpp_refs cudpp.bib") 13 | html_file = open('cudpp_refs.html') 14 | html = html_file.read() 15 | html_file.close() 16 | 17 | # if we want to munge the resulting text, do it here 18 | # right now this script is only 2 calls and a file remove though 19 | 20 | # write the file back 21 | html_file = open('cudpp_refs.html', 'w') 22 | print >> html_file, html 23 | html_file.close() 24 | 25 | # clean up temp files 26 | os.remove("ref.txt") 27 | -------------------------------------------------------------------------------- /extra/cudpp/doc/building-cudpp.md: -------------------------------------------------------------------------------- 1 | Building CUDPP {#building-cudpp} 2 | ============== 3 | 4 | CUDPP has currently been tested on Windows, Mac OS X and Linux. 5 | See the [Release Notes](@ref release-notes) for release-specific platform support. 6 | 7 | Initial checkout 8 | 9 | $ git clone https://github.com/cudpp/cudpp.git 10 | 11 | With CUDPP 2.2, we've added suffix_array which has dependencies on [cub](http://nvlabs.github.io/cub/) and [moderngpu](http://nvlabs.github.io/moderngpu/) libraries. They are added as submodules and you will need to pull them in using the following two commands in the cudpp/ root directory: 12 | 13 | $ git submodule init 14 | 15 | $ git submodule update 16 | 17 | Thrust Dependency {#build-thrust} 18 | ================= 19 | 20 | Starting with release 2.0, CUDPP uses the [Thrust](http://thrust.github.io) 21 | library for the implementation of cudppRadixSort(). Thrust is included with 22 | the CUDA Toolkit, so if you are using CUDA 4.0 or later, you need to do 23 | nothing else. If you are using an earlier version of CUDA however, you will 24 | need to download the Thrust source distribution and install it in your 25 | CUDA/include path before building CUDPP 26 | 27 | Building CUDPP using CMake {#build-cmake} 28 | ========================== 29 | 30 | CUDPP 2.0 uses CMake for cross-platform builds. Follow the instructions 31 | [on the CUDPP Wiki](https://github.com/cudpp/cudpp/wiki/BuildingCUDPPwithCMake) 32 | to build CUDPP. 33 | 34 | Warnings {#warnings} 35 | ======== 36 | 37 | You may see warnings during compilation of the form "warning: Double is not 38 | supported. Demoting to float". You can safely disregard these warnings. 39 | They are generated by the CUDA compiler when kernels that are specialized 40 | for multiple data types are compiled for CUDA targets that do not support 41 | double precision floating point. 42 | -------------------------------------------------------------------------------- /extra/cudpp/doc/changelog.md: -------------------------------------------------------------------------------- 1 | CUDPP Change Log {#changelog} 2 | ================ 3 | 4 | @include changelog.txt 5 | 6 | -------------------------------------------------------------------------------- /extra/cudpp/doc/cudpp_refs.md: -------------------------------------------------------------------------------- 1 | Publications that use CUDPP {#cudpp_refs} 2 | =========================== 3 | 4 | @htmlinclude doc/bib/cudpp_refs.html 5 | 6 | -------------------------------------------------------------------------------- /extra/cudpp/doc/cudpp_refs_bib.md: -------------------------------------------------------------------------------- 1 | Bibtex for publications that use CUDPP {#cudpp_refs_bib} 2 | ====================================== 3 | 4 | @htmlinclude doc/bib/cudpp_refs_bib.html 5 | 6 | -------------------------------------------------------------------------------- /extra/cudpp/doc/license.md: -------------------------------------------------------------------------------- 1 | CUDPP License {#license} 2 | ============= 3 | 4 | BSD License 5 | ============= 6 | 7 | CUDPP is released under the [BSD license](http://www.opensource.org/licenses/bsd-license.php). 8 | 9 | Non source-code content (such as documentation, web pages, etc.) from CUDPP 10 | is distributed under a [Creative Commons Attribution-ShareAlike 3.0 (CC BY-SA 3.0)](http://creativecommons.org/licenses/by-sa/3.0/) license. 11 | 12 | Note that prior to release 1.1 of CUDPP, the license used was a modified 13 | BSD license. With release 1.1, this license was replaced with the pure BSD 14 | license to facilitate the use of open source hosting of the code. 15 | 16 | CUDPP also includes the [Mersenne twister code](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html) of [Makoto Matsumoto](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/eindex.html), 17 | also licensed under BSD. 18 | 19 | CUDPP also calls functions in the [Thrust](http://thrust.github.io) template library, 20 | which is included with the CUDA Toolkit and licensed under the Apache 2.0 open source 21 | license. 22 | 23 | CUDPP also includes a modified version of FindGLEW.cmake from 24 | [nvidia-texture-tools](http://code.google.com/p/nvidia-texture-tools/), 25 | licensed under the [MIT license](http://www.opensource.org/licenses/mit-license.php). 26 | 27 | @include license.txt 28 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | GIT_CUB 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | 14 | 15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 16 | full,incremental, 17 | 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.core.cnature 23 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 24 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 25 | org.eclipse.cdt.core.ccnature 26 | 27 | 28 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/.settings/language.settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/.settings/org.eclipse.cdt.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | formatter_profile=_B40C 3 | formatter_settings_version=1 4 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/.settings/org.eclipse.core.runtime.prefs: -------------------------------------------------------------------------------- 1 | content-types/enabled=true 2 | content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh 3 | content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu 4 | eclipse.preferences.version=1 5 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/LICENSE.TXT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2011, Duane Merrill. All rights reserved. 2 | Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the NVIDIA CORPORATION nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/cub/util_namespace.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2011, Duane Merrill. All rights reserved. 3 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * * Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * * Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * * Neither the name of the NVIDIA CORPORATION nor the 13 | * names of its contributors may be used to endorse or promote products 14 | * derived from this software without specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | ******************************************************************************/ 28 | 29 | /** 30 | * \file 31 | * Place-holder for prefixing the cub namespace 32 | */ 33 | 34 | #pragma once 35 | 36 | // For example: 37 | //#define CUB_NS_PREFIX namespace thrust{ namespace detail { 38 | //#define CUB_NS_POSTFIX } } 39 | 40 | #ifndef CUB_NS_PREFIX 41 | #define CUB_NS_PREFIX 42 | #endif 43 | 44 | #ifndef CUB_NS_POSTFIX 45 | #define CUB_NS_POSTFIX 46 | #endif 47 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/examples/block/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /Debug 3 | /Release 4 | /cuda55.sdf 5 | /cuda55.suo 6 | /cuda60.sdf 7 | /cuda60.suo 8 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/examples/block/reduce_by_key.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | 5 | 6 | template < 7 | int BLOCK_THREADS, ///< Number of CTA threads 8 | typename KeyT, ///< Key type 9 | typename ValueT> ///< Value type 10 | __global__ void Kernel() 11 | { 12 | // Tuple type for scanning (pairs accumulated segment-value with segment-index) 13 | typedef cub::KeyValuePair OffsetValuePairT; 14 | 15 | // Reduce-value-by-segment scan operator 16 | typedef cub::ReduceBySegmentOp ReduceBySegmentOpT; 17 | 18 | // Parameterized BlockDiscontinuity type for setting head flags 19 | typedef cub::BlockDiscontinuity< 20 | KeyT, 21 | BLOCK_THREADS> 22 | BlockDiscontinuityKeysT; 23 | 24 | // Parameterized BlockScan type 25 | typedef cub::BlockScan< 26 | OffsetValuePairT, 27 | BLOCK_THREADS, 28 | cub::BLOCK_SCAN_WARP_SCANS> 29 | BlockScanT; 30 | 31 | // Shared memory 32 | __shared__ union 33 | { 34 | typename BlockScanT::TempStorage scan; // Scan storage 35 | typename BlockDiscontinuityKeysT::TempStorage discontinuity; // Discontinuity storage 36 | } temp_storage; 37 | 38 | 39 | // Read data (each thread gets 3 items each, every 9 items is a segment) 40 | KeyT my_keys[3] = {threadIdx.x / 3, threadIdx.x / 3, threadIdx.x / 3}; 41 | ValueT my_values[3] = {1, 1, 1}; 42 | 43 | // Set head segment head flags 44 | int my_flags[3]; 45 | BlockDiscontinuityKeysT(temp_storage.discontinuity).FlagHeads( 46 | my_flags, 47 | my_keys, 48 | cub::Inequality()); 49 | 50 | __syncthreads(); 51 | 52 | 53 | 54 | 55 | 56 | 57 | } 58 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/examples/device/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /Debug 3 | /ipch 4 | /Release 5 | /cuda55.sdf 6 | /cuda55.suo 7 | /cuda60.sdf 8 | /cuda60.suo 9 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/experimental/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/experimental/spmv_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 4 | do 5 | echo `date`, `$1 --dense=$i $2 $3 $4 $5 $6 $7` 6 | done 7 | 8 | echo 9 | echo 10 | 11 | for i in `ls /home/dumerrill/graphs/spmv/*.mtx` 12 | do 13 | if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] 14 | then 15 | echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null` 16 | fi 17 | done 18 | 19 | echo 20 | echo 21 | 22 | for i in `ls /scratch/dumerrill/graphs/mtx/*.mtx` 23 | #for i in `ls /cygdrive/w/Dev/UFget/mtx/*.mtx` 24 | do 25 | if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] 26 | then 27 | echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null` 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/test/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /link_main.obj 3 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/test/link_a.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void a() 4 | { 5 | printf("a() called\n"); 6 | 7 | cub::DoubleBuffer d_keys; 8 | cub::DoubleBuffer d_values; 9 | size_t temp_storage_bytes = 0; 10 | cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024); 11 | } 12 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/test/link_b.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void b() 4 | { 5 | printf("b() called\n"); 6 | 7 | cub::DoubleBuffer d_keys; 8 | cub::DoubleBuffer d_values; 9 | size_t temp_storage_bytes = 0; 10 | cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024); 11 | } 12 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/test/link_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern void a(); 4 | extern void b(); 5 | 6 | int main() 7 | { 8 | printf("hello world\n"); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /extra/cudpp/ext/cub/tune/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/README.md: -------------------------------------------------------------------------------- 1 | moderngpu 2 | ========= 3 | 4 | Design patterns for GPU computing 5 | 6 | Modern GPU is code and commentary intended to promote new and productive ways of thinking about GPU computing. 7 | 8 | http://nvlabs.github.io/moderngpu 9 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkinsert/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkinsert 7 | 8 | benchmarkinsert.o: benchmarkinsert.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkinsert: benchmarkinsert.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkinsert 22 | 23 | clean: 24 | rm -f benchmarkinsert.o mgpucontext.o mgpuutil.o benchmarkinsert 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkinsert/benchmarkinsert.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkintervalmove/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkintervalmove 7 | 8 | benchmarkintervalmove.o: benchmarkintervalmove.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkintervalmove: benchmarkintervalmove.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkintervalmove 22 | 23 | clean: 24 | rm -f benchmarkintervalmove.o mgpucontext.o mgpuutil.o benchmarkintervalmove 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkintervalmove/benchmarkintervalmove.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | 25 | Source Files 26 | 27 | 28 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkjoin/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkjoin 7 | 8 | benchmarkjoin.o: benchmarkjoin.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkjoin: benchmarkjoin.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkjoin 22 | 23 | clean: 24 | rm -f benchmarkjoin.o mgpucontext.o mgpuutil.o benchmarkjoin 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkjoin/benchmarkjoin.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarklaunchbox/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarklaunchbox 7 | 8 | benchmarklaunchbox.o: benchmarklaunchbox.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarklaunchbox: benchmarklaunchbox.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarklaunchbox 22 | 23 | clean: 24 | rm -f benchmarklaunchbox.o mgpucontext.o mgpuutil.o benchmarklaunchbox 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarklaunchbox/benchmarklaunchbox.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkloadbalance/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkloadbalance 7 | 8 | benchmarkloadbalance.o: benchmarkloadbalance.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkloadbalance: benchmarkloadbalance.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkloadbalance 22 | 23 | clean: 24 | rm -f benchmarkloadbalance.o mgpucontext.o mgpuutil.o benchmarkloadbalance 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkloadbalance/benchmarkloadbalance.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarklocalitysort/Makefile: -------------------------------------------------------------------------------- 1 | include ../common.mk 2 | 3 | all: build 4 | 5 | build: benchmarklocalitysort 6 | 7 | benchmarklocalitysort.o: benchmarklocalitysort.cu 8 | nvcc $(NVCCFLAGS) -o $@ -c $< 9 | 10 | mgpucontext.o: ../src/mgpucontext.cu 11 | nvcc $(NVCCFLAGS) -o $@ -c $< 12 | 13 | mgpuutil.o: ../src/mgpuutil.cpp 14 | nvcc $(NVCCFLAGS) -o $@ -c $< 15 | 16 | benchmarklocalitysort: benchmarklocalitysort.o mgpucontext.o mgpuutil.o 17 | nvcc $(NVCCFLAGS) -o $@ $+ 18 | 19 | run: build 20 | ./benchmarklocalitysort 21 | 22 | clean: 23 | rm -f benchmarklocalitysort.o mgpucontext.o mgpuutil.o benchmarklocalitysort 24 | 25 | clobber: 26 | clean 27 | 28 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarklocalitysort/benchmarklocalitysort.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkmerge/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkmerge 7 | 8 | benchmarkmerge.o: benchmarkmerge.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkmerge: benchmarkmerge.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkmerge 22 | 23 | clean: 24 | rm -f benchmarkmerge.o mgpucontext.o mgpuutil.o benchmarkmerge 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkmerge/benchmarkmerge.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkreducebykey/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkreducebykey 7 | 8 | benchmarkreducebykey.o: benchmarkreducebykey.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkreducebykey: benchmarkreducebykey.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkreducebykey 22 | 23 | clean: 24 | rm -f benchmarkreducebykey.o mgpucontext.o mgpuutil.o benchmarkreducebykey 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkreducebykey/benchmarkreducebykey.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkscan/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkscan 7 | 8 | benchmarkscan.o: benchmarkscan.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarkscan: benchmarkscan.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarkscan 22 | 23 | clean: 24 | rm -f benchmarkscan.o mgpucontext.o mgpuutil.o benchmarkscan 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkscan/benchmarkscan.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksegreduce/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarksegreduce 7 | 8 | benchmarksegreduce.o: benchmarksegreduce.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarksegreduce: benchmarksegreduce.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarksegreduce 22 | 23 | clean: 24 | rm -f benchmarksegreduce.o mgpucontext.o mgpuutil.o benchmarksegreduce 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksegreduce/benchmarksegreduce.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksegsort/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarksegsort 7 | 8 | benchmarksegsort.o: benchmarksegsort.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarksegsort: benchmarksegsort.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarksegsort 22 | 23 | clean: 24 | rm -f benchmarksegsort.o mgpucontext.o mgpuutil.o benchmarksegsort 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksegsort/benchmarksegsort.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksets/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarksets 7 | 8 | benchmarksets.o: benchmarksets.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarksets: benchmarksets.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarksets 22 | 23 | clean: 24 | rm -f benchmarksets.o mgpucontext.o mgpuutil.o benchmarksets 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksets/benchmarksets.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksort/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarksort 7 | 8 | benchmarksort.o: benchmarksort.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | benchmarksort: benchmarksort.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./benchmarksort 22 | 23 | clean: 24 | rm -f benchmarksort.o mgpucontext.o mgpuutil.o benchmarksort 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksort/benchmarksort.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksortedsearch/Makefile: -------------------------------------------------------------------------------- 1 | include ../common.mk 2 | 3 | all: build 4 | 5 | build: benchmarksortedsearch 6 | 7 | benchmarksortedsearch.o: benchmarksortedsearch.cu 8 | nvcc $(NVCCFLAGS) -o $@ -c $< 9 | 10 | mgpucontext.o: ../src/mgpucontext.cu 11 | nvcc $(NVCCFLAGS) -o $@ -c $< 12 | 13 | mgpuutil.o: ../src/mgpuutil.cpp 14 | nvcc $(NVCCFLAGS) -o $@ -c $< 15 | 16 | benchmarksortedsearch: benchmarksortedsearch.o mgpucontext.o mgpuutil.o 17 | nvcc $(NVCCFLAGS) -o $@ $+ 18 | 19 | run: build 20 | ./benchmarksortedsearch 21 | 22 | clean: 23 | rm -f benchmarksortedsearch.o mgpucontext.o mgpuutil.o benchmarksortedsearch 24 | 25 | clobber: 26 | clean 27 | 28 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarksortedsearch/benchmarksortedsearch.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkspmvcsr/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: benchmarkspmvcsr 7 | 8 | benchmarkspmvcsr.o: benchmarkspmvcsr.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | mmio.o: ../src/mmio.cpp 18 | nvcc $(NVCCFLAGS) -o $@ -c $< 19 | 20 | sparsematrix.o: ../src/sparsematrix.cpp 21 | nvcc $(NVCCFLAGS) -o $@ -c $< 22 | 23 | benchmarkspmvcsr: benchmarkspmvcsr.o mgpucontext.o mgpuutil.o mmio.o sparsematrix.o 24 | nvcc $(NVCCFLAGS) -o $@ $+ -lcusparse 25 | 26 | run: build 27 | ./benchmarkspmvcsr 28 | 29 | clean: 30 | rm -f benchmarkspmvcsr.o mgpucontext.o mgpuutil.o benchmarkspmvcsr 31 | 32 | clobber: 33 | clean 34 | 35 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/benchmarkspmvcsr/benchmarkspmvcsr.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | Source Files 27 | 28 | 29 | Source Files 30 | 31 | 32 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/common.mk: -------------------------------------------------------------------------------- 1 | 2 | ifeq ($(dbg),1) 3 | NVCCFLAGS += -g -G 4 | endif 5 | 6 | ifdef NVCC_BITS 7 | NVCCFLAGS += -m $(NVCC_BITS) 8 | endif 9 | 10 | ifdef NVCC_VERBOSE 11 | NVCCFLAGS += -Xptxas="-v" 12 | endif 13 | 14 | INCLUDES := -I ../include 15 | 16 | GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 17 | GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 18 | GENCODE_SM35 := -gencode arch=compute_35,code=sm_35 19 | 20 | GENCODE_FLAGS := $(GENCODE_SM20) $(GENCODE_SM35) 21 | 22 | NVCCFLAGS += $(GENCODE_FLAGS) $(INCLUDES) 23 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/demo/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: demo 7 | 8 | mgpucontext.o: ../src/mgpucontext.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpuutil.o: ../src/mgpuutil.cpp 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | demo.o: demo.cu 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | demo: demo.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./demo 22 | 23 | clean: 24 | rm -f demo.o mgpucontext.o mgpuutil.o demo 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/include/kernels/cubradixsort.cuh: -------------------------------------------------------------------------------- 1 | #include "device/deviceutil.cuh" 2 | #include "cub/cub.cuh" 3 | 4 | namespace mgpu { 5 | 6 | template 7 | bool CubRadixSort(Key* keys_global, Key* keys2_global, int count, int beginBit, 8 | int endBit, CudaContext& context) { 9 | 10 | cub::DoubleBuffer keys(keys_global, keys2_global); 11 | 12 | size_t tempBytes = 0; 13 | cub::DeviceRadixSort::SortKeys(0, tempBytes, keys, count, beginBit, endBit, 14 | context.Stream()); 15 | 16 | MGPU_MEM(byte) tempDevice = context.Malloc(tempBytes); 17 | 18 | cub::DeviceRadixSort::SortKeys(tempDevice->get(), tempBytes, keys, count, 19 | beginBit, endBit, context.Stream()); 20 | MGPU_SYNC_CHECK("cub::DeviceRadixSort::SortKeys"); 21 | 22 | return 1 == keys.selector; 23 | } 24 | 25 | template 26 | bool CubRadixSort(Key* keys_global, Key* keys2_global, Value* values_global, 27 | Value* values2_global, int count, int beginBit, int endBit, 28 | CudaContext& context) { 29 | 30 | cub::DoubleBuffer keys(keys_global, keys2_global); 31 | cub::DoubleBuffer values(values_global, values2_global); 32 | 33 | size_t tempBytes = 0; 34 | cub::DeviceRadixSort::SortPairs(0, tempBytes, keys, values, count, 35 | beginBit, endBit, context.Stream()); 36 | 37 | MGPU_MEM(byte) tempDevice = context.Malloc(tempBytes); 38 | 39 | cub::DeviceRadixSort::SortPairs(tempDevice->get(), tempBytes, keys, values, 40 | count, beginBit, endBit, context.Stream()); 41 | MGPU_SYNC_CHECK("cub::DeviceRadixSort::SortPairs"); 42 | 43 | return 1 == keys.selector; 44 | } 45 | 46 | } // namespace mgpu 47 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/mgpu_benchmarks.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/extra/cudpp/ext/moderngpu/mgpu_benchmarks.xlsx -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/parallelmerge/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: parallelmerge 7 | 8 | parallelmerge.o: parallelmerge.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | parallelmerge: parallelmerge.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./parallelmerge 22 | 23 | clean: 24 | rm -f parallelmerge.o mgpucontext.o mgpuutil.o parallelmerge 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/parallelmerge/parallelmerge.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/testlaunchbox/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../common.mk 3 | 4 | all: build 5 | 6 | build: testlaunchbox 7 | 8 | testlaunchbox.o: testlaunchbox.cu 9 | nvcc $(NVCCFLAGS) -o $@ -c $< 10 | 11 | mgpucontext.o: ../src/mgpucontext.cu 12 | nvcc $(NVCCFLAGS) -o $@ -c $< 13 | 14 | mgpuutil.o: ../src/mgpuutil.cpp 15 | nvcc $(NVCCFLAGS) -o $@ -c $< 16 | 17 | testlaunchbox: testlaunchbox.o mgpucontext.o mgpuutil.o 18 | nvcc $(NVCCFLAGS) -o $@ $+ 19 | 20 | run: build 21 | ./testlaunchbox 22 | 23 | clean: 24 | rm -f testlaunchbox.o mgpucontext.o mgpuutil.o testlaunchbox 25 | 26 | clobber: 27 | clean 28 | 29 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/testlaunchbox/testlaunchbox.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/testsegsortbyflags/testsegsortbyflags.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/cudpp/ext/moderngpu/vs.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | ../include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) 9 | _CRT_SECURE_NO_WARNINGS;_UNICODE;UNICODE;%(PreprocessorDefinitions) 10 | 11 | 12 | compute_20,sm_20;compute_35,sm_35 13 | 14 | 15 | true 16 | false 17 | true 18 | true 19 | 20 | 21 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /extra/cudpp/include/cudpp_config.h: -------------------------------------------------------------------------------- 1 | // This file is automatically generated. DO NOT EDIT 2 | 3 | /* #undef CUDPP_STATIC_LIB */ 4 | -------------------------------------------------------------------------------- /extra/cudpp/include/cudpp_config.h.in: -------------------------------------------------------------------------------- 1 | // This file is automatically generated. DO NOT EDIT 2 | 3 | #cmakedefine CUDPP_STATIC_LIB -------------------------------------------------------------------------------- /extra/cudpp/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2007-2010 The Regents of the University of California, Davis 2 | campus ("The Regents") and NVIDIA Corporation ("NVIDIA"). All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of the The Regents, nor NVIDIA, nor the names of its 13 | contributors may be used to endorse or promote products derived from this 14 | software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_compact.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt in 8 | // the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_compact.h 14 | * 15 | * @brief Compact functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_COMPACT_H_ 19 | #define _CUDPP_COMPACT_H_ 20 | 21 | class CUDPPCompactPlan; 22 | 23 | extern "C" 24 | void allocCompactStorage(CUDPPCompactPlan* plan); 25 | 26 | extern "C" 27 | void freeCompactStorage(CUDPPCompactPlan* plan); 28 | 29 | extern "C" 30 | void cudppCompactDispatch(void *d_out, 31 | size_t *d_numValidElements, 32 | const void *d_in, 33 | const unsigned int *d_isValid, 34 | size_t numElements, 35 | const CUDPPCompactPlan *plan); 36 | 37 | #endif // _CUDPP_COMPACT_H_ 38 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_compress.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // CUDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_compress.h 14 | * 15 | * @brief Compress functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_COMPRESS_H_ 19 | #define _CUDPP_COMPRESS_H_ 20 | 21 | class CUDPPCompressPlan; 22 | class CUDPPBwtPlan; 23 | class CUDPPMtfPlan; 24 | 25 | // Compress 26 | extern "C" 27 | void allocCompressStorage(CUDPPCompressPlan* plan); 28 | 29 | extern "C" 30 | void freeCompressStorage(CUDPPCompressPlan* plan); 31 | 32 | extern "C" 33 | void cudppCompressDispatch(unsigned char *d_uncompressed, 34 | int *d_bwtIndex, 35 | unsigned int *d_histSize, 36 | unsigned int *d_hist, 37 | unsigned int *d_encodeOffset, 38 | unsigned int *d_compressedSize, 39 | unsigned int *d_compressed, 40 | size_t numElements, 41 | const CUDPPCompressPlan *plan); 42 | 43 | // BWT 44 | extern "C" 45 | void allocBwtStorage(CUDPPBwtPlan* plan); 46 | 47 | extern "C" 48 | void freeBwtStorage(CUDPPBwtPlan* plan); 49 | 50 | extern "C" 51 | void cudppBwtDispatch(unsigned char *d_in, 52 | unsigned char *d_out, 53 | int *d_index, 54 | size_t numElements, 55 | const CUDPPBwtPlan *plan); 56 | 57 | // MTF 58 | extern "C" 59 | void allocMtfStorage(CUDPPMtfPlan* plan); 60 | 61 | extern "C" 62 | void freeMtfStorage(CUDPPMtfPlan* plan); 63 | 64 | extern "C" 65 | void cudppMtfDispatch(unsigned char *d_in, 66 | unsigned char *d_out, 67 | size_t numElements, 68 | const CUDPPMtfPlan *plan); 69 | 70 | #endif // _CUDPP_COMPRESS_H_ 71 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_listrank.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // CUDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_listrank.h 14 | * 15 | * @brief ListRank functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_LISTRANK_H_ 19 | #define _CUDPP_LISTRANK_H_ 20 | 21 | class CUDPPListRankPlan; 22 | 23 | // ListRank 24 | extern "C" 25 | void allocListRankStorage(CUDPPListRankPlan* plan); 26 | 27 | extern "C" 28 | void freeListRankStorage(CUDPPListRankPlan* plan); 29 | 30 | extern "C" 31 | CUDPPResult cudppListRankDispatch(void *d_ranked_values, 32 | void *d_unranked_values, 33 | void *d_next_indices, 34 | size_t head, 35 | size_t numElements, 36 | const CUDPPListRankPlan *plan); 37 | 38 | #endif // _CUDPP_LISTRANK_H_ -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_manager.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision: 3572$ 5 | // $Date: 2007-11-19 13:58:06 +0000 (Mon, 19 Nov 2007) $ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | #ifndef __CUDPP_MANAGER_H__ 11 | #define __CUDPP_MANAGER_H__ 12 | 13 | #include 14 | 15 | /** @brief Internal manager class for CUDPPP resources 16 | * 17 | */ 18 | class CUDPPManager 19 | { 20 | public: 21 | 22 | CUDPPManager(); 23 | ~CUDPPManager(); 24 | 25 | //! @internal Convert an opaque handle to a pointer to a manager 26 | //! @param [in] cudppHandle Handle to the Manager object 27 | //! @returns Pointer to CUDPP manager 28 | static CUDPPManager* getManagerFromHandle(CUDPPHandle cudppHandle) 29 | { 30 | return reinterpret_cast(cudppHandle); 31 | } 32 | 33 | void getDeviceProps(cudaDeviceProp & props) { props = m_deviceProps; } 34 | 35 | //! @internal Get an opaque handle for this manager 36 | //! @returns CUDPP handle for this manager 37 | CUDPPHandle getHandle() 38 | { 39 | return reinterpret_cast(this); 40 | } 41 | 42 | private: 43 | cudaDeviceProp m_deviceProps; 44 | }; 45 | 46 | #endif // __CUDPP_PLAN_MANAGER_H__ 47 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_maximal_launch.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | #ifndef _MAXIMAL_LAUNCH_H_ 11 | #define _MAXIMAL_LAUNCH_H_ 12 | 13 | #include "cuda_runtime.h" 14 | 15 | extern "C" 16 | size_t maxBlocks(cudaFuncAttributes &attribs, 17 | cudaDeviceProp &devprop, 18 | size_t bytesDynamicSharedMem, 19 | size_t threadsPerBlock); 20 | 21 | extern "C" 22 | size_t maxBlocksFromPointer(void* kernel, 23 | size_t bytesDynamicSharedMem, 24 | size_t threadsPerBlock); 25 | 26 | #ifdef __cplusplus 27 | 28 | template 29 | size_t maxBlocks(T kernel, 30 | size_t bytesDynamicSharedMem, 31 | size_t threadsPerBlock) 32 | { 33 | return maxBlocksFromPointer((void*)kernel, bytesDynamicSharedMem, threadsPerBlock); 34 | } 35 | #endif 36 | 37 | #endif // _MAXIMAL_LAUNCH_H_ 38 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_mergesort.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | #ifndef __MERGESORT_H__ 11 | #define __MERGESORT_H__ 12 | 13 | #include "cudpp_globals.h" 14 | #include "cudpp.h" 15 | #include "cudpp_plan.h" 16 | 17 | extern "C" 18 | void allocMergeSortStorage(CUDPPMergeSortPlan* plan); 19 | 20 | extern "C" 21 | void freeMergeSortStorage(CUDPPMergeSortPlan* plan); 22 | 23 | extern "C" 24 | void cudppMergeSortDispatch(void *keys, 25 | void *values, 26 | size_t numElements, 27 | const CUDPPMergeSortPlan *plan); 28 | 29 | 30 | #endif // __MERGESORT_H__ 31 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_multisplit.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | #ifndef __MULTISPLIT_H__ 11 | #define __MULTISPLIT_H__ 12 | 13 | #include 14 | #include "cudpp_globals.h" 15 | #include "cudpp.h" 16 | #include "cudpp_plan.h" 17 | 18 | 19 | extern "C" 20 | void allocMultiSplitStorage(CUDPPMultiSplitPlan* plan); 21 | 22 | extern "C" 23 | void freeMultiSplitStorage(CUDPPMultiSplitPlan* plan); 24 | 25 | extern "C" 26 | void cudppMultiSplitDispatch(unsigned int *d_keys, 27 | unsigned int *d_values, 28 | size_t numElements, 29 | size_t numBuckets, 30 | BucketMappingFunc bucketMappingFunc, 31 | const CUDPPMultiSplitPlan *plan); 32 | 33 | #endif // __MULTISPLIT_H__ 34 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_radixsort.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | #ifndef __RADIXSORT_H__ 11 | #define __RADIXSORT_H__ 12 | 13 | #include "cudpp_globals.h" 14 | #include "cudpp.h" 15 | #include "cudpp_plan.h" 16 | 17 | 18 | void allocRadixSortStorage(CUDPPRadixSortPlan* plan); 19 | 20 | void freeRadixSortStorage(CUDPPRadixSortPlan* plan); 21 | 22 | void cudppRadixSortDispatch(void *keys, 23 | void *values, 24 | size_t numElements, 25 | const CUDPPRadixSortPlan *plan); 26 | 27 | 28 | #endif // __RADIXSORT_H__ 29 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_rand.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt in 8 | // the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_rand.h 14 | * 15 | * @brief rand functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef __CUDPP_RAND_H__ 19 | #define __CUDPP_RAND_H__ 20 | 21 | #include "cudpp_globals.h" 22 | #include "cudpp.h" 23 | #include "cudpp_plan.h" 24 | 25 | extern "C" 26 | void cudppRandDispatch(void * d_out, size_t num_elements, const CUDPPRandPlan * plan); 27 | 28 | #endif //__CUDPP_RAND_H__ 29 | 30 | 31 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_reduce.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_reduce.h 14 | * 15 | * @brief Reduce functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_REDUCE_H_ 19 | #define _CUDPP_REDUCE_H_ 20 | 21 | class CUDPPReducePlan; 22 | 23 | 24 | void allocReduceStorage(CUDPPReducePlan *plan); 25 | 26 | void freeReduceStorage(CUDPPReducePlan *plan); 27 | 28 | void cudppReduceDispatch(void *d_out, 29 | const void *d_in, 30 | size_t numElements, 31 | const CUDPPReducePlan *plan); 32 | 33 | #endif // _CUDPP_REDUCE_H_ 34 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_sa.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt in 8 | // the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_sa.h 14 | * 15 | * @brief Suffix Array functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_SA_H_ 19 | #define _CUDPP_SA_H_ 20 | 21 | class CUDPPSaPlan; 22 | 23 | extern "C" 24 | void allocSaStorage(CUDPPSaPlan* plan); 25 | 26 | extern "C" 27 | void freeSaStorage(CUDPPSaPlan* plan); 28 | 29 | extern "C" 30 | void cudppSuffixArrayDispatch(unsigned char* d_str, 31 | unsigned int* d_keys_sa, 32 | size_t d_str_length, 33 | const CUDPPSaPlan *plan); 34 | 35 | #endif // _CUDPP_SA_H_ 36 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_scan.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_scan.h 14 | * 15 | * @brief Scan functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_SCAN_H_ 19 | #define _CUDPP_SCAN_H_ 20 | 21 | class CUDPPScanPlan; 22 | 23 | extern "C" 24 | void allocScanStorage(CUDPPScanPlan *plan); 25 | 26 | extern "C" 27 | void freeScanStorage(CUDPPScanPlan *plan); 28 | 29 | extern "C" 30 | void cudppScanDispatch(void *d_out, 31 | const void *d_in, 32 | size_t numElements, 33 | size_t numRows, 34 | const CUDPPScanPlan *plan); 35 | 36 | #endif // _CUDPP_SCAN_H_ 37 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_segscan.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt in 8 | // the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_segscan.h 14 | * 15 | * @brief Scan functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_SEGMENTEDSCAN_H_ 19 | #define _CUDPP_SEGMENTEDSCAN_H_ 20 | 21 | class CUDPPSegmentedScanPlan; 22 | 23 | extern "C" 24 | void allocSegmentedScanStorage(CUDPPSegmentedScanPlan *plan); 25 | 26 | extern "C" 27 | void freeSegmentedScanStorage(CUDPPSegmentedScanPlan *plan); 28 | 29 | extern "C" 30 | void cudppSegmentedScanDispatch(void *d_out, 31 | const void *d_idata, 32 | const unsigned int *d_iflags, 33 | size_t numElements, 34 | const CUDPPSegmentedScanPlan *plan); 35 | 36 | #endif // _CUDPP_SEGMENTEDSCAN_H_ 37 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_spmvmult.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt in 8 | // the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * cudpp_spmvmult.h 14 | * 15 | * @brief Scan functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef _CUDPP_SPMVMULT_H_ 19 | #define _CUDPP_SPMVMULT_H_ 20 | 21 | class CUDPPSparseMatrixVectorMultiplyPlan; 22 | 23 | extern "C" 24 | void allocSparseMatrixVectorMultiplyStorage(CUDPPSparseMatrixVectorMultiplyPlan *plan, 25 | const void *A, 26 | const unsigned int *rowindx, 27 | const unsigned int *indx); 28 | 29 | extern "C" 30 | void freeSparseMatrixVectorMultiplyStorage(CUDPPSparseMatrixVectorMultiplyPlan *plan); 31 | 32 | extern "C" 33 | void cudppSparseMatrixVectorMultiplyDispatch(void *d_y, 34 | const void *d_x, 35 | const CUDPPSparseMatrixVectorMultiplyPlan *plan); 36 | 37 | #endif // _CUDPP_SPMVMULT_H_ 38 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_stringsort.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt 8 | // in the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | #ifndef __STRINGSORT_H__ 11 | #define __STRINGSORT_H__ 12 | 13 | #include "cudpp_globals.h" 14 | #include "cudpp.h" 15 | #include "cudpp_plan.h" 16 | 17 | extern "C" 18 | void allocStringSortStorage(CUDPPStringSortPlan* plan); 19 | 20 | extern "C" 21 | void freeStringSortStorage(CUDPPStringSortPlan* plan); 22 | 23 | 24 | 25 | extern "C" 26 | void cudppStringSortDispatch(unsigned int *keys, 27 | unsigned int *values, 28 | unsigned int *stringVals, 29 | size_t numElements, 30 | size_t stringArrayLength, 31 | unsigned char termC, 32 | const CUDPPStringSortPlan *plan); 33 | 34 | //Some helper functions needed to transform data 35 | extern "C" 36 | void dotAdd(unsigned int* d_address, 37 | unsigned int* numSpaces, 38 | unsigned int* packedAddress, 39 | size_t numElements, 40 | size_t stringArrayLength); 41 | 42 | extern "C" 43 | void calculateAlignedOffsets(unsigned int* d_address, 44 | unsigned int* numSpaces, 45 | unsigned char* d_stringVals, 46 | unsigned char termC, 47 | size_t numElements, 48 | size_t stringArrayLength); 49 | extern "C" 50 | void packStrings(unsigned int* packedStrings, 51 | unsigned char* d_stringVals, 52 | unsigned int* d_keys, 53 | unsigned int* packedAddress, 54 | unsigned int* address, 55 | size_t numElements, 56 | size_t stringArrayLength, 57 | unsigned char termC); 58 | 59 | extern "C" 60 | void unpackStrings(unsigned int* packedAddress, 61 | unsigned int* packedAddressRef, 62 | unsigned int* address, 63 | unsigned int* addressRef, 64 | size_t numElements); 65 | 66 | #endif // __STRINGSORT_H__ 67 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp/cudpp_tridiagonal.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------- 2 | // cuDPP -- CUDA Data Parallel Primitives library 3 | // ------------------------------------------------------------- 4 | // $Revision$ 5 | // $Date$ 6 | // ------------------------------------------------------------- 7 | // This source code is distributed under the terms of license.txt in 8 | // the root directory of this source distribution. 9 | // ------------------------------------------------------------- 10 | 11 | /** 12 | * @file 13 | * tridiagonal.h 14 | * 15 | * @brief tridiagonal functionality header file - contains CUDPP interface (not public) 16 | */ 17 | 18 | #ifndef __CUDPP_TRIDIAGONAL_H__ 19 | #define __CUDPP_TRIDIAGONAL_H__ 20 | 21 | #include "cudpp_globals.h" 22 | #include "cudpp.h" 23 | #include "cudpp_plan.h" 24 | 25 | CUDPPResult cudppTridiagonalDispatch(void *d_a, 26 | void *d_b, 27 | void *d_c, 28 | void *d_d, 29 | void *d_x, 30 | int systemSize, 31 | int numSystems, 32 | const CUDPPTridiagonalPlan * plan); 33 | 34 | #endif //__CUDPP_TRIDIAGONAL_H__ 35 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp_hash/hash_functions.cu: -------------------------------------------------------------------------------- 1 | #include "hash_table.h" 2 | #include "debugging.h" 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace CudaHT { 9 | namespace CuckooHashing { 10 | 11 | void GenerateFunctions(const unsigned N, 12 | const unsigned num_keys, 13 | const unsigned *d_keys, 14 | const unsigned table_size, 15 | uint2 *constants) { 16 | bool regenerate = true; 17 | 18 | while (regenerate) { 19 | regenerate = false; 20 | 21 | // Generate a set of hash function constants for this build attempt. 22 | for (unsigned i = 0 ; i < N; ++i) { 23 | unsigned new_a = genrand_int32() % kPrimeDivisor; 24 | constants[i].x = (1 > new_a ? 1 : new_a); 25 | constants[i].y = genrand_int32() % kPrimeDivisor; 26 | } 27 | 28 | #ifdef FORCEFULLY_GENERATE_NO_CYCLES 29 | // Ensure that every key gets N different slots. 30 | regenerate = CheckAssignedSameSlot(N, num_keys, d_keys, table_size, constants); 31 | #endif 32 | } 33 | 34 | 35 | #ifdef TAKE_HASH_FUNCTION_STATISTICS 36 | // Examine how well distributed the items are. 37 | TakeHashFunctionStatistics(num_keys, d_keys, table_size, constants, N); 38 | #endif 39 | } 40 | 41 | }; // namespace CuckooHashing 42 | }; // namespace CudaHT 43 | -------------------------------------------------------------------------------- /extra/cudpp/src/cudpp_hash/mt19937ar.h: -------------------------------------------------------------------------------- 1 | void init_genrand(unsigned long s); 2 | void init_by_array(unsigned long init_key[], int key_length); 3 | unsigned long genrand_int32(void); 4 | long genrand_int31(void); 5 | double genrand_real1(void); 6 | double genrand_real2(void); 7 | double genrand_real3(void); 8 | double genrand_res53(void); 9 | -------------------------------------------------------------------------------- /extra/easy_profiler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(easy_profiler CXX) 3 | 4 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 5 | 6 | if (CMAKE_VERSION VERSION_LESS "3.1") 7 | if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") 8 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++11") 9 | endif () 10 | else () 11 | set(CMAKE_CXX_STANDARD 11) 12 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 13 | endif () 14 | 15 | option(EASY_PROFILER_NO_GUI "Build easy_profiler without the GUI application (required Qt)" OFF) 16 | 17 | set(EASY_PROGRAM_VERSION_MAJOR 2) 18 | set(EASY_PROGRAM_VERSION_MINOR 1) 19 | set(EASY_PROGRAM_VERSION_PATCH 0) 20 | set(EASY_PRODUCT_VERSION_STRING "${EASY_PROGRAM_VERSION_MAJOR}.${EASY_PROGRAM_VERSION_MINOR}.${EASY_PROGRAM_VERSION_PATCH}") 21 | 22 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 23 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 24 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 25 | 26 | # set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_LIST_DIR}/sdk) 27 | 28 | macro(easy_define_target_option TARGET SOURCE_OPTION TARGET_DEFINITION) 29 | if (${SOURCE_OPTION}) 30 | set(_VALUE 1) 31 | else () 32 | set(_VALUE 0) 33 | endif () 34 | target_compile_options(${TARGET} PUBLIC -D${TARGET_DEFINITION}=${_VALUE}) 35 | endmacro() 36 | 37 | SET(CMAKE_INSTALL_RPATH "$ORIGIN") 38 | 39 | add_subdirectory(easy_profiler_core) 40 | if (NOT EASY_PROFILER_NO_GUI) 41 | add_subdirectory(profiler_gui) 42 | endif() 43 | add_subdirectory(easy_profiler_converter) 44 | 45 | if (NOT EASY_PROFILER_NO_SAMPLES) 46 | add_subdirectory(sample) 47 | add_subdirectory(reader) 48 | endif () 49 | -------------------------------------------------------------------------------- /extra/easy_profiler/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2018 Sergey Yagovtsev, Victor Zarubkin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /extra/easy_profiler/LICENSE.MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2018 Sergey Yagovtsev, Victor Zarubkin 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 15 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 16 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 | USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /extra/easy_profiler/appveyor.bat: -------------------------------------------------------------------------------- 1 | mkdir build_msvc 2 | cd build_msvc 3 | cmake -G "%GENERATOR%" ../ 4 | cmake --build . --config Release 5 | 6 | goto :EOF 7 | -------------------------------------------------------------------------------- /extra/easy_profiler/appveyor.yml: -------------------------------------------------------------------------------- 1 | platform: 2 | - Win64 3 | 4 | configuration: 5 | - Release 6 | 7 | environment: 8 | matrix: 9 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013 10 | Qt5Widgets_DIR: "C:\\Qt\\5.5\\msvc2013_64\\lib\\cmake\\Qt5Widgets" 11 | GENERATOR: "Visual Studio 12 2013 Win64" 12 | 13 | test: off 14 | 15 | build_script: 16 | - CALL appveyor.bat 17 | 18 | skip_commits: 19 | message: /.*\[skip appveyor\].*/ 20 | -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_converter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CPP_FILES 2 | converter.cpp 3 | reader.cpp) 4 | 5 | set(HEADER_FILES 6 | converter.h 7 | reader.h) 8 | 9 | include_directories(../easy_profiler_core/) 10 | include_directories(./include) 11 | 12 | add_executable(profiler_converter ${HEADER_FILES} ${CPP_FILES} main.cpp) 13 | target_link_libraries(profiler_converter easy_profiler) 14 | 15 | install( 16 | TARGETS 17 | profiler_converter 18 | RUNTIME 19 | DESTINATION 20 | bin 21 | ) 22 | 23 | set_property(TARGET profiler_converter PROPERTY INSTALL_RPATH_USE_LINK_PATH TRUE) 24 | -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_converter/main.cpp: -------------------------------------------------------------------------------- 1 | ///std 2 | #include 3 | #include 4 | #include "converter.h" 5 | 6 | using namespace profiler::reader; 7 | 8 | int main(int argc, char* argv[]) 9 | { 10 | std::string filename, output_json_filename; 11 | 12 | if (argc > 1 && argv[1]) 13 | { 14 | filename = argv[1]; 15 | } 16 | else 17 | { 18 | std::cout << "Usage: " << argv[0] << " INPUT_PROF_FILE [OUTPUT_JSON_FILE]\n" 19 | "where:\n" 20 | "INPUT_PROF_FILE // Required\n" 21 | "OUTPUT_JSON_FILE (if not specified output will be print in stdout) // Optional\n"; 22 | return 1; 23 | } 24 | 25 | if (argc > 2 && argv[2]) 26 | { 27 | output_json_filename = argv[2]; 28 | } 29 | 30 | JsonExporter js; 31 | js.convert(filename, output_json_filename); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_core/LICENSE.MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2018 Sergey Yagovtsev, Victor Zarubkin 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 15 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 16 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 | USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_core/cmake/config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") 4 | check_required_components("@PROJECT_NAME@") 5 | -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_core/event_trace_status.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #ifndef EASY_PROFILER__EVENT_TRACE_STATUS__H_ 5 | #define EASY_PROFILER__EVENT_TRACE_STATUS__H_ 6 | 7 | ////////////////////////////////////////////////////////////////////////// 8 | ////////////////////////////////////////////////////////////////////////// 9 | 10 | enum class EventTracingEnableStatus : unsigned char 11 | { 12 | LaunchedSuccessfully = 0, 13 | PermissionDenied, 14 | AlreadyLaunched, 15 | BadPropertiesSize, 16 | OpenTraceFailed, 17 | UnknownError, 18 | }; 19 | 20 | ////////////////////////////////////////////////////////////////////////// 21 | ////////////////////////////////////////////////////////////////////////// 22 | 23 | #endif // EASY_PROFILER__EVENT_TRACE_STATUS__H_ 24 | -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_core/include/easy/easy_protocol.h: -------------------------------------------------------------------------------- 1 | #ifndef EASY_PROPROTOCOL_H 2 | #define EASY_PROPROTOCOL_H 3 | ///C++ 4 | #include 5 | #include 6 | #include 7 | 8 | ///this 9 | #include 10 | //#include 11 | 12 | ///for actual version vistit https://github.com/yse/easy_profiler/wiki/.prof-file-format-v1.3.0 13 | 14 | namespace profiler { 15 | 16 | namespace reader { 17 | 18 | struct BlockDescriptor; 19 | 20 | struct BlocksInfo //12 21 | { 22 | uint32_t totalBlocksCount; //4 bytes 23 | uint64_t totalBlocksMemory; //8 bytes 24 | }; 25 | 26 | struct DescriptorsInfo //12 27 | { 28 | uint32_t allDescriptorsCount; //4 bytes 29 | uint64_t allDescriptorsMemory; //8 bytes 30 | }; 31 | 32 | struct FileHeader //64 33 | { 34 | uint32_t signature; //4 35 | uint32_t version; //4 36 | uint64_t processId; //8 37 | int64_t cpuFrequency; //8 38 | uint64_t beginTime; //8 39 | uint64_t endTime; //8 40 | BlocksInfo serializedBlocksInfo; //12 41 | DescriptorsInfo blocksDescriptorInfo; //12 42 | }; 43 | 44 | struct BlockInfo 45 | { 46 | uint64_t beginTime; 47 | uint64_t endTime; 48 | const BlockDescriptor* descriptor; 49 | uint32_t blockIndex; 50 | }; 51 | 52 | struct ContextSwitchEvent 53 | { 54 | uint64_t beginTime; 55 | uint64_t endTime; 56 | uint64_t targetThreadId; 57 | std::string targetProcess; ///< Contains process id and process name 58 | }; 59 | 60 | struct BlockDescriptor 61 | { 62 | uint32_t parentId; ///< This will differ from id if this descriptor was created from runtime named block 63 | uint32_t id; 64 | int lineNumber; 65 | uint32_t argbColor; 66 | uint8_t blockType; 67 | uint8_t status; 68 | std::string blockName; 69 | std::string fileName; 70 | }; 71 | 72 | } //namespace reader 73 | 74 | } //namespace profiler 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /extra/easy_profiler/easy_profiler_core/resources.rc: -------------------------------------------------------------------------------- 1 | 1 VERSIONINFO 2 | 3 | # define EASY_STRINGIFY(a) #a 4 | # define EASY_STRINGIFICATION(a) EASY_STRINGIFY(a) 5 | 6 | #define EASY_PROFILER_PRODUCT_VERSION "v" EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MAJOR) "." \ 7 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MINOR) "." \ 8 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_PATCH) 9 | 10 | FILEVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH 11 | PRODUCTVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH 12 | BEGIN 13 | BLOCK "StringFileInfo" 14 | BEGIN 15 | BLOCK "080904b0" 16 | BEGIN 17 | VALUE "CompanyName", "EasySolutions" 18 | VALUE "FileDescription", "Lightweight profiler library for C++" 19 | VALUE "LegalCopyright", "Copyright (C) 2016-2018 Victor Zarubkin, Sergey Yagovtsev" 20 | VALUE "LegalTrademarks1", "All Rights Reserved" 21 | VALUE "LegalTrademarks2", "All Rights Reserved" 22 | VALUE "ProductName", "easy_profiler lib" 23 | VALUE "ProductVersion", EASY_PROFILER_PRODUCT_VERSION 24 | END 25 | END 26 | BLOCK "VarFileInfo" 27 | BEGIN 28 | VALUE "Translation", 0x809, 1200 29 | END 30 | END 31 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-down-disabled.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-down-hover.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-down-pressed.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-down.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-left.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-right.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-up-disabled.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-up-hover.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-up-pressed.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/arrow-up.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/big-o.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 8 | 15 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/binoculars.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 8 | 10 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/check-disabled.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/check-partial-disabled.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/check-partial.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/check.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/close-hover.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/close-white-hover.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 9 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/close-white-pressed.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 9 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/close-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 9 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/close.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/collapse.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 9 | 11 | 13 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/crop.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 10 | 11 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/csv.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/delete-old.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 9 | 10 | 11 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/delete.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/expand.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 12 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/lan.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 11 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/lan_on.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 11 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/maximize-white-hover.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/maximize-white-pressed.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/maximize-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/minimize-white-hover.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/minimize-white-pressed.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/minimize-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/minimize.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/off.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 9 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/open-folder.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/open-folder2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 11 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/play.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/radio-indicator-disabled.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/radio-indicator.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/reload-folder2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 10 | 14 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/reload.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 9 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/search-next.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 14 | 18 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/search-prev.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 14 | 18 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/statistics.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 9 | 11 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/statistics2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 8 | 13 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/stop.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/to-fullscreen.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/to-window.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 13 | 14 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/wifi.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 9 | 12 | 15 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/wifi_on.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 9 | 12 | 15 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/window.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/default/yx.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 8 | 12 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THU-luvision/INS-Conv/c1cdd4187803f1d50e00610ae947ed337a7eb93f/extra/easy_profiler/profiler_gui/images/logo.ico -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/images/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 13 | 15 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /extra/easy_profiler/profiler_gui/resources.rc: -------------------------------------------------------------------------------- 1 | IDI_ICON1 ICON DISCARDABLE "images/logo.ico" 2 | 1 VERSIONINFO 3 | FILEVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH 4 | PRODUCTVERSION EASY_PROFILER_VERSION_MAJOR, EASY_PROFILER_VERSION_MINOR, EASY_PROFILER_VERSION_PATCH 5 | 6 | # define EASY_STRINGIFY(a) #a 7 | # define EASY_STRINGIFICATION(a) EASY_STRINGIFY(a) 8 | 9 | #define EASY_PROFILER_PRODUCT_VERSION "v" EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MAJOR) "." \ 10 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_MINOR) "." \ 11 | EASY_STRINGIFICATION(EASY_PROFILER_VERSION_PATCH) 12 | 13 | BEGIN 14 | BLOCK "StringFileInfo" 15 | BEGIN 16 | BLOCK "080904b0" 17 | BEGIN 18 | VALUE "CompanyName", "EasySolutions" 19 | VALUE "FileDescription", "EasyProfiler" 20 | VALUE "InternalName", "profiler_gui" 21 | VALUE "LegalCopyright", "Copyright (C) 2016-2018 Victor Zarubkin, Sergey Yagovtsev" 22 | VALUE "LegalTrademarks1", "All Rights Reserved" 23 | VALUE "LegalTrademarks2", "All Rights Reserved" 24 | VALUE "OriginalFilename", "profiler_gui.exe" 25 | VALUE "ProductName", "easy_profiler gui application" 26 | VALUE "ProductVersion", EASY_PROFILER_PRODUCT_VERSION 27 | END 28 | END 29 | BLOCK "VarFileInfo" 30 | BEGIN 31 | VALUE "Translation", 0x809, 1200 32 | END 33 | END 34 | -------------------------------------------------------------------------------- /extra/easy_profiler/reader/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_executable(profiler_reader main.cpp) 3 | target_link_libraries(profiler_reader easy_profiler) 4 | -------------------------------------------------------------------------------- /extra/easy_profiler/sample/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CPP_FILES 2 | main.cpp 3 | ) 4 | 5 | set(SOURCES 6 | ${CPP_FILES} 7 | ) 8 | 9 | link_directories(${CMAKE_SOURCE_DIR}/../bin) 10 | 11 | add_executable(profiler_sample ${SOURCES}) 12 | target_link_libraries(profiler_sample easy_profiler) 13 | 14 | add_executable(profiler_sample_disabled_profiler ${SOURCES}) 15 | target_link_libraries(profiler_sample_disabled_profiler easy_profiler) 16 | target_compile_definitions(profiler_sample_disabled_profiler PRIVATE DISABLE_EASY_PROFILER) 17 | -------------------------------------------------------------------------------- /extra/easy_profiler/sample/express_sample.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | int OBJECTS = 500; 13 | 14 | void modellingThread(){ 15 | EASY_THREAD("Modelling"); 16 | 17 | static const int N = OBJECTS; 18 | 19 | volatile double *pos[N]; 20 | for (int i = 0; i < N; ++i) 21 | { 22 | pos[i] = new volatile double[3]; 23 | } 24 | 25 | { 26 | EASY_BLOCK("Collisions"); 27 | volatile int i, j; 28 | volatile double dist; 29 | for (i = 0; i < N; ++i) 30 | { 31 | for (j = i + 1; j < N; ++j) 32 | { 33 | EASY_BLOCK("Check"); 34 | volatile double v[3]; 35 | v[0] = pos[i][0] - pos[j][0]; 36 | v[1] = pos[i][1] - pos[j][1]; 37 | v[2] = pos[i][2] - pos[j][2]; 38 | dist = v[0] * v[0] + v[1] * v[1] + v[2] * v[2]; 39 | if (dist < 10000) 40 | { 41 | dist *= dist; 42 | } 43 | } 44 | } 45 | } 46 | 47 | for (int i = 0; i < N; ++i) 48 | { 49 | delete [] pos[i]; 50 | } 51 | } 52 | 53 | ////////////////////////////////////////////////////////////////////////// 54 | 55 | int main(int argc, char* argv[]) 56 | { 57 | if (argc > 1 && argv[1]){ 58 | OBJECTS = std::atoi(argv[1]); 59 | } 60 | 61 | std::cout << "Objects count: " << OBJECTS << std::endl; 62 | 63 | auto start = std::chrono::system_clock::now(); 64 | 65 | 66 | EASY_PROFILER_ENABLE; 67 | EASY_MAIN_THREAD; 68 | 69 | 70 | modellingThread(); 71 | 72 | auto end = std::chrono::system_clock::now(); 73 | auto elapsed = 74 | std::chrono::duration_cast(end - start); 75 | 76 | std::cout << "Elapsed time: " << elapsed.count() << " usec" << std::endl; 77 | 78 | auto blocks_count = profiler::dumpBlocksToFile("test.prof"); 79 | 80 | std::cout << "Blocks count: " << blocks_count << std::endl; 81 | 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /extra/easy_profiler/scripts/context_switch_logger.stp: -------------------------------------------------------------------------------- 1 | global target_pid 2 | global target_name 3 | 4 | probe scheduler.ctxswitch { 5 | 6 | if (target_pid != 0 7 | && next_pid != target_pid 8 | && prev_pid != target_pid) 9 | next 10 | 11 | if (target_name != "" 12 | && prev_task_name != target_name 13 | && next_task_name != target_name) 14 | next 15 | 16 | //printf("Switch from %d(%s) to %d(%s) at %d\n",prev_tid, prev_task_name,next_tid,next_task_name, gettimeofday_ns()) 17 | printf("%d %d %d %s %d\n", get_cycles(), prev_tid, next_tid, next_task_name, next_pid) 18 | //printf("%d %d %d\n",gettimeofday_ns(),prev_tid, next_tid ) 19 | } 20 | 21 | probe begin 22 | { 23 | target_pid = 0 24 | target_name = "" 25 | 26 | %( $# == 1 || $# > 2 %? 27 | log("Wrong number of arguments, use none, 'pid nr' or 'name proc'") 28 | exit() 29 | %) 30 | 31 | %( $# == 2 %? 32 | if(@1 == "pid") 33 | target_pid = strtol(@2, 10) 34 | if(@1 == "name") 35 | target_name = @2 36 | %) 37 | } 38 | -------------------------------------------------------------------------------- /extra/easy_profiler/scripts/make_style.sh: -------------------------------------------------------------------------------- 1 | if [ "$#" -ne 1 ]; then 2 | echo -e "Usage: \n$0 DIRECTORY\n\twhere DIRECTORY is a directory with sources for styling" 3 | exit 1 4 | fi 5 | 6 | if ! [ -x "$(command -v clang-format)" ]; then 7 | echo 'Error: clang-format is not installed. Please install clang-format with minimal version 3.8' >&2 8 | exit 1 9 | fi 10 | 11 | DIR=$1 12 | 13 | FILES=`find $DIR -name "*.h" -or -name "*.cpp"` 14 | 15 | for FILE in $FILES 16 | do 17 | echo "Set style for $FILE" 18 | clang-format -i $FILE 19 | done 20 | -------------------------------------------------------------------------------- /extra/easy_profiler/scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | unamestr=`uname` 4 | SUBDIR="./bin" 5 | if [[ ! "$unamestr" == 'Linux' ]]; then 6 | SUBDIR="./bin/Release/" 7 | fi 8 | 9 | DISABLED_PROF=$SUBDIR/profiler_sample_disabled_profiler 10 | ENABLED_PROF=$SUBDIR/profiler_sample 11 | 12 | TEMP_FILE_ENABLE="enable.info" 13 | TEMP_FILE_DISABLE="disable.info" 14 | RESULT_FILE="result.csv" 15 | RESULT_FILE_TMP="result.csv.tmp" 16 | 17 | HEADER="Blocks count, dT prof enabled usec, dT prof disabled usec,delta, usec/block" 18 | 19 | #echo "Blocks count, dT prof enabled usec, dT prof disabled usec,delta, usec/block" > $RESULT_FILE 20 | 21 | rm -rf $RESULT_FILE 22 | 23 | for i in {1..9} 24 | do 25 | OBJECTS_COUNT=$(($i*100)) 26 | for j in {10..15} 27 | do 28 | RENDER_COUNT=$(($j*100)) 29 | for k in {10..15} 30 | do 31 | MODELLING_COUNT=$(($k*100)) 32 | $ENABLED_PROF $OBJECTS_COUNT $RENDER_COUNT $MODELLING_COUNT > $TEMP_FILE_ENABLE 33 | $DISABLED_PROF $OBJECTS_COUNT $RENDER_COUNT $MODELLING_COUNT > $TEMP_FILE_DISABLE 34 | DT_ENA=`cat $TEMP_FILE_ENABLE | grep Elapsed| awk '{print $3}'` 35 | N_ENA=`cat $TEMP_FILE_ENABLE | grep Blocks| awk '{print $3}'` 36 | N_DIS=`cat $TEMP_FILE_DISABLE | grep Elapsed| awk '{print $3}'` 37 | 38 | DELTA=$(($DT_ENA-$N_DIS)) 39 | USEC_BLOCK=`awk "BEGIN{print $DELTA/$N_ENA}"` 40 | 41 | echo $N_ENA,$DT_ENA,$N_DIS,$DELTA,$USEC_BLOCK >> $RESULT_FILE 42 | done 43 | done 44 | echo $i 45 | 46 | done 47 | 48 | cat $RESULT_FILE | sort > $RESULT_FILE_TMP 49 | 50 | echo $HEADER > $RESULT_FILE 51 | cat $RESULT_FILE_TMP >> $RESULT_FILE 52 | 53 | rm -rf $TEMP_FILE_ENABLE 54 | rm -rf $TEMP_FILE_DISABLE 55 | rm -rf $RESULT_FILE_TMP 56 | 57 | echo "See result in $RESULT_FILE" 58 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import sparseconvnet as scn 2 | import torch 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import sys, os, time 6 | 7 | 8 | class Naive_UNet(nn.Module): 9 | def __init__(self, config): 10 | nn.Module.__init__(self) 11 | m = 32 # 16 or 32 12 | residual_blocks = True # True or False 13 | block_reps = 2 # Conv block repetition factor: 1 or 2 14 | 15 | self.sparseModel = scn.Sequential().add( 16 | scn.InputLayer(config['dimension'], config['full_scale'], mode=4)).add( 17 | scn.SubmanifoldConvolution(config['dimension'], 3, m, 3, False)).add( 18 | scn.UNet(config['dimension'], block_reps, [m, 2*m, 3*m, 4*m, 5*m, 6*m, 7*m], residual_blocks)).add( 19 | scn.BatchNormReLU(m)).add( 20 | scn.OutputLayer(config['dimension'])) 21 | self.linear = nn.Linear(m, 20) 22 | 23 | def forward(self, x, increment=False): 24 | x = self.sparseModel(x, increment) 25 | x = self.linear(x) 26 | return x 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch, os 8 | from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension 9 | from setuptools import setup, find_packages 10 | 11 | this_dir = os.path.dirname(os.path.realpath(__file__)) 12 | torch_dir = os.path.dirname(torch.__file__) 13 | conda_include_dir = '/'.join(torch_dir.split('/')[:-4]) + '/include' 14 | 15 | # extra = {'cxx': ['-std=c++11', '-fopenmp'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp']} 16 | extra = {'cxx': ['-std=c++11','-g', '-fopenmp', '-DBUILD_WITH_EASY_PROFILER'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp', '-DBUILD_WITH_EASY_PROFILER']} 17 | #extra = {'cxx': ['-std=c++11', '-fopenmp'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp']} 18 | print(this_dir) 19 | setup( 20 | name='sparseconvnet', 21 | version='0.2', 22 | description='Submanifold (Spatially) Sparse Convolutional Networks https://arxiv.org/abs/1706.01307', 23 | author='Facebook AI Research', 24 | author_email='benjamingraham@fb.com', 25 | url='https://github.com/facebookresearch/SparseConvNet', 26 | packages=['sparseconvnet','sparseconvnet.SCN'], 27 | ext_modules=[ 28 | CUDAExtension('sparseconvnet.SCN', 29 | [ 30 | 'sparseconvnet/SCN/cuda.cu', 'sparseconvnet/SCN/sparseconvnet_cuda.cpp', 'sparseconvnet/SCN/pybind.cpp'], 31 | include_dirs=[conda_include_dir,this_dir+'/sparseconvnet/SCN/',this_dir+'/extra/easy_profiler/easy_profiler_core/include',this_dir + '/extra/cudpp/include'], 32 | library_dirs = ['/usr/local/lib', this_dir + '/extra/cudpp/build/lib',this_dir+'/extra/easy_profiler/build/bin'], 33 | libraries = ['easy_profiler', 'cudpp', 'cudpp_hash'], 34 | extra_compile_args=extra) 35 | if torch.cuda.is_available() else 36 | CppExtension('sparseconvnet.SCN', 37 | ['sparseconvnet/SCN/pybind.cpp', 'sparseconvnet/SCN/sparseconvnet_cpu.cpp'], 38 | include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'], 39 | library_dirs = ['/usr/local/lib', this_dir + '/extra/cudpp/build/lib',this_dir+'/extra/easy_profiler/build/bin',this_dir+'/extra/easy_profiler/easy_profiler_core/include',this_dir + '/extra/cudpp/include'], 40 | libraries = ['easy_profiler', 'cudpp', 'cudpp_hash'], 41 | extra_compile_args=extra['cxx'])], 42 | cmdclass={'build_ext': BuildExtension}, 43 | zip_safe=False, 44 | ) 45 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | void cpu_BatchwiseMultiplicativeDropout_updateOutput( 9 | /*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features, 10 | /*float*/ at::Tensor noise, T alpha) { 11 | output_features.resize_as_(input_features); 12 | auto nActive = input_features.size(0); 13 | auto nPlanes = input_features.size(1); 14 | auto iF = input_features.data(); 15 | auto oF = output_features.data(); 16 | auto nz = noise.data(); 17 | for (Int row = 0; row < nActive; row++) 18 | for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes; 19 | plane++, o++, i++) 20 | oF[o] = (iF[i] > 0) ? iF[i] * nz[plane] : iF[i] * nz[plane] * alpha; 21 | } 22 | template 23 | void cpu_BatchwiseMultiplicativeDropout_updateGradInput( 24 | /*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features, 25 | /*float*/ at::Tensor d_output_features, /*float*/ at::Tensor noise, 26 | T alpha) { 27 | d_input_features.resize_as_(d_output_features); 28 | auto nActive = input_features.size(0); 29 | auto nPlanes = input_features.size(1); 30 | auto iF = input_features.data(); 31 | auto diF = d_input_features.data(); 32 | auto doF = d_output_features.data(); 33 | auto nz = noise.data(); 34 | for (Int row = 0; row < nActive; row++) 35 | for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes; 36 | plane++, o++, i++) 37 | diF[i] = (iF[i] > 0) ? doF[o] * nz[plane] : doF[o] * nz[plane] * alpha; 38 | } 39 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CPU/LeakyReLU.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features, 9 | /*float*/ at::Tensor output_features, T alpha) { 10 | output_features.resize_as_(input_features); 11 | auto iF = input_features.data(); 12 | auto oF = output_features.data(); 13 | auto n = input_features.numel(); 14 | 15 | for (Int i = 0; i < n; i++) { 16 | const T x = iF[i]; 17 | const T r = (x > 0) ? 1 : alpha; 18 | oF[i] = x * r; 19 | } 20 | } 21 | template 22 | void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor input_features, 23 | /*float*/ at::Tensor d_input_features, 24 | /*float*/ at::Tensor d_output_features, 25 | T alpha) { 26 | d_input_features.resize_as_(d_output_features); 27 | auto iF = input_features.data(); 28 | auto diF = d_input_features.data(); 29 | auto doF = d_output_features.data(); 30 | auto n = d_input_features.numel(); 31 | 32 | for (Int i = 0; i < n; i++) { 33 | const T r = (iF[i] > 0) ? 1 : alpha; 34 | diF[i] = doF[i] * r; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CPU/NetworkInNetwork.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features, 9 | /*float*/ at::Tensor output_features, 10 | /*float*/ at::Tensor weight, 11 | /*float*/ at::Tensor bias) { 12 | auto nActive = input_features.size(0); 13 | auto input_nPlanes = weight.size(0); 14 | auto output_nPlanes = weight.size(1); 15 | output_features.resize_({nActive, output_nPlanes}); 16 | if (bias.numel()) 17 | output_features.copy_(bias); 18 | else 19 | output_features.zero_(); 20 | if (nActive) 21 | output_features.addmm_(input_features, weight); 22 | return nActive * input_nPlanes * output_nPlanes; 23 | } 24 | template 25 | void cpu_NetworkInNetwork_updateGradInput( 26 | /*float*/ at::Tensor d_input_features, 27 | /*float*/ at::Tensor d_output_features, 28 | /*float*/ at::Tensor weight) { 29 | 30 | int nActive = d_output_features.size(0); 31 | d_input_features.resize_({nActive, weight.size(0)}); 32 | d_input_features.zero_(); 33 | if (nActive) 34 | at::mm_out(d_input_features, d_output_features, weight.t()); 35 | } 36 | template 37 | void cpu_NetworkInNetwork_accGradParameters( 38 | /*float*/ at::Tensor input_features, 39 | /*float*/ at::Tensor d_output_features, 40 | /*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) { 41 | auto nActive = input_features.size(0); 42 | if (nActive and d_bias.numel()) 43 | at::sum_out(d_bias, d_output_features, {0}, false); 44 | if (nActive) 45 | at::mm_out(d_weight, input_features.t(), d_output_features); 46 | } 47 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CUDA/ActivePooling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | void ActivePooling_ForwardPass(T *input_features, T *output_features, 9 | Int batchSize, Int maxActive, Int nPlanes, 10 | Int *rules, bool average); 11 | 12 | template 13 | void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, 14 | Int batchSize, Int maxActive, Int nPlanes, 15 | Int *rules, bool average); 16 | 17 | template 18 | void cuda_ActivePooling_updateOutput( 19 | /*long*/ at::Tensor inputSize, Metadata &m, 20 | /*cuda float*/ at::Tensor input_features, 21 | /*cuda float*/ at::Tensor output_features, bool average) { 22 | 23 | Int nPlanes = input_features.size(1); 24 | auto _rules = m.getActivePoolingRuleBook(inputSize); 25 | Int batchSize = _rules[1][0]; 26 | Int maxActive = _rules[1][1]; 27 | output_features.resize_({batchSize, nPlanes}); 28 | output_features.zero_(); 29 | 30 | auto iF = input_features.data(); 31 | auto oF = output_features.data(); 32 | ActivePooling_ForwardPass(iF, oF, batchSize, maxActive, nPlanes, 33 | &_rules[0][0], average); 34 | } 35 | template 36 | void cuda_ActivePooling_updateGradInput( 37 | /*long*/ at::Tensor inputSize, Metadata &m, 38 | /*cuda float*/ at::Tensor input_features, 39 | /*cuda float*/ at::Tensor d_input_features, 40 | /*cuda float*/ at::Tensor d_output_features, bool average) { 41 | 42 | Int nPlanes = input_features.size(1); 43 | auto _rules = m.getActivePoolingRuleBook(inputSize); 44 | Int batchSize = _rules[1][0]; 45 | Int maxActive = _rules[1][1]; 46 | d_input_features.resize_as_(input_features); 47 | d_input_features.zero_(); 48 | 49 | auto diF = d_input_features.data(); 50 | auto doF = d_output_features.data(); 51 | 52 | ActivePooling_BackwardPass(diF, doF, batchSize, maxActive, nPlanes, 53 | &_rules[0][0], average); 54 | } 55 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | void bmd_f(T *input_features, T *output_features, T *noise, Int nActive, 9 | Int nPlanes, T alpha); 10 | template 11 | void bmd_b(T *input_features, T *d_input_features, T *d_output_features, 12 | T *noise, Int nActive, Int nPlanes, T alpha); 13 | 14 | template 15 | void cuda_BatchwiseMultiplicativeDropout_updateOutput( 16 | /*cuda float*/ at::Tensor input_features, 17 | /*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor noise, 18 | T alpha) { 19 | output_features.resize_as_(input_features); 20 | auto nActive = input_features.size(0); 21 | auto nPlanes = input_features.size(1); 22 | bmd_f(input_features.data(), output_features.data(), noise.data(), 23 | nActive, nPlanes, alpha); 24 | } 25 | 26 | template 27 | void cuda_BatchwiseMultiplicativeDropout_updateGradInput( 28 | /*cuda float*/ at::Tensor input_features, 29 | /*cuda float*/ at::Tensor d_input_features, 30 | /*cuda float*/ at::Tensor d_output_features, 31 | /*cuda float*/ at::Tensor noise, T alpha) { 32 | d_input_features.resize_as_(d_output_features); 33 | auto nActive = input_features.size(0); 34 | auto nPlanes = input_features.size(1); 35 | bmd_b(input_features.data(), d_input_features.data(), 36 | d_output_features.data(), noise.data(), nActive, nPlanes, alpha); 37 | } 38 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CUDA/LeakyReLU.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | void LeakyReLU_fp(T *input_features, T *output_features, Int n, T alpha); 9 | template 10 | void LeakyReLU_bp(T *input_features, T *d_input_features, T *output_features, 11 | Int n, T alpha); 12 | 13 | template 14 | void cuda_LeakyReLU_updateOutput(/*cuda float*/ at::Tensor input_features, 15 | /*cuda float*/ at::Tensor output_features, 16 | T alpha) { 17 | output_features.resize_as_(input_features); 18 | auto n = input_features.numel(); 19 | LeakyReLU_fp(input_features.data(), output_features.data(), n, 20 | alpha); 21 | } 22 | 23 | template 24 | void cuda_LeakyReLU_updateGradInput( 25 | /*cuda float*/ at::Tensor input_features, 26 | /*cuda float*/ at::Tensor d_input_features, 27 | /*cuda float*/ at::Tensor d_output_features, T alpha) { 28 | d_input_features.resize_as_(d_output_features); 29 | auto n = d_input_features.numel(); 30 | LeakyReLU_bp(input_features.data(), d_input_features.data(), 31 | d_output_features.data(), n, alpha); 32 | } 33 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CUDA/LeakyReLU.cu: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | __global__ void LeakyReLU_fp_(T *input_features, T *output_features, Int n, 9 | T alpha) { 10 | for (Int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += 16 * 1024) 11 | output_features[i] = (input_features[i] > 0) ? input_features[i] 12 | : (input_features[i] * alpha); 13 | } 14 | template 15 | void LeakyReLU_fp(T *input_features, T *output_features, Int n, T alpha) { 16 | LeakyReLU_fp_<<<16, 1024>>>(input_features, output_features, n, alpha); 17 | } 18 | template 19 | __global__ void LeakyReLU_bp_(T *input_features, T *d_input_features, 20 | T *d_output_features, Int n, T alpha) { 21 | for (Int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += 16 * 1024) 22 | d_input_features[i] = (input_features[i] > 0) 23 | ? d_output_features[i] 24 | : (d_output_features[i] * alpha); 25 | } 26 | template 27 | void LeakyReLU_bp(T *input_features, T *d_input_features, T *output_features, 28 | Int n, T alpha) { 29 | LeakyReLU_bp_<<<16, 1024>>>(input_features, d_input_features, 30 | output_features, n, alpha); 31 | } 32 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CUDA/NetworkInNetwork.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include 8 | 9 | template 10 | double cuda_NetworkInNetwork_updateOutput( 11 | /*cuda float*/ at::Tensor input_features, 12 | /*cuda float*/ at::Tensor output_features, 13 | /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor bias) { 14 | auto nActive = input_features.size(0); 15 | auto input_nPlanes = weight.size(0); 16 | auto output_nPlanes = weight.size(1); 17 | output_features.resize_({nActive, output_nPlanes}); 18 | if (bias.numel()) 19 | output_features.copy_(bias); 20 | else 21 | output_features.zero_(); 22 | if (nActive) 23 | output_features.addmm_(input_features, weight); 24 | return nActive * input_nPlanes * output_nPlanes; 25 | } 26 | 27 | template 28 | void cuda_NetworkInNetwork_updateGradInput( 29 | /*cuda float*/ at::Tensor d_input_features, 30 | /*cuda float*/ at::Tensor d_output_features, 31 | /*cuda float*/ at::Tensor weight) { 32 | 33 | int nActive = d_output_features.size(0); 34 | d_input_features.resize_({nActive, weight.size(0)}); 35 | d_input_features.zero_(); 36 | if (nActive) 37 | at::mm_out(d_input_features, d_output_features, weight.t()); 38 | } 39 | 40 | template 41 | void cuda_NetworkInNetwork_accGradParameters( 42 | /*cuda float*/ at::Tensor input_features, 43 | /*cuda float*/ at::Tensor d_output_features, 44 | /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor d_bias) { 45 | auto nActive = input_features.size(0); 46 | if (nActive and d_bias.numel()) 47 | at::sum_out(d_bias, d_output_features, {0}, false); 48 | if (nActive) 49 | at::mm_out(d_weight, input_features.t(), d_output_features); 50 | } 51 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/CUDA/SparseToDense.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | template 8 | void cuda_SparseToDense_ForwardPass(T *input_features, T *output_features, 9 | Int nPlanes, Int spatialVolume, 10 | RuleBook _rules); 11 | template 12 | void cuda_SparseToDense_BackwardPass(T *d_input_features, T *d_output_features, 13 | Int nPlanes, Int spatialVolume, 14 | RuleBook _rules); 15 | 16 | template 17 | void cuda_SparseToDense_updateOutput( 18 | /*long*/ at::Tensor inputSize, Metadata &m, 19 | /*cuda float*/ at::Tensor input_features, 20 | /*cuda float*/ at::Tensor output_features, long nPlanes) { 21 | 22 | { 23 | std::array sz; 24 | sz[0] = m.grids.begin()->second.size(); // batch size 25 | sz[1] = nPlanes; 26 | long *in_sz = inputSize.data(); 27 | for (Int i = 0; i < Dimension; ++i) 28 | sz[i + 2] = in_sz[i]; 29 | output_features.resize_(sz); 30 | output_features.zero_(); 31 | } 32 | if (input_features.ndimension() == 2) { 33 | auto _rules = m.getSparseToDenseRuleBook(inputSize, true); 34 | Int _nPlanes = input_features.size(1); 35 | auto iF = input_features.data(); 36 | auto oF = output_features.data(); 37 | long spatialVolume = inputSize.prod().data()[0]; 38 | cuda_SparseToDense_ForwardPass(iF, oF, _nPlanes, spatialVolume, _rules); 39 | } 40 | } 41 | template 42 | void cuda_SparseToDense_updateGradInput( 43 | /*long*/ at::Tensor inputSize, Metadata &m, 44 | /*cuda float*/ at::Tensor input_features, 45 | /*cuda float*/ at::Tensor d_input_features, 46 | /*cuda float*/ at::Tensor d_output_features) { 47 | 48 | d_input_features.resize_as_(input_features); 49 | d_input_features.zero_(); 50 | 51 | if (input_features.ndimension() == 2) { 52 | auto _rules = m.getSparseToDenseRuleBook(inputSize, true); 53 | long spatialVolume = inputSize.prod().data()[0]; 54 | Int _nPlanes = d_input_features.size(1); 55 | auto diF = d_input_features.data(); 56 | auto doF = d_output_features.data(); 57 | cuda_SparseToDense_BackwardPass(diF, doF, _nPlanes, spatialVolume, 58 | _rules); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/Metadata/64bits.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include 8 | 9 | // Using 64 bit integers for coordinates and memory calculations. 10 | 11 | using Int = int64_t; 12 | 13 | // Point is a point in the d-dimensional integer lattice 14 | // (i.e. square-grid/cubic-grid, ...) 15 | template using Point = std::array; 16 | 17 | template 18 | Point LongTensorToPoint(/*long*/ at::Tensor &t) { 19 | Point p; 20 | long *td = t.data(); 21 | for (Int i = 0; i < dimension; i++) 22 | p[i] = td[i]; 23 | return p; 24 | } 25 | template 26 | Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0, 27 | /*long*/ at::Tensor &t1) { 28 | Point<2 * dimension> p; 29 | long *td; 30 | td = t0.data(); 31 | for (Int i = 0; i < dimension; i++) 32 | p[i] = td[i]; 33 | td = t1.data(); 34 | for (Int i = 0; i < dimension; i++) 35 | p[i + dimension] = td[i]; 36 | return p; 37 | } 38 | template 39 | Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0, 40 | /*long*/ at::Tensor &t1, 41 | /*long*/ at::Tensor &t2) { 42 | Point<3 * dimension> p; 43 | long *td; 44 | td = t0.data(); 45 | for (Int i = 0; i < dimension; i++) 46 | p[i] = td[i]; 47 | td = t1.data(); 48 | for (Int i = 0; i < dimension; i++) 49 | p[i + dimension] = td[i]; 50 | td = t2.data(); 51 | for (Int i = 0; i < dimension; i++) 52 | p[i + 2 * dimension] = td[i]; 53 | return p; 54 | } 55 | 56 | // FNV Hash function for Point 57 | template struct IntArrayHash { 58 | std::size_t operator()(Point const &p) const { 59 | Int hash = -3750763034362895579; // 14695981039346656037; 60 | for (auto x : p) { 61 | hash *= 1099511628211; 62 | hash ^= x; 63 | } 64 | return hash; 65 | } 66 | }; 67 | 68 | #define at_kINT at::kLong 69 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/Metadata/ActivePoolingRules.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #ifndef ACTIVEPOOLING_H 8 | #define ACTIVEPOOLING_H 9 | 10 | // Return the maximum number of active sites in the batch 11 | // rules has size 1. 12 | // rules[0] is a batchSize x (maxActive + 1) matrix. 13 | // First column is number of active sites for that sample (<= maxActive) 14 | // Remaining maxActive columns give the active sites, zero padded. 15 | 16 | template 17 | void activePoolingRules(SparseGrids &SGs, RuleBook &rules) { 18 | rules.clear(); 19 | rules.resize(2); 20 | auto &r = rules[0]; 21 | Int maxActive = 0; 22 | for (auto &sg : SGs) 23 | maxActive = std::max(maxActive, (Int)sg.mp.size()); 24 | for (auto &sg : SGs) { 25 | r.push_back(sg.mp.size()); 26 | for (auto &iter : sg.mp) 27 | r.push_back(sg.ctr + iter.second); 28 | while (rules.size() % (maxActive + 1) != 0) 29 | r.push_back(0); // padding 30 | } 31 | rules[1].push_back(SGs.size()); 32 | rules[1].push_back(maxActive); 33 | } 34 | #endif /* ACTIVEPOOLING_H */ 35 | -------------------------------------------------------------------------------- /sparseconvnet/SCN/misc/drawCurve.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2016-present, Facebook, Inc. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | // Helper function to draw pen strokes with 8 | // nPlanes = 3, feature vector = (1,dx,dy) 9 | void cpu_float_DrawCurve_2(Metadata<2> &m, 10 | /*float*/ at::Tensor features, 11 | /*float*/ at::Tensor stroke) { 12 | at::Tensor location = at::zeros(at::CPU(at::kLong), {2}); 13 | auto location_ = location.data(); 14 | 15 | auto vec = at::zeros(at::CPU(at::kFloat), {3}); 16 | auto vec_ = vec.data(); 17 | 18 | int n = stroke.size(0) - 1; 19 | float *s = stroke.data(); // stroke is a [n+1,2] array 20 | long idx = 0; 21 | float x1, y1, x2, y2; // n line segments (x1,y1) to (x2,y2) 22 | x2 = s[idx++]; 23 | y2 = s[idx++]; 24 | for (int i = 0; i < n; ++i) { 25 | x1 = x2; 26 | y1 = y2; 27 | x2 = s[idx++]; 28 | y2 = s[idx++]; 29 | float inverse_length = 30 | powf(1e-10 + (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1), -0.5); 31 | vec_[0] = 1; 32 | vec_[1] = (x2 - x1) * inverse_length; 33 | vec_[2] = (y2 - y1) * inverse_length; 34 | for (float a = 0; a < 1; a += inverse_length) { 35 | location_[0] = x1 * a + x2 * (1 - a); 36 | location_[1] = y1 * a + y2 * (1 - a); 37 | m.setInputSpatialLocation(features, location, vec, false); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /sparseconvnet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | forward_pass_multiplyAdd_count = 0 8 | forward_pass_hidden_states = 0 9 | from .activations import Tanh, Sigmoid, ReLU, LeakyReLU, ELU, SELU, BatchNormELU 10 | from .averagePooling import AveragePooling 11 | from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU, MeanOnlyBNLeakyReLU 12 | from .classificationTrainValidate import ClassificationTrainValidate 13 | from .convolution import Convolution 14 | from .deconvolution import Deconvolution 15 | from .denseToSparse import DenseToSparse 16 | from .dropout import Dropout, BatchwiseDropout 17 | from .fullConvolution import FullConvolution, TransposeConvolution 18 | from .identity import Identity 19 | from .inputBatch import InputBatch 20 | from .ioLayers import InputLayer, OutputLayer, BLInputLayer, BLOutputLayer, InputLayerInput 21 | from .maxPooling import MaxPooling 22 | from .metadata import Metadata 23 | from .networkArchitectures import * 24 | from .networkInNetwork import NetworkInNetwork 25 | from .permutohedralSubmanifoldConvolution import PermutohedralSubmanifoldConvolution, permutohedral_basis 26 | from .randomizedStrideConvolution import RandomizedStrideConvolution 27 | from .randomizedStrideMaxPooling import RandomizedStrideMaxPooling 28 | from .sequential import Sequential 29 | from .sparseConvNetTensor import SparseConvNetTensor 30 | from .sparseToDense import SparseToDense 31 | from .sparsify import Sparsify 32 | from .spectral_norm import spectral_norm 33 | from .submanifoldConvolution import SubmanifoldConvolution, ValidConvolution 34 | from .tables import * 35 | from .unPooling import UnPooling 36 | from .utils import append_tensors, AddCoords, add_feature_planes, concatenate_feature_planes, compare_sparse 37 | from .shapeContext import ShapeContext, MultiscaleShapeContext 38 | -------------------------------------------------------------------------------- /sparseconvnet/dropout.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from torch.autograd import Function 8 | from torch.nn import Module 9 | from .utils import * 10 | from .sparseConvNetTensor import SparseConvNetTensor 11 | 12 | 13 | class Dropout(Module): 14 | def __init__(self, p=0.5): 15 | Module.__init__(self) 16 | self.p = p 17 | 18 | def forward(self, input): 19 | output = SparseConvNetTensor() 20 | i = input.features 21 | if self.training: 22 | m = i.new().resize_(1).expand_as(i).fill_(1 - self.p) 23 | output.features = i * torch.bernoulli(m) 24 | else: 25 | output.features = i * (1 - self.p) 26 | output.metadata = input.metadata 27 | output.spatial_size = input.spatial_size 28 | return output 29 | 30 | def input_spatial_size(self, out_size): 31 | return out_size 32 | 33 | 34 | class BatchwiseDropout(Module): 35 | def __init__(self, p=0.5): 36 | Module.__init__(self) 37 | self.p = p 38 | 39 | def forward(self, input): 40 | output = SparseConvNetTensor() 41 | i = input.features 42 | if self.training: 43 | m = i.new().resize_(1).expand(1, i.shape[1]).fill_(1 - self.p) 44 | output.features = i * torch.bernoulli(m) 45 | else: 46 | output.features = i * (1 - self.p) 47 | output.metadata = input.metadata 48 | output.spatial_size = input.spatial_size 49 | return output 50 | 51 | def input_spatial_size(self, out_size): 52 | return out_size 53 | -------------------------------------------------------------------------------- /sparseconvnet/identity.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from torch.nn import Module 8 | 9 | 10 | class Identity(Module): 11 | def forward(self, input, increment=False): 12 | return input 13 | 14 | def input_spatial_size(self, out_size): 15 | return out_size 16 | -------------------------------------------------------------------------------- /sparseconvnet/metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | Store Metadata relating to which spatial locations are active at each scale. 9 | Convolutions, submanifold convolutions and 'convolution reversing' deconvolutions 10 | all coexist within the same MetaData object as long as each spatial size 11 | only occurs once. 12 | """ 13 | 14 | import sparseconvnet.SCN 15 | 16 | def Metadata(dim): 17 | return getattr(sparseconvnet.SCN, 'Metadata_%d'%dim)() 18 | -------------------------------------------------------------------------------- /sparseconvnet/sequential.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | class Sequential(torch.nn.Sequential): 10 | def input_spatial_size(self, out_size): 11 | for m in reversed(self._modules): 12 | out_size = self._modules[m].input_spatial_size(out_size) 13 | return out_size 14 | 15 | def add(self, module): 16 | self._modules[str(len(self._modules))] = module 17 | return self 18 | 19 | 20 | def forward(self, input, increment=False): 21 | for module in self: 22 | #if increment: 23 | # print("Sequence", module) 24 | input = module(input, increment) 25 | return input 26 | 27 | 28 | def reweight(self, input): 29 | for module in self._modules.values(): 30 | if isinstance(module, Sequential): 31 | input = module.reweight(input) 32 | elif hasattr(input, 'features') and hasattr(module, 'weight') and hasattr(module, 'bias'): 33 | f = module(input).features 34 | f = f - module.bias 35 | s = f.std(0) 36 | f = f / s 37 | module.weight = torch.nn.Parameter(module.weight/s) 38 | module.bias = torch.nn.Parameter(-f.mean(0)) 39 | input = module(input) 40 | else: 41 | input = module(input) 42 | return input 43 | 44 | def rebias(self, input): 45 | for module in self._modules.values(): 46 | if isinstance(module, Sequential): 47 | input = module.reweight(input) 48 | elif hasattr(input, 'features') and hasattr(module, 'bias'): 49 | f = module(input).features 50 | f = f - module.bias 51 | module.bias = torch.nn.Parameter(-f.mean(0)) 52 | input = module(input) 53 | else: 54 | input = module(input) 55 | return input 56 | -------------------------------------------------------------------------------- /sparseconvnet/sparsify.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import sparseconvnet 8 | from torch.autograd import Function, Variable 9 | from torch.nn import Module, Parameter 10 | from .utils import * 11 | from .sparseConvNetTensor import SparseConvNetTensor 12 | from .metadata import Metadata 13 | 14 | class Sparsify(Module): 15 | def __init__(self, dimension): 16 | Module.__init__(self) 17 | self.dimension = dimension 18 | def forward(self, input, increment=False): 19 | if input.features.numel(): 20 | output = SparseConvNetTensor() 21 | output.metadata = Metadata(self.dimension) 22 | output.spatial_size = input.spatial_size 23 | active = input.features[:,0]>0 24 | output.features=input.features[active] 25 | active=active.type('torch.LongTensor') 26 | input.metadata.sparsifyMetadata( 27 | output.metadata, 28 | input.spatial_size, 29 | active.byte(), 30 | active.cumsum(0)) 31 | return output 32 | else: 33 | return input 34 | -------------------------------------------------------------------------------- /sparseconvnet/tables.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from torch.autograd import Function 8 | from torch.nn import Module 9 | from .utils import * 10 | from .sparseConvNetTensor import SparseConvNetTensor 11 | 12 | 13 | class JoinTable(torch.nn.Sequential): 14 | def __init__(self, *args): 15 | torch.nn.Sequential.__init__(self, *args) 16 | 17 | def forward(self, input, increment=False): 18 | output = SparseConvNetTensor() 19 | output.metadata = input[0].metadata 20 | output.spatial_size = input[0].spatial_size 21 | output.features = torch.cat([i.features for i in input], 1) 22 | return output 23 | 24 | def input_spatial_size(self, out_size): 25 | return out_size 26 | 27 | 28 | class AddTable(torch.nn.Sequential): 29 | def __init__(self, *args): 30 | torch.nn.Sequential.__init__(self, *args) 31 | 32 | def forward(self, input, increment=False): 33 | output = SparseConvNetTensor() 34 | output.metadata = input[0].metadata 35 | output.spatial_size = input[0].spatial_size 36 | output.features = sum([i.features for i in input]) 37 | return output 38 | 39 | def input_spatial_size(self, out_size): 40 | return out_size 41 | 42 | 43 | class ConcatTable(torch.nn.Sequential): 44 | def __init__(self, *args): 45 | torch.nn.Sequential.__init__(self, *args) 46 | 47 | def forward(self, input, increment=False): 48 | return [module(input, increment) for module in self._modules.values()] 49 | 50 | def add(self, module): 51 | self._modules[str(len(self._modules))] = module 52 | return self 53 | 54 | def input_spatial_size(self, out_size): 55 | return self._modules['0'].input_spatial_size(out_size) 56 | --------------------------------------------------------------------------------