├── .clang-format
├── .gersemirc
├── .git-blame-ignore-revs
├── .gitattributes
├── .github
    └── workflows
    │   ├── C++.yml
    │   ├── C++_build_win.ps1
    │   ├── build_cufinufft_wheels.yml
    │   ├── build_finufft_wheels.yml
    │   ├── cmake_ci.yml
    │   ├── generate_cmake_matrix.py
    │   ├── generate_matrix.py
    │   ├── mex.yml
    │   ├── mex_C++_win64.xml
    │   └── valgrind.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG
├── CMakeLists.txt
├── Jenkinsfile
├── LICENSE
├── README.md
├── cmake
    ├── CheckAVX.cpp
    ├── setupCPM.cmake
    ├── setupDUCC.cmake
    ├── setupFFTW.cmake
    ├── setupSphinx.cmake
    ├── setupXSIMD.cmake
    ├── utils.cmake
    └── workaround.cmake
├── contributing.md
├── devel
    ├── CMakeLists.txt
    ├── ESlocalpoly.m
    ├── GuruInterfaceBranch_BrainDump
    ├── README
    ├── TODO
    ├── agenda_11-27-23.txt
    ├── analyse_upsamp.py
    ├── check_dumbinputs.cmake
    ├── compare_foldrescale_PR440_laptop5700U.txt
    ├── cuda
    │   ├── cufinufft_tasks_meeting_Jun2023.txt
    │   └── draft_interfaces_c+py_Jun2023.txt
    ├── eval_ker_expts.cpp
    ├── eval_ker_expts2.cpp
    ├── eval_ker_expts_libin_simd64.cpp
    ├── eval_ker_expts_ludvig.cpp
    ├── fig_speed_ker_ppval.m
    ├── finufft_meeting-7-5-23.txt
    ├── foldrescale.cpp
    ├── foldrescale.sh
    ├── foldrescale_finufft1d.txt
    ├── foldrescale_perf.cpp
    ├── foldrescale_perf2.cpp
    ├── foldrescale_spreadtest1d_slower.txt
    ├── gen_all_horner_C_code.m
    ├── gen_all_horner_cpp_header.m
    ├── gen_ker_horner_loop_C_code.m
    ├── gen_ker_horner_loop_cpp_code.m
    ├── get_degree_and_beta.m
    ├── i7_1thr_ker_eval_speeds.png
    ├── interp_square_nowrap.cpp
    ├── ker_horner_allw.c
    ├── ker_horner_allw_loop_notpadded.c
    ├── ker_ppval_coeff_mat.m
    ├── non-contiguous_frequency_inputs_bug2d1.py
    ├── padding.cpp
    ├── plans_fall23.txt
    ├── reverse_engineer_tol.m
    ├── test_ker_ppval.cpp
    ├── time2d2interp.cpp
    ├── v2spec.md
    └── wisdom.py
├── docs
    ├── FIlogo_200.png
    ├── Makefile
    ├── README
    ├── ackn.rst
    ├── c.rst
    ├── c1d.doc
    ├── c1d.docsrc
    ├── c2d.doc
    ├── c2d.docsrc
    ├── c3d.doc
    ├── c3d.docsrc
    ├── c_gpu.rst
    ├── cex.rst
    ├── cguru.doc
    ├── cguru.docsrc
    ├── changelog.rst
    ├── conf.py
    ├── cufinufft_migration.rst
    ├── devnotes.rst
    ├── dirs.rst
    ├── error.rst
    ├── fortran.rst
    ├── genmatlabhelp.sh
    ├── index.rst
    ├── install.rst
    ├── install_gpu.rst
    ├── julia.rst
    ├── latexindex.rst
    ├── logo-32x32.png
    ├── logo-small.png
    ├── logo.png
    ├── logo_gpu.png
    ├── makecdocs.sh
    ├── makefile.doc
    ├── math.rst
    ├── matlab.rst
    ├── matlab_gpu.rst
    ├── matlabgpuhelp.doc
    ├── matlabhelp.doc
    ├── nfft_migr.rst
    ├── opts.rst
    ├── overview.rst
    ├── overview.src
    ├── performance.rst
    ├── pics
    │   ├── 10000x1x1-type-1-upsamp1.25-precd-thread1.png
    │   ├── 10000x1x1-type-1-upsamp1.25-precf-thread1.png
    │   ├── 10000x1x1-type-1-upsamp2.00-precd-thread1.png
    │   ├── 10000x1x1-type-1-upsamp2.00-precf-thread1.png
    │   ├── 10000x1x1-type-2-upsamp1.25-precd-thread1.png
    │   ├── 10000x1x1-type-2-upsamp1.25-precf-thread1.png
    │   ├── 10000x1x1-type-2-upsamp2.00-precd-thread1.png
    │   ├── 10000x1x1-type-2-upsamp2.00-precf-thread1.png
    │   ├── 10000x1x1-type-3-upsamp1.25-precd-thread1.png
    │   ├── 10000x1x1-type-3-upsamp1.25-precf-thread1.png
    │   ├── 10000x1x1-type-3-upsamp2.00-precd-thread1.png
    │   ├── 10000x1x1-type-3-upsamp2.00-precf-thread1.png
    │   ├── 192x192x128-type-1-upsamp1.25-precd-thread16.png
    │   ├── 192x192x128-type-1-upsamp1.25-precd-thread32.png
    │   ├── 192x192x128-type-1-upsamp2.00-precd-thread16.png
    │   ├── 192x192x128-type-1-upsamp2.00-precd-thread32.png
    │   ├── 192x192x128-type-2-upsamp1.25-precd-thread16.png
    │   ├── 192x192x128-type-2-upsamp1.25-precd-thread32.png
    │   ├── 192x192x128-type-2-upsamp2.00-precd-thread16.png
    │   ├── 192x192x128-type-2-upsamp2.00-precd-thread32.png
    │   ├── 192x192x128-type-3-upsamp1.25-precd-thread16.png
    │   ├── 192x192x128-type-3-upsamp1.25-precd-thread32.png
    │   ├── 192x192x128-type-3-upsamp2.00-precd-thread16.png
    │   ├── 192x192x128-type-3-upsamp2.00-precd-thread32.png
    │   ├── 250x250x250-type-1-upsamp2.00-precd-thread1.png
    │   ├── 250x250x250-type-2-upsamp2.00-precd-thread1.png
    │   ├── 250x250x250-type-3-upsamp2.00-precd-thread1.png
    │   ├── 320x320x1-type-1-upsamp1.25-precd-thread1.png
    │   ├── 320x320x1-type-1-upsamp1.25-precf-thread1.png
    │   ├── 320x320x1-type-1-upsamp1.25-precf-thread16.png
    │   ├── 320x320x1-type-1-upsamp1.25-precf-thread32.png
    │   ├── 320x320x1-type-1-upsamp2.00-precd-thread1.png
    │   ├── 320x320x1-type-1-upsamp2.00-precf-thread1.png
    │   ├── 320x320x1-type-1-upsamp2.00-precf-thread16.png
    │   ├── 320x320x1-type-1-upsamp2.00-precf-thread32.png
    │   ├── 320x320x1-type-2-upsamp1.25-precd-thread1.png
    │   ├── 320x320x1-type-2-upsamp1.25-precf-thread1.png
    │   ├── 320x320x1-type-2-upsamp1.25-precf-thread16.png
    │   ├── 320x320x1-type-2-upsamp1.25-precf-thread32.png
    │   ├── 320x320x1-type-2-upsamp2.00-precd-thread1.png
    │   ├── 320x320x1-type-2-upsamp2.00-precf-thread1.png
    │   ├── 320x320x1-type-2-upsamp2.00-precf-thread16.png
    │   ├── 320x320x1-type-2-upsamp2.00-precf-thread32.png
    │   ├── 320x320x1-type-3-upsamp1.25-precd-thread1.png
    │   ├── 320x320x1-type-3-upsamp1.25-precf-thread1.png
    │   ├── 320x320x1-type-3-upsamp1.25-precf-thread16.png
    │   ├── 320x320x1-type-3-upsamp1.25-precf-thread32.png
    │   ├── 320x320x1-type-3-upsamp2.00-precd-thread1.png
    │   ├── 320x320x1-type-3-upsamp2.00-precf-thread1.png
    │   ├── 320x320x1-type-3-upsamp2.00-precf-thread16.png
    │   ├── 320x320x1-type-3-upsamp2.00-precf-thread32.png
    │   ├── contft1d.png
    │   ├── contft1dN.png
    │   ├── contft1dans.png
    │   ├── contft1dsing.png
    │   ├── contft2dans.png
    │   ├── contft2dnodes.png
    │   ├── cufinufft_announce.png
    │   ├── fser1d.png
    │   ├── fser2d.png
    │   ├── grf1d.png
    │   ├── inv1d2err.png
    │   ├── inv1d2err_wellcond.png
    │   ├── pois_fft.png
    │   ├── pois_fhat.png
    │   ├── pois_nufft.png
    │   └── pois_nugrid.png
    ├── python.rst
    ├── python_gpu.rst
    ├── refs.rst
    ├── related.rst
    ├── requirements.txt
    ├── spreadpic.png
    ├── trouble.rst
    ├── tut.rst
    ├── tutorial
    │   ├── contft.rst
    │   ├── grf.rst
    │   ├── inv1d2.rst
    │   ├── peripois2d.rst
    │   └── serieseval.rst
    └── users.rst
├── examples
    ├── CMakeLists.txt
    ├── README
    ├── cuda
    │   ├── CMakeLists.txt
    │   ├── README
    │   ├── example2d1many.cpp
    │   ├── example2d2many.cpp
    │   ├── example2d3many.cpp
    │   └── getting_started.cpp
    ├── guru1d1.cpp
    ├── guru1d1c.c
    ├── guru1d1f.cpp
    ├── guru2d1.cpp
    ├── gurumany1d1.cpp
    ├── many1d1.cpp
    ├── simple1d1.cpp
    ├── simple1d1c.c
    ├── simple1d1cf.c
    ├── simple1d1f.cpp
    ├── simple2d1.cpp
    ├── simulplans1d1.cpp
    ├── spreadinterponly1d.cpp
    ├── threadsafe1d1.cpp
    └── threadsafe2d2f.cpp
├── fortran
    ├── CMakeLists.txt
    ├── README
    ├── cmcl_license.txt
    ├── directft
    │   ├── README
    │   ├── dirft1d.f
    │   ├── dirft1df.f
    │   ├── dirft2d.f
    │   ├── dirft2df.f
    │   ├── dirft3d.f
    │   ├── dirft3df.f
    │   └── prini.f
    ├── examples
    │   ├── guru1d1.f
    │   ├── guru1d1f.f
    │   ├── nufft1d_demo.f
    │   ├── nufft1d_demof.f
    │   ├── nufft2d_demo.f
    │   ├── nufft2d_demof.f
    │   ├── nufft2dmany_demo.f
    │   ├── nufft2dmany_demof.f
    │   ├── nufft3d_demo.f
    │   ├── nufft3d_demof.f
    │   ├── simple1d1.f
    │   ├── simple1d1.f90
    │   └── simple1d1f.f
    └── finufftfort.cpp
├── include
    ├── cufinufft.h
    ├── cufinufft
    │   ├── common.h
    │   ├── contrib
    │   │   ├── helper_cuda.h
    │   │   ├── helper_math.h
    │   │   ├── ker_horner_allw_loop.inc
    │   │   └── ker_lowupsampfac_horner_allw_loop.inc
    │   ├── cudeconvolve.h
    │   ├── defs.h
    │   ├── impl.h
    │   ├── memtransfer.h
    │   ├── precision_independent.h
    │   ├── spreadinterp.h
    │   ├── types.h
    │   └── utils.h
    ├── cufinufft_opts.h
    ├── finufft.fh
    ├── finufft.h
    ├── finufft
    │   ├── fft.h
    │   ├── finufft_core.h
    │   ├── finufft_utils.hpp
    │   ├── heuristics.hpp
    │   ├── spreadinterp.h
    │   └── test_defs.h
    ├── finufft_eitherprec.h
    ├── finufft_errors.h
    ├── finufft_mod.f90
    ├── finufft_opts.h
    └── finufft_spread_opts.h
├── lib-static
    └── README
├── lib
    └── README
├── make-platforms
    ├── README
    ├── make.inc.GCC7
    ├── make.inc.linux_ICC
    ├── make.inc.macosx_arm64
    ├── make.inc.macosx_arm64_matlab2022b_beta
    ├── make.inc.macosx_clang
    ├── make.inc.macosx_clang_matlab
    ├── make.inc.macosx_gcc-10
    ├── make.inc.macosx_gcc-12
    ├── make.inc.macosx_gcc-8
    ├── make.inc.manylinux
    ├── make.inc.powerpc
    ├── make.inc.windows_mingw
    └── make.inc.windows_msys
├── makefile
├── matlab
    ├── @gpuArray
    │   ├── finufft1d1.m
    │   ├── finufft1d2.m
    │   ├── finufft1d3.m
    │   ├── finufft2d1.m
    │   ├── finufft2d2.m
    │   ├── finufft2d3.m
    │   ├── finufft3d1.m
    │   ├── finufft3d2.m
    │   └── finufft3d3.m
    ├── CMakeLists.txt
    ├── Contents.m
    ├── README
    ├── addmhelp.sh
    ├── cufinufft.cu
    ├── cufinufft.mw
    ├── cufinufft1d1.docsrc
    ├── cufinufft1d1.m
    ├── cufinufft1d2.docsrc
    ├── cufinufft1d2.m
    ├── cufinufft1d3.docsrc
    ├── cufinufft1d3.m
    ├── cufinufft2d1.docsrc
    ├── cufinufft2d1.m
    ├── cufinufft2d2.docsrc
    ├── cufinufft2d2.m
    ├── cufinufft2d3.docsrc
    ├── cufinufft2d3.m
    ├── cufinufft3d1.docsrc
    ├── cufinufft3d1.m
    ├── cufinufft3d2.docsrc
    ├── cufinufft3d2.m
    ├── cufinufft3d3.docsrc
    ├── cufinufft3d3.m
    ├── cufinufft_plan.docsrc
    ├── cufinufft_plan.m
    ├── errhandler.m
    ├── examples
    │   ├── README
    │   ├── cuda
    │   │   ├── README
    │   │   ├── guru1d1_gpu.m
    │   │   ├── guru1d1f_gpu.m
    │   │   ├── guru2d1_gpu.m
    │   │   ├── guru2d1f_gpu.m
    │   │   └── simple1d1f_gpu.m
    │   ├── demo_spreadinterponly2d.m
    │   ├── guru1d1.m
    │   └── guru1d1_single.m
    ├── finufft.cpp
    ├── finufft.mw
    ├── finufft1d1.docsrc
    ├── finufft1d1.m
    ├── finufft1d2.docsrc
    ├── finufft1d2.m
    ├── finufft1d3.docsrc
    ├── finufft1d3.m
    ├── finufft2d1.docsrc
    ├── finufft2d1.m
    ├── finufft2d2.docsrc
    ├── finufft2d2.m
    ├── finufft2d3.docsrc
    ├── finufft2d3.m
    ├── finufft3d1.docsrc
    ├── finufft3d1.m
    ├── finufft3d2.docsrc
    ├── finufft3d2.m
    ├── finufft3d3.docsrc
    ├── finufft3d3.m
    ├── finufft_plan.docsrc
    ├── finufft_plan.m
    ├── gnotes.docbit
    ├── gopts.docbit
    ├── gopts12.docbit
    ├── isigneps.docbit
    ├── notes.docbit
    ├── opts.docbit
    ├── opts12.docbit
    ├── test
    │   ├── big1dtest.m
    │   ├── check_finufft.m
    │   ├── check_finufft_single.m
    │   ├── check_modeords.m
    │   ├── fig_accuracy.m
    │   ├── fullmathtest.m
    │   ├── guru_setpts_issue.m
    │   └── test_strictadjoint.m
    ├── valid_ntr.m
    └── valid_setpts.m
├── perftest
    ├── CMakeLists.txt
    ├── README
    ├── bench.py
    ├── big2d2f.cpp
    ├── checkGuruTiming.sh
    ├── compare_spreads.jl
    ├── cuda
    │   ├── CMakeLists.txt
    │   ├── bench.py
    │   ├── bench.sh
    │   └── cuperftest.cu
    ├── getSpeedup.sh
    ├── guru_timing_test.cpp
    ├── highaspect3d_test.sh
    ├── manysmallprobs.cpp
    ├── multispreadtestndall.sh
    ├── mycpuinfo.sh
    ├── mymaxthreads.sh
    ├── mynumcores.sh
    ├── nuffttestnd.sh
    ├── perftest.cpp
    ├── results
    │   ├── gcc_vs_icc_xeon.txt
    │   ├── nuffttestnd_results_i7_2-2-17.txt
    │   ├── nuffttestnd_results_i7_3-16-17.txt
    │   ├── nuffttestnd_results_i7_6-17-17.txt
    │   ├── nuffttestnd_results_i7_gcc72_4-24-18.txt
    │   ├── nuffttestnd_results_i7_gcc72_4-25-18.txt
    │   ├── nuffttestnd_results_i7_gcc72_9-14-18.txt
    │   ├── nuffttestnd_results_i7_v1.1.2_gcc92.txt
    │   ├── nuffttestnd_results_v1.1.1_xeonE5-2643v3_gcc74.txt
    │   ├── nuffttestnd_results_v1.1.2_xeonE5-2643v3_gcc74.txt
    │   ├── nuffttestnd_results_v1.2_i7_gcc92.txt
    │   ├── perftest_xeon-E5-2643v3.txt
    │   ├── spreadtestnd_results_i7_2-2-17.txt
    │   ├── spreadtestnd_results_i7_3-16-17.txt
    │   ├── spreadtestnd_results_i7_6-17-17.txt
    │   ├── spreadtestnd_results_i7_gcc72_4-24-18.txt
    │   ├── spreadtestnd_results_i7_gcc72_4-25-18.txt
    │   ├── spreadtestnd_results_i7_gcc72_9-14-18.txt
    │   ├── spreadtestnd_results_i7_v1.1.2_gcc92.txt
    │   ├── spreadtestnd_results_v1.1.1_xeonE5-2643v3_gcc74.txt
    │   ├── spreadtestnd_results_v1.1.2_xeonE5-2643v3_gcc74.txt
    │   └── spreadtestnd_results_v1.2_i7_gcc92.txt
    ├── searchForTimeMetrics.py
    ├── spreadbenchmark.py
    ├── spreaderbench.py
    ├── spreadingSchemeStats.py
    ├── spreadtestall.sh
    ├── spreadtestnd.cpp
    ├── spreadtestnd.sh
    ├── spreadtestndall.cpp
    ├── timingBreakdowns.py
    └── timingResults
    │   ├── timingBreakdowns_largeProblems.out
    │   ├── timingBreakdowns_smallProblems_SequentialMulti.out
    │   ├── timingBreakdowns_smallProblems_SequentialMulti_noSwitch.out
    │   └── timingBreakdowns_smallProblems_SimultaneousSingle.out
├── python
    ├── CMakeLists.txt
    ├── cufinufft
    │   ├── README.md
    │   ├── cufinufft
    │   │   ├── __init__.py
    │   │   ├── _compat.py
    │   │   ├── _cufinufft.py
    │   │   ├── _plan.py
    │   │   └── _simple.py
    │   ├── examples
    │   │   ├── example2d1_pycuda.py
    │   │   ├── example2d2_pycuda.py
    │   │   ├── example3d2many_async_cupy.py
    │   │   ├── getting_started_cupy.py
    │   │   ├── getting_started_numba.py
    │   │   ├── getting_started_pycuda.py
    │   │   └── getting_started_torch.py
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   └── tests
    │   │   ├── conftest.py
    │   │   ├── test_array_ordering.py
    │   │   ├── test_basic.py
    │   │   ├── test_error_checks.py
    │   │   ├── test_examples.py
    │   │   ├── test_fallback.py
    │   │   ├── test_multi.py
    │   │   ├── test_simple.py
    │   │   └── utils.py
    └── finufft
    │   ├── README.md
    │   ├── examples
    │       ├── guru1d1.py
    │       ├── guru1d1f.py
    │       ├── guru2d1.py
    │       ├── guru2d1f.py
    │       ├── many2d1.py
    │       ├── simple1d1.py
    │       ├── simple2d1.py
    │       └── simpleopts1d1.py
    │   ├── finufft
    │       ├── __init__.py
    │       ├── _finufft.py
    │       └── _interfaces.py
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   └── test
    │       ├── README.md
    │       ├── accuracy_speed_tests.py
    │       ├── run_accuracy_tests.py
    │       ├── run_speed_tests.py
    │       ├── test_fallback.py
    │       ├── test_finufft_plan.py
    │       ├── test_finufft_simple.py
    │       └── utils.py
├── src
    ├── c_interface.cpp
    ├── cuda
    │   ├── 1d
    │   │   ├── README
    │   │   ├── cufinufft1d.cu
    │   │   ├── interp1d_wrapper.cu
    │   │   ├── spread1d_wrapper.cu
    │   │   └── spreadinterp1d.cuh
    │   ├── 2d
    │   │   ├── README
    │   │   ├── cufinufft2d.cu
    │   │   ├── interp2d_wrapper.cu
    │   │   ├── spread2d_wrapper.cu
    │   │   └── spreadinterp2d.cuh
    │   ├── 3d
    │   │   ├── README
    │   │   ├── cufinufft3d.cu
    │   │   ├── interp3d_wrapper.cu
    │   │   ├── spread3d_wrapper.cu
    │   │   └── spreadinterp3d.cuh
    │   ├── CMakeLists.txt
    │   ├── README
    │   ├── common.cu
    │   ├── cufinufft.cu
    │   ├── deconvolve_wrapper.cu
    │   ├── memtransfer_wrapper.cu
    │   ├── precision_independent.cu
    │   ├── spreadinterp.cpp
    │   └── utils.cpp
    ├── fft.cpp
    ├── finufft_core.cpp
    ├── finufft_utils.cpp
    ├── ker_horner_allw_loop_constexpr.h
    ├── ker_lowupsampfac_horner_allw_loop_constexpr.h
    └── spreadinterp.cpp
├── test
    ├── CMakeLists.txt
    ├── README
    ├── basicpassfail.cpp
    ├── check_finufft.sh
    ├── checkallaccs.sh
    ├── cuda
    │   ├── CMakeLists.txt
    │   ├── README
    │   ├── cufinufft1d_test.cu
    │   ├── cufinufft1dspreadinterponly_test.cu
    │   ├── cufinufft2d1nupts_test.cu
    │   ├── cufinufft2d_test.cu
    │   ├── cufinufft2dmany_test.cu
    │   ├── cufinufft3d_test.cu
    │   ├── cufinufft_math_test.cu
    │   ├── public_api_test.c
    │   ├── spreadperf.sh
    │   └── test_makeplan.c
    ├── dumbinputs.cpp
    ├── fftw_lock_test.cpp
    ├── finufft1d_test.cpp
    ├── finufft1dmany_test.cpp
    ├── finufft2d_test.cpp
    ├── finufft2dmany_test.cpp
    ├── finufft3d_test.cpp
    ├── finufft3dkernel_test.cpp
    ├── finufft3dmany_test.cpp
    ├── results
    │   └── README
    ├── spreadinterp1d_test.cpp
    ├── testutils.cpp
    └── utils
    │   ├── dirft1d.hpp
    │   ├── dirft2d.hpp
    │   ├── dirft3d.hpp
    │   └── norms.hpp
├── tools
    ├── common
    │   ├── docker
    │   │   └── Dockerfile-x86_64
    │   └── sdist-helper.sh
    ├── cufinufft
    │   ├── build-library.sh
    │   ├── build-sdist.sh
    │   ├── build-wheels.sh
    │   ├── distribution_helper.sh
    │   ├── docker
    │   │   ├── README
    │   │   ├── cuda11.2
    │   │   │   ├── Dockerfile-x86_64
    │   │   │   └── cuda.repo
    │   │   ├── cuda11.8
    │   │   │   ├── Dockerfile-x86_64
    │   │   │   └── cuda.repo
    │   │   └── cuda12.0
    │   │   │   ├── Dockerfile-x86_64
    │   │   │   └── cuda.repo
    │   └── test.sh
    └── finufft
    │   ├── build-sdist.sh
    │   ├── build-wheels.sh
    │   └── docker
    │       └── Dockerfile-x86_64
└── tutorial
    ├── README
    ├── applyAHA.m
    ├── applyToep.m
    ├── contft1d.m
    ├── contft2d.m
    ├── inv1d2.m
    ├── migrate2d1_test.c
    ├── nfft2d1_test.c
    ├── poisson2dnuquad.m
    ├── samplegrf1d.m
    ├── serieseval1d.m
    ├── serieseval2d.m
    └── utils
        └── lgwt.m


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: LLVM
 3 | AlignAfterOpenBracket: Align
 4 | AlignConsecutiveMacros: AcrossEmptyLinesAndComments
 5 | AlignConsecutiveAssignments: Consecutive
 6 | AlignEscapedNewlines: Left
 7 | AlignOperands: true
 8 | AlignTrailingComments:
 9 |   Kind: Always
10 |   OverEmptyLines: 1
11 | AllowShortIfStatementsOnASingleLine: WithoutElse
12 | AllowShortLambdasOnASingleLine: Inline
13 | AllowShortLoopsOnASingleLine: true
14 | BreakBeforeBraces: Attach
15 | BreakBeforeBinaryOperators: None
16 | ColumnLimit: 90
17 | ExperimentalAutoDetectBinPacking: true
18 | FixNamespaceComments: true
19 | IndentWidth: 2
20 | MaxEmptyLinesToKeep: 1
21 | NamespaceIndentation: None
22 | ReflowComments: true
23 | PenaltyBreakComment: 1
24 | PenaltyBreakOpenParenthesis: 1  # modified; was 0
25 | SortIncludes: CaseSensitive
26 | SortUsingDeclarations: true
27 | SpacesBeforeTrailingComments: 1
28 | SpaceAfterCStyleCast: false
29 | SpaceAfterLogicalNot: false
30 | SpaceAfterTemplateKeyword: false
31 | TabWidth: 2
32 | UseTab: Never
33 | AttributeMacros: ['__host__', '__device__', '__global__', '__forceinline__']
34 | QualifierOrder:
35 |   - static
36 |   - inline
37 |   - constexpr
38 |   - const
39 |   - type
40 | QualifierAlignment: Custom
41 | ...
42 | 


--------------------------------------------------------------------------------
/.gersemirc:
--------------------------------------------------------------------------------
1 | definitions:
2 |   - "./cmake"
3 | line_length: 120
4 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Applied clang format to the codebase
2 | 884ba427be0c60aa3399d5ea71b0e9e3a7cbf686
3 | b1e484fb294b2759d3b6b1f68ca0bf5e255b87d1
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Convert to LF line endings on checkout.
2 | *.sh text eol=lf
3 | 


--------------------------------------------------------------------------------
/.github/workflows/C++_build_win.ps1:
--------------------------------------------------------------------------------
 1 | $ErrorActionPreference = "Stop"
 2 | Set-Variable -Name MSYSTEM -Value MINGW64
 3 | 
 4 | # Setup the make.inc file
 5 | Copy-Item -Path make-platforms\make.inc.windows_msys -Destination make.inc
 6 | 
 7 | # call make
 8 | Set-Variable repo_root -Value ([IO.Path]::Combine($PSScriptRoot, '..', '..'))
 9 | c:\msys64\usr\bin\env MSYSTEM=MINGW64 c:\msys64\usr\bin\bash.exe -lc "cd '$repo_root' && make spreadtestall && make lib && make test"
10 | if (-not $?) {throw "Failed make"}
11 | 


--------------------------------------------------------------------------------
/.github/workflows/build_cufinufft_wheels.yml:
--------------------------------------------------------------------------------
 1 | name: Build cufinufft Python wheels
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build_wheels:
 7 |     name: Build cufinufft wheels on ${{ matrix.buildplat[1] }}
 8 |     runs-on: ${{ matrix.buildplat[0] }}
 9 |     strategy:
10 |       fail-fast: false
11 |       matrix:
12 |         buildplat:
13 |           - [ ubuntu-22.04, manylinux_x86_64 ]
14 |           - [ windows-2019, win_amd64 ]
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: ilammy/msvc-dev-cmd@v1
18 |       - name: Setup CUDA
19 |         if: ${{ matrix.buildplat[0] == 'windows-2019' }}
20 |         uses: Jimver/cuda-toolkit@v0.2.21
21 |         with:
22 |           cuda: '12.4.0'
23 |       - name: Build ${{ matrix.buildplat[1] }} wheels
24 |         uses: pypa/cibuildwheel@v2.22.0
25 |         with:
26 |           package-dir: 'python/cufinufft'
27 |         env:
28 |           CIBW_BUILD: '*-${{ matrix.buildplat[1] }}'
29 |           CIBW_TEST_COMMAND: "echo 'Wheel installed'"
30 |           CIBW_BUILD_FRONTEND: "pip; args: --no-deps"
31 |           CIBW_BEFORE_ALL_LINUX: |
32 |             if command -v yum &> /dev/null; then
33 |               yum install -y epel-release
34 |               yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
35 |               yum install -y cuda-12-4
36 |             else
37 |               echo "Unsupported package manager. Exiting."
38 |               exit 1
39 |             fi
40 |           CIBW_ENVIRONMENT_LINUX: PATH=$PATH:/usr/local/cuda/bin LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
41 |           CIBW_ARCHS_LINUX: x86_64
42 | 
43 |       - uses: actions/upload-artifact@v4
44 |         with:
45 |           name: cufinufft-wheels-${{ matrix.buildplat[1] }}
46 |           path: ./wheelhouse/*.whl
47 | 


--------------------------------------------------------------------------------
/.github/workflows/generate_matrix.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | matrix = {
 4 |     "include": []
 5 | }
 6 | 
 7 | python_versions = ["3.8", "3.11"]
 8 | 
 9 | combinations = {
10 |     "ubuntu-22.04": {
11 |         "compiler": ["llvm", "gcc"],
12 |         "arch_flags": ["-march=native", "-march=x86-64"]
13 |     },
14 |     "windows-2022": {
15 |         "compiler": ["msvc", "llvm"],
16 |         "arch_flags": ["/arch:AVX2", "/arch:SSE2"]
17 |     },
18 |     "macos-13": {
19 |         "compiler": ["llvm", "gcc-14"],
20 |         "arch_flags": ["-march=native", "-march=x86-64"]
21 |     }
22 | }
23 | 
24 | for platform in combinations.keys():
25 |     for python_version in python_versions:
26 |         for compiler in combinations[platform]["compiler"]:
27 |             for arch_flag in combinations[platform]["arch_flags"]:
28 |                 matrix["include"].append({
29 |                     "os": platform,
30 |                     "python-version": python_version,
31 |                     "compiler": compiler,
32 |                     "arch_flags": arch_flag
33 |                 })
34 | 
35 | json_str = json.dumps(matrix, ensure_ascii=False)
36 | print(json_str)
37 | 


--------------------------------------------------------------------------------
/.github/workflows/valgrind.yml:
--------------------------------------------------------------------------------
 1 | name: Valgrind memcheck
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   valgrind:
 7 |     runs-on: ubuntu-22.04
 8 |     strategy:
 9 |       fail-fast: false
10 |     steps:
11 |       - name: Checkout code
12 |         uses: actions/checkout@v4
13 |       - name: Setup Cpp
14 |         uses: aminya/setup-cpp@v1.1.1
15 |         with:
16 |           compiler: gcc
17 |           cmake: true
18 |           ninja: true
19 |           vcpkg: false
20 |           cppcheck: false
21 |           clangtidy: false
22 |       - name: Prepare
23 |         run: |
24 |           sudo apt update
25 |           sudo apt install -y libfftw3-dev jq valgrind
26 |       - name: Configure Cmake
27 |         run: |
28 |           cmake -S . -B ./build -G Ninja -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -DFINUFFT_BUILD_TESTS=ON -DFINUFFT_ENABLE_SANITIZERS=OFF
29 |       - name: Build
30 |         run: |
31 |           cmake --build ./build --config RelWithDebInfo
32 |       - name: Test
33 |         working-directory: ./build
34 |         run: |
35 |           ctest --show-only=json-v1 > ctest_tests.json
36 |           # Loop over all test entries
37 |           exec 3>&1
38 |           jq -c '.tests[]' ctest_tests.json | while read -r test; do
39 |             name=$(echo "$test" | jq -r '.name')
40 |             command=$(echo "$test" | jq -r '.command | @sh')
41 | 
42 |             echo -e "\n▶ Running test: $name"
43 |             echo "   Command: $command"
44 | 
45 |             # Eval to reconstruct command array safely
46 |             eval "cmd=( $command )"
47 | 
48 |             valgrind --undef-value-errors=yes --errors-for-leak-kinds=definite --error-exitcode=1 --log-fd=3 "${cmd[@]}"  > /dev/null 2>&1
49 | 
50 |             # Check valgrind exit code
51 |             status=$?
52 |             if [[ $status -ne 0 ]]; then
53 |               echo "❌ Valgrind detected errors in test: $name"
54 |               exit $status
55 |             fi
56 |           done
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *_test
 2 | *~
 3 | *.a
 4 | *.so
 5 | *.out
 6 | *.o
 7 | examples/example1d1
 8 | examples/example1d1c
 9 | examples/example2d1
10 | examples/guru1d1
11 | examples/example1d1f
12 | examples/example1d1cf
13 | examples/example2d1f
14 | examples/guru1d1f
15 | fortran/examples/guru1d1
16 | fortran/examples/nufft1d_demo
17 | fortran/examples/nufft1d_demo_legacy
18 | fortran/examples/nufft2d_demo
19 | fortran/examples/nufft2dmany_demo
20 | fortran/examples/nufft3d_demo
21 | fortran/examples/guru1d1f
22 | fortran/examples/nufft1d_demof
23 | fortran/examples/nufft1d_demo_legacyf
24 | fortran/examples/nufft2d_demof
25 | fortran/examples/nufft2dmany_demof
26 | fortran/examples/nufft3d_demof
27 | test/dumbinputs
28 | test/finufft1d_basicpassfail
29 | test/testlib
30 | __pycache__*
31 | 
32 | docs/_build
33 | 
34 | build/
35 | .vscode/
36 | 
37 | cufinufft/python/cufinufft/docs/_build
38 | cufinufft/python/cufinufft/docs/_static
39 | cufinufft/python/cufinufft/docs/_templates
40 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/.gitmodules


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/mirrors-clang-format
 3 |     rev: 'v19.1.7'
 4 |     hooks:
 5 |       - id: clang-format
 6 |         types_or: [c++, c, cuda]
 7 |         exclude: '(^|/)(matlab/.*)$'
 8 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 9 |     rev: v5.0.0
10 |     hooks:
11 |       - id: check-yaml
12 |       - id: end-of-file-fixer
13 |       - id: trailing-whitespace
14 |       - id: check-illegal-windows-names
15 |       - id: mixed-line-ending
16 |   - repo: https://github.com/BlankSpruce/gersemi
17 |     rev: 0.19.1
18 |     hooks:
19 |       - id: gersemi
20 |   - repo: https://github.com/abravalheri/validate-pyproject
21 |     rev: v0.23  # Use the latest stable version
22 |     hooks:
23 |       - id: validate-pyproject
24 |         # Optional: Include additional validations from SchemaStore
25 |         additional_dependencies: ["validate-pyproject-schema-store[all]"]
26 |         files: ^python/(finufft|cufinufft)/pyproject\.toml$
27 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # "Read the Docs" doc-hosting website configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required (for this file format)
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build all formats
15 | formats: all
16 | 
17 | # Build documentation in the docs/ directory with Sphinx
18 | sphinx:
19 |   configuration: docs/conf.py
20 | 
21 | # Optionally set the version of Python and requirements required to build your docs
22 | python:
23 |   install:
24 |     - requirements: docs/requirements.txt
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2017-2024 The Simons Foundation, Inc. - All Rights Reserved.
 2 | 
 3 | See docs/ackn.rst for the list of code authors and contributors.
 4 | 
 5 | ------
 6 | 
 7 | FINUFFT is licensed under the Apache License, Version 2.0 (the
 8 | "License"); you may not use this file except in compliance with the
 9 | License.  You may obtain a copy of the License at
10 | 
11 |     http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 | Unless required by applicable law or agreed to in writing, software
14 | distributed under the License is distributed on an "AS IS" BASIS,
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | See the License for the specific language governing permissions and
17 | limitations under the License.
18 | 
19 | ------
20 | 
21 | Certain parts of this repository are contributed by others.
22 | For their license info (which are both BSD-like), see:
23 | 
24 | fortran/cmcl_license.txt
25 | tutorial/utils/lgwt.m
26 | 
27 | ------
28 | 
29 | If you find this library useful, or it helps you in creating software
30 | or publications, please let us know, and acknowledge that fact by citing our
31 | source repository:
32 | 
33 |   https://github.com/flatironinstitute/finufft
34 | 
35 | and the corresponding journal articles (particularly the first for the CPU
36 | and/or the last for the GPU):
37 | 
38 |   A parallel non-uniform fast Fourier transform library based on an
39 |   ``exponential of semicircle'' kernel. A. H. Barnett, J. F. Magland,
40 |   and L. af Klinteberg.  SIAM J. Sci. Comput. 41(5), C479-C504 (2019).
41 | 
42 |   Aliasing error of the $\exp (\beta \sqrt{1-z^2})$ kernel in the
43 |   nonuniform fast Fourier transform. A. H. Barnett,
44 |   Appl. Comput. Harmon. Anal. 51, 1-16 (2021).
45 | 
46 |   cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs,
47 |   Yu-hsuan Shih, Garrett Wright, Joakim Andén, Johannes Blaschke, and
48 |   Alex H. Barnett. PDSEC2021 workshop of the IPDPS2021 conference.
49 |   https://arxiv.org/abs/2102.08463
50 | 


--------------------------------------------------------------------------------
/cmake/CheckAVX.cpp:
--------------------------------------------------------------------------------
 1 | #include <array>
 2 | #include <intrin.h>
 3 | #include <iostream>
 4 | 
 5 | bool is_sse2_supported() {
 6 |   std::array<int, 4> cpui;
 7 |   __cpuid(cpui.data(), 1);
 8 |   return (cpui[3] & (1 << 26)) != 0;
 9 | }
10 | 
11 | bool is_avx_supported() {
12 |   std::array<int, 4> cpui;
13 |   __cpuid(cpui.data(), 1);
14 |   bool osUsesXSAVE_XRSTORE = (cpui[2] & (1 << 27)) != 0;
15 |   bool cpuAVXSupport       = (cpui[2] & (1 << 28)) != 0;
16 |   if (osUsesXSAVE_XRSTORE && cpuAVXSupport) {
17 |     unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
18 |     return (xcrFeatureMask & 0x6) == 0x6;
19 |   }
20 |   return false;
21 | }
22 | 
23 | bool is_avx2_supported() {
24 |   std::array<int, 4> cpui;
25 |   __cpuid(cpui.data(), 7);
26 |   return (cpui[1] & (1 << 5)) != 0;
27 | }
28 | 
29 | bool is_avx512_supported() {
30 |   std::array<int, 4> cpui;
31 |   __cpuidex(cpui.data(), 7, 0);
32 |   return (cpui[1] & (1 << 16)) != 0;
33 | }
34 | 
35 | int main() {
36 |   if (is_avx512_supported()) {
37 |     std::cout << "AVX512";
38 |   } else if (is_avx2_supported()) {
39 |     std::cout << "AVX2";
40 |   } else if (is_avx_supported()) {
41 |     std::cout << "AVX";
42 |   } else if (is_sse2_supported()) {
43 |     std::cout << "SSE2";
44 |   } else {
45 |     std::cout << "NONE";
46 |   }
47 |   return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/cmake/setupCPM.cmake:
--------------------------------------------------------------------------------
 1 | # USING CPM TO HANDLE DEPENDENCIES
 2 | if(CPM_SOURCE_CACHE)
 3 |     set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
 4 | elseif(DEFINED ENV{CPM_SOURCE_CACHE})
 5 |     set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
 6 | else()
 7 |     set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
 8 | endif()
 9 | 
10 | if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
11 |     message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
12 |     file(
13 |         DOWNLOAD
14 |             https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
15 |             ${CPM_DOWNLOAD_LOCATION}
16 |     )
17 | endif()
18 | 
19 | include(${CPM_DOWNLOAD_LOCATION})
20 | 


--------------------------------------------------------------------------------
/cmake/setupDUCC.cmake:
--------------------------------------------------------------------------------
 1 | CPMAddPackage(
 2 |     NAME
 3 |     ducc0
 4 |     GIT_REPOSITORY
 5 |     https://gitlab.mpcdf.mpg.de/mtr/ducc.git
 6 |     GIT_TAG
 7 |     ${DUCC0_VERSION}
 8 |     DOWNLOAD_ONLY
 9 |     YES
10 | )
11 | 
12 | if(ducc0_ADDED)
13 |     add_library(
14 |         ducc0
15 |         STATIC
16 |         ${ducc0_SOURCE_DIR}/src/ducc0/infra/string_utils.cc
17 |         ${ducc0_SOURCE_DIR}/src/ducc0/infra/threading.cc
18 |         ${ducc0_SOURCE_DIR}/src/ducc0/infra/mav.cc
19 |         ${ducc0_SOURCE_DIR}/src/ducc0/math/gridding_kernel.cc
20 |         ${ducc0_SOURCE_DIR}/src/ducc0/math/gl_integrator.cc
21 |     )
22 |     target_include_directories(ducc0 PUBLIC ${ducc0_SOURCE_DIR}/src/)
23 |     target_compile_options(ducc0 PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:${FINUFFT_ARCH_FLAGS}>)
24 |     target_compile_options(ducc0 PRIVATE $<$<CONFIG:Release>:${FINUFFT_CXX_FLAGS_RELEASE}>)
25 |     target_compile_options(ducc0 PRIVATE $<$<CONFIG:RelWithDebInfo>:${FINUFFT_CXX_FLAGS_RELWITHDEBINFO}>)
26 |     target_compile_features(ducc0 PRIVATE cxx_std_17)
27 |     # private because we do not want to propagate this requirement
28 |     set_target_properties(
29 |         ducc0
30 |         PROPERTIES
31 |             MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
32 |             POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE}
33 |     )
34 |     check_cxx_compiler_flag(-ffast-math HAS_FAST_MATH)
35 |     if(HAS_FAST_MATH)
36 |         target_compile_options(ducc0 PRIVATE -ffast-math)
37 |     endif()
38 |     check_cxx_compiler_flag(/fp:fast HAS_FP_FAST)
39 |     if(HAS_FP_FAST)
40 |         target_compile_options(ducc0 PRIVATE /fp:fast)
41 |     endif()
42 |     if(NOT OpenMP_CXX_FOUND)
43 |         find_package(Threads REQUIRED)
44 |         target_link_libraries(ducc0 PRIVATE Threads::Threads)
45 |     endif()
46 |     enable_asan(ducc0)
47 | endif()
48 | 


--------------------------------------------------------------------------------
/cmake/setupSphinx.cmake:
--------------------------------------------------------------------------------
 1 | CPMAddPackage(
 2 |     NAME
 3 |     sphinx_cmake
 4 |     GIT_REPOSITORY
 5 |     https://github.com/k0ekk0ek/cmake-sphinx.git
 6 |     GIT_TAG
 7 |     e13c40a
 8 |     DOWNLOAD_ONLY
 9 |     YES
10 | )
11 | 
12 | list(APPEND CMAKE_MODULE_PATH ${sphinx_cmake_SOURCE_DIR}/cmake/Modules)
13 | 
14 | # requires pip install sphinx texext
15 | find_package(Sphinx)
16 | if(SPHINX_FOUND)
17 |     message(STATUS "Sphinx found")
18 |     sphinx_add_docs(finufft_sphinx BUILDER html SOURCE_DIRECTORY
19 |                     ${FINUFFT_SOURCE_DIR}/docs
20 |     )
21 | else()
22 |     message(STATUS "Sphinx not found docs will not be generated")
23 | endif()
24 | 


--------------------------------------------------------------------------------
/cmake/setupXSIMD.cmake:
--------------------------------------------------------------------------------
 1 | CPMAddPackage(
 2 |     NAME
 3 |     xtl
 4 |     GIT_REPOSITORY
 5 |     "https://github.com/xtensor-stack/xtl.git"
 6 |     GIT_TAG
 7 |     ${XTL_VERSION}
 8 |     EXCLUDE_FROM_ALL
 9 |     YES
10 |     GIT_SHALLOW
11 |     YES
12 |     OPTIONS
13 |     "XTL_DISABLE_EXCEPTIONS YES"
14 | )
15 | 
16 | CPMAddPackage(
17 |     NAME
18 |     xsimd
19 |     GIT_REPOSITORY
20 |     "https://github.com/xtensor-stack/xsimd.git"
21 |     GIT_TAG
22 |     ${XSIMD_VERSION}
23 |     EXCLUDE_FROM_ALL
24 |     YES
25 |     GIT_SHALLOW
26 |     YES
27 |     OPTIONS
28 |     "XSIMD_SKIP_INSTALL YES"
29 |     "XSIMD_ENABLE_XTL_COMPLEX YES"
30 | )
31 | 


--------------------------------------------------------------------------------
/cmake/workaround.cmake:
--------------------------------------------------------------------------------
1 | function(CPMAddPackage some_args)
2 |     # This is a stub definition for gersemi formatting only.
3 | endfunction()
4 | 


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
 1 | This repository is formatted according to the .clang-format in the root directory.
 2 | Please enable the reformatting hook before committing your changes.
 3 | See [pre-commit](https://pre-commit.com/) for more information.
 4 | A quick summary:
 5 | ```
 6 | pip install pre-commit
 7 | pre-commit install
 8 | ```
 9 | 
10 | We also suggest to configure your IDE to use the same formatting settings.
11 | 
12 | Another suggestion is to ignore the formatting commits in your git configuration:
13 | ```
14 | git config blame.ignoreRevsFile .git-blame-ignore-revs
15 | ```
16 | 


--------------------------------------------------------------------------------
/devel/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(finufft_devel)
 2 | # Set the minimum required version of CMake
 3 | cmake_minimum_required(VERSION 3.5)
 4 | 
 5 | # include cpm cmake, downloading it
 6 | CPMAddPackage(
 7 |     NAME
 8 |     benchmark
 9 |     GITHUB_REPOSITORY
10 |     google/benchmark
11 |     VERSION
12 |     1.8.3
13 |     OPTIONS
14 |     "BENCHMARK_ENABLE_TESTING OFF"
15 | )
16 | 
17 | if(benchmark_ADDED)
18 |     # patch benchmark target
19 |     set_target_properties(benchmark PROPERTIES CXX_STANDARD 17)
20 | endif()
21 | 
22 | add_executable(foldrescale foldrescale.cpp)
23 | target_link_libraries(foldrescale finufft benchmark xsimd)
24 | add_executable(padding padding.cpp)
25 | target_compile_features(padding PRIVATE cxx_std_17)
26 | target_link_libraries(padding finufft xsimd)
27 | target_compile_options(padding PRIVATE -march=native)
28 | 


--------------------------------------------------------------------------------
/devel/README:
--------------------------------------------------------------------------------
 1 | Developer and experimental codes for FINUFFT
 2 | --------------------------------------------
 3 | 
 4 | For generating kernel coefficient codes in ../src,
 5 | the developer must run from MATLAB the following:
 6 | 
 7 | gen_all_horner_C_code.m        : writes C-style Horner coeffs (pre-2024)
 8 |   * a single call writes upsampfac=2 and 1.25
 9 |   * calls gen_ker_horner_loop_C_code.m
10 | gen_all_horner_cpp_header.m    : writes C++ header Horner coeffs (July 2024 on)
11 |   * a single call writes upsampfac=2 and 1.25
12 |   * calls gen_ker_horner_loop_cpp_code.m
13 | 
14 | Both of the gen_ker_* scripts call for the solve of the coeffs for each w:
15 | ker_ppval_coeff_mat.m
16 |   (which has the kernel definition in it, which must match spreadinterp.cpp)
17 | 
18 | The universal location for kernel approximation (degree, ES beta setting) is:
19 | get_degree_and_beta.m
20 | Tweaks should be done here, and see instructions there for resulting acc test.
21 | Another code that has to match ../src/spreadinterp.cpp is:
22 | reverse_engineer_tol.m
23 | 
24 | Re measuring overall accuracy, to compare kernels, make matlab, and run:
25 | matlab/test/fig_accuracy.m
26 | 
27 | Barnett 8/20/24
28 | 


--------------------------------------------------------------------------------
/devel/TODO:
--------------------------------------------------------------------------------
 1 | Side list of items to do for FINUFFT library that are not in github Issues
 2 | ==========================================================================
 3 | 
 4 | 
 5 | * Add a real-valued spreader option which will be faster and use half the RAM: but how avoid code duplication? Extend the C macros we now have for dual-prec.
 6 | 
 7 | * Check huge arrays >2^31 working in 2d,3d in C++, and for any d in MATLAB/octave.
 8 | - matlab/MEX used to give zero answers for >=2e31 array sizes (big1dtest.m).
 9 | - test huge arrays >=2^31 2d, 3d in C++.
10 | - test huge arrays >=2^31 in octave/mex.
11 | - ditto py.
12 | 
13 | * Package as RPM and .deb for linux, brew for OSX.
14 | 
15 | * R interface?
16 | 
17 | 
18 | 
19 | LOWER PRIORITY TODO / DISCUSSIONS:
20 | 
21 | * Return FFTW's internal state to single-threaded upon exit from finufft (deals with Marina & Andras' problem where fftw was then kicked from single to multi-threaded by an intervening finufft call).  Remind ourselves why?
22 | * understand why two modeords not give bit-wise same answers in check_modeords.m (really why it's stochastic either exactly zero or around 1e-13)
23 | * Decide if non vs omp get different lib names? (like FFTW) -> not yet.
24 | * Intel MKL FFT interface option instead of FFTW?
25 | * recoup DFM's PyPI publishing of finufftpy, maybe awkward
26 | * make finufft.cpp shuffle routines dcomplex interface and native dcomplex arith (remove a bunch of 2* in indexing, and have no fftw_complex refs in them. However, need first to make sure using complex divide isn't slower than real divide used now).
27 | * rewrite fftw3 plans via 64bit guru interface to allow eg 1024^3 type 1 or 2 in 3D. Not sure needed (only for huge 1d transforms). see http://www.fftw.org/fftw3_doc/Guru-Interface.html#Guru-Interface
28 | 


--------------------------------------------------------------------------------
/devel/cuda/draft_interfaces_c+py_Jun2023.txt:
--------------------------------------------------------------------------------
 1 | int finufft_makeplan(int type, int dim, int64_t* nmodes, int iflag, int ntr, double eps, finufft_plan* plan, nufft_opts* opts)
 2 | int cufinufft_makeplan(int type, int dim, int* nmodes, int iflag, int ntransf, double tol, int maxbatchsize, cufinufft_plan *plan, cufinufft_opts *opts)
 3 | // Remove maxbatchsize (-> opts), use int64_t.   Rename ntransf to ntr, tol to eps.
 4 | 
 5 | int finufft_setpts(finufft_plan plan, int64_t m, double* x, double* y, double* z, int64_t n, double* s, double* t, double* z)
 6 | int cufinufft_setpts(int m, double* x, double* y, double* z, int n, double* s, double* t, double *u, cufinufft_plan plan)
 7 | // Move plan to the beginning, use int64_t.
 8 | 
 9 | int finufft_execute(finufft_plan plan, complex double* c, complex double* f)
10 | int cufinufft_execute(cuDoubleComplex* c, cuDoubleComplex* f, cufinufft_plan plan)
11 | // Move plan to beginning.
12 | 
13 | int finufft_destroy(finufft_plan plan)
14 | int cufinufft_destroy(cufinufft_plan plan)
15 | 
16 | void finufft_default_opts(finufft_opts* opts)
17 | int cufinufft_default_opts(int type, int dim, cufinufft_opts* opts);
18 | // Return type make void. Can we avoid specifying type and dim when calling? Allow "default" values for various parameters in opts struct? Yes, use meth=0 for auto.
19 | 
20 | 
21 | // & do same for float32 versions.
22 | 
23 | 
24 | -------------- PYTHON ----------
25 | 
26 | Plan.__init__(nufft_type, n_modes_or_dim, n_trans=1, eps=1e-06,
27 | isign=None, dtype='complex128', **kwargs)
28 | 
29 | cufinufft.__init__(nufft_type, modes, n_trans=1, eps=1e-06, isign=None,
30 | dtype=numpy.float32, **kwargs)
31 | 
32 | # avoid reliance on np?  use dtype = 'complex64' or 'complex128'
33 | (and deprecate the float dtype options, and in FINUFFT).
34 | 
35 | Plan.setpts(x=None, y=None, z=None, s=None, t=None, u=None)
36 | cufinufft.set_pts(kx, ky=None, kz=None)
37 | # Why not kx=None? What about type 3?
38 | 
39 | Plan.execute(data, out=None)
40 | cufinufft.execute(c, fk)
41 | # Allow returning output array. Specify in/out order.
42 | 


--------------------------------------------------------------------------------
/devel/fig_speed_ker_ppval.m:
--------------------------------------------------------------------------------
 1 | % script to generate and plot timing of raw kernel evals via various methods.
 2 | % Uses test_ker_ppval and its temp data file (see test_ker_ppval.cpp).
 3 | % Barnett 4/23/18
 4 | 
 5 | clear
 6 | nam = '/tmp/test_ker_ppval.dat';  % wipes any old data; make header for humans:
 7 | system(['echo "# M       w   t_plain  t_horner   relsuperr" > ' nam]);
 8 | 
 9 | Mwant=1e7;        % how many NU pts for a 1d1 or 1d2 NUFFT
10 | 
11 | ws=2:16;      % range of kernel widths, do timing tests...
12 | for j=1:numel(ws), w=ws(j)
13 |   % glib via shell must matter here, since links to glibc(?) w/o fast simd...
14 |   % system(sprintf('./test_ker_ppval %d %d',Mwant,w));  % links to slower glibc?
15 |   system(sprintf('(unset LD_LIBRARY_PATH; ./test_ker_ppval %d %d)',Mwant,w));
16 | end
17 | 
18 | fid=fopen(nam,'r');   % read and make plot...
19 | fgets(fid);      % ignore header line
20 | [y,count] = fscanf(fid, '%f', [5,inf]);
21 | fclose(fid);
22 | if (count~=5*numel(ws)), warning('file wrong number of lines!'); end
23 | y = y';              % since rows of text file come in as cols of array
24 | M = y(:,1);
25 | w = y(:,2);
26 | r = (M.*w)./y(:,3);  % rate in evals/sec
27 | r2 = (M.*w)./y(:,4); % "
28 | e = y(:,5);          % rel err
29 | figure; plot(w,[r r2]/1e6,'+-'); xlabel('w'); ylabel('eval rate (Meval/s)');
30 | 
31 | ylim([0, 700])
32 | grid on
33 | 
34 | legend('exp eval','Horner'); title(sprintf('1thr, with padding, M=%d',Mwant))
35 | %print -dpng 1thr_ker_eval_speeds_withpadding.png
36 | 
37 | 
38 | % xeon gcc6.4: exp max out at 40 Meval/s; horner 170-300 Meval/s.
39 | 
40 | % cf ludvig's i7 results: 0.2 sec for 1e7*(w=12) = 600 Meval/s
41 | % (but that's special to m=12, also w/o the domain conditional?)
42 | % Wouldn't it be nice if could get that for all i7 cases.
43 | 
44 | % Concl: for xeon w/ gcc, horner is much better! (5-10x)
45 | 
46 | % Jan 2020: Ludvig padded the Horner loop too, giving a little boost for
47 | %  w = 2,3 (mod 4) in GCC7,9, and big boost for old GCC5.4.
48 | %  We're at 400-700 Meval/s on i7 for all compilers except GCC8 now,
49 | % in -O3 not -Ofast (which we can't use in FINUFFT).
50 | 


--------------------------------------------------------------------------------
/devel/foldrescale.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "BINNING (small N)...."
 4 | 
 5 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp
 6 | ./foldrescale_perf2
 7 | 
 8 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp -ffast-math -fno-finite-math-only
 9 | ./foldrescale_perf2
10 | 
11 | echo "NOBIN...."
12 | 
13 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp -DNOBIN
14 | ./foldrescale_perf2
15 | 
16 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp -ffast-math -fno-finite-math-only -DNOBIN
17 | ./foldrescale_perf2
18 | 


--------------------------------------------------------------------------------
/devel/gen_all_horner_C_code.m:
--------------------------------------------------------------------------------
 1 | % Script to make all C code for looped Horner eval of kernels of all widths.
 2 | % writes to "ker" array, from a variable "z", and switches by width "w".
 3 | % Now does both upsampfacs.
 4 | % Resulting C code needs only including in a function.
 5 | 
 6 | % Barnett 4/23/18; now calling Ludvig's loop version from 4/25/18.
 7 | % version including low upsampfac, 6/17/18.
 8 | % Ludvig put in w=4n padding, 1/31/20. Mystery about why d was bigger 2/6/20.
 9 | % split out code for degree, beta, etc; loop upsampfacs Barnett 7/22/24.
10 | clear
11 | opts = struct();
12 | 
13 | for upsampfac = [2.0, 1.25];   % sigma: either 2 (default) or low (eg 5/4)
14 |   fprintf('upsampfac = %g...\n',upsampfac)
15 | 
16 |   ws = 2:16;
17 |   opts.wpad = false;    % pad kernel eval to multiple of 4
18 | 
19 |   if upsampfac==2, fid = fopen('../include/cufinufft/contrib/ker_horner_allw_loop.inc','w');
20 |   else, fid = fopen('../include/cufinufft/contrib/ker_lowupsampfac_horner_allw_loop.inc','w');
21 |   end
22 |   fwrite(fid,sprintf('// Code generated by gen_all_horner_C_code.m in finufft/devel\n'));
23 |   fwrite(fid,sprintf('// Authors: Alex Barnett & Ludvig af Klinteberg.\n// (C) The Simons Foundation, Inc.\n'));
24 |   for j=1:numel(ws)
25 |     w = ws(j);
26 |     [d,beta] = get_degree_and_beta(w,upsampfac);
27 |     fprintf('w=%d\td=%d\tbeta=%.3g\n',w,d,beta);
28 |     str = gen_ker_horner_loop_C_code(w,d,beta,opts);
29 |     if j==1                                % write switch statement
30 |       fwrite(fid,sprintf('  if constexpr (w==%d) {\n',w));
31 |     else
32 |       fwrite(fid,sprintf('  } else if constexpr (w==%d) {\n',w));
33 |     end
34 |     for i=1:numel(str); fwrite(fid,['    ',str{i}]); end
35 |   end
36 |   fwrite(fid,sprintf('  } else\n    printf("width not implemented!\\n");\n'));
37 |   fclose(fid);
38 | 
39 | end
40 | 


--------------------------------------------------------------------------------
/devel/get_degree_and_beta.m:
--------------------------------------------------------------------------------
 1 | function [d,beta] = get_degree_and_beta(w,upsampfac)
 2 | % GET_DEGREE_AND_BETA  defines degree & beta from w & upsampfac
 3 | %
 4 | % [d,beta] = get_degree_and_beta(w,upsampfac)
 5 | %
 6 | % Universal definition for piecewise poly degree chosen for kernel
 7 | % coeff generation by matlab, and the ES kernel beta parameter.
 8 | % The map from tol to width w must match code in spreadinterp used to
 9 | % choose w.
10 | %
11 | % Used by all other *.m codes for generating coeffs.
12 | %
13 | % To test: use KER_PPVAL_COEFF_MAT self-test
14 | %
15 | % To verify accuracy in practice, compile FINUFFT CPU then run
16 | % test/checkallaccs.sh and matlab/test/fig_accuracy.m
17 | %
18 | % Also see: REVERSE_ENGINEER_TOL, KER_PPVAL_COEFF_MAT
19 | 
20 | % Barnett 7/22/24
21 | if upsampfac==0.0, upsampfac=2.0; end
22 | 
23 | % if d set to 0 in following, means it gets auto-chosen...
24 | if upsampfac==2    % hardwire the betas for this default case
25 |   betaoverws = [2.20 2.26 2.38 2.30];   % must match setup_spreader
26 |   beta = betaoverws(min(4,w-1)) * w;    % uses last entry for w>=5
27 |   d = w + 1 + (w<=7) - (w==2);          % between 1-2 more degree than w. tweak
28 | elseif upsampfac==1.25  % use formulae, must match params in setup_spreader
29 |   gamma=0.97;                           % safety factor
30 |   betaoverws = gamma*pi*(1-1/(2*upsampfac));  % from cutoff freq formula
31 |   beta = betaoverws * w;
32 |   d = ceil(0.7*w+1.3);                  % less, since beta smaller. tweak
33 |   %d = 0;    % auto-choose override? No, too much jitter.
34 | end
35 | 
36 | if d==0
37 |   tol = reverse_engineer_tol(w,upsampfac);
38 |   opts.cutoff = 0.5 * tol;    % fudge to get more poly-approx acc than tol
39 |   C = ker_ppval_coeff_mat(w,0,beta,opts);    % do solve merely to get d
40 |   d = size(C,1)-1;            % extract the auto-chosen d
41 | end
42 | 


--------------------------------------------------------------------------------
/devel/i7_1thr_ker_eval_speeds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/devel/i7_1thr_ker_eval_speeds.png


--------------------------------------------------------------------------------
/devel/interp_square_nowrap.cpp:
--------------------------------------------------------------------------------
 1 | // this is code I was messing with timing using time2d2interp.cpp
 2 | // around May 3, 2018, to figure how wrapping was slowing down spreading.
 3 | 
 4 | void interp_square_nowrap(FLT *out, FLT *du, FLT *ker1, FLT *ker2, BIGINT i1, BIGINT i2,
 5 |                           BIGINT N1, BIGINT N2, int ns)
 6 | // *************** don't periodic wrap, avoid ptrs. correct if no NU pts nr edge
 7 | {
 8 |   out[0] = 0.0;
 9 |   out[1] = 0.0;
10 |   if (0) { // plain
11 |     for (int dy = 0; dy < ns; dy++) {
12 |       BIGINT j = N1 * (i2 + dy) + i1;
13 |       for (int dx = 0; dx < ns; dx++) {
14 |         FLT k = ker1[dx] * ker2[dy];
15 |         out[0] += du[2 * j] * k;
16 |         out[1] += du[2 * j + 1] * k;
17 |         ++j;
18 |       }
19 |     }
20 |   } else {
21 |     for (int dy = 0; dy < ns; dy++) {
22 |       BIGINT j = N1 * (i2 + dy) + i1;
23 |       // #pragma omp simd
24 |       for (int dx = 0; dx < ns; dx++) {
25 |         FLT k = ker1[dx] * ker2[dy];
26 |         out[0] += du[2 * j] * k;
27 |         out[1] += du[2 * j + 1] * k;
28 |         ++j;
29 |       }
30 |     }
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/devel/non-contiguous_frequency_inputs_bug2d1.py:
--------------------------------------------------------------------------------
 1 | # ahb updated for v2.0 interface, and complex c. Bug seems to have been fixed :)
 2 | 
 3 | import numpy as np
 4 | 
 5 | from finufft import nufft2d1
 6 | 
 7 | c = np.complex128(np.random.rand(2))
 8 | 
 9 | omega = np.arange(4).reshape((2, 2)) / 3 * np.pi
10 | 
11 | x0 = omega[:, 0]
12 | y0 = omega[:, 1]
13 | 
14 | f0 = np.zeros((4, 4), order='F', dtype=np.complex128)
15 | 
16 | nufft2d1(x0, y0, c, f0.shape, out=f0, eps=1e-14)
17 | 
18 | x1 = x0.copy()
19 | y1 = y0.copy()
20 | 
21 | f1 = np.zeros((4, 4), order='F', dtype=np.complex128)
22 | 
23 | nufft2d1(x1, y1, c, f1.shape, out=f1, eps=1e-14)
24 | 
25 | print('difference: %e' % np.linalg.norm(f0 - f1))
26 | 


--------------------------------------------------------------------------------
/devel/plans_fall23.txt:
--------------------------------------------------------------------------------
 1 | FINUFFT (CPU+GPU) plans, Fall 2023:
 2 | 
 3 | pick meeting date
 4 | 
 5 | * CPU spreader/interp, bring in Wenda's stuff -> Marco?
 6 | 
 7 | * GPU type 3   Marco?
 8 | 
 9 | * implement Reinecke's sparse sliced FFT, w/ many-vectors too. CPU
10 | Libin help?
11 | 
12 | * binaries release with GH Releases feature (Assets?). No crucial.
13 | 
14 | * CPU perf tests, standardized way to benchmark spread/interp, FFT, H<>D, etc.
15 | Add to docs/devnotes.rst how to run benchmarks.
16 | Robert + Joakim
17 | 
18 | * tutorial in docs: on eg inverse FFT by CG iteration.
19 | 
20 | * doc for py local install #340
21 | 
22 | * #340 docs for GPU simple interface
23 | 
24 | Go through Issues & prioritize.
25 | 
26 | PRs.
27 | 


--------------------------------------------------------------------------------
/devel/reverse_engineer_tol.m:
--------------------------------------------------------------------------------
 1 | function tol = reverse_engineer_tol(w,upsampfac)
 2 | % REVERSE_ENGINEER_TOL  reconstructs tolerance from width and upsampfac
 3 | %
 4 | % tol = reverse_engineer_tol(w,upsampfac)
 5 | %
 6 | %  For fixed upsampfac (aka sigma), must be the inverse function for
 7 | %  how w is chosen from tol in spreadinterp.cpp:setup_spreader()
 8 | 
 9 | % Barnett 7/22/24
10 | 
11 | if upsampfac==2.0
12 |   tol = 10^(1-w);
13 | else
14 |   tol = exp(-pi*w*sqrt(1-1/upsampfac));    % generic case, covers sigma=1.25
15 | end
16 | 


--------------------------------------------------------------------------------
/docs/FIlogo_200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/FIlogo_200.png


--------------------------------------------------------------------------------
/docs/c1d.docsrc:
--------------------------------------------------------------------------------
 1 | int @F1d1(int64_t M, double* x, complex<double>* c, int iflag, double eps, int64_t N1, complex<double>* f, finufft_opts* opts)
 2 | 
 3 |   1D complex nonuniform FFT of type 1 (nonuniform to uniform).
 4 | @t
 5 |               M-1
 6 |      f[k1] =  SUM c[j] exp(+/-i k1 x(j))  for -N1/2 <= k1 <= (N1-1)/2
 7 |               j=0
 8 | 
 9 |   Inputs:
10 | @nt
11 | @mi
12 | @x
13 | @ci
14 | @f
15 | @e
16 |     N1     number of output Fourier modes to be computed
17 | @o
18 | 
19 |   Outputs:
20 |     f      Fourier mode coefficients (size N1*ntr complex array)
21 | @r
22 | @no
23 | @notes12
24 | 
25 | 
26 | int @F1d2(int64_t M, double* x, complex<double>* c, int iflag, double eps, int64_t N1, complex<double>* f, finufft_opts* opts)
27 | 
28 |   1D complex nonuniform FFT of type 2 (uniform to nonuniform).
29 | @t
30 |      c[j] = SUM   f[k1] exp(+/-i k1 x[j])      for j = 0,...,M-1
31 |              k1
32 |      where the sum is over integers -N1/2 <= k1 <= (N1-1)/2.
33 | 
34 |   Inputs:
35 | @nt
36 | @mo
37 | @x
38 | @f
39 | @e
40 |     N1     number of input Fourier modes
41 |     f      Fourier mode coefficients (size N1*ntr complex array)
42 | @o
43 | 
44 |   Outputs:
45 | @co
46 | @r
47 | @no
48 | @notes12
49 | 
50 | 
51 | int @F1d3(int64_t M, double* x, complex<double>* c, int iflag, double eps, int64_t N, double* s, complex<double>* f, finufft_opts* opts)
52 | 
53 |   1D complex nonuniform FFT of type 3 (nonuniform to nonuniform).
54 | @t
55 |               M-1
56 |      f[k]  =  SUM   c[j] exp(+-i s[k] x[j]),      for k = 0,...,N-1
57 |               j=0
58 | 
59 |   Inputs:
60 | @nt
61 | @mi
62 | @xr
63 | @ci
64 | @f
65 | @e
66 | @n
67 | @s
68 | @o
69 | 
70 |   Outputs:
71 |     f     Fourier transform values at targets (size N*ntr complex array)
72 | @r
73 | @no
74 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. _changelog:
2 | 
3 | Changelog
4 | =========
5 | 
6 | .. literalinclude:: ../CHANGELOG
7 | 


--------------------------------------------------------------------------------
/docs/genmatlabhelp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This extracts and concatenates MATLAB documentation blocks from the .m files.
 3 | # Three steps: 1) keep comment lines beginning with %, 2) remove the first
 4 | # char of each line.
 5 | # Barnett 11/2/17, changed output name 7/24/20. Added GPU 3/31/25.
 6 | 
 7 | # CPU: The output is a text file...
 8 | OUT=matlabhelp.doc
 9 | 
10 | # zero the size...
11 | > $OUT
12 | 
13 | # dump the matlab comment blocks
14 | for i in ../matlab/finufft?d?.m ../matlab/finufft_plan.m
15 | do
16 |     printf "::\n\n" >> $OUT
17 |     sed -n '/^%/p' $i | sed 's/^.//' >> $OUT
18 |     printf "\n" >> $OUT
19 | done
20 | 
21 | 
22 | # ---------------------------------
23 | # now GPU: output is a text file...
24 | OUT=matlabgpuhelp.doc
25 | 
26 | # zero the size...
27 | > $OUT
28 | 
29 | # dump the matlab comment blocks
30 | for i in ../matlab/cufinufft?d?.m ../matlab/cufinufft_plan.m
31 | do
32 |     printf "::\n\n" >> $OUT
33 |     sed -n '/^%/p' $i | sed 's/^.//' >> $OUT
34 |     printf "\n" >> $OUT
35 | done
36 | 
37 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. finufft documentation master file. This also contains what appears on the
 2 |    front HTML page.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 |    Barnett 8/27/20: this is HTML only; split out separate latexindex.rst
 6 | 
 7 | .. _index:
 8 | 
 9 | Flatiron Institute Nonuniform Fast Fourier Transform
10 | ========================================================
11 | 
12 | .. include:: overview.src
13 | 
14 | 
15 | Documentation contents
16 | ========================
17 | 
18 | .. toctree::
19 |    :maxdepth: 3
20 | 
21 |    install
22 |    install_gpu
23 |    dirs
24 |    math
25 |    cex
26 |    c
27 |    c_gpu
28 |    opts
29 |    error
30 |    trouble
31 |    performance
32 |    tut
33 |    fortran
34 |    matlab
35 |    matlab_gpu
36 |    python
37 |    python_gpu
38 |    julia
39 |    changelog
40 |    nfft_migr
41 |    cufinufft_migration
42 |    devnotes
43 |    related
44 |    users
45 |    ackn
46 |    refs
47 | 


--------------------------------------------------------------------------------
/docs/julia.rst:
--------------------------------------------------------------------------------
 1 | .. _julia:
 2 | 
 3 | Julia interfaces (CPU and GPU)
 4 | ==============================
 5 | 
 6 | Principal author Ludvig af Klinteberg and others have built and maintain `FINUFFT.jl <https://github.com/ludvigak/FINUFFT.jl>`_, an interface from the `Julia <https://julialang.org/>`_ language. This official Julia package supports 32-bit and 64-bit precision, now on both CPU and GPU (via `CUDA.jl`), via a common interface.
 7 | The Julia package installation automatically downloads pre-built CPU binaries of the FINUFFT library for Linux, macOS, Windows and FreeBSD (for a full list see `finufft_jll <https://github.com/JuliaBinaryWrappers/finufft_jll.jl>`_), and the GPU binary for Linux (see `cufinufft_jll <https://github.com/JuliaBinaryWrappers/cufinufft_jll.jl>`_).
 8 | 
 9 | `FINUFFT.jl` has itself been wrapped as part of `NFFT.jl <https://juliamath.github.io/NFFT.jl/dev/performance/>`_, which contains an "abstract" interface
10 | to any NUFFT in Julia, with FINUFFT as an example. This was by Tobias Knopp and coworkers, starting around 2022.
11 | Their
12 | `performance comparison page <https://juliamath.github.io/NFFT.jl/dev/performance/>`_
13 | show that FINUFFT matches their native Julia implementation for speed of type 1
14 | and type 2 transforms
15 | in 3D, and beats NFFT, and with less precomputation.
16 | In 1D and 2D, the native Julia implementation is 1-2 times faster
17 | than FINUFFT in their tests on uniformly-random nonuniform points.
18 | 


--------------------------------------------------------------------------------
/docs/latexindex.rst:
--------------------------------------------------------------------------------
 1 | .. _index:
 2 | 
 3 | .. finufft documentation master file - latex only.
 4 |    You can adapt this file completely to your liking, but it should at least
 5 |    contain the root `toctree` directive.
 6 |    Barnett 8/27/20: split this out as separate latexindex.rst.
 7 |    Also see latex section of conf.py for tocdepth override, etc.
 8 | 
 9 | :orphan:
10 | 
11 | 
12 | Flatiron Institute Nonuniform Fast Fourier Transform
13 | =====================================================
14 | 
15 | .. the toctree seems to have to precede any text for latex chapters correct
16 |    numbering. Note that ch.1 is now the overview (unlike in index.rst):
17 | 
18 | .. toctree::
19 | 
20 |    overview
21 |    install
22 |    dirs
23 |    math
24 |    cex
25 |    c
26 |    opts
27 |    error
28 |    trouble
29 |    tut
30 |    fortran
31 |    matlab
32 |    python
33 |    julia
34 |    changelog
35 |    devnotes
36 |    related
37 |    users
38 |    ackn
39 |    refs
40 | 


--------------------------------------------------------------------------------
/docs/logo-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo-32x32.png


--------------------------------------------------------------------------------
/docs/logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo-small.png


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo.png


--------------------------------------------------------------------------------
/docs/logo_gpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo_gpu.png


--------------------------------------------------------------------------------
/docs/makefile.doc:
--------------------------------------------------------------------------------
 1 | make[1]: Entering directory '/home/marco/repos/finufft'
 2 | Makefile for FINUFFT CPU library. Please specify your task:
 3 |  make lib - build the main library (in lib/ and lib-static/)
 4 |  make examples - compile and run all codes in examples/
 5 |  make test - compile and run quick math validation tests
 6 |  make perftest - compile and run (slower) performance tests
 7 |  make fortran - compile and run Fortran tests and examples
 8 |  make matlab - compile MATLAB interfaces (no test)
 9 |  make octave - compile and test octave interfaces
10 |  make python - compile and test python interfaces
11 |  make all - do all the above (around 1 minute; assumes you have MATLAB, etc)
12 |  make spreadtest - compile & run spreader-only tests (no FFT)
13 |  make spreadtestall - small set spreader-only tests for CI use
14 |  make objclean - remove all object files, preserving libs & MEX
15 |  make clean - also remove all lib, MEX, py, and demo executables
16 |  make setup - check (and possibly download) dependencies
17 |  make setupclean - delete downloaded dependencies
18 | For faster (multicore) compilation, append, for example, -j8
19 | 
20 | Make options:
21 |  'make [task] OMP=OFF' for single-threaded (no refs to OpenMP)
22 |  'make [task] FFT=DUCC' for DUCC0 FFT (otherwise uses FFTW3)
23 |  You must at least 'make objclean' before changing such options!
24 | 
25 | Also see docs/install.rst and docs/README
26 | make[1]: Leaving directory '/home/marco/repos/finufft'
27 | 


--------------------------------------------------------------------------------
/docs/overview.rst:
--------------------------------------------------------------------------------
1 | .. _index:
2 | 
3 | Overview
4 | =========
5 | 
6 | .. include:: overview.src
7 | 


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-1-upsamp1.25-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp1.25-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-1-upsamp1.25-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp1.25-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-1-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-1-upsamp2.00-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp2.00-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-2-upsamp1.25-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp1.25-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-2-upsamp1.25-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp1.25-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-2-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-2-upsamp2.00-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp2.00-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-3-upsamp1.25-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp1.25-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-3-upsamp1.25-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp1.25-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-3-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/10000x1x1-type-3-upsamp2.00-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp2.00-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-1-upsamp1.25-precd-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp1.25-precd-thread16.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-1-upsamp1.25-precd-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp1.25-precd-thread32.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-1-upsamp2.00-precd-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp2.00-precd-thread16.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-1-upsamp2.00-precd-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp2.00-precd-thread32.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-2-upsamp1.25-precd-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp1.25-precd-thread16.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-2-upsamp1.25-precd-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp1.25-precd-thread32.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-2-upsamp2.00-precd-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp2.00-precd-thread16.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-2-upsamp2.00-precd-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp2.00-precd-thread32.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-3-upsamp1.25-precd-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp1.25-precd-thread16.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-3-upsamp1.25-precd-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp1.25-precd-thread32.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-3-upsamp2.00-precd-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp2.00-precd-thread16.png


--------------------------------------------------------------------------------
/docs/pics/192x192x128-type-3-upsamp2.00-precd-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp2.00-precd-thread32.png


--------------------------------------------------------------------------------
/docs/pics/250x250x250-type-1-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/250x250x250-type-1-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/250x250x250-type-2-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/250x250x250-type-2-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/250x250x250-type-3-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/250x250x250-type-3-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp1.25-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread16.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread32.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread16.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread32.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp1.25-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread16.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread32.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread16.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread32.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp1.25-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread16.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread32.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp2.00-precd-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precd-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread1.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread16.png


--------------------------------------------------------------------------------
/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread32.png


--------------------------------------------------------------------------------
/docs/pics/contft1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1d.png


--------------------------------------------------------------------------------
/docs/pics/contft1dN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1dN.png


--------------------------------------------------------------------------------
/docs/pics/contft1dans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1dans.png


--------------------------------------------------------------------------------
/docs/pics/contft1dsing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1dsing.png


--------------------------------------------------------------------------------
/docs/pics/contft2dans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft2dans.png


--------------------------------------------------------------------------------
/docs/pics/contft2dnodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft2dnodes.png


--------------------------------------------------------------------------------
/docs/pics/cufinufft_announce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/cufinufft_announce.png


--------------------------------------------------------------------------------
/docs/pics/fser1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/fser1d.png


--------------------------------------------------------------------------------
/docs/pics/fser2d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/fser2d.png


--------------------------------------------------------------------------------
/docs/pics/grf1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/grf1d.png


--------------------------------------------------------------------------------
/docs/pics/inv1d2err.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/inv1d2err.png


--------------------------------------------------------------------------------
/docs/pics/inv1d2err_wellcond.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/inv1d2err_wellcond.png


--------------------------------------------------------------------------------
/docs/pics/pois_fft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_fft.png


--------------------------------------------------------------------------------
/docs/pics/pois_fhat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_fhat.png


--------------------------------------------------------------------------------
/docs/pics/pois_nufft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_nufft.png


--------------------------------------------------------------------------------
/docs/pics/pois_nugrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_nugrid.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | texext
2 | sphinx_rtd_theme
3 | 


--------------------------------------------------------------------------------
/docs/spreadpic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/spreadpic.png


--------------------------------------------------------------------------------
/docs/tut.rst:
--------------------------------------------------------------------------------
 1 | .. _tut:
 2 | 
 3 | Tutorials and application demos
 4 | ==================================
 5 | 
 6 | The following are instructive demos of using FINUFFT for a variety of
 7 | spectrally-related tasks arising in
 8 | scientific computing and signal/image processing. We will slowly grow the
 9 | list (contact us to add one).
10 | For conciseness of code, and ease of writing, they are currently
11 | in MATLAB (they should work on versions at least back to R2017a).
12 | 
13 | .. toctree::
14 | 
15 |    tutorial/serieseval
16 |    tutorial/contft
17 |    tutorial/peripois2d
18 |    tutorial/grf
19 |    tutorial/inv1d2
20 | 
21 | For further applications, see :ref:`references <refs>`, and:
22 | 
23 | * These software tutorial `PDF slides <http://users.flatironinstitute.org/~ahb/talks/fwam23.pdf>`_.
24 | 
25 | * These seminar `PDF slides <http://users.flatironinstitute.org/~ahb/talks/pacm20.pdf>`_.
26 | 
27 | * `Fast Fresnel diffraction <https://github.com/ahbarnett/fresnaq>`_ for optics and acoustics applications.
28 | 
29 | * `Equispaced Fourier methods for Gaussian process regression <https://github.com/flatironinstitute/gp-shootout>`_ as described in https://arxiv.org/abs/2210.10210 and https://arxiv.org/abs/2305.11065
30 | 
31 | * Tutorials for PyNUFFT with 1D and 2D reconstruction examples `here <http://jyhmiinlin.github.io/pynufft/tutor/init.html>`_.
32 | 
33 | * The numerical sampling of `random plane waves <https://users.flatironinstitute.org/~ahb/rpws/>`_.
34 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(EXAMPLES
 2 |     guru1d1
 3 |     guru1d1f
 4 |     guru2d1
 5 |     many1d1
 6 |     simple1d1
 7 |     simple1d1f
 8 |     simulplans1d1
 9 | )
10 | set(EXAMPLES_OPENMP threadsafe1d1 threadsafe2d2f)
11 | set(EXAMPLES_C guru1d1c simple1d1c simple1d1cf)
12 | 
13 | find_library(MATH_LIBRARY m)
14 | 
15 | foreach(EXAMPLE ${EXAMPLES})
16 |     add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
17 |     target_compile_features(${EXAMPLE} PRIVATE cxx_std_14)
18 |     target_link_libraries(${EXAMPLE} PRIVATE finufft)
19 |     if(CMAKE_PROJECT_NAME STREQUAL "FINUFFT")
20 |         enable_asan(${EXAMPLE})
21 |     endif()
22 | endforeach()
23 | 
24 | foreach(EXAMPLE ${EXAMPLES_C})
25 |     add_executable(${EXAMPLE} ${EXAMPLE}.c)
26 |     target_link_libraries(${EXAMPLE} PRIVATE finufft)
27 |     if(CMAKE_PROJECT_NAME STREQUAL "FINUFFT")
28 |         enable_asan(${EXAMPLE})
29 |     endif()
30 |     if(MATH_LIBRARY)
31 |         target_link_libraries(${EXAMPLE} PRIVATE ${MATH_LIBRARY})
32 |     endif()
33 | endforeach()
34 | 
35 | if(FINUFFT_USE_OPENMP)
36 |     foreach(EXAMPLE ${EXAMPLES_OPENMP})
37 |         add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
38 |         target_link_libraries(${EXAMPLE} PRIVATE finufft OpenMP::OpenMP_CXX)
39 |         target_compile_features(${EXAMPLE} PRIVATE cxx_std_11)
40 |         if(CMAKE_PROJECT_NAME STREQUAL "FINUFFT")
41 |             enable_asan(${EXAMPLE})
42 |         endif()
43 |     endforeach()
44 | endif()
45 | 


--------------------------------------------------------------------------------
/examples/cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB example_src "*.cpp")
 2 | 
 3 | foreach(srcfile ${example_src})
 4 |     string(REPLACE ".cpp" "" executable ${srcfile})
 5 |     get_filename_component(executable ${executable} NAME)
 6 |     add_executable(${executable} ${srcfile})
 7 |     target_include_directories(${executable} PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
 8 |     target_link_libraries(${executable} cufinufft CUDA::cufft CUDA::cudart)
 9 |     target_compile_features(${executable} PRIVATE cxx_std_17)
10 | endforeach()
11 | 


--------------------------------------------------------------------------------
/examples/cuda/README:
--------------------------------------------------------------------------------
 1 | Examples of cuFINUFFT usage in C++ and Python
 2 | 
 3 | Here we show 2D transforms of type 1 and 2, being performed, and tested,
 4 | in C++, and in Python. In each case, a batch of transforms is done with
 5 | new coefficients or weights, but the same set of nonuniform points; this
 6 | explains the suffix "many" in the code names. You may set ntransf=1 to
 7 | perform a single transform. Default options are used. In each case the
 8 | four steps (plan, setpts, execute, destroy) are used. A math test is also
 9 | performed; see the FINUFFT documentation for the definitions of the
10 | transforms: https://finufft.readthedocs.io/en/latest/math.html
11 | 
12 | For more usage examples see:
13 | 
14 | ../test/cufinufft*.cu
15 | ../python/cufinufft/tests/*.py
16 | 


--------------------------------------------------------------------------------
/fortran/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(
 2 |     directft
 3 |     OBJECT
 4 |     directft/dirft1d.f
 5 |     directft/dirft1df.f
 6 |     directft/dirft2d.f
 7 |     directft/dirft2df.f
 8 |     directft/dirft3d.f
 9 |     directft/dirft3df.f
10 | )
11 | 
12 | set(FORTRAN_EXAMPLES
13 |     guru1d1
14 |     nufft1d_demo
15 |     nufft2d_demo
16 |     nufft2dmany_demo
17 |     nufft3d_demo
18 |     simple1d1
19 | )
20 | 
21 | foreach(EXAMPLE ${FORTRAN_EXAMPLES})
22 |     add_executable(fort_${EXAMPLE} examples/${EXAMPLE}.f)
23 |     add_executable(fort_${EXAMPLE}f examples/${EXAMPLE}f.f)
24 | 
25 |     target_link_libraries(fort_${EXAMPLE} PRIVATE directft finufft ${FINUFFT_FFTLIBS})
26 |     target_link_libraries(fort_${EXAMPLE}f PRIVATE directft finufft ${FINUFFT_FFTLIBS})
27 | endforeach()
28 | 


--------------------------------------------------------------------------------
/fortran/cmcl_license.txt:
--------------------------------------------------------------------------------
 1 | Below is the license applying to the original fortran drivers and direct
 2 | evaluation routines modified in this directory. This license does not
 3 | apply to the rest of FINUFFT.
 4 | 
 5 | -------------
 6 | 
 7 | Copyright (c) 2009-2014, Leslie Greengard, June-Yub Lee and Zydrunas Gimbutas
 8 | All rights reserved.
 9 | 
10 | Redistribution and use in source and binary forms, with or without
11 | modification, are permitted provided that the following conditions are met:
12 | 
13 | 1. Redistributions of source code must retain the above copyright notice, this
14 |    list of conditions and the following disclaimer.
15 | 2. Redistributions in binary form must reproduce the above copyright notice,
16 |    this list of conditions and the following disclaimer in the documentation
17 |    and/or other materials provided with the distribution.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | The views and conclusions contained in the software and documentation are those
31 | of the authors and should not be interpreted as representing official policies,
32 | either expressed or implied, of the FreeBSD Project.
33 | 


--------------------------------------------------------------------------------
/fortran/directft/README:
--------------------------------------------------------------------------------
1 | This directory contains the CMCL NUFFT direct summation implementations,
2 | plus single-precision versions by Alex Barnett, 2017.
3 | It also contains the legendary prini.f, which is currently unused.
4 | 


--------------------------------------------------------------------------------
/include/cufinufft.h:
--------------------------------------------------------------------------------
 1 | // Defines the C++/C user interface to CUFINUFFT library.
 2 | #include <cufft.h>
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #include <cufinufft_opts.h>
 7 | #include <finufft_errors.h>
 8 | 
 9 | typedef struct cufinufft_plan_s *cufinufft_plan;
10 | typedef struct cufinufft_fplan_s *cufinufftf_plan;
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | void cufinufft_default_opts(cufinufft_opts *opts);
16 | 
17 | int cufinufft_makeplan(int type, int dim, const int64_t *n_modes, int iflag, int ntr,
18 |                        double eps, cufinufft_plan *d_plan_ptr, cufinufft_opts *opts);
19 | int cufinufftf_makeplan(int type, int dim, const int64_t *n_modes, int iflag, int ntr,
20 |                         float eps, cufinufftf_plan *d_plan_ptr, cufinufft_opts *opts);
21 | 
22 | int cufinufft_setpts(cufinufft_plan d_plan, int64_t M, double *d_x, double *d_y,
23 |                      double *d_z, int N, double *d_s, double *d_t, double *d_u);
24 | int cufinufftf_setpts(cufinufftf_plan d_plan, int64_t M, float *d_x, float *d_y,
25 |                       float *d_z, int N, float *d_s, float *d_t, float *d_u);
26 | 
27 | int cufinufft_execute(cufinufft_plan d_plan, cuDoubleComplex *d_c, cuDoubleComplex *d_fk);
28 | int cufinufftf_execute(cufinufftf_plan d_plan, cuFloatComplex *d_c, cuFloatComplex *d_fk);
29 | 
30 | int cufinufft_destroy(cufinufft_plan d_plan);
31 | int cufinufftf_destroy(cufinufftf_plan d_plan);
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | 


--------------------------------------------------------------------------------
/include/cufinufft/cudeconvolve.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CUDECONVOLVE_H__
 2 | #define __CUDECONVOLVE_H__
 3 | 
 4 | #include <cufinufft/types.h>
 5 | 
 6 | namespace cufinufft {
 7 | namespace deconvolve {
 8 | template<typename T, int modeord>
 9 | __global__ void deconvolve_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw,
10 |                               cuda_complex<T> *fk, T *fwkerhalf1);
11 | template<typename T, int modeord>
12 | __global__ void amplify_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw,
13 |                            cuda_complex<T> *fk, T *fwkerhalf2);
14 | template<typename T, int modeord>
15 | __global__ void deconvolve_2d(int ms, int mt, int nf1, int nf2, int fw_width,
16 |                               cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1,
17 |                               T *fwkerhalf2);
18 | template<typename T, int modeord>
19 | __global__ void amplify_2d(int ms, int mt, int nf1, int nf2, int fw_width,
20 |                            cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1,
21 |                            T *fwkerhalf2);
22 | 
23 | template<typename T, int modeord>
24 | __global__ void deconvolve_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3,
25 |                               int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk,
26 |                               T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3);
27 | template<typename T, int modeord>
28 | __global__ void amplify_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3,
29 |                            int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk,
30 |                            T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3);
31 | 
32 | template<typename T, int modeord>
33 | int cudeconvolve1d(cufinufft_plan_t<T> *d_mem, int blksize);
34 | template<typename T, int modeord>
35 | int cudeconvolve2d(cufinufft_plan_t<T> *d_mem, int blksize);
36 | template<typename T, int modeord>
37 | int cudeconvolve3d(cufinufft_plan_t<T> *d_mem, int blksize);
38 | } // namespace deconvolve
39 | } // namespace cufinufft
40 | #endif
41 | 


--------------------------------------------------------------------------------
/include/cufinufft/defs.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUFINUFFT_DEFS_H
 2 | #define CUFINUFFT_DEFS_H
 3 | 
 4 | #include <limits>
 5 | // constants needed within common
 6 | // upper bound on w, ie nspread, even when padded (see evaluate_kernel_vector); also for
 7 | // common
 8 | #define MAX_NSPREAD          16
 9 | #define MIN_NSPREAD          2
10 | 
11 | // max number of positive quadr nodes
12 | #define MAX_NQUAD            100
13 | 
14 | // Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3
15 | #define ARRAYWIDCEN_GROWFRAC 0.1
16 | 
17 | // FIXME: If cufft ever takes N > INT_MAX...
18 | constexpr int32_t MAX_NF = std::numeric_limits<int32_t>::max();
19 | 
20 | // allow compile-time switch off of openmp, so compilation without any openmp
21 | // is done (Note: _OPENMP is automatically set by -fopenmp compile flag)
22 | #ifdef _OPENMP
23 | #include <omp.h>
24 | // point to actual omp utils
25 | #define MY_OMP_GET_NUM_THREADS()  omp_get_num_threads()
26 | #define MY_OMP_GET_MAX_THREADS()  omp_get_max_threads()
27 | #define MY_OMP_GET_THREAD_NUM()   omp_get_thread_num()
28 | #define MY_OMP_SET_NUM_THREADS(x) omp_set_num_threads(x)
29 | #define MY_OMP_SET_NESTED(x)      omp_set_nested(x)
30 | #else
31 | // non-omp safe dummy versions of omp utils
32 | #define MY_OMP_GET_NUM_THREADS() 1
33 | #define MY_OMP_GET_MAX_THREADS() 1
34 | #define MY_OMP_GET_THREAD_NUM()  0
35 | #define MY_OMP_SET_NUM_THREADS(x)
36 | #define MY_OMP_SET_NESTED(x)
37 | #endif
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/include/cufinufft/memtransfer.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MEMTRANSFER_H__
 2 | #define __MEMTRANSFER_H__
 3 | 
 4 | #include "cufinufft/types.h"
 5 | 
 6 | namespace cufinufft {
 7 | namespace memtransfer {
 8 | 
 9 | template<typename T> int allocgpumem1d_plan(cufinufft_plan_t<T> *d_plan);
10 | template<typename T> int allocgpumem1d_nupts(cufinufft_plan_t<T> *d_plan);
11 | template<typename T> void freegpumemory(cufinufft_plan_t<T> *d_plan);
12 | template<typename T> int allocgpumem2d_plan(cufinufft_plan_t<T> *d_plan);
13 | template<typename T> int allocgpumem2d_nupts(cufinufft_plan_t<T> *d_plan);
14 | template<typename T> int allocgpumem3d_plan(cufinufft_plan_t<T> *d_plan);
15 | template<typename T> int allocgpumem3d_nupts(cufinufft_plan_t<T> *d_plan);
16 | 
17 | } // namespace memtransfer
18 | } // namespace cufinufft
19 | #endif
20 | 


--------------------------------------------------------------------------------
/include/cufinufft_opts.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CUFINUFFT_OPTS_H__
 2 | #define __CUFINUFFT_OPTS_H__
 3 | 
 4 | typedef struct cufinufft_opts { // see cufinufft_default_opts() for defaults
 5 |   double upsampfac; // upsampling ratio sigma, only 2.0 (standard) is implemented
 6 |                     /* following options are for gpu */
 7 |   int gpu_method;   // 1: nonuniform-pts driven, 2: shared mem (SM)
 8 |   int gpu_sort;     // when NU-pts driven: 0: no sort (GM), 1: sort (GM-sort)
 9 | 
10 |   int gpu_binsizex; // used for 2D, 3D subproblem method
11 |   int gpu_binsizey;
12 |   int gpu_binsizez;
13 | 
14 |   int gpu_obinsizex; // used for 3D spread block gather method
15 |   int gpu_obinsizey;
16 |   int gpu_obinsizez;
17 | 
18 |   int gpu_maxsubprobsize;
19 |   int gpu_kerevalmeth;      // 0: direct exp(sqrt()), 1: Horner ppval
20 | 
21 |   int gpu_spreadinterponly; // 0: NUFFT, 1: spread or interpolation only
22 | 
23 |   int gpu_maxbatchsize;
24 | 
25 |   /* multi-gpu support */
26 |   int gpu_device_id;
27 | 
28 |   void *gpu_stream;
29 | 
30 |   int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order
31 |                //                  1 FFT-style mode order
32 | 
33 |   int debug;   // 0: no debug, 1: debug
34 | } cufinufft_opts;
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/include/finufft.fh:
--------------------------------------------------------------------------------
 1 | c     Fortran header recreating finufft_opts struct in fortran (f90 style).
 2 | c     This must be kept synchronized with finufft_opts.h, matching its order.
 3 | c     Also see finufft_mod.f90 and ../fortran/finufftfort.cpp.
 4 | c     Barnett 5/29/20. One prec 7/2/20. Fix ordering bug 11/29/24.
 5 | c     erase chkbnds 1/7/25.
 6 | 
 7 |       type finufft_opts
 8 | 
 9 | c     data handling opts...
10 |       integer modeord, spreadinterponly
11 | 
12 | c     diagnostic opts...
13 |       integer debug, spread_debug, showwarn
14 | 
15 | c     alg performance opts...
16 |       integer nthreads, fftw, spread_sort, spread_kerevalmeth
17 |       integer spread_kerpad
18 |       real*8 upsampfac
19 |       integer spread_thread, maxbatchsize, spread_nthr_atomic
20 |       integer spread_max_sp_size
21 |       integer fftw_lock_fun, fftw_unlock_fun, fftw_lock_data
22 | 
23 |       end type
24 | 


--------------------------------------------------------------------------------
/include/finufft.h:
--------------------------------------------------------------------------------
 1 | // Defines the public C++ and C compatible user interface to FINUFFT library.
 2 | 
 3 | // This contains both single and double precision user-facing commands.
 4 | // "macro-safe" rewrite, including the plan object, Barnett 5/21/22-6/7/22.
 5 | // They will clobber any prior macros starting FINUFFT*.
 6 | 
 7 | /* Devnotes.
 8 |    A) Two precisions done by including the "either precision" headers twice.
 9 |    No use of the private headers for lib/test/example compilation is made.
10 | 
11 |    B) Good ways to debug this header ---
12 |    1) preprocessor output (gets the general idea the macros worked):
13 |    cpp include/finufft.h -Iinclude
14 |    cpp -dD include/finufft.h -Iinclude
15 |    then https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
16 |    2) compile examples in both precs and C/C++, needed to catch typos:
17 |    g++ examples/simple1d1.cpp -Iinclude -c
18 |    g++ examples/simple1d1f.cpp -Iinclude -c
19 |    gcc examples/simple1d1c.c -Iinclude -c
20 |    gcc examples/simple1d1cf.c -Iinclude -c
21 | */
22 | 
23 | #ifndef FINUFFT_H
24 | #define FINUFFT_H
25 | 
26 | // prec-indep stuff. both these are thus made public-facing
27 | #include <finufft_opts.h>
28 | #include <finufft_spread_opts.h>
29 | 
30 | // Public error numbers
31 | #include <finufft_errors.h>
32 | 
33 | // octave (mkoctfile) needs this otherwise it doesn't know what int64_t is!
34 | #include <stdint.h>
35 | #define FINUFFT_BIGINT int64_t
36 | 
37 | // this macro name has to be safe since exposed to user
38 | #define FINUFFT_SINGLE
39 | #include <finufft_eitherprec.h>
40 | #undef FINUFFT_SINGLE
41 | // do it again for double-prec...
42 | #include <finufft_eitherprec.h>
43 | 
44 | // clean up any purely local defs that are not in finufft_eitherprec.h...
45 | #undef FINUFFT_BIGINT
46 | 
47 | #endif // FINUFFT_H
48 | 


--------------------------------------------------------------------------------
/include/finufft_errors.h:
--------------------------------------------------------------------------------
 1 | #ifndef FINUFFT_ERRORS_H
 2 | #define FINUFFT_ERRORS_H
 3 | 
 4 | // ---------- Global error/warning output codes for the library ---------------
 5 | // All documentation is at ../docs/errors.rst (not here):
 6 | enum {
 7 |   FINUFFT_WARN_EPS_TOO_SMALL         = 1,
 8 |   FINUFFT_ERR_MAXNALLOC              = 2,
 9 |   FINUFFT_ERR_SPREAD_BOX_SMALL       = 3,
10 |   FINUFFT_ERR_SPREAD_PTS_OUT_RANGE   = 4, // DEPRECATED
11 |   FINUFFT_ERR_SPREAD_ALLOC           = 5,
12 |   FINUFFT_ERR_SPREAD_DIR             = 6,
13 |   FINUFFT_ERR_UPSAMPFAC_TOO_SMALL    = 7,
14 |   FINUFFT_ERR_HORNER_WRONG_BETA      = 8,
15 |   FINUFFT_ERR_NTRANS_NOTVALID        = 9,
16 |   FINUFFT_ERR_TYPE_NOTVALID          = 10,
17 |   FINUFFT_ERR_ALLOC                  = 11,
18 |   FINUFFT_ERR_DIM_NOTVALID           = 12,
19 |   FINUFFT_ERR_SPREAD_THREAD_NOTVALID = 13,
20 |   FINUFFT_ERR_NDATA_NOTVALID         = 14,
21 |   FINUFFT_ERR_CUDA_FAILURE           = 15,
22 |   FINUFFT_ERR_PLAN_NOTVALID          = 16,
23 |   FINUFFT_ERR_METHOD_NOTVALID        = 17,
24 |   FINUFFT_ERR_BINSIZE_NOTVALID       = 18,
25 |   FINUFFT_ERR_INSUFFICIENT_SHMEM     = 19,
26 |   FINUFFT_ERR_NUM_NU_PTS_INVALID     = 20,
27 |   FINUFFT_ERR_INVALID_ARGUMENT       = 21,
28 |   FINUFFT_ERR_LOCK_FUNS_INVALID      = 22,
29 |   FINUFFT_ERR_NTHREADS_NOTVALID      = 23,
30 | };
31 | #endif
32 | 


--------------------------------------------------------------------------------
/include/finufft_mod.f90:
--------------------------------------------------------------------------------
 1 | module finufft_mod
 2 | ! Fortran header recreating finufft_opts struct in fortran (f90 style)
 3 | ! Module version, contributed by Reinhard Neder, 1/20/23. Order fixed 1/7/25.
 4 | ! This must be kept synchronized with finufft_opts.h, matching its order.
 5 | ! Also see ../fortran/finufftfort.cpp.
 6 | ! Relies on "use ISO_C_BINDING" in the fortran module.
 7 | use iso_c_binding
 8 | type finufft_opts
 9 | 
10 |    ! data handling opts...
11 |    integer(kind=C_INT) :: modeord, spreadinterponly
12 | 
13 |    ! diagnostic opts...
14 |    integer(kind=C_INT) :: debug, spread_debug, showwarn
15 | 
16 |    ! alg perf opts...
17 |    integer(kind=C_INT) :: nthreads,fftw,spread_sort,spread_kerevalmeth
18 |    integer(kind=C_INT) :: spread_kerpad
19 |    real(kind=C_DOUBLE) :: upsampfac
20 |    integer(kind=C_INT) :: spread_thread, maxbatchsize
21 |    integer(kind=C_INT) :: spread_nthr_atomic, spread_max_sp_size
22 |    integer(kind=C_SIZE_T) :: fftw_lock_fun, fftw_unlock_fun, fftw_lock_data
23 |    !  really, last should be type(C_PTR) :: etc, but fails to print nicely
24 | 
25 | end type finufft_opts
26 | end module finufft_mod
27 | 


--------------------------------------------------------------------------------
/include/finufft_spread_opts.h:
--------------------------------------------------------------------------------
 1 | #ifndef FINUFFT_SPREAD_OPTS_H
 2 | #define FINUFFT_SPREAD_OPTS_H
 3 | 
 4 | // C-compatible options struct for spread/interpolation within FINUFFT
 5 | 
 6 | // Notes: 1) Has to be part of public-facing
 7 | // headers since finufft_plan has an instance of this spread_opts struct.
 8 | // 2) Deliberately uses fixed types (no macro precision-switching).
 9 | 
10 | typedef struct finufft_spread_opts {
11 |   // See spreadinterp:setup_spreader for default values of the following fields.
12 |   // This is the main documentation for these options...
13 |   int nspread;             // w, the kernel width in grid pts
14 |   int spread_direction;    // 1 means spread NU->U, 2 means interpolate U->NU
15 |   int sort;                // 0: don't sort NU pts, 1: do, 2: heuristic choice
16 |   int kerevalmeth;         // 0: direct exp(sqrt()), or 1: Horner ppval, fastest
17 |   int kerpad;              // 0: no pad w to mult of 4, 1: do pad
18 |                            // (this helps SIMD for kerevalmeth=0, eg on i7).
19 |   int nthreads;            // # threads for spreadinterp (0: use max avail)
20 |   int sort_threads;        // # threads for sort (0: auto-choice up to nthreads)
21 |   int max_subproblem_size; // # pts per t1 subprob; sets extra RAM per thread
22 |   int flags;               // binary flags for timing only (may give wrong ans
23 |                            // if changed from 0!). See spreadinterp.h
24 |   int debug;               // 0: silent, 1: small text output, 2: verbose
25 |   int atomic_threshold; // num threads before switching spreadSorted to using atomic ops
26 |   double upsampfac;     // sigma, upsampling factor
27 |   // ES kernel specific consts for eval. No longer FLT, to avoid name clash...
28 |   double ES_beta;
29 |   double ES_halfwidth;
30 |   double ES_c;
31 | } finufft_spread_opts;
32 | 
33 | #endif // FINUFFT_SPREAD_OPTS_H
34 | 


--------------------------------------------------------------------------------
/lib-static/README:
--------------------------------------------------------------------------------
1 | This directory is where the static libraries will appear.
2 | 


--------------------------------------------------------------------------------
/lib/README:
--------------------------------------------------------------------------------
1 | This directory is where the shared libraries will appear.
2 | 


--------------------------------------------------------------------------------
/make-platforms/README:
--------------------------------------------------------------------------------
1 | This directory contains platform-specific variable settings for the
2 | GNU makefile. They are used by CI.
3 | 
4 | Please copy one of these up to ../make.inc and possibly modify for
5 | your needs.
6 | 
7 | Barnett 1/7/25
8 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.GCC7:
--------------------------------------------------------------------------------
 1 | # example of how to override compiler choices in makefile.
 2 | # Here we use GCC 7 in linux ubuntu 16.40 LTS (provides /usr/bin/g++-7, etc)
 3 | # You should make your own.
 4 | 
 5 | CXX=g++-7
 6 | CC=gcc-7
 7 | FC=gfortran-7
 8 | 
 9 | CXXFLAGS += -g -Wall
10 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.linux_ICC:
--------------------------------------------------------------------------------
 1 | # makefile overrides to use Intel ICC compiler & libiomp5. double prec only.
 2 | # (fixed the fatal mistake of leaving -lgomp in the compile line! 6/4/20)
 3 | 
 4 | CXX=icpc
 5 | CC=icc
 6 | FC=ifort
 7 | 
 8 | # we want to start fresh, ignore the GCC flags...
 9 | CFLAGS = -O3 -xHost
10 | # CFLAGS += -lsvml    % fails (gives nans & no faster)
11 | 
12 | CXXFLAGS = $(CFLAGS)
13 | FFLAGS   = $(CFLAGS)
14 | 
15 | # It is crucial to *replace* -fopenmp & -lgomp with Intel's equiv
16 | # (crucial in the sense that linking libiomp5 *and* libgomp gives weird
17 | # segfaults, corruption in parallel blocks, or works fine, frustratingly):
18 | OMPFLAGS = -qopenmp
19 | OMPLIBS =
20 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.macosx_arm64:
--------------------------------------------------------------------------------
 1 | # Makefile variable overrides for cross-compiling for ARM silicon via
 2 | # clang on Mac OSX.
 3 | #
 4 | # This is used for CI.
 5 | # Libin Lu 12/21/23.
 6 | 
 7 | # compile flags for use with clang: (note absence of -march, etc)
 8 | CFLAGS = -O3 -arch arm64 -target arm64-apple-macos11
 9 | 
10 | # If you're getting warning messages of the form:
11 | #    ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for
12 | #    newer OSX version (10.13) than being linked (10.9)
13 | # Then you can uncomment the following two lines with the older version number
14 | # (in this example -mmacosx-version-min=10.9)
15 | #
16 | #CFLAGS += "-mmacosx-version-min=<OLDER OSX VERSION NUMBER>"
17 | 
18 | CXX=clang++
19 | CC=clang
20 | 
21 | # assuming libomp and fftw are installed through homebrew
22 | OMP_ROOT = $(shell brew --prefix libomp)
23 | FFTW_ROOT = $(shell brew --prefix fftw)
24 | 
25 | # taken from makefile...
26 | CFLAGS   += -I include -I/usr/local/include -I$(OMP_ROOT)/include -I$(FFTW_ROOT)/include
27 | FFLAGS   = $(CFLAGS)
28 | CXXFLAGS = $(CFLAGS)
29 | LIBS += -L/usr/local/lib -L$(OMP_ROOT)/lib -L$(FFTW_ROOT)/lib
30 | LDFLAGS += -arch arm64 -target arm64-apple-macos11
31 | 
32 | # OpenMP with clang needs following...
33 | OMPFLAGS = -Xpreprocessor -fopenmp
34 | OMPLIBS = -lomp
35 | # since fftw3_omp doesn't work in OSX, we need...
36 | FFTWOMPSUFFIX=threads
37 | 
38 | # MATLAB interface: this will probably segfault. Instead we suggest you use
39 | # make.inc.macosx_clang_matlab
40 | 
41 | # Some of these will depend on your FFTW library location...
42 | MFLAGS += -I/usr/local/include -I/opt/homebrew/include -L/usr/local/lib -L/opt/homebrew/lib -lm
43 | # may need to edit for your MATLAB version location...
44 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
45 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file.
46 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.macosx_clang:
--------------------------------------------------------------------------------
 1 | # Makefile variable overrides for Mac OSX compilation with CLANG.
 2 | #
 3 | # Note that we have not been able to link against gfortran, so if you need
 4 | # fortran interfaces, use make.inc.macosx_gcc-* instead.
 5 | #
 6 | # Copy this file to make.inc, and if needed edit for your setup.
 7 | # Barnett 10/27/18. Input from Yu-Hsuan Shih, Amit Moskovich.
 8 | 
 9 | # (note that /usr/bin/g++,gcc are aliased to clang/LLVM, so CXX,CC unchanged)
10 | 
11 | # compile flags for use with clang: (note absence of -march, etc)
12 | CFLAGS = -O3
13 | 
14 | # If you're getting warning messages of the form:
15 | #    ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for
16 | #    newer OSX version (10.13) than being linked (10.9)
17 | # Then you can uncomment the following two lines with the older version number
18 | # (in this example -mmacosx-version-min=10.9)
19 | #
20 | #CFLAGS += "-mmacosx-version-min=<OLDER OSX VERSION NUMBER>"
21 | 
22 | CXX=clang++
23 | CC=clang
24 | 
25 | # taken from makefile...
26 | CFLAGS   += -I include -I/usr/local/include -I/usr/local/opt/libomp/include -I/opt/homebrew/include
27 | FFLAGS   = $(CFLAGS)
28 | CXXFLAGS = $(CFLAGS)
29 | LIBS += -L/usr/local/lib -L/opt/homebrew/lib
30 | 
31 | # OpenMP with clang needs following...
32 | OMPFLAGS = -Xpreprocessor -fopenmp
33 | OMPLIBS = -L/usr/local/lib -L/usr/local/opt/libomp/lib -lomp
34 | # since fftw3_omp doesn't work in OSX, we need...
35 | FFTWOMPSUFFIX=threads
36 | 
37 | 
38 | # MATLAB interface: this will probably segfault. Instead we suggest you use
39 | # make.inc.macosx_clang_matlab
40 | 
41 | # Some of these will depend on your FFTW library location...
42 | MFLAGS += -I/usr/local/include -L/usr/local/lib -lm
43 | # may need to edit for your MATLAB version location...
44 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
45 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file.
46 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.macosx_gcc-10:
--------------------------------------------------------------------------------
 1 | # Makefile variable overrides for Mac OSX compilation with GCC v.10.*
 2 | #
 3 | # Use this if you'll need to link against gfortran.
 4 | #
 5 | # Barnett 10/27/18. Input from Yu-Hsuan Shih, Amit Moskovich.
 6 | # Lu minor modification for gcc-10 12/06/2020
 7 | 
 8 | # By default we use clang/LLVM (which is aliased to /usr/lib/gcc, etc).
 9 | # This make.inc is if you want to override this.
10 | # Get gcc from brew then use, eg:
11 | CXX=g++-10
12 | CC=gcc-10
13 | FC=gfortran
14 | 
15 | # (compile flags for use with GCC are as in linux makefile)
16 | CFLAGS +=
17 | 
18 | # If you're getting warning messages of the form:
19 | #    ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for
20 | #    newer OSX version (10.13) than being linked (10.9)
21 | # Then you can uncomment the following two lines with the older version number
22 | # (in this example -mmacosx-version-min=10.9)
23 | #
24 | #CFLAGS += "-mmacosx-version-min=<OLDER OSX VERSION NUMBER>"
25 | 
26 | # as in makefile, but with the brew /usr/local/ stuff...
27 | CFLAGS   += -I src -I/usr/local/include -I/opt/homebrew/include
28 | FFLAGS   = $(CFLAGS)
29 | CXXFLAGS = $(CFLAGS)
30 | LIBS += -L/usr/local/lib -L/opt/homebrew/lib
31 | 
32 | # OpenMP with GCC on OSX needs following...
33 | OMPFLAGS = -fopenmp
34 | OMPLIBS = -L/usr/local/lib -lgomp
35 | # since fftw3_omp doesn't work in OSX, we need...
36 | FFTWOMPSUFFIX=threads
37 | 
38 | # MATLAB interface:
39 | # some of these will depend on your FFTW library location...
40 | MFLAGS += -I/usr/local/include  -I/opt/homebrew/include -L/usr/local/lib -L/opt/homebrew/lib -lm
41 | # edit for your MATLAB version location...
42 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
43 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file.
44 | 
45 | # If you have segfault of MATLAB then please try the following:
46 | #MOMPFLAGS = -D_OPENMP
47 | #OMPFLAGS = -Xpreprocessor -fopenmp
48 | #OMPLIBS = $(shell ls -d /Applications/MATLAB_R20**.app)/sys/os/maci64/libiomp5.dylib
49 | # This links to MATLAB's omp not gomp.
50 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.macosx_gcc-8:
--------------------------------------------------------------------------------
 1 | # Makefile variable overrides for Mac OSX compilation with GCC v.8.*
 2 | #
 3 | # Use this if you'll need to link against gfortran.
 4 | #
 5 | # Copy this file to make.inc, and if needed edit for your setup.
 6 | #
 7 | # Barnett 10/27/18. Input from Yu-Hsuan Shih, Amit Moskovich.
 8 | 
 9 | # By default we use clang/LLVM (which is aliased to /usr/lib/gcc, etc).
10 | # This make.inc is if you want to override this.
11 | # Get gcc from brew then use, eg:
12 | CXX=g++-8
13 | CC=gcc-8
14 | FC=gfortran
15 | 
16 | # (compile flags for use with GCC are as in linux makefile)
17 | CFLAGS +=
18 | 
19 | # If you're getting warning messages of the form:
20 | #    ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for
21 | #    newer OSX version (10.13) than being linked (10.9)
22 | # Then you can uncomment the following two lines with the older version number
23 | # (in this example -mmacosx-version-min=10.9)
24 | #
25 | #CFLAGS += "-mmacosx-version-min=<OLDER OSX VERSION NUMBER>"
26 | 
27 | # as in makefile, but with the brew /usr/local/ stuff...
28 | CFLAGS   += -I src -I/usr/local/include -I/opt/homebrew/include
29 | FFLAGS   = $(CFLAGS)
30 | CXXFLAGS = $(CFLAGS)
31 | LIBS += -L/usr/local/lib -L/opt/homebrew/lib
32 | 
33 | # OpenMP with GCC on OSX needs following...
34 | OMPFLAGS = -fopenmp
35 | OMPLIBS = -L/usr/local/lib -lgomp
36 | # since fftw3_omp doesn't work in OSX, we need...
37 | FFTWOMPSUFFIX=threads
38 | 
39 | # MATLAB interface:
40 | # some of these will depend on your FFTW library location...
41 | MFLAGS += -I/usr/local/include -I/opt/homebrew/include -L/usr/local/lib -L/opt/homebrew/lib -lm
42 | # edit for your MATLAB version location...
43 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
44 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file.
45 | 
46 | # If you have segfault of MATLAB then please try the following:
47 | #MOMPFLAGS = -D_OPENMP
48 | #OMPFLAGS = -Xpreprocessor -fopenmp
49 | #OMPLIBS = $(shell ls -d /Applications/MATLAB_R20**.app)/sys/os/maci64/libiomp5.dylib
50 | # This links to MATLAB's omp not gomp.
51 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.manylinux:
--------------------------------------------------------------------------------
1 | CFLAGS = -O3 -funroll-loops -march=x86-64 -mtune=generic -msse4 -fcx-limited-range
2 | CXXFLAGS = $(CFLAGS)
3 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.powerpc:
--------------------------------------------------------------------------------
 1 | # Custom makefile settings for PowerPC (as opposed to Intel x86) architecture.
 2 | # Copy this to make.inc to apply.
 3 | # It adds a COMP (compiler) make argument, with values COMP=ibm, otherwise GCC.
 4 | # By Hugo Brunie, May 2020.
 5 | # This makefile is greatly inspired by AMReX (amrex/Tools/GNUMake/comps)
 6 | 
 7 | ## Compiler choice
 8 | ifeq ($(COMP),ibm)
 9 | OMPFLAGS = -qsmp=omp
10 | ifneq ($(OMP),OFF)
11 |   CXX = xlC_r
12 |   CC  = xlc_r
13 |   FC  = xlf_r
14 | else
15 |   CXX = xlC
16 |   CC  = xlc
17 |   FC  = xlf
18 | endif
19 | ifneq ($(DEBUG),TRUE)
20 |   CXXFLAGS = -qsimd=auto -qmaxmem=-1
21 |   CFLAGS   = -qsimd=auto -qmaxmem=-1
22 | endif
23 | else                # DEFAULT is gnu compiler (you can update to g++-9, etc)
24 | CXX = g++
25 | CC  = gcc
26 | FC=gfortran
27 | OMPFLAGS = -fopenmp
28 | CFLAGS = -funroll-loops -mcpu=powerpc64 -fcx-limited-range
29 | endif
30 | 
31 | FFLAGS   = $(CFLAGS)
32 | CXXFLAGS = $(CFLAGS) -DNEED_EXTERN_C
33 | 
34 | ## shared library
35 | CFLAGS += -fPIC
36 | 
37 | ifeq ($(DEBUG),TRUE)
38 |   CXXFLAGS += -g -O0
39 |   CFLAGS   += -g -O0
40 | else
41 |   CXXFLAGS += -g -O3
42 |   CFLAGS   += -g -O3
43 | endif
44 | CLINK=-lstdc++
45 | FLINK=$(CLINK)
46 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.windows_mingw:
--------------------------------------------------------------------------------
 1 | MINGW=ON
 2 | # libm not available on Windows? Has to be removed from LIBS to build MATLAB mex file. Does not interfere with library build
 3 | LIBS=
 4 | # please set these paths
 5 | FFTW_H_DIR=
 6 | FFTW_LIB_DIR=
 7 | # might be needed for MATLAB
 8 | LGOMP_DIR=
 9 | 
10 | # modify FLAGS such that FFTW headers are included
11 | ifneq ($(FFTW_H_DIR),)
12 | CFLAGS+=-I$(FFTW_H_DIR)
13 | CXXFLAGS+=-I$(FFTW_H_DIR)
14 | endif
15 | # add FFTW DLL location to LIBS
16 | ifneq ($(FFTW_LIB_DIR),)
17 | LIBS+=-L$(FFTW_LIB_DIR)
18 | endif
19 | 
20 | # adjust MATLAB flags, add path of lgomp
21 | ifneq ($(FFTW_H_DIR),)
22 | MFLAGS=-I$(FFTW_H_DIR) -largeArrayDims
23 | endif
24 | ifneq ($(LGOMP_DIR),)
25 | MFLAGS+=-L$(LGOMP_DIR)
26 | endif
27 | MFLAGS+=-lgomp.dll
28 | 


--------------------------------------------------------------------------------
/make-platforms/make.inc.windows_msys:
--------------------------------------------------------------------------------
1 | MSYS=ON
2 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft1d1.m:
--------------------------------------------------------------------------------
1 | % FINUFFT1D1   GPU 1D complex nonuniform FFT, type 1 (nonuniform to uniform).
2 | %
3 | % See CUFINUFFT1D1
4 | function f = finufft1d1(varargin)
5 | f = cufinufft1d1(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft1d2.m:
--------------------------------------------------------------------------------
1 | % FINUFFT1D2   GPU 1D complex nonuniform FFT, type 2 (uniform to nonuniform).
2 | %
3 | % See CUFINUFFT1D2
4 | function c = finufft1d2(varargin)
5 | c = cufinufft1d2(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft1d3.m:
--------------------------------------------------------------------------------
1 | % FINUFFT1D3   GPU 1D complex nonuniform FFT, type 3 (nonuniform to nonuniform).
2 | %
3 | % See CUFINUFFT1D3
4 | function f = finufft1d3(varargin)
5 | f = cufinufft1d3(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft2d1.m:
--------------------------------------------------------------------------------
1 | % FINUFFT2D1   GPU 2D complex nonuniform FFT, type 1 (nonuniform to uniform).
2 | %
3 | % See CUFINUFFT2D1
4 | function f = finufft2d1(varargin)
5 | f = cufinufft2d1(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft2d2.m:
--------------------------------------------------------------------------------
1 | % FINUFFT2D2   GPU 2D complex nonuniform FFT, type 2 (uniform to nonuniform).
2 | %
3 | % See CUFINUFFT2D2
4 | function c = finufft2d2(varargin)
5 | c = cufinufft2d2(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft2d3.m:
--------------------------------------------------------------------------------
1 | % FINUFFT2D3   GPU 2D complex nonuniform FFT, type 3 (nonuniform to nonuniform).
2 | %
3 | % See CUFINUFFT2D3
4 | function f = finufft2d3(varargin)
5 | f = cufinufft2d3(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft3d1.m:
--------------------------------------------------------------------------------
1 | % FINUFFT3D1   GPU 3D complex nonuniform FFT, type 1 (nonuniform to uniform).
2 | %
3 | % See CUFINUFFT3D1
4 | function f = finufft3d1(varargin)
5 | f = cufinufft3d1(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft3d2.m:
--------------------------------------------------------------------------------
1 | % FINUFFT3D2   GPU 3D complex nonuniform FFT, type 2 (uniform to nonuniform).
2 | %
3 | % See CUFINUFFT3D2
4 | function c = finufft3d2(varargin)
5 | c = cufinufft3d2(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/@gpuArray/finufft3d3.m:
--------------------------------------------------------------------------------
1 | % FINUFFT3D3   GPU 3D complex nonuniform FFT, type 3 (nonuniform to nonuniform).
2 | %
3 | % See CUFINUFFT3D3
4 | function f = finufft3d3(varargin)
5 | f = cufinufft3d3(varargin{:});
6 | 


--------------------------------------------------------------------------------
/matlab/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(Matlab REQUIRED)
 2 | matlab_add_mex(NAME finufft_mex SRC finufft.cpp LINK_TO finufft OUTPUT_NAME finufft R2018a)
 3 | target_compile_definitions(finufft_mex PRIVATE -DR2008OO)
 4 | 
 5 | file(GLOB FINUFFT_MATLAB_M_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.m)
 6 | 
 7 | add_custom_command(
 8 |     TARGET finufft_mex
 9 |     POST_BUILD
10 |     COMMAND ${CMAKE_COMMAND} -E copy ${FINUFFT_MATLAB_M_SOURCES} ${CMAKE_CURRENT_BINARY_DIR}
11 |     VERBATIM
12 | )
13 | 


--------------------------------------------------------------------------------
/matlab/cufinufft1d1.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT1D1   GPU 1D complex nonuniform FFT, type 1 (nonuniform to uniform).
 2 | %
 3 | % f = cufinufft1d1(x,c,isign,eps,ms)
 4 | % f = cufinufft1d1(x,c,isign,eps,ms,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %               nj
 9 | %     f(k1) =  SUM c[j] exp(+/-i k1 x(j))  for -ms/2 <= k1 <= (ms-1)/2
10 | %              j=1
11 | %   Inputs:
12 | %     x     length-nj vector of real-valued locations of nonuniform sources
13 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
14 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
15 | %           transformed with the same source locations.
16 | ISIGNEPS
17 | %     ms    number of Fourier modes computed, may be even or odd;
18 | %           in either case, mode range is integers lying in [-ms/2, (ms-1)/2]
19 | GOPTS
20 | GOPTS12
21 | %   Outputs:
22 | %     f     size-ms complex column vector of Fourier coefficients, or, if
23 | %           ntrans>1, a matrix of size (ms,ntrans).
24 | %
25 | GNOTES
26 | 


--------------------------------------------------------------------------------
/matlab/cufinufft1d2.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT1D2   GPU 1D complex nonuniform FFT, type 2 (uniform to nonuniform).
 2 | %
 3 | % c = cufinufft1d2(x,isign,eps,f)
 4 | % c = cufinufft1d2(x,isign,eps,f,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %    c[j] = SUM   f[k1] exp(+/-i k1 x[j])      for j = 1,...,nj
 9 | %            k1
10 | %     where sum is over -ms/2 <= k1 <= (ms-1)/2.
11 | %
12 | %  Inputs:
13 | %     x     length-nj vector of real-valued locations of nonuniform sources
14 | %     f     complex Fourier coefficients. If a vector, length sets ms
15 | %           (with mode ordering given by opts.modeord). If a matrix, each
16 | %           of ntrans columns is transformed with the same nonuniform targets.
17 | ISIGNEPS
18 | GOPTS
19 | GOPTS12
20 | %  Outputs:
21 | %     c     complex column vector of nj answers at targets, or,
22 | %           if ntrans>1, matrix of size (nj,ntrans).
23 | %
24 | GNOTES
25 | 


--------------------------------------------------------------------------------
/matlab/cufinufft1d3.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT1D3   GPU 1D complex nonuniform FFT, type 3 (nonuniform to nonuniform).
 2 | %
 3 | % f = cufinufft1d3(x,c,isign,eps,s)
 4 | % f = cufinufft1d3(x,c,isign,eps,s,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %              nj
 9 | %     f[k]  =  SUM   c[j] exp(+-i s[k] x[j]),      for k = 1, ..., nk
10 | %              j=1
11 | %   Inputs:
12 | %     x     length-nj vector of real-valued locations of nonuniform sources
13 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
14 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
15 | %           transformed with the same source and target locations.
16 | ISIGNEPS
17 | %     s     length-nk vector of frequency locations of nonuniform targets
18 | GOPTS
19 | %   Outputs:
20 | %     f     length-nk complex vector of values at targets, or, if ntrans>1,
21 | %           a matrix of size (nk,ntrans)
22 | %
23 | GNOTES
24 | 


--------------------------------------------------------------------------------
/matlab/cufinufft2d1.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT2D1   GPU 2D complex nonuniform FFT, type 1 (nonuniform to uniform).
 2 | %
 3 | % f = cufinufft2d1(x,y,c,isign,eps,ms,mt)
 4 | % f = cufinufft2d1(x,y,c,isign,eps,ms,mt,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %                   nj
 9 | %     f[k1,k2] =   SUM  c[j] exp(+-i (k1 x[j] + k2 y[j]))
10 | %                  j=1
11 | %
12 | %     for -ms/2 <= k1 <= (ms-1)/2,  -mt/2 <= k2 <= (mt-1)/2.
13 | %
14 | %   Inputs:
15 | %     x,y   real-valued coordinates of nonuniform sources in the plane,
16 | %           each a length-nj vector
17 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
18 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
19 | %           transformed with the same source locations.
20 | ISIGNEPS
21 | %     ms,mt  number of Fourier modes requested in x & y; each may be even or odd.
22 | %            In either case the mode range is integers lying in [-m/2, (m-1)/2]
23 | GOPTS
24 | GOPTS12
25 | %   Outputs:
26 | %     f     size (ms,mt) complex matrix of Fourier coefficients
27 | %           (ordering given by opts.modeord in each dimension; ms fast, mt slow),
28 | %           or, if ntrans>1, a 3D array of size (ms,mt,ntrans).
29 | %
30 | GNOTES
31 | 


--------------------------------------------------------------------------------
/matlab/cufinufft2d2.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT2D2   GPU 2D complex nonuniform FFT, type 2 (uniform to nonuniform).
 2 | %
 3 | % c = cufinufft2d2(x,y,isign,eps,f)
 4 | % c = cufinufft2d2(x,y,isign,eps,f,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %    c[j] =  SUM   f[k1,k2] exp(+/-i (k1 x[j] + k2 y[j]))  for j = 1,..,nj
 9 | %           k1,k2
10 | %     where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2,
11 | %
12 | %  Inputs:
13 | %     x,y   real-valued coordinates of nonuniform targets in the plane,
14 | %           each a vector of length nj
15 | %     f     complex Fourier coefficient matrix, whose size determines (ms,mt).
16 | %           (Mode ordering given by opts.modeord, in each dimension.)
17 | %           If a 3D array, 3rd dimension sets ntrans, and each of ntrans
18 | %           matrices is transformed with the same nonuniform targets.
19 | ISIGNEPS
20 | GOPTS
21 | GOPTS12
22 | %  Outputs:
23 | %     c     complex column vector of nj answers at targets, or,
24 | %           if ntrans>1, matrix of size (nj,ntrans).
25 | %
26 | GNOTES
27 | 


--------------------------------------------------------------------------------
/matlab/cufinufft2d3.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT2D3   GPU 2D complex nonuniform FFT, type 3 (nonuniform to nonuniform).
 2 | %
 3 | % f = cufinufft2d3(x,y,c,isign,eps,s,t)
 4 | % f = cufinufft2d3(x,y,c,isign,eps,s,t,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %              nj
 9 | %     f[k]  =  SUM   c[j] exp(+-i (s[k] x[j] + t[k] y[j])),  for k = 1, ..., nk
10 | %              j=1
11 | %   Inputs:
12 | %     x,y   coordinates of nonuniform sources in R^2, each a length-nj vector.
13 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
14 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
15 | %           transformed with the same source and target locations.
16 | ISIGNEPS
17 | %     s,t   frequency coordinates of nonuniform targets in R^2,
18 | %           each a length-nk vector.
19 | GOPTS
20 | %   Outputs:
21 | %     f     length-nk complex vector of values at targets, or, if ntrans>1,
22 | %           a matrix of size (nk,ntrans)
23 | %
24 | GNOTES
25 | 


--------------------------------------------------------------------------------
/matlab/cufinufft3d1.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT3D1   GPU 3D complex nonuniform FFT, type 1 (nonuniform to uniform).
 2 | %
 3 | % f = cufinufft3d1(x,y,z,c,isign,eps,ms,mt,mu)
 4 | % f = cufinufft3d1(x,y,z,c,isign,eps,ms,mt,mu,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %                       nj
 9 | %     f[k1,k2,k3] =    SUM  c[j] exp(+-i (k1 x[j] + k2 y[j] + k3 z[j]))
10 | %                      j=1
11 | %
12 | %     for -ms/2 <= k1 <= (ms-1)/2,  -mt/2 <= k2 <= (mt-1)/2,
13 | %         -mu/2 <= k3 <= (mu-1)/2.
14 | %
15 | %   Inputs:
16 | %     x,y,z real-valued coordinates of nonuniform sources,
17 | %           each a length-nj vector
18 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
19 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
20 | %           transformed with the same source locations.
21 | ISIGNEPS
22 | %     ms,mt,mu  number of Fourier modes requested in x,y and z; each may be
23 | %           even or odd.
24 | %           In either case the mode range is integers lying in [-m/2, (m-1)/2]
25 | GOPTS
26 | GOPTS12
27 | %   Outputs:
28 | %     f     size (ms,mt,mu) complex array of Fourier coefficients
29 | %           (ordering given by opts.modeord in each dimension; ms fastest, mu
30 | %           slowest), or, if ntrans>1, a 4D array of size (ms,mt,mu,ntrans).
31 | %
32 | GNOTES
33 | 


--------------------------------------------------------------------------------
/matlab/cufinufft3d2.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT3D2   GPU 3D complex nonuniform FFT, type 2 (uniform to nonuniform).
 2 | %
 3 | % c = cufinufft3d2(x,y,z,isign,eps,f)
 4 | % c = cufinufft3d2(x,y,z,isign,eps,f,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %    c[j] =   SUM   f[k1,k2,k3] exp(+/-i (k1 x[j] + k2 y[j] + k3 z[j]))
 9 | %           k1,k2,k3
10 | %                            for j = 1,..,nj
11 | %     where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2,
12 | %                       -mu/2 <= k3 <= (mu-1)/2.
13 | %
14 | %  Inputs:
15 | %     x,y,z real-valued coordinates of nonuniform targets,
16 | %           each a vector of length nj
17 | %     f     complex Fourier coefficient array, whose size sets (ms,mt,mu).
18 | %           (Mode ordering given by opts.modeord, in each dimension.)
19 | %           If a 4D array, 4th dimension sets ntrans, and each of ntrans
20 | %           3D arrays is transformed with the same nonuniform targets.
21 | ISIGNEPS
22 | GOPTS
23 | GOPTS12
24 | %  Outputs:
25 | %     c     complex column vector of nj answers at targets, or,
26 | %           if ntrans>1, matrix of size (nj,ntrans).
27 | %
28 | GNOTES
29 | 


--------------------------------------------------------------------------------
/matlab/cufinufft3d3.docsrc:
--------------------------------------------------------------------------------
 1 | % CUFINUFFT3D3   GPU 3D complex nonuniform FFT, type 3 (nonuniform to nonuniform).
 2 | %
 3 | % f = cufinufft3d3(x,y,z,c,isign,eps,s,t,u)
 4 | % f = cufinufft3d3(x,y,z,c,isign,eps,s,t,u,opts)
 5 | %
 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %              nj
 9 | %     f[k]  =  SUM   c[j] exp(+-i (s[k] x[j] + t[k] y[j] + u[k] z[j])),
10 | %              j=1
11 | %                              for k = 1, ..., nk
12 | %   Inputs:
13 | %     x,y,z  coordinates of nonuniform sources in R^3, each a length-nj vector.
14 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
15 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
16 | %           transformed with the same source and target locations.
17 | ISIGNEPS
18 | %     s,t,u  frequency coordinates of nonuniform targets in R^3,
19 | %           each a length-nk vector.
20 | GOPTS
21 | %   Outputs:
22 | %     f     length-nk complex vector of values at targets, or, if ntrans>1,
23 | %           a matrix of size (nk,ntrans)
24 | %
25 | GNOTES
26 | 


--------------------------------------------------------------------------------
/matlab/errhandler.m:
--------------------------------------------------------------------------------
 1 | function errhandler(ier)
 2 | % ERRHANDLER  translate FINUFFT's ier status into MATLAB warnings/error throws.
 3 | 
 4 | % Barnett 6/13/20
 5 | 
 6 | % Note that there are other matlab-only error types defined in valid_*.m
 7 | 
 8 | switch ier
 9 |  % These are the ERR_ #defines in ../include/finufft_errors.h:
10 |  case 1
11 |   warning('FINUFFT:epsTooSmall','FINUFFT eps tolerance too small to achieve');
12 |  case 2
13 |   error('FINUFFT:mallocGtMaxNf','FINUFFT malloc size requested greater than MAXNF');
14 |  case 3
15 |   error('FINUFFT:spreadinterp:fineGridSmall','FINUFFT spreader fine grid too small compared to kernel width');
16 |  case 4
17 |   error('FINUFFT:spreadinterp:NUrange','[DEPRECATED]');
18 |  case 5
19 |   error('FINUFFT:spreadinterp:malloc','FINUFFT spreader malloc error');
20 |  case 6
21 |   error('FINUFFT:spreadinterp:badDir','FINUFFT spreader illegal direction (must be 1 or 2)');
22 |  case 7
23 |   error('FINUFFT:upsampfacSmall','FINUFFT opts.upsampfac not > 1.0');
24 |  case 8
25 |   error('FINUFFT:upsampfacNotHorner','FINUFFT opts.upsampfac not a value with known Horner polynomial rule');
26 |  case 9
27 |   error('FINUFFT:badNtrans','FINUFFT number of transforms ntrans invalid');
28 |  case 10
29 |   error('FINUFFT:badType','FINUFFT transform type invalid');
30 |  case 11
31 |   error('FINUFFT:malloc','FINUFFT general malloc failure');
32 |  case 12
33 |   error('FINUFFT:badDim','FINUFFT number of dimensions dim invalid');
34 | end
35 | 


--------------------------------------------------------------------------------
/matlab/examples/README:
--------------------------------------------------------------------------------
 1 | MATLAB/octave example drivers for FINUFFT (CPU and GPU versions)
 2 | 
 3 | This directory contains example codes for MATLAB/Octave users of FINUFFT.
 4 | The GPU examples are in cuda/ (these are MATLAB-only).
 5 | 
 6 | Please also see:
 7 | 
 8 | ../test/check_finufft.m for examples of eleven types of transform calls in double-precision.
 9 | ../test/check_finufft_single.m for examples of eleven types of transform calls in single-precision.
10 | 
11 | ../../tutorial/*.m many of which are in MATLAB.
12 | 


--------------------------------------------------------------------------------
/matlab/examples/cuda/README:
--------------------------------------------------------------------------------
 1 | MATLAB GPU FINUFFT example codes. All data I/O are gpuArrays.
 2 | 
 3 | Double-precision demos have filenames *_gpu.m
 4 | Single-precision are *f_gpu.m
 5 | 
 6 | Unlike the CPU codes in the above directory, these are not compatible
 7 | with Octave.
 8 | 
 9 | Libin Lu & Alex Barnett, March 2025.
10 | 


--------------------------------------------------------------------------------
/matlab/examples/cuda/guru1d1_gpu.m:
--------------------------------------------------------------------------------
 1 | % MATLAB double-precision FINUFFT GPU demo for 1D type 1 transform.
 2 | clear
 3 | 
 4 | % set required parameters...
 5 | isign   = +1;     % sign of imaginary unit in exponential
 6 | tol     = 1e-8;   % requested accuracy
 7 | M       = 1e7;    % # pts
 8 | N       = 1e7;    % # of modes
 9 | type = 1;
10 | n_modes = [N];    % n_dims inferred from length of this
11 | ntrans = 1;       % number of transforms (>1: demo many-vector interface)
12 | 
13 | xg = pi*(2*gpuArray.rand(M,1)-1);                           % NU points on GPU
14 | cg = gpuArray.randn(M,ntrans)+1i*gpuArray.randn(M,ntrans);  % strengths on GPU
15 | 
16 | opts.debug=1;                % set options then plan the transform...
17 | opts.floatprec = 'double';   % tells it to make a double-precision plan
18 | opts.gpu_method=2;           % "SM" method
19 | 
20 | dev = gpuDevice();           % needed for timing
21 | disp('starting...'), tic     % just time cuFINUFFT, not the data creation
22 | 
23 | plan = cufinufft_plan(type,n_modes,isign,ntrans,tol,opts);   % make plan
24 | 
25 | plan.setpts(xg);                                 % send in NU pts
26 | 
27 | fg = plan.execute(cg);                           % do transform (to fg on GPU)
28 | 
29 | wait(dev); tgpu = toc;	      	      	         % since GPU async
30 | fprintf('done in %.3g s: throughput (excl H<->D) is %.3g NUpt/s\n',...
31 |         tgpu, M*ntrans/tgpu)
32 | 
33 | % if you do not want to do more transforms of this size, clean up...
34 | delete(plan);
35 | 
36 | % check the error of only one output also on GPU...
37 | t = ceil(0.7*ntrans);                           % pick a transform in stack
38 | if ntrans>1, ct = cg(:,t); ft = fg(:,t); else, ct = cg; ft = fg; end
39 | nt = ceil(0.37*N);                              % pick a mode index
40 | fe = sum(ct.*exp(1i*isign*nt*xg));              % exact
41 | of1 = floor(N/2)+1;                             % mode index offset
42 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-ft(nt+of1))/norm(ft,Inf))
43 | 


--------------------------------------------------------------------------------
/matlab/examples/cuda/guru1d1f_gpu.m:
--------------------------------------------------------------------------------
 1 | % MATLAB single-precision FINUFFT GPU demo for 1D type 1 transform.
 2 | clear
 3 | 
 4 | % set required parameters...
 5 | isign   = +1;     % sign of imaginary unit in exponential
 6 | tol     = 1e-3;   % requested accuracy
 7 | M       = 1e8;    % # pts
 8 | N       = 1e6;    % # of modes; note N*eps('single') limits accuracy
 9 | type = 1;
10 | n_modes = [N];    % n_dims inferred from length of this
11 | ntrans = 1;       % number of transforms (>1: demo many-vector interface)
12 | 
13 | xg = pi*(2*gpuArray.rand(M,1,'single')-1);             % float32 NU pts on GPU
14 | % stack of float32 strengths on GPU...
15 | cg = gpuArray.randn(M,ntrans,'single')+1i*gpuArray.randn(M,ntrans,'single');
16 | 
17 | opts.debug=1;    % set options then plan the transform...
18 | opts.floatprec = 'single';   % tells it to make a single-precision plan
19 | opts.gpu_method=2;
20 | 
21 | dev = gpuDevice();           % needed for timing
22 | disp('starting...'), tic     % just time cuFINUFFT, not the data creation
23 | 
24 | plan = cufinufft_plan(type,n_modes,isign,ntrans,tol,opts);   % make plan
25 | 
26 | plan.setpts(xg);                                 % send in NU pts
27 | 
28 | fg = plan.execute(cg);                           % do transform (to fg on GPU)
29 | 
30 | wait(dev); tgpu = toc;                           % since GPU async
31 | fprintf('done in %.3g s: throughput (excl H<->D) is %.3g NUpt/s\n',...
32 |         tgpu, M*ntrans/tgpu)
33 | 
34 | % if you do not want to do more transforms of this size, clean up...
35 | delete(plan);
36 | 
37 | % check the error of only one output also on GPU...
38 | t = ceil(0.7*ntrans);                           % pick a transform in stack
39 | if ntrans>1, ct = cg(:,t); ft = fg(:,t); else, ct = cg; ft = fg; end
40 | nt = ceil(0.37*N);                              % pick a mode index
41 | fe = sum(ct.*exp(1i*isign*nt*xg));              % exact
42 | of1 = floor(N/2)+1;                             % mode index offset
43 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-ft(nt+of1))/norm(ft,Inf))
44 | 


--------------------------------------------------------------------------------
/matlab/examples/cuda/simple1d1f_gpu.m:
--------------------------------------------------------------------------------
 1 | % MATLAB single-precision FINUFFT GPU demo for 1D type 1 transform.
 2 | clear
 3 | 
 4 | M = 1e8;
 5 | x = 2*pi*gpuArray.rand(M,1,'single');   % random pts in [0,2pi]^2
 6 | y = 2*pi*gpuArray.rand(M,1,'single');
 7 | % iid random complex data...
 8 | c = gpuArray.randn(M,1,'single')+1i*gpuArray.randn(M,1,'single');
 9 | 
10 | N1 = 10000; N2 = 5000;                   % desired Fourier mode array sizes
11 | tol = 1e-3;
12 | 
13 | dev = gpuDevice();                       % crucial for valid timing
14 | tic
15 | f = cufinufft2d1(x,y,c,+1,tol,N1,N2);    % do it (all opts default)
16 | %opts.gpu_method=2; f = cufinufft2d1(x,y,c,+1,tol,N1,N2,opts); % do it with opts
17 | wait(dev)                                % crucial for valid timing
18 | tgpu = toc;
19 | fprintf('done in %.3g s: throughput (excl H<->D) is %.3g NUpt/s\n',tgpu,M/tgpu)
20 | 
21 | % check the error of only one output, also on GPU...
22 | nt = ceil(0.47*N);                       % pick a mode index in -N/2,..,N/2-1
23 | fe = sum(c.*exp(1i*isign*nt*x));         % exact
24 | of = floor(N/2)+1;                       % mode index offset
25 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-f(nt+of))/norm(f,Inf))
26 | 


--------------------------------------------------------------------------------
/matlab/examples/guru1d1.m:
--------------------------------------------------------------------------------
 1 | % MATLAB/octave demo script of guru interface to FINUFFT, 1D type 1.
 2 | % Lu 5/11/2020. Barnett added timing, tweaked.
 3 | clear
 4 | 
 5 | % set required parameters...
 6 | isign   = +1;     % sign of imaginary unit in exponential
 7 | tol     = 1e-9;   % requested accuracy
 8 | M       = 1e6;
 9 | N       = 1e6;    % # of modes (approx total, used in all dims)
10 | type = 1;
11 | n_modes = N;      % n_dims inferred from length of this
12 | ntrans = 2;
13 | 
14 | x = pi*(2*rand(1,M)-1);                         % choose NU points
15 | c = randn(1,M*ntrans)+1i*randn(1,M*ntrans);     % choose stack of strengths
16 | 
17 | disp('starting...'), tic     % just time FINUFFT not the data creation
18 | opts.debug=2;    % set options then plan the transform...
19 | plan = finufft_plan(type,n_modes,isign,ntrans,tol,opts);
20 | 
21 | plan.setpts(x);                                 % send in NU pts
22 | 
23 | f = plan.execute(c);                               % do the transform
24 | disp('done.'); toc
25 | 
26 | % if you do not want to do more transforms of this size, clean up...
27 | delete(plan);
28 | 
29 | % check the error of one output...
30 | nt = ceil(0.37*N);                              % pick a mode index
31 | t = ceil(0.7*ntrans);                           % pick a transform in stack
32 | fe = sum(c(M*(t-1)+(1:M)).*exp(1i*isign*nt*x));        % exact
33 | of1 = floor(N/2) + 1 + N*(t-1);                        % mode index offset
34 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-f(nt+of1))/norm(f,Inf))
35 | 


--------------------------------------------------------------------------------
/matlab/examples/guru1d1_single.m:
--------------------------------------------------------------------------------
 1 | % MATLAB/octave demo script of guru interface to FINUFFT, 1D type 1.
 2 | % Single-precision case.
 3 | % Lu 5/11/2020. Barnett added timing, tweaked.
 4 | clear
 5 | 
 6 | % set required parameters...
 7 | isign   = +1;     % sign of imaginary unit in exponential
 8 | tol     = 1e-5;   % requested accuracy (cannot ask for much more in single prec)
 9 | M       = 2e5;
10 | N       = 1e5;    % # of modes (approx total, used in all dims)
11 | type = 1;
12 | n_modes = N;      % n_dims inferred from length of this
13 | ntrans = 3;
14 | 
15 | x = pi*(2*rand(1,M,'single')-1);                % choose NU points
16 | c = randn(1,M*ntrans,'single')+1i*randn(1,M*ntrans,'single');     % strengths
17 | 
18 | % set options then plan the transform...
19 | opts.debug=2;
20 | opts.floatprec = 'single';   % tells it to make a single-precision plan
21 | disp('starting...'), tic
22 | plan = finufft_plan(type,n_modes,isign,ntrans,tol,opts);
23 | 
24 | plan.setpts(x);                                 % send in NU pts
25 | 
26 | f = plan.execute(c);                               % do the transform
27 | disp('done.'); toc
28 | 
29 | % if you do not want to do more transforms of this size, clean up...
30 | delete(plan);
31 | 
32 | % check the error of one output...
33 | nt = ceil(0.37*N);                              % pick a mode index
34 | t = ceil(0.7*ntrans);                           % pick a transform in stack
35 | fe = sum(c(M*(t-1)+(1:M)).*exp(1i*isign*nt*x));           % exact
36 | of1 = floor(N/2) + 1 + N*(t-1);                           % mode index offset
37 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-f(nt+of1))/norm(f,Inf))
38 | 


--------------------------------------------------------------------------------
/matlab/finufft1d1.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT1D1   1D complex nonuniform FFT of type 1 (nonuniform to uniform).
 2 | %
 3 | % f = finufft1d1(x,c,isign,eps,ms)
 4 | % f = finufft1d1(x,c,isign,eps,ms,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %               nj
 9 | %     f(k1) =  SUM c[j] exp(+/-i k1 x(j))  for -ms/2 <= k1 <= (ms-1)/2
10 | %              j=1
11 | %   Inputs:
12 | %     x     length-nj vector of real-valued locations of nonuniform sources
13 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
14 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
15 | %           transformed with the same source locations.
16 | ISIGNEPS
17 | %     ms    number of Fourier modes computed, may be even or odd;
18 | %           in either case, mode range is integers lying in [-ms/2, (ms-1)/2]
19 | OPTS
20 | OPTS12
21 | %   Outputs:
22 | %     f     size-ms complex column vector of Fourier coefficients, or, if
23 | %           ntrans>1, a matrix of size (ms,ntrans).
24 | %
25 | NOTES
26 | 


--------------------------------------------------------------------------------
/matlab/finufft1d2.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT1D2   1D complex nonuniform FFT of type 2 (uniform to nonuniform).
 2 | %
 3 | % c = finufft1d2(x,isign,eps,f)
 4 | % c = finufft1d2(x,isign,eps,f,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %    c[j] = SUM   f[k1] exp(+/-i k1 x[j])      for j = 1,...,nj
 9 | %            k1
10 | %     where sum is over -ms/2 <= k1 <= (ms-1)/2.
11 | %
12 | %  Inputs:
13 | %     x     length-nj vector of real-valued locations of nonuniform sources
14 | %     f     complex Fourier coefficients. If a vector, length sets ms
15 | %           (with mode ordering given by opts.modeord). If a matrix, each
16 | %           of ntrans columns is transformed with the same nonuniform targets.
17 | ISIGNEPS
18 | OPTS
19 | OPTS12
20 | %  Outputs:
21 | %     c     complex column vector of nj answers at targets, or,
22 | %           if ntrans>1, matrix of size (nj,ntrans).
23 | %
24 | NOTES
25 | 


--------------------------------------------------------------------------------
/matlab/finufft1d3.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT1D3   1D complex nonuniform FFT of type 3 (nonuniform to nonuniform).
 2 | %
 3 | % f = finufft1d3(x,c,isign,eps,s)
 4 | % f = finufft1d3(x,c,isign,eps,s,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %              nj
 9 | %     f[k]  =  SUM   c[j] exp(+-i s[k] x[j]),      for k = 1, ..., nk
10 | %              j=1
11 | %   Inputs:
12 | %     x     length-nj vector of real-valued locations of nonuniform sources
13 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
14 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
15 | %           transformed with the same source and target locations.
16 | ISIGNEPS
17 | %     s     length-nk vector of frequency locations of nonuniform targets
18 | OPTS
19 | %   Outputs:
20 | %     f     length-nk complex vector of values at targets, or, if ntrans>1,
21 | %           a matrix of size (nk,ntrans)
22 | %
23 | NOTES
24 | 


--------------------------------------------------------------------------------
/matlab/finufft2d1.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT2D1   2D complex nonuniform FFT of type 1 (nonuniform to uniform).
 2 | %
 3 | % f = finufft2d1(x,y,c,isign,eps,ms,mt)
 4 | % f = finufft2d1(x,y,c,isign,eps,ms,mt,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %                   nj
 9 | %     f[k1,k2] =   SUM  c[j] exp(+-i (k1 x[j] + k2 y[j]))
10 | %                  j=1
11 | %
12 | %     for -ms/2 <= k1 <= (ms-1)/2,  -mt/2 <= k2 <= (mt-1)/2.
13 | %
14 | %   Inputs:
15 | %     x,y   real-valued coordinates of nonuniform sources in the plane,
16 | %           each a length-nj vector
17 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
18 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
19 | %           transformed with the same source locations.
20 | ISIGNEPS
21 | %     ms,mt  number of Fourier modes requested in x & y; each may be even or odd.
22 | %            In either case the mode range is integers lying in [-m/2, (m-1)/2]
23 | OPTS
24 | OPTS12
25 | %   Outputs:
26 | %     f     size (ms,mt) complex matrix of Fourier coefficients
27 | %           (ordering given by opts.modeord in each dimension; ms fast, mt slow),
28 | %           or, if ntrans>1, a 3D array of size (ms,mt,ntrans).
29 | %
30 | NOTES
31 | 


--------------------------------------------------------------------------------
/matlab/finufft2d2.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT2D2   2D complex nonuniform FFT of type 2 (uniform to nonuniform).
 2 | %
 3 | % c = finufft2d2(x,y,isign,eps,f)
 4 | % c = finufft2d2(x,y,isign,eps,f,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %    c[j] =  SUM   f[k1,k2] exp(+/-i (k1 x[j] + k2 y[j]))  for j = 1,..,nj
 9 | %           k1,k2
10 | %     where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2,
11 | %
12 | %  Inputs:
13 | %     x,y   real-valued coordinates of nonuniform targets in the plane,
14 | %           each a vector of length nj
15 | %     f     complex Fourier coefficient matrix, whose size determines (ms,mt).
16 | %           (Mode ordering given by opts.modeord, in each dimension.)
17 | %           If a 3D array, 3rd dimension sets ntrans, and each of ntrans
18 | %           matrices is transformed with the same nonuniform targets.
19 | ISIGNEPS
20 | OPTS
21 | OPTS12
22 | %  Outputs:
23 | %     c     complex column vector of nj answers at targets, or,
24 | %           if ntrans>1, matrix of size (nj,ntrans).
25 | %
26 | NOTES
27 | 


--------------------------------------------------------------------------------
/matlab/finufft2d3.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT2D3   2D complex nonuniform FFT of type 3 (nonuniform to nonuniform).
 2 | %
 3 | % f = finufft2d3(x,y,c,isign,eps,s,t)
 4 | % f = finufft2d3(x,y,c,isign,eps,s,t,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %              nj
 9 | %     f[k]  =  SUM   c[j] exp(+-i (s[k] x[j] + t[k] y[j])),  for k = 1, ..., nk
10 | %              j=1
11 | %   Inputs:
12 | %     x,y   coordinates of nonuniform sources in R^2, each a length-nj vector.
13 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
14 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
15 | %           transformed with the same source and target locations.
16 | ISIGNEPS
17 | %     s,t   frequency coordinates of nonuniform targets in R^2,
18 | %           each a length-nk vector.
19 | OPTS
20 | %   Outputs:
21 | %     f     length-nk complex vector of values at targets, or, if ntrans>1,
22 | %           a matrix of size (nk,ntrans)
23 | %
24 | NOTES
25 | 


--------------------------------------------------------------------------------
/matlab/finufft3d1.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT3D1   3D complex nonuniform FFT of type 1 (nonuniform to uniform).
 2 | %
 3 | % f = finufft3d1(x,y,z,c,isign,eps,ms,mt,mu)
 4 | % f = finufft3d1(x,y,z,c,isign,eps,ms,mt,mu,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %                       nj
 9 | %     f[k1,k2,k3] =    SUM  c[j] exp(+-i (k1 x[j] + k2 y[j] + k3 z[j]))
10 | %                      j=1
11 | %
12 | %     for -ms/2 <= k1 <= (ms-1)/2,  -mt/2 <= k2 <= (mt-1)/2,
13 | %         -mu/2 <= k3 <= (mu-1)/2.
14 | %
15 | %   Inputs:
16 | %     x,y,z real-valued coordinates of nonuniform sources,
17 | %           each a length-nj vector
18 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
19 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
20 | %           transformed with the same source locations.
21 | ISIGNEPS
22 | %     ms,mt,mu  number of Fourier modes requested in x,y and z; each may be
23 | %           even or odd.
24 | %           In either case the mode range is integers lying in [-m/2, (m-1)/2]
25 | OPTS
26 | OPTS12
27 | %   Outputs:
28 | %     f     size (ms,mt,mu) complex array of Fourier coefficients
29 | %           (ordering given by opts.modeord in each dimension; ms fastest, mu
30 | %           slowest), or, if ntrans>1, a 4D array of size (ms,mt,mu,ntrans).
31 | %
32 | NOTES
33 | 


--------------------------------------------------------------------------------
/matlab/finufft3d2.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT3D2   3D complex nonuniform FFT of type 2 (uniform to nonuniform).
 2 | %
 3 | % c = finufft3d2(x,y,z,isign,eps,f)
 4 | % c = finufft3d2(x,y,z,isign,eps,f,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %    c[j] =   SUM   f[k1,k2,k3] exp(+/-i (k1 x[j] + k2 y[j] + k3 z[j]))
 9 | %           k1,k2,k3
10 | %                            for j = 1,..,nj
11 | %     where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2,
12 | %                       -mu/2 <= k3 <= (mu-1)/2.
13 | %
14 | %  Inputs:
15 | %     x,y,z real-valued coordinates of nonuniform targets,
16 | %           each a vector of length nj
17 | %     f     complex Fourier coefficient array, whose size sets (ms,mt,mu).
18 | %           (Mode ordering given by opts.modeord, in each dimension.)
19 | %           If a 4D array, 4th dimension sets ntrans, and each of ntrans
20 | %           3D arrays is transformed with the same nonuniform targets.
21 | ISIGNEPS
22 | OPTS
23 | OPTS12
24 | %  Outputs:
25 | %     c     complex column vector of nj answers at targets, or,
26 | %           if ntrans>1, matrix of size (nj,ntrans).
27 | %
28 | NOTES
29 | 


--------------------------------------------------------------------------------
/matlab/finufft3d3.docsrc:
--------------------------------------------------------------------------------
 1 | % FINUFFT3D3   3D complex nonuniform FFT of type 3 (nonuniform to nonuniform).
 2 | %
 3 | % f = finufft3d3(x,y,z,c,isign,eps,s,t,u)
 4 | % f = finufft3d3(x,y,z,c,isign,eps,s,t,u,opts)
 5 | %
 6 | % This computes, to relative precision eps, via a fast algorithm:
 7 | %
 8 | %              nj
 9 | %     f[k]  =  SUM   c[j] exp(+-i (s[k] x[j] + t[k] y[j] + u[k] z[j])),
10 | %              j=1
11 | %                              for k = 1, ..., nk
12 | %   Inputs:
13 | %     x,y,z  coordinates of nonuniform sources in R^3, each a length-nj vector.
14 | %     c     length-nj complex vector of source strengths. If numel(c)>nj,
15 | %           expects a stack of vectors (eg, a nj*ntrans matrix) each of which is
16 | %           transformed with the same source and target locations.
17 | ISIGNEPS
18 | %     s,t,u  frequency coordinates of nonuniform targets in R^3,
19 | %           each a length-nk vector.
20 | OPTS
21 | %   Outputs:
22 | %     f     length-nk complex vector of values at targets, or, if ntrans>1,
23 | %           a matrix of size (nk,ntrans)
24 | %
25 | NOTES
26 | 


--------------------------------------------------------------------------------
/matlab/gnotes.docbit:
--------------------------------------------------------------------------------
 1 | % Notes:
 2 | %  * For CUFINUFFT all array I/O is in the form of gpuArrays (on-device).
 3 | %  * The precision of gpuArray input x controls whether the double or
 4 | %    single precision GPU library is called; all array inputs must match in
 5 | %    location (ie, be gpuArrays), and in precision.
 6 | %  * The vectorized (many vector) interface, ie ntrans>1, can be faster
 7 | %    than repeated calls with the same nonuniform points. Note that here the
 8 | %    I/O data ordering is stacked not interleaved. See ../docs/matlab_gpu.rst
 9 | %  * For more details about the opts fields, see ../docs/c_gpu.rst
10 | %  * See ERRHANDLER, VALID_* and CUFINUFFT_PLAN for possible warning/error IDs.
11 | %  * Full documentation is online at http://finufft.readthedocs.io
12 | %
13 | % See also CUFINUFFT_PLAN.
14 | 


--------------------------------------------------------------------------------
/matlab/gopts.docbit:
--------------------------------------------------------------------------------
 1 | %     opts   optional struct with optional fields controlling the following:
 2 | %     opts.debug:   0 (silent, default), 1 (timing breakdown), 2 (debug info).
 3 | %     opts.upsampfac:   sigma.  2.0 (default), or 1.25 (low RAM, smaller FFT).
 4 | %     opts.gpu_method:  0 (auto, default), 1 (GM or GM-sort), 2 (SM).
 5 | %     opts.gpu_sort:  0 (do not sort NU pts), 1 (sort when GM method, default).
 6 | %     opts.gpu_kerevalmeth:  0 (slow reference). 1 (Horner ppoly, default).
 7 | %     opts.gpu_maxsubprobsize:  max # NU pts per subprob (gpu_method=2 only).
 8 | %     opts.gpu_binsize{x,y,z}:  various binsizes in GM-sort/SM (for experts).
 9 | %     opts.gpu_maxbatchsize:   0 (auto, default), or many-vector batch size.
10 | %     opts.gpu_device_id:  sets the GPU device ID (experts only).
11 | 


--------------------------------------------------------------------------------
/matlab/gopts12.docbit:
--------------------------------------------------------------------------------
1 | %     opts.modeord: 0 (CMCL increasing mode ordering, default), 1 (FFT ordering)
2 | %     opts.gpu_spreadinterponly: 0 (do NUFFT, default), 1 (only spread/interp)
3 | 


--------------------------------------------------------------------------------
/matlab/isigneps.docbit:
--------------------------------------------------------------------------------
1 | %     isign if >=0, uses + sign in exponential, otherwise - sign.
2 | %     eps   relative precision requested (generally between 1e-15 and 1e-1)
3 | 


--------------------------------------------------------------------------------
/matlab/notes.docbit:
--------------------------------------------------------------------------------
 1 | % Notes:
 2 | %  * The vectorized (many vector) interface, ie ntrans>1, can be much faster
 3 | %    than repeated calls with the same nonuniform points. Note that here the I/O
 4 | %    data ordering is stacked rather than interleaved. See ../docs/matlab.rst
 5 | %  * The class of input x (double vs single) controls whether the double or
 6 | %    single precision library are called; precisions of all data should match.
 7 | %  * For more details about the opts fields, see ../docs/opts.rst
 8 | %  * See ERRHANDLER, VALID_* and FINUFFT_PLAN for possible warning/error IDs.
 9 | %  * Full documentation is online at http://finufft.readthedocs.io
10 | %
11 | % See also FINUFFT_PLAN.
12 | 


--------------------------------------------------------------------------------
/matlab/opts.docbit:
--------------------------------------------------------------------------------
 1 | %     opts   optional struct with optional fields controlling the following:
 2 | %     opts.debug:   0 (silent, default), 1 (timing breakdown), 2 (debug info).
 3 | %     opts.spread_debug: spreader: 0 (no text, default), 1 (some), or 2 (lots)
 4 | %     opts.spread_sort:  0 (don't sort NU pts), 1 (do), 2 (auto, default)
 5 | %     opts.spread_kerevalmeth:  0: exp(sqrt()), 1: Horner ppval (faster)
 6 | %     opts.spread_kerpad: (iff kerevalmeth=0)  0: don't pad to mult of 4, 1: do
 7 | %     opts.fftw: FFTW plan mode, 64=FFTW_ESTIMATE (default), 0=FFTW_MEASURE, etc
 8 | %     opts.upsampfac:   sigma.  2.0 (default), or 1.25 (low RAM, smaller FFT)
 9 | %     opts.spread_thread:   for ntrans>1 only. 0:auto, 1:seq multi, 2:par, etc
10 | %     opts.maxbatchsize:  for ntrans>1 only. max blocking size, or 0 for auto.
11 | %     opts.nthreads:   number of threads, or 0: use all available (default)
12 | 


--------------------------------------------------------------------------------
/matlab/opts12.docbit:
--------------------------------------------------------------------------------
1 | %     opts.modeord: 0 (CMCL increasing mode ordering, default), 1 (FFT ordering)
2 | %     opts.spreadinterponly: 0 (perform NUFFT, default), 1 (only spread/interp)
3 | 


--------------------------------------------------------------------------------
/matlab/test/big1dtest.m:
--------------------------------------------------------------------------------
 1 | % Explore large problems: timing, RAM usage, matlab interface.
 2 | % Barnett 3/28/17
 3 | clear
 4 | isign   = +1;     % sign of imaginary unit in exponential
 5 | eps     = 1e-3;   % requested accuracy
 6 | o.debug = 1;      % choose 1 for timing breakdown text output
 7 | o.spread_sort=0;
 8 | M       = 2.2e9;    % # of NU pts - when >=2e31, answer is wrong, zero ***
 9 | N       = 1e6;    % # of modes (approx total, used in all dims)
10 | 
11 | j = ceil(0.93*M);                               % target pt index to test
12 | 
13 | if 0
14 | fprintf('generating x & c data (single-threaded and slow)...\n')
15 | x = pi*(2*rand(M,1)-1);
16 | c = randn(M,1)+1i*randn(M,1);
17 | fprintf('1D type 1: using %d modes...\n',N)
18 | tic;
19 | f = finufft1d1(x,c,isign,eps,N,o);
20 | fprintf('done in %.3g s\n',toc)
21 | if ~ier
22 |   nt = ceil(0.37*N);                              % pick a mode index
23 |   fe = sum(c.*exp(1i*isign*nt*x));                % exact
24 |   of1 = floor(N/2)+1;                             % mode index offset
25 |   fprintf('rel err in F[%d] is %.3g\n',nt,abs((fe-f(nt+of1))/fe))
26 | end
27 | end
28 | 
29 | if 1
30 | fprintf('generating x data (single-threaded and slow)...\n')
31 | x = pi*(2*rand(M,1)-1);
32 | f = randn(N,1)+1i*randn(N,1);
33 | fprintf('1D type 2: using %d modes...\n',N)
34 | tic
35 | c = finufft1d2(x,isign,eps,f,o);        % Out of memory iff >=2^31
36 | fprintf('done in %.3g s\n',toc)
37 | ms=numel(f); mm = (ceil(-ms/2):floor((ms-1)/2))';  % mode index list
38 | ce = sum(f.*exp(1i*isign*mm*x(j)));             % crucial f, mm same shape
39 | fprintf('1D type-2: rel err in c[%d] is %.3g\n',j,abs((ce-c(j))/ce))
40 | end
41 | 
42 | % conclusion: we get zeros output if >=2^31. Fix this issue w/ mex interface.
43 | 


--------------------------------------------------------------------------------
/matlab/test/fig_accuracy.m:
--------------------------------------------------------------------------------
 1 | % finufft accuracy test figs, deciding err norm to report. Barnett 6/6/17
 2 | % Changed to rel 2-norm, 7/22/24.
 3 | clear
 4 | %M=1e5; N=1e2;         % M = # NU pts, N = # modes.  Note: keep MN<1e8 for now
 5 | M=1e4; N=1e2;         % keel N small to see close to epsmach; cond # = O(N)
 6 | %M=1e2; N=1e5; % confusion about N vs M controlling err prefac (it's N)
 7 | isign   = +1;     % sign of imaginary unit in exponential
 8 | o.debug = 0;      % choose 1 for timing breakdown text output
 9 | 
10 | % use one of these two...
11 | tols = 10.^(-1:-0.02:-15); o.upsampfac = 2.0;
12 | %tols = 10.^(-1:-0.02:-10); o.upsampfac=1.25;    % for lowupsampfac
13 | 
14 | % other expts...
15 | %tols = 1e-6;
16 | %tols = 10.^(-1:-1:-10); o.upsampfac=1.25;    % for lowupsampfac
17 | 
18 | errs = nan*tols;
19 | for t=1:numel(tols)
20 |   x = pi*(2*rand(1,M)-1);
21 |   c = randn(1,M)+1i*randn(1,M);
22 |   ns = (ceil(-N/2):floor((N-1)/2))';         % mode indices, col vec
23 |   f = finufft1d1(x,c,isign,tols(t),N,o);
24 |   fe = exp(1i*isign*ns*x) * c.';             % exact (note mat fill, matvec)
25 |   %errs(t) = max(abs(f(:)-fe(:)))/norm(c,1);  % eps as in err analysis...
26 |   %p=2; errs(t) = norm(f(:)-fe(:),p)/norm(c,p);  % ... or p-norm rel to input
27 |   p=2; errs(t) = norm(f(:)-fe(:),p)/norm(fe(:),p);  % ... or rel p-norm
28 | end
29 | figure; loglog(tols,errs,'+'); hold on; plot(tols,tols,'-');
30 | axis tight; xlabel('tol'); ylabel('err');
31 | %title(sprintf('1d1: (maxerr)/||c||_1, M=%d, N=%d\n',M,N));
32 | title(sprintf('1d1: ||\tilde f - f||_2/||f||_2, M=%d, N=%d\n',M,N));
33 | 


--------------------------------------------------------------------------------
/matlab/test/guru_setpts_issue.m:
--------------------------------------------------------------------------------
 1 | % Tests fix of setpts temporary array loss by MWrap (issue 185).
 2 | % The issue occurred when expressions such as -x were passed into setpts,
 3 | % resulting in crash or incorrect answers (due to pointing to destroyed temp
 4 | % arrays).
 5 | % It is fixed as of 5/6/2021.
 6 | % code by Dan Fortunato.
 7 | 
 8 | % Random points
 9 | M = 10000;
10 | x = pi*(2*rand(M,1)-1);
11 | y = pi*(2*rand(M,1)-1);
12 | 
13 | % Random Fourier coefficients
14 | N = 64;
15 | coeffs = randn(N) + 1i*randn(N);
16 | 
17 | % FINUFFT options
18 | tol = 1e-12;
19 | opts = struct();
20 | 
21 | for k = 1:100
22 | 
23 |     disp(k)
24 | 
25 |     % Without planning
26 |     vals = finufft2d2(-x, -y, -1, tol, coeffs, opts);
27 | 
28 |     % With planning (was buggy, at seemingly random times)
29 |     plan = finufft_plan(2, [N N], -1, 1, tol, opts);
30 |     plan.setpts(-x, -y);
31 |     vals2 = plan.execute(coeffs);
32 | 
33 |     % With planning (was the workaround, now not needed)
34 |     plan = finufft_plan(2, [N N], -1, 1, tol, opts);
35 |     xx = -x;
36 |     yy = -y;
37 |     plan.setpts(xx, yy);
38 |     vals3 = plan.execute(coeffs);
39 | 
40 |     if ( any(isnan(vals2)) || norm(vals - vals2) > tol )
41 |         warning('Something went wrong during run #%i', k);
42 |         fprintf('norm(vals - vals2) = %g\n', norm(vals - vals2));
43 |         fprintf('norm(vals - vals3) = %g\n', norm(vals - vals3));
44 |         break
45 |     end
46 | end
47 | 


--------------------------------------------------------------------------------
/matlab/test/test_strictadjoint.m:
--------------------------------------------------------------------------------
 1 | % check t1 and t2 are adjoints to rounding error, not merely to requested tol.
 2 | % 1d only for now. Barnett 8/27/18
 3 | %clear; addpath ~/numerics/finufft/matlab
 4 | 
 5 | M=1e5;  % pts
 6 | N=1e4;  % modes
 7 | tol = 1e-6;
 8 | x = pi*(2*rand(M,1)-1);
 9 | % pick two vectors to check (u,F1 v) = (F2 u,v) with...
10 | v = randn(M,1)+1i*randn(M,1);
11 | u = randn(N,1)+1i*randn(N,1);
12 | ip1 = dot(u,finufft1d1(x,v,+1,tol,N));
13 | ip2 = dot(finufft1d2(x,-1,tol,u),v);    % note sign flips to be complex adjoint
14 | fprintf('M=%d,N=%d,tol=%.1g: rel err (u,F1 v) vs (F2 u,v): %.3g\n',M,N,tol,abs(ip1-ip2)/abs(ip1))
15 | clear eps
16 | fprintf('cf estimated rounding err for this prob size; %.3g\n',0.2*eps*N)
17 | 


--------------------------------------------------------------------------------
/matlab/valid_ntr.m:
--------------------------------------------------------------------------------
 1 | function n_transf = valid_ntr(x,c)
 2 | % VALID_NTR   deduce n_transforms and validate the size of c, for types 1 and 3.
 3 | %             also check for array device consistency.
 4 | 
 5 | if isa(x, 'gpuArray') ~= isa(c, 'gpuArray')
 6 |   error('FINUFFT:mixedDevice','FINUFFT: x and c must be both on GPU or CPU');
 7 | end
 8 | 
 9 | n_transf = round(numel(c)/numel(x));    % this allows general row/col vec, matrix, input shapes
10 | if n_transf*numel(x)~=numel(c)
11 |   error('FINUFFT:badCsize','FINUFFT numel(c) must be divisible by numel(x)');
12 | end
13 | 


--------------------------------------------------------------------------------
/perftest/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Each source test file is instantiated in single and double precision
 2 | set(PERFTESTS guru_timing_test manysmallprobs spreadtestnd spreadtestndall)
 3 | 
 4 | foreach(TEST ${PERFTESTS})
 5 |     add_executable(${TEST} ${TEST}.cpp)
 6 |     if(FINUFFT_USE_DUCC0)
 7 |         target_compile_definitions(${TEST} PRIVATE -DFINUFFT_USE_DUCC0)
 8 |     endif()
 9 |     finufft_link_test(${TEST})
10 | 
11 |     add_executable(${TEST}f ${TEST}.cpp)
12 |     target_compile_definitions(${TEST}f PRIVATE -DSINGLE)
13 |     if(FINUFFT_USE_DUCC0)
14 |         target_compile_definitions(${TEST}f PRIVATE -DFINUFFT_USE_DUCC0)
15 |     endif()
16 |     finufft_link_test(${TEST}f)
17 | endforeach()
18 | 
19 | include(CheckIncludeFile)
20 | check_include_file("getopt.h" HAVE_GETOPT_H)
21 | if(HAVE_GETOPT_H)
22 |     add_executable(perftest perftest.cpp)
23 |     if(FINUFFT_USE_DUCC0)
24 |         target_compile_definitions(perftest PRIVATE -DFINUFFT_USE_DUCC0)
25 |     endif()
26 |     finufft_link_test(perftest)
27 | endif()
28 | 


--------------------------------------------------------------------------------
/perftest/README:
--------------------------------------------------------------------------------
 1 | Performance and development test directory for FINUFFT.
 2 | 
 3 | spreadtestnd : time spread & interp for given dim, tol, etc.
 4 | spreadtestndall : time spread or interp sweeping over all tols (w), given dim.
 5 |   [note the above two differ in 4th cmd-line arg being "tol" vs "dir"]
 6 | big2d2f : tests int64_t (8byte int) indexing, ie data size > 2^31.
 7 | 
 8 | Scripts:
 9 | spreadtestall.sh : rapid test of spreadtestnd in all cases.
10 | spreadtestnd.sh : performance test of spreader only, in dims 1,2, or 3.
11 | nuffttestnd.sh : performance test of NUFFT library, in dims 1,2, or 3.
12 | mycpuinfo.sh : prints info about the CPU
13 | multispreadtestndall.sh : runs Marco's w-sweeping scripts all dims, precs.
14 | 
15 | Possibly obsolete scripts (for developers):
16 | highaspect3d_test.sh : comparing various pizza-box orientations for speed
17 | 
18 | 
19 | Also see:
20 | ../test for validation tests
21 | 


--------------------------------------------------------------------------------
/perftest/big2d2f.cpp:
--------------------------------------------------------------------------------
 1 | /* This is a 2D type-2 demo calling FINUFFT for big number of transforms, that
 2 |    results in a number of data exceeding the max signed int value of 2^31.
 3 |    This verifies correct handling via int64_t (8byte) indexing.
 4 |    It takes about 30 s to run on 8 threads, and demands about 30 GB of RAM.
 5 | 
 6 |    See makefile for compilation. Libin Lu 6/7/22; edits Alex Barnett.
 7 | */
 8 | 
 9 | // this is all you must include for the finufft lib...
10 | #include <finufft.h>
11 | 
12 | // also used in this example...
13 | #include <complex>
14 | #include <iostream>
15 | #include <omp.h>
16 | #include <vector>
17 | using namespace std;
18 | 
19 | int test_finufft(finufft_opts *opts) {
20 |   size_t nj = 129 * 129 * 2;
21 |   size_t ms = 129, mt = 129;
22 |   size_t ntrans = 75000;    // the point is: 129*129*2*75000 > 2^31 ~ 2.15e9
23 |   std::vector<float> x(nj); // bunch of zero data
24 |   std::vector<float> y(nj);
25 |   std::vector<std::complex<float>> cj(ntrans * nj);
26 |   std::vector<std::complex<float>> fk(ntrans * ms * mt);
27 | 
28 |   int ier = finufftf2d2many(ntrans, nj, x.data(), y.data(), cj.data(), -1, 1e-3, ms, mt,
29 |                             fk.data(), opts);
30 | 
31 |   std::cout << "\tbig2d2f finufft status: " << ier << std::endl;
32 |   return ier;
33 | }
34 | 
35 | int main(int argc, char *argv[]) {
36 |   finufft_opts opts;
37 |   finufftf_default_opts(&opts);
38 |   return test_finufft(&opts);
39 | }
40 | 


--------------------------------------------------------------------------------
/perftest/checkGuruTiming.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # by Andrea Malleo, summer 2019.
 3 | 
 4 | srcpts=1e7
 5 | tolerance=1e-6
 6 | debug=1
 7 | modes[0]=1e6
 8 | modes[1]=1
 9 | modes[2]=1
10 | modes[3]=1e3
11 | modes[4]=1e3
12 | modes[5]=1
13 | modes[6]=1e2
14 | modes[7]=1e2
15 | modes[8]=1e2
16 | 
17 | for dimension in 1 2 3
18 | do
19 |     for type in 1 2 3
20 |     do
21 | 	for n_trials in 1 20 41
22 | 	do
23 | 	    declare -i row
24 | 	    row=${dimension}-1
25 | 
26 | 	    declare -i index
27 | 	    index=row*3
28 | 
29 | 	    declare -i modeNum
30 | 	    modeNum1=${modes[index]}
31 | 	    modeNum2=${modes[index+1]}
32 | 	    modeNum3=${modes[index+2]}
33 | 
34 | 	    echo "./guru_timing_test ${n_trials} ${type} ${dimension} ${modeNum1} ${modeNum2} ${modeNum3} ${srcpts} ${tolerance} ${debug}"
35 | 	    ./guru_timing_test ${n_trials} ${type} ${dimension} ${modeNum1} ${modeNum2} ${modeNum3} ${srcpts} ${tolerance} ${debug}
36 | 	done
37 |     done
38 | done
39 | 


--------------------------------------------------------------------------------
/perftest/cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(cuperftest cuperftest.cu)
 2 | target_include_directories(cuperftest PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
 3 | target_link_libraries(cuperftest cufinufft CUDA::cufft CUDA::cudart)
 4 | target_compile_features(cuperftest PRIVATE cxx_std_17)
 5 | target_compile_options(cuperftest PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
 6 | set_target_properties(
 7 |     cuperftest
 8 |     PROPERTIES
 9 |         LINKER_LANGUAGE CUDA
10 |         CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
11 |         CUDA_STANDARD 17
12 |         CUDA_STANDARD_REQUIRED ON
13 | )
14 | 


--------------------------------------------------------------------------------
/perftest/cuda/bench.sh:
--------------------------------------------------------------------------------
 1 | ./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --M 2e6 --method 0 --tol 1e-4
 2 | ./cuperftest --prec d --n_runs 5 --N1 1e1 --N2 1e1 --N3 1e1 --M 2e6 --method 0 --tol 1e-4
 3 | ./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --N3 1e1 --M 2e6 --method 0 --tol 1e-4
 4 | ./cuperftest --prec d --n_runs 5 --N1 1e1 --N2 1e2 --N3 1e3 --M 2e6 --method 0 --tol 1e-4
 5 | ./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --N3 1e3 --M 2e6 --method 0 --tol 1e-4
 6 | #./cuperftest --prec d --n_runs 5 --N1 1e5 --N2 1e5 --N3 1e5 --M 2e6 --method 0 --tol 1e-10
 7 | #./cuperftest --prec d --n_runs 5 --N1 1e4 --N2 1e4 --N3 1e4 --M 2e6 --method 0 --tol 1e-10
 8 | #./cuperftest --prec d --n_runs 5 --N1 1e5 --N2 1e5 --N3 1e5 --M 2e6 --method 0 --tol 1e-10
 9 | #./cuperftest --prec d --n_runs 5 --N1 1e6 --N2 1e6 --M 2e6 --method 0 --tol 1e-10
10 | #./cuperftest --prec d --n_runs 5 --N1 1e8 --N2 1e6 --M 2e6 --method 0 --tol 1e-10
11 | #./cuperftest --prec d --n_runs 5 --N1 1e6 --N2 1e6 --M 2e6 --method 0 --tol 1e-10
12 | #./cuperftest --prec d --n_runs 5 --N1 1e7 --N2 1e7 --M 2e6 --method 0 --tol 1e-10
13 | #./cuperftest --prec d --n_runs 5 --N1 1e8 --N2 1e8 --M 2e6 --method 0 --tol 1e-10
14 | 


--------------------------------------------------------------------------------
/perftest/getSpeedup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # by Andrea Malleo, summer 2019.
3 | 
4 | ./checkGuruTiming.sh | grep -E 'guru_timing_test|speedup'
5 | 


--------------------------------------------------------------------------------
/perftest/highaspect3d_test.sh:
--------------------------------------------------------------------------------
 1 | # test various poor aspect ratios in 3d
 2 | # Barnett 2/6/17
 3 | 
 4 | # fastest
 5 | time ./finufft3d_test 10 400 400 1e6 1e-12 0
 6 | 
 7 | # weird thing is this one is slowest even though z split is easy - RAM access?
 8 | time ./finufft3d_test 400 10 400 1e6 1e-12 0
 9 | 
10 | # expect poor when split only along z:
11 | time ./finufft3d_test 400 400 10 1e6 1e-12 0
12 | 


--------------------------------------------------------------------------------
/perftest/multispreadtestndall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # simple driver for Marco's sweeping-w spreadtest variant, all precs & dims.
 3 | # used my the makefile.
 4 | # all avail threads for now.
 5 | # human has to check the output for now.
 6 | # Barnett 6/4/24
 7 | 
 8 | M=1e7       # problem size (sets both # NU pts and # U modes); it's a string
 9 | N=1e6       # num U grid pts
10 | 
11 | ./spreadtestndall 1 $M $N 1 1
12 | ./spreadtestndall 1 $M $N 2 1
13 | ./spreadtestndall 2 $M $N 1 1
14 | ./spreadtestndall 2 $M $N 2 1
15 | ./spreadtestndall 3 $M $N 1 1
16 | ./spreadtestndall 3 $M $N 2 1
17 | ./spreadtestndallf 1 $M $N 1 1
18 | ./spreadtestndallf 1 $M $N 2 1
19 | ./spreadtestndallf 2 $M $N 1 1
20 | ./spreadtestndallf 2 $M $N 2 1
21 | ./spreadtestndallf 3 $M $N 1 1
22 | ./spreadtestndallf 3 $M $N 2 1
23 | 


--------------------------------------------------------------------------------
/perftest/mycpuinfo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Barnett 2/2/17
 3 | # Linux and OSX both, 11/1/18
 4 | 
 5 | echo "what CPUs do I have?..."
 6 | unameOut="$(uname -s)"
 7 | case "${unameOut}" in
 8 |     Linux*)
 9 | 	echo "(I'm in a linux OS)"
10 | 	grep "model name" /proc/cpuinfo | uniq
11 | 	if hash lscpu 2> /dev/null; then   # only do it if cmd exists...
12 | 	    lscpu
13 | 	fi
14 | 	;;
15 |     Darwin*)
16 | 	echo "(I'm in Mac OSX)"
17 | 	sysctl -n machdep.cpu.brand_string
18 | 	sysctl -a | grep machdep.cpu
19 | 	;;
20 |     *)
21 | 	echo "I'm in an unknown or unsupported operating system";;
22 | esac
23 | 
24 | # help from:
25 | 
26 | #lscpu | egrep 'Thread|Core|Socket|^CPU\(|MHz'
27 | # thanks to http://unix.stackexchange.com/questions/218074/how-to-know-number-of-cores-of-a-system-in-linux
28 | 
29 | # https://www.cyberciti.biz/faq/lscpu-command-find-out-cpu-architecture-information/
30 | 
31 | # https://stackoverflow.com/questions/3466166/how-to-check-if-running-in-cygwin-mac-or-linux/27776822
32 | 


--------------------------------------------------------------------------------
/perftest/mymaxthreads.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Output number of logical cores as a string, OS-indep.  Barnett 7/5/20.
 3 | 
 4 | # Linux and OSX for now. this doesn't handle non-linux unices.
 5 | 
 6 | unameOut="$(uname -s)"
 7 | case "${unameOut}" in
 8 |     Linux*)
 9 |         lscpu -p | egrep -v '^#' | wc -l
10 | 	;;
11 |     Darwin*)
12 |         sysctl -n hw.logicalcpu_max
13 | 	;;
14 |     MINGW*)
15 |         # not sure this correct...
16 |         echo "$NUMBER_OF_PROCESSORS"
17 |         ;;
18 |     *)
19 | 	echo "I'm in an unknown or unsupported operating system: ${unameOut}" >&2
20 |         ;;
21 | esac
22 | 


--------------------------------------------------------------------------------
/perftest/mynumcores.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Output number of physical cores as a string, OS-indep. Barnett 7/5/20.
 3 | 
 4 | # see:
 5 | # https://stackoverflow.com/questions/6481005/how-to-obtain-the-number-of-cpus-cores-in-linux-from-the-command-line
 6 | # https://en.wikipedia.org/wiki/Uname
 7 | 
 8 | # Linux and MAX only. this doesn't handle non-linux unices.
 9 | 
10 | unameOut="$(uname -s)"
11 | case "${unameOut}" in
12 |     Linux*)
13 |         lscpu -p | egrep -v '^#' | sort -u -t, -k 2,4 | wc -l
14 | 	;;
15 |     Darwin*)
16 | #	sysctl -n machdep.cpu.core_count
17 |         sysctl -n hw.physicalcpu_max
18 | 	;;
19 |     MINGW*)
20 |         # not sure this is correct...
21 |         echo "$NUMBER_OF_PROCESSORS"
22 |         ;;
23 |     *)
24 | 	echo "I'm in an unknown or unsupported operating system: ${unameOut}" >&2
25 |         ;;
26 | esac
27 | 


--------------------------------------------------------------------------------
/perftest/nuffttestnd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # A basic set of performance tests for Flatiron Institute NUFFT.
 3 | # Relies on test executables in ../test
 4 | 
 5 | # Barnett 2/2/17, tidied 3/13/17. no sort 6/13/20. prec switch, threads 7/2/20.
 6 | 
 7 | M=1e6       # problem size (sets both # NU pts and # U modes); it's a string
 8 | TOL=1e-6    # overall requested accuracy, also a string
 9 | DEBUG=0     # whether to see timing breakdowns
10 | 
11 | #TESTTHREADS=$(./mymaxthreads.sh)      # max threads (hyperthreading)
12 | TESTTHREADS=$(./mynumcores.sh)        # one thread per core (no hyperthreading)
13 | 
14 | echo "nuffttestnd output:"
15 | ./mycpuinfo.sh
16 | 
17 | if [[ $1 == "SINGLE" ]]; then
18 |     PREC=single
19 |     PRECSUF=f
20 | else
21 |     PREC=double
22 |     PRECSUF=
23 | fi
24 | 
25 | echo
26 | export OMP_NUM_THREADS=$TESTTHREADS
27 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: size = $M, tol = $TOL..."
28 | # currently we run 1e6 modes in each case, in non-equal dims (more generic):
29 | ../test/finufft1d_test$PRECSUF 1e6 $M $TOL $DEBUG
30 | ../test/finufft2d_test$PRECSUF 500 2000 $M $TOL $DEBUG
31 | ../test/finufft3d_test$PRECSUF 100 200 50 $M $TOL $DEBUG
32 | 
33 | echo
34 | export OMP_NUM_THREADS=1
35 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: size = $M, tol = $TOL..."
36 | ../test/finufft1d_test$PRECSUF 1e6 $M $TOL $DEBUG
37 | ../test/finufft2d_test$PRECSUF 500 2000 $M $TOL $DEBUG
38 | ../test/finufft3d_test$PRECSUF 100 200 50 $M $TOL $DEBUG
39 | 


--------------------------------------------------------------------------------
/perftest/results/gcc_vs_icc_xeon.txt:
--------------------------------------------------------------------------------
 1 | Comparing GCC 6.4.0 vs ICC 17.0.4, on xeon E5-2643 v3 @ 3.40GHz
 2 | 3/28/18
 3 | 
 4 | spreadtestnd d 1e7 1e7 1e-6 2
 5 | 
 6 | Timings in sec:
 7 | 
 8 | single-thread:
 9 | 
10 | 	t1		t2
11 | 
12 | 	GCC	ICC	GCC	ICC
13 | 
14 | 1d	2.7	1.7	3.3	2.7
15 | 2d	4.5	2.7	5.0	4.5
16 | 3d	8.6	5.7	10.9	8.4
17 | 
18 | 24-thread:
19 | 
20 | 1d	.49	.38	.48	.36
21 | 2d	.5	.34	.48	.37
22 | 3d	.82	.62	.76	.58
23 | 
24 | Note: sorting a bit slower under ICC, but spreading is faster.
25 | 


--------------------------------------------------------------------------------
/perftest/searchForTimeMetrics.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | decimalMatchString = "\d+\.?\d+" #regular expression to match a decimal number
 5 | sciNotString = "(\d*.?\d*e-\d* s)" #regular expression to match a number in scientific notation
 6 | wholeNumberMatchString = "\d+"
 7 | 
 8 | 
 9 | #search string needs to have two groupings! (one for everything besides) (time s)
10 | def extractTime(searchString, strOut):
11 |     time = 0
12 |     lineMatch = re.search(searchString,strOut)
13 |     if(lineMatch):
14 |         val = re.search(sciNotString,lineMatch.group(2))
15 |         if(not val):
16 |             val = re.search(decimalMatchString, lineMatch.group(2))
17 |         if(not val):
18 |             val = re.search(wholeNumberMatchString, lineMatch.group(2))
19 |         time = round(float(val.group(0).split('s')[0].strip()),5)
20 |     return time
21 | 
22 | 
23 | def sumAllTime(searchString, strOut):
24 |     newVal = 0
25 |     lineMatch = re.findall(searchString,strOut)
26 |     for match in lineMatch:
27 |         val = re.search(sciNotString, match[1])
28 |         if(not val): #search failed, try decimal format
29 |             val = re.search(decimalMatchString, match[1])
30 |         if(not val):
31 |             val = re.search(wholeNumberMatchString, match[1])
32 |         newVal = newVal + float(val.group(0).split('s')[0].strip()) #trim off " s"
33 |     newVal = round(newVal,5)
34 |     return newVal
35 | 


--------------------------------------------------------------------------------
/perftest/spreaderbench.py:
--------------------------------------------------------------------------------
 1 | fast = 'new.txt'
 2 | slow = 'old.txt'
 3 | 
 4 | 
 5 | def read_data(filename):
 6 |     data = [0] * 17
 7 |     with open(filename) as f1:
 8 |         nspread = 0
 9 |         speed = 0
10 |         for line in f1:
11 |             if 'nspread' in line:
12 |                 nspread = int(line.split('=')[-1])
13 |             if 'pts/s' in line:
14 |                 speed = float(line.split(' ')[12])
15 |             data[nspread] = speed
16 |     return data
17 | 
18 | # compute relative increment in percentage between two numbers
19 | 
20 | 
21 | vec = read_data(fast)[2:]
22 | old = read_data(slow)[2:]
23 | 
24 | # 1 : slow = x : fast
25 | # x = (1 - slow/fast) * 100
26 | i = 2
27 | for vec, old in zip(vec, old):
28 |     diff = (1 - old/vec)*100
29 |     print(f'nspread={i:02d} delta={diff:.3f}%')
30 |     i+=1
31 | 


--------------------------------------------------------------------------------
/perftest/spreadtestall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # tester for spreadinterp component that hits as many options & code lines
 3 | # as possible. Takes around 2-3 seconds wall-clock time.
 4 | # No math test is done (human could check "rel err" outputs small),
 5 | # since this is based on a speed-testing executable (spreadtestnd).
 6 | # For pass-fail math tests instead see ../test/finufft*test
 7 | # Barnett 10/23/20
 8 | 
 9 | M=1e6       # problem size (# NU pts)
10 | N=1e6       # num U grid pts
11 | 
12 | # one thread per core (ie no hyperthreading)...
13 | export OMP_NUM_THREADS=$(./mynumcores.sh)
14 | 
15 | echo "spreadtestall.sh :"
16 | echo ""
17 | echo "Double-prec spread/interp tests --------------------------------------"
18 | echo "=========== default kernel choice ============"
19 | TOL=1e-6    # req precision
20 | ./spreadtestnd 1 $M $N $TOL
21 | ./spreadtestnd 2 $M $N $TOL
22 | ./spreadtestnd 3 $M $N $TOL
23 | echo "=========== kerevalmeth=0 nonstandard upsampfac + debug ============"
24 | # nonstandard upsampfac to test with the direct kernel eval (slower)...
25 | UP=1.5
26 | # debug output
27 | DEB=1
28 | ./spreadtestnd 1 $M $N $TOL 2 0 $DEB 0 0 $UP
29 | ./spreadtestnd 2 $M $N $TOL 2 0 $DEB 0 0 $UP
30 | ./spreadtestnd 3 $M $N $TOL 2 0 $DEB 0 0 $UP
31 | 
32 | echo ""
33 | echo "Single-prec spread/interp tests --------------------------------------"
34 | echo "=========== default kernel choice ============"
35 | TOL=1e-3    # req precision
36 | ./spreadtestndf 1 $M $N $TOL
37 | ./spreadtestndf 2 $M $N $TOL
38 | ./spreadtestndf 3 $M $N $TOL
39 | echo "=========== kerevalmeth=0 nonstandard upsampfac + debug ============"
40 | ./spreadtestndf 1 $M $N $TOL 2 0 $DEB 0 0 $UP
41 | ./spreadtestndf 2 $M $N $TOL 2 0 $DEB 0 0 $UP
42 | ./spreadtestndf 3 $M $N $TOL 2 0 $DEB 0 0 $UP
43 | 


--------------------------------------------------------------------------------
/perftest/spreadtestnd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # a basic quick set of quasi-uniform multidimensional spreader speed tests.
 3 | # Usage:
 4 | # double-prec:  ./spreadtestnd.sh
 5 | # single-prec:  ./spreadtestnd.sh SINGLE
 6 | 
 7 | # Barnett started 2/2/17. both-precision handling, choose # threads 7/3/20.
 8 | 
 9 | M=1e6       # problem size (# NU pts)
10 | N=1e6       # num U grid pts
11 | TOL=1e-6    # overall requested accuracy
12 | 
13 | #TESTTHREADS=$(./mymaxthreads.sh)      # max threads (hyperthreading)
14 | TESTTHREADS=$(./mynumcores.sh)        # one thread per core (no hyperthreading)
15 | 
16 | echo "spreadtestnd output:"
17 | ./mycpuinfo.sh
18 | 
19 | if [[ $1 == "SINGLE" ]]; then
20 |     PREC=single
21 |     ST=./spreadtestndf
22 | else
23 |     PREC=double
24 |     ST=./spreadtestnd
25 | fi
26 | 
27 | echo
28 | export OMP_NUM_THREADS=$TESTTHREADS
29 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: #NU = $M, #U = $N, tol = $TOL..."
30 | $ST 1 $M $N $TOL
31 | $ST 2 $M $N $TOL
32 | $ST 3 $M $N $TOL
33 | 
34 | echo
35 | export OMP_NUM_THREADS=1
36 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: #NU = $M, #U = $N, tol = $TOL..."
37 | $ST 1 $M $N $TOL
38 | $ST 2 $M $N $TOL
39 | $ST 3 $M $N $TOL
40 | 


--------------------------------------------------------------------------------
/python/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(FINUFFT_USE_CPU)
 2 |     if(WIN32)
 3 |         install(TARGETS finufft LIBRARY DESTINATION finufft RUNTIME DESTINATION finufft)
 4 |     else()
 5 |         install(TARGETS finufft LIBRARY DESTINATION finufft)
 6 |     endif()
 7 | endif()
 8 | 
 9 | if(FINUFFT_USE_CUDA)
10 |     install(TARGETS cufinufft LIBRARY DESTINATION cufinufft)
11 | endif()
12 | 
13 | # Warn if the user invokes CMake directly
14 | if(NOT SKBUILD)
15 |     message(
16 |         WARNING
17 |         "\
18 |   This CMake file is meant to be executed using 'scikit-build-core'.
19 |   Running it directly will almost certainly not produce the desired
20 |   result. If you are a user trying to install this package, use the
21 |   command below, which will install all necessary build dependencies,
22 |   compile the package in an isolated environment, and then install it.
23 |   =====================================================================
24 |    $ pip install python/finufft
25 |    or
26 |    $ pip install python/cufinufft
27 |   =====================================================================
28 |   If you are a software developer, and this is your own package, then
29 |   it is usually much more efficient to install the build dependencies
30 |   in your environment once and use the following command that avoids
31 |   a costly creation of a new virtual environment at every compilation:
32 |   =====================================================================
33 |    $ pip install finufft scikit-build-core[pyproject]
34 |    $ pip install --no-build-isolation -ve .
35 |   =====================================================================
36 |   You may optionally add -Ceditable.rebuild=true to auto-rebuild when
37 |   the package is imported. Otherwise, you need to rerun the above
38 |   after editing C++ files."
39 |     )
40 | endif()
41 | 


--------------------------------------------------------------------------------
/python/cufinufft/README.md:
--------------------------------------------------------------------------------
 1 | # FINUFFT GPU library Python wrappers
 2 | 
 3 | This is a Python interface to the efficient GPU CUDA implementation of the 1-, 2- and
 4 | 3-dimensional nonuniform fast Fourier transform (NUFFT), provided
 5 | in the FINUFFT library. It performs type
 6 | 1 (nonuniform to uniform) or type 2 (uniform to nonuniform) transforms.
 7 | For a mathematical description of the NUFFT and applications to signal
 8 | processing, imaging, and scientific computing, see [the FINUFFT
 9 | documentation](https://finufft.readthedocs.io).
10 | The Python GPU interface is [here](https://finufft.readthedocs.io/en/latest/python_gpu.html).
11 | Usage examples can be found in the examples folder in the same directory as
12 | the file you are reading.
13 | 
14 | If you use this GPU feature of our package, please cite our GPU paper:
15 | 
16 | Y. Shih, G. Wright, J. Andén, J. Blaschke, A. H. Barnett (2021).
17 | cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs.
18 | arXiv preprint arXiv:2102.08463.
19 | [(paper)](https://arxiv.org/abs/2102.08463)
20 | [(bibtex)](https://arxiv.org/bibtex/2102.08463)
21 | 
22 | **Note**: With version 2.2 we have changed the GPU interfaces slightly to better align with FINUFFT. For an outline of the changes, please see [the migration guide](https://finufft.readthedocs.io/en/latest/cufinufft_migration.html).
23 | 


--------------------------------------------------------------------------------
/python/cufinufft/cufinufft/__init__.py:
--------------------------------------------------------------------------------
 1 | from cufinufft._plan import Plan
 2 | 
 3 | from cufinufft._simple import (nufft1d1, nufft1d2, nufft2d1, nufft2d2,
 4 |                                nufft3d1, nufft3d2)
 5 | 
 6 | __all__ = ["nufft1d1", "nufft1d2",
 7 |            "nufft2d1", "nufft2d2",
 8 |            "nufft3d1", "nufft3d2",
 9 |            "Plan"]
10 | 
11 | __version__ = '2.4.0'
12 | 


--------------------------------------------------------------------------------
/python/cufinufft/examples/example2d2_pycuda.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demonstrate the type 2 NUFFT using cuFINUFFT
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | import pycuda.autoinit
 8 | from pycuda.gpuarray import to_gpu
 9 | 
10 | import cufinufft
11 | 
12 | # Set up parameters for problem.
13 | N1, N2 = 37, 41                 # Size of uniform grid
14 | M = 17                          # Number of nonuniform points
15 | n_transf = 2                    # Number of input arrays
16 | eps = 1e-6                      # Requested tolerance
17 | dtype = np.float32              # Datatype (real)
18 | complex_dtype = np.complex64    # Datatype (complex)
19 | 
20 | # Generate coordinates of non-uniform points.
21 | x = np.random.uniform(-np.pi, np.pi, size=M)
22 | y = np.random.uniform(-np.pi, np.pi, size=M)
23 | 
24 | # Generate grid values.
25 | fk = (np.random.standard_normal((n_transf, N1, N2))
26 |       + 1j * np.random.standard_normal((n_transf, N1, N2)))
27 | 
28 | # Cast to desired datatype.
29 | x = x.astype(dtype)
30 | y = y.astype(dtype)
31 | fk = fk.astype(complex_dtype)
32 | 
33 | # Initialize the plan and set the points.
34 | plan = cufinufft.Plan(2, (N1, N2), n_transf, eps=eps, dtype=complex_dtype)
35 | plan.setpts(to_gpu(x), to_gpu(y))
36 | 
37 | # Execute the plan, reading from the uniform grid fk and storing the result
38 | # in c_gpu.
39 | c_gpu = plan.execute(to_gpu(fk))
40 | 
41 | # Retreive the result from the GPU.
42 | c = c_gpu.get()
43 | 
44 | # Check accuracy of the transform at index jt.
45 | jt = M // 2
46 | 
47 | for i in range(n_transf):
48 |     # Calculate the true value of the type 2 transform at the index jt.
49 |     m, n = np.mgrid[-(N1 // 2):(N1 + 1) // 2, -(N2 // 2):(N2 + 1) // 2]
50 |     c_true = np.sum(fk[i] * np.exp(-1j * (m * x[jt] + n * y[jt])))
51 | 
52 |     # Calculate the absolute and relative error.
53 |     err = np.abs(c[i, jt] - c_true)
54 |     rel_err = err / np.max(np.abs(c[i]))
55 | 
56 |     print(f"[{i}] Absolute error on point [{jt}] is {err:.3g}")
57 |     print(f"[{i}] Relative error on point [{jt}] is {rel_err:.3g}")
58 | 
59 |     assert(rel_err < 15 * eps)
60 | 


--------------------------------------------------------------------------------
/python/cufinufft/examples/getting_started_cupy.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | 
 3 | import cufinufft
 4 | 
 5 | # number of nonuniform points
 6 | M = 100000
 7 | 
 8 | # grid size
 9 | N = 200000
10 | 
11 | # generate positions for the nonuniform points and the coefficients
12 | x_gpu = 2 * cp.pi * cp.random.uniform(size=M)
13 | c_gpu = (cp.random.standard_normal(size=M)
14 |          + 1J * cp.random.standard_normal(size=M))
15 | 
16 | # compute the transform
17 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,))
18 | 
19 | # move results off the GPU
20 | f = f_gpu.get()
21 | 


--------------------------------------------------------------------------------
/python/cufinufft/examples/getting_started_numba.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import numba.cuda
 4 | 
 5 | import cufinufft
 6 | 
 7 | # number of nonuniform points
 8 | M = 100000
 9 | 
10 | # grid size
11 | N = 200000
12 | 
13 | # generate positions for the nonuniform points and the coefficients
14 | x = 2 * np.pi * np.random.uniform(size=M)
15 | c = (np.random.standard_normal(size=M) + 1J * np.random.standard_normal(size=M))
16 | 
17 | # transfer to GPU
18 | x_gpu = numba.cuda.to_device(x)
19 | c_gpu = numba.cuda.to_device(c)
20 | 
21 | # compute the transform
22 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,))
23 | 
24 | # move results off the GPU
25 | f = f_gpu.copy_to_host()
26 | 


--------------------------------------------------------------------------------
/python/cufinufft/examples/getting_started_pycuda.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pycuda.autoinit
 4 | from pycuda.gpuarray import to_gpu
 5 | 
 6 | import cufinufft
 7 | 
 8 | # number of nonuniform points
 9 | M = 100000
10 | 
11 | # grid size
12 | N = 200000
13 | 
14 | # generate positions for the nonuniform points and the coefficients
15 | x = 2 * np.pi * np.random.uniform(size=M)
16 | c = (np.random.standard_normal(size=M)
17 |      + 1J * np.random.standard_normal(size=M))
18 | 
19 | # move the data to GPU
20 | x_gpu = to_gpu(x)
21 | c_gpu = to_gpu(c)
22 | 
23 | # compute the transform
24 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,))
25 | 
26 | # move results off the GPU
27 | f = f_gpu.get()
28 | 


--------------------------------------------------------------------------------
/python/cufinufft/examples/getting_started_torch.py:
--------------------------------------------------------------------------------
 1 | import cufinufft
 2 | 
 3 | import torch
 4 | 
 5 | # number of nonuniform points
 6 | M = 100000
 7 | 
 8 | # grid size
 9 | N = 200000
10 | 
11 | # generate positions for the nonuniform points and the coefficients
12 | x_gpu = 2 * torch.pi * torch.rand(size=(M,)).cuda()
13 | c_gpu = (torch.randn(size=(M,)) + 1J * torch.randn(size=(M,))).cuda()
14 | 
15 | # compute the transform
16 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,))
17 | 
18 | # move results off the GPU
19 | f = f_gpu.cpu()
20 | 


--------------------------------------------------------------------------------
/python/cufinufft/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | six
3 | packaging
4 | 


--------------------------------------------------------------------------------
/python/cufinufft/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import utils
 4 | 
 5 | 
 6 | def pytest_addoption(parser):
 7 |     parser.addoption("--framework", action="append", default=[], help="List of frameworks")
 8 | 
 9 | def pytest_generate_tests(metafunc):
10 |     if "framework" in metafunc.fixturenames:
11 |         metafunc.parametrize("framework", metafunc.config.getoption("framework"))
12 | 
13 | @pytest.fixture
14 | def to_gpu(framework):
15 |     to_gpu, _ = utils.transfer_funcs(framework)
16 | 
17 |     return to_gpu
18 | 
19 | 
20 | @pytest.fixture
21 | def to_cpu(framework):
22 |     _, to_cpu = utils.transfer_funcs(framework)
23 | 
24 |     return to_cpu
25 | 


--------------------------------------------------------------------------------
/python/cufinufft/tests/test_array_ordering.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from cufinufft import Plan, _compat
 6 | 
 7 | import utils
 8 | 
 9 | 
10 | def test_type1_ordering(to_gpu, to_cpu, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3):
11 |     complex_dtype = utils._complex_dtype(dtype)
12 | 
13 |     k, c = utils.type1_problem(dtype, shape, M)
14 | 
15 |     k_gpu = to_gpu(k)
16 |     c_gpu = to_gpu(c)
17 | 
18 |     plan = Plan(1, shape, eps=tol, dtype=complex_dtype)
19 | 
20 |     plan.setpts(*k_gpu)
21 | 
22 |     out = np.empty(shape, dtype=complex_dtype, order="F")
23 | 
24 |     out_gpu = to_gpu(out)
25 | 
26 |     with pytest.raises(TypeError, match="following requirement: C") as err:
27 |         plan.execute(c_gpu, out=out_gpu)
28 | 


--------------------------------------------------------------------------------
/python/cufinufft/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Discover and run Python example scripts as unit tests.
 3 | """
 4 | 
 5 | import os
 6 | import subprocess
 7 | import sys
 8 | from pathlib import Path
 9 | 
10 | import pytest
11 | 
12 | examples_dir = os.path.join(Path(__file__).resolve().parents[1], "examples")
13 | 
14 | scripts = []
15 | for filename in os.listdir(examples_dir):
16 |     if filename.endswith(".py"):
17 |         scripts.append(os.path.join(examples_dir, filename))
18 | 
19 | @pytest.mark.parametrize("filename", scripts)
20 | def test_example(filename, request):
21 |     # Extract framework from format `example_framework.py`.
22 |     framework = Path(filename).stem.split("_")[-1]
23 | 
24 |     if framework in request.config.getoption("framework"):
25 |         subprocess.check_call([sys.executable, filename])
26 |     else:
27 |         pytest.skip("Example not in list of frameworks")
28 | 


--------------------------------------------------------------------------------
/python/cufinufft/tests/test_fallback.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import numpy as np
 4 | from ctypes.util import find_library
 5 | 
 6 | 
 7 | # Check to make sure the fallback mechanism works if there is no bundled
 8 | # dynamic library.
 9 | @pytest.mark.skip(reason="Patching seems to fail in CI")
10 | def test_fallback(mocker):
11 |     def fake_load_library(lib_name, path):
12 |         if lib_name in ["libcufinufft", "cufinufft"]:
13 |             raise OSError()
14 |         else:
15 |             return np.ctypeslib.load_library(lib_name, path)
16 | 
17 |     # Block out the bundled library.
18 |     mocker.patch("numpy.ctypeslib.load_library", fake_load_library)
19 | 
20 |     # Make sure an error is raised if no system library is found.
21 |     if find_library("cufinufft") is None:
22 |         with pytest.raises(ImportError, match="suitable cufinufft"):
23 |             import cufinufft
24 |     else:
25 |         import cufinufft
26 | 


--------------------------------------------------------------------------------
/python/cufinufft/tests/test_multi.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import numpy as np
 4 | from cufinufft import Plan
 5 | 
 6 | import utils
 7 | 
 8 | 
 9 | def test_multi_type1(framework, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3):
10 |     if framework == "pycuda":
11 |         import pycuda.driver as drv
12 |         import pycuda.gpuarray as gpuarray
13 |     else:
14 |         pytest.skip("Multi-GPU support only tested for pycuda")
15 | 
16 |     complex_dtype = utils._complex_dtype(dtype)
17 | 
18 |     drv.init()
19 | 
20 |     dev_count = drv.Device.count()
21 | 
22 |     if dev_count == 1:
23 |         pytest.skip()
24 | 
25 |     devs = [drv.Device(dev_id) for dev_id in range(dev_count)]
26 | 
27 |     dim = len(shape)
28 | 
29 |     errs = []
30 | 
31 |     for dev_id, dev in enumerate(devs):
32 |         ctx = dev.make_context()
33 | 
34 |         k = utils.gen_nu_pts(M, dim=dim).astype(dtype)
35 |         c = utils.gen_nonuniform_data(M).astype(complex_dtype)
36 | 
37 |         k_gpu = gpuarray.to_gpu(k)
38 |         c_gpu = gpuarray.to_gpu(c)
39 |         fk_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype)
40 | 
41 |         plan = Plan(1, shape, eps=tol, dtype=complex_dtype,
42 |                          gpu_device_id=dev_id)
43 | 
44 |         plan.setpts(k_gpu[0], k_gpu[1], k_gpu[2])
45 | 
46 |         plan.execute(c_gpu, fk_gpu)
47 | 
48 |         fk = fk_gpu.get()
49 | 
50 |         ind = int(0.1789 * np.prod(shape))
51 | 
52 |         fk_est = fk.ravel()[ind]
53 |         fk_target = utils.direct_type1(c, k, shape, ind)
54 | 
55 |         type1_rel_err = np.abs(fk_target - fk_est) / np.abs(fk_target)
56 | 
57 |         print(f'Type 1 relative error (GPU {dev_id}):', type1_rel_err)
58 | 
59 |         ctx.pop()
60 | 
61 |         errs.append(type1_rel_err)
62 | 
63 |     assert all(err < 0.01 for err in errs)
64 | 


--------------------------------------------------------------------------------
/python/finufft/README.md:
--------------------------------------------------------------------------------
 1 | # Flatiron Institute Nonuniform Fast Fourier Transform library: FINUFFT
 2 | 
 3 | Principal author **Alex H. Barnett**, main co-developers Jeremy F. Magland, Ludvig af Klinteberg, Yu-hsuan "Melody" Shih, Andrea Malleo, Libin Lu, Joakim Andén, Marco Barbone, and Robert Blackwell.
 4 | 
 5 | This package provides a Python interface to the CPU library, enabling fast computation of nonuniform discrete Fourier transforms to specified precision in one, two, and three dimensions.
 6 | It supports transforms of type 1 (nonuniform to uniform), type 2 (uniform to nonuniform) and type 3 (nonuniform to nonuniform).
 7 | For more information, see the [online documentation](https://finufft.readthedocs.io/en/latest/python.html).
 8 | 
 9 | If you find FINUFFT useful in your work, please cite this package and our paper:
10 | 
11 |     A parallel non-uniform fast Fourier transform library based on an ``exponential of semicircle'' kernel.
12 |     A. H. Barnett, J. F. Magland, and L. af Klinteberg.
13 |     SIAM J. Sci. Comput. 41(5), C479–C504 (2019).
14 | 


--------------------------------------------------------------------------------
/python/finufft/examples/guru1d1.py:
--------------------------------------------------------------------------------
 1 | # Simple 1d1 python interface call
 2 | # Lu 02/07/20.
 3 | 
 4 | import time
 5 | import finufft as fp
 6 | import numpy as np
 7 | 
 8 | np.random.seed(42)
 9 | 
10 | N = int(1e6)
11 | M = int(1e5)
12 | x = np.random.uniform(-np.pi, np.pi, M)
13 | c = np.random.randn(M) + 1.j * np.random.randn(M)
14 | F = np.zeros([N], dtype=np.complex128)       # allocate F (modes out)
15 | n_modes = np.ones([1], dtype=np.int64)
16 | n_modes[0] = N
17 | 
18 | strt = time.time()
19 | 
20 | #plan
21 | plan = fp.Plan(1,(N,))
22 | 
23 | #set pts
24 | plan.setpts(x)
25 | 
26 | #exec
27 | plan.execute(c,F)
28 | 
29 | #timing
30 | print("Finished nufft in {0:.2g} seconds. Checking..."
31 |       .format(time.time()-strt))
32 | 
33 | #check error
34 | n = 142519      # mode to check
35 | Ftest = 0.0
36 | # this is so slow...
37 | for j in range(M):
38 |     Ftest += c[j] * np.exp(n * x[j] * 1.j)
39 | Fmax = np.max(np.abs(F))
40 | err = np.abs((F[n + N // 2] - Ftest) / Fmax)
41 | print("Error relative to max of F: {0:.2e}".format(err))
42 | 


--------------------------------------------------------------------------------
/python/finufft/examples/guru1d1f.py:
--------------------------------------------------------------------------------
 1 | # Simple 1d1 python interface call
 2 | # Lu 02/07/20.
 3 | 
 4 | import time
 5 | import finufft as fp
 6 | import numpy as np
 7 | 
 8 | np.random.seed(42)
 9 | 
10 | N = int(1e6)
11 | M = int(1e5)
12 | x = np.random.uniform(-np.pi, np.pi, M)
13 | x = x.astype('float32')
14 | c = np.random.randn(M) + 1.j * np.random.randn(M)
15 | c = c.astype('complex64')
16 | F = np.zeros([N], dtype=np.complex64)       # allocate F (modes out)
17 | n_modes = np.ones([1], dtype=np.int64)
18 | n_modes[0] = N
19 | 
20 | strt = time.time()
21 | 
22 | # plan, using proper specifier for single-precision transform
23 | plan = fp.Plan(1,(N,),dtype='complex64')
24 | 
25 | # set pts
26 | plan.setpts(x)
27 | 
28 | # exec
29 | plan.execute(c,F)
30 | 
31 | # timing
32 | print("Finished nufft in {0:.2g} seconds. Checking..."
33 |       .format(time.time()-strt))
34 | 
35 | # check error
36 | n = 143      # mode to check
37 | Ftest = 0.0
38 | # this is so slow...
39 | for j in range(M):
40 |     Ftest += c[j] * np.exp(n * x[j] * 1.j)
41 | Fmax = np.max(np.abs(F))
42 | err = np.abs((F[n + N // 2] - Ftest) / Fmax)
43 | print("Error relative to max of F: {0:.2e}".format(err))
44 | 


--------------------------------------------------------------------------------
/python/finufft/examples/guru2d1.py:
--------------------------------------------------------------------------------
 1 | # demo of vectorized 2D type 1 FINUFFT in python via guru interface. Should stay close to docs/python.rst
 2 | # Lu 8/20/20
 3 | 
 4 | import numpy as np
 5 | import finufft
 6 | import time
 7 | np.random.seed(42)
 8 | 
 9 | # number of nonuniform points
10 | M = 100000
11 | 
12 | # the nonuniform points in the square [0,2pi)^2
13 | x = 2 * np.pi * np.random.uniform(size=M)
14 | y = 2 * np.pi * np.random.uniform(size=M)
15 | 
16 | # number of transforms
17 | K = 4
18 | 
19 | # generate K stacked strength arrays
20 | c = (np.random.standard_normal(size=(K, M))
21 |      + 1J * np.random.standard_normal(size=(K, M)))
22 | 
23 | # desired number of Fourier modes (in x,y directions respectively)
24 | N1 = 1000
25 | N2 = 2000
26 | 
27 | # specify type 1 transform
28 | nufft_type = 1
29 | 
30 | # instantiate the plan (note n_trans must be set here), also setting tolerance:
31 | t0 = time.time()
32 | plan = finufft.Plan(nufft_type, (N1, N2), eps=1e-9, n_trans=K)
33 | 
34 | # set the nonuniform points
35 | plan.setpts(x, y)
36 | 
37 | # execute the plan (K transforms together, note c.shape must match)
38 | f = plan.execute(c)
39 | print("vectorized guru finufft2d1 done in {0:.2g} s.".format(time.time()-t0))
40 | 
41 | print(f.dtype)
42 | print(f.shape)
43 | 
44 | k1 = 376     # do a math check, for a single output mode index (k1,k2)
45 | k2 = -1000
46 | t = K-2      # from the t'th transform
47 | assert((k1>=-N1/2.) & (k1<N1/2.))   # float division easier here
48 | assert((k2>=-N2/2.) & (k2<N2/2.))
49 | assert((t>=0) & (t<K))
50 | ftest = sum(c[t,:] * np.exp(1.j*(k1*x + k2*y)))
51 | err = np.abs(f[t, k1+N1//2, k2+N2//2] - ftest) / np.max(np.abs(f))
52 | print("Error relative to max: {0:.2e}".format(err))
53 | 


--------------------------------------------------------------------------------
/python/finufft/examples/guru2d1f.py:
--------------------------------------------------------------------------------
 1 | # demo of vectorized 2D type 1 FINUFFT in python via guru interface. Should stay close to docs/python.rst
 2 | # Lu 8/20/20
 3 | 
 4 | import numpy as np
 5 | import finufft
 6 | import time
 7 | np.random.seed(42)
 8 | 
 9 | # number of nonuniform points
10 | M = 100000
11 | 
12 | # the nonuniform points in the square [0,2pi)^2
13 | x = 2 * np.pi * np.random.uniform(size=M)
14 | y = 2 * np.pi * np.random.uniform(size=M)
15 | 
16 | # number of transforms
17 | K = 4
18 | 
19 | # generate K stacked strength arrays
20 | c = (np.random.standard_normal(size=(K, M))
21 |      + 1J * np.random.standard_normal(size=(K, M)))
22 | 
23 | # convert input data to single precision
24 | x = x.astype('float32')
25 | y = y.astype('float32')
26 | c = c.astype('complex64')
27 | 
28 | # desired number of Fourier modes (in x,y directions respectively)
29 | N1 = 1000
30 | N2 = 2000
31 | 
32 | # specify type 1 transform
33 | nufft_type = 1
34 | 
35 | # instantiate the plan (note n_trans must be set here), also setting tolerance:
36 | t0 = time.time()
37 | plan = finufft.Plan(nufft_type, (N1, N2), eps=1e-4, n_trans=K, dtype='complex64')
38 | 
39 | # set the nonuniform points
40 | plan.setpts(x, y)
41 | 
42 | # execute the plan (K transforms together, note c.shape must match)
43 | f = plan.execute(c)
44 | print("vectorized guru single-prec finufft2d1 done in {0:.2g} s.".format(time.time()-t0))
45 | 
46 | print(f.dtype)
47 | print(f.shape)
48 | 
49 | k1 = 37     # do a math check, for a single output mode index (k1,k2)
50 | k2 = -100
51 | t = K-2      # from the t'th transform
52 | assert((k1>=-N1/2.) & (k1<N1/2.))   # float division easier here
53 | assert((k2>=-N2/2.) & (k2<N2/2.))
54 | assert((t>=0) & (t<K))
55 | ftest = sum(c[t,:] * np.exp(1.j*(k1*x + k2*y)))
56 | err = np.abs(f[t, k1+N1//2, k2+N2//2] - ftest) / np.max(np.abs(f))
57 | print("Error relative to max: {0:.2e}".format(err))
58 | 


--------------------------------------------------------------------------------
/python/finufft/examples/many2d1.py:
--------------------------------------------------------------------------------
 1 | # demo of vectorized 2D type 1 FINUFFT in python. Should stay close to docs/python.rst
 2 | # Barnett 8/19/20
 3 | 
 4 | import numpy as np
 5 | import finufft
 6 | import time
 7 | np.random.seed(42)
 8 | 
 9 | # number of nonuniform points
10 | M = 100000
11 | 
12 | # the nonuniform points in the square [0,2pi)^2
13 | x = 2 * np.pi * np.random.uniform(size=M)
14 | y = 2 * np.pi * np.random.uniform(size=M)
15 | 
16 | # number of transforms
17 | K = 4
18 | 
19 | # generate K stacked strength arrays
20 | c = (np.random.standard_normal(size=(K, M))
21 |      + 1J * np.random.standard_normal(size=(K, M)))
22 | 
23 | # desired number of Fourier modes (in x,y directions respectively)
24 | N1 = 1000
25 | N2 = 2000
26 | 
27 | # calculate the K transforms simultaneously (K is inferred from c.shape)
28 | t0 = time.time()
29 | f = finufft.nufft2d1(x, y, c, (N1,N2), eps=1e-9)
30 | print("vectorized finufft2d1 done in {0:.2g} s.".format(time.time()-t0))
31 | print(f.shape)
32 | 
33 | k1 = 376     # do a math check, for a single output mode index (k1,k2)
34 | k2 = -1000
35 | t = K-1      # from the t'th transform
36 | assert((k1>=-N1/2.) & (k1<N1/2.))   # float division easier here
37 | assert((k2>=-N2/2.) & (k2<N2/2.))
38 | assert((t>=0) & (t<K))
39 | ftest = sum(c[t,:] * np.exp(1.j*(k1*x + k2*y)))
40 | err = np.abs(f[t, k1+N1//2, k2+N2//2] - ftest) / np.max(np.abs(f))
41 | print("Error relative to max: {0:.2e}".format(err))
42 | 


--------------------------------------------------------------------------------
/python/finufft/examples/simple1d1.py:
--------------------------------------------------------------------------------
 1 | # demo of 1D type 1 FINUFFT in python. Should stay close to docs/python.rst
 2 | # Barnett 8/19/20
 3 | 
 4 | import numpy as np
 5 | import finufft
 6 | import time
 7 | np.random.seed(42)
 8 | 
 9 | # number of nonuniform points
10 | M = 100000
11 | 
12 | # input nonuniform points
13 | x = 2 * np.pi * np.random.uniform(size=M)
14 | 
15 | # their complex strengths
16 | c = (np.random.standard_normal(size=M)
17 |      + 1J * np.random.standard_normal(size=M))
18 | 
19 | # desired number of output Fourier modes
20 | N = 1000000
21 | 
22 | # calculate the transform
23 | t0 = time.time()
24 | f = finufft.nufft1d1(x, c, N, eps=1e-9)
25 | print("finufft1d1 done in {0:.2g} s.".format(time.time()-t0))
26 | 
27 | n = 142519   # do a math check, for a single output mode index n
28 | assert((n>=-N/2.) & (n<N/2.))
29 | ftest = sum(c * np.exp(1.j*n*x))
30 | err = np.abs(f[n + N // 2] - ftest) / np.max(np.abs(f))
31 | print("Error relative to max: {0:.2e}".format(err))
32 | 


--------------------------------------------------------------------------------
/python/finufft/examples/simple2d1.py:
--------------------------------------------------------------------------------
 1 | # demo of 2D type 1 FINUFFT in python. Should stay close to docs/python.rst
 2 | # Barnett 8/19/20
 3 | 
 4 | import numpy as np
 5 | import finufft
 6 | import time
 7 | np.random.seed(42)
 8 | 
 9 | # number of nonuniform points
10 | M = 100000
11 | 
12 | # the nonuniform points in the square [0,2pi)^2
13 | x = 2 * np.pi * np.random.uniform(size=M)
14 | y = 2 * np.pi * np.random.uniform(size=M)
15 | 
16 | # their complex strengths
17 | c = (np.random.standard_normal(size=M)
18 |      + 1J * np.random.standard_normal(size=M))
19 | 
20 | # desired number of Fourier modes (in x,y directions respectively)
21 | N1 = 1000
22 | N2 = 2000
23 | 
24 | # calculate the transform
25 | t0 = time.time()
26 | f = finufft.nufft2d1(x, y, c, (N1,N2), eps=1e-9)
27 | print("finufft2d1 done in {0:.2g} s.".format(time.time()-t0))
28 | 
29 | k1 = 376   # do a math check, for a single output mode index (k1,k2)
30 | k2 = -1000
31 | assert((k1>=-N1/2.) & (k1<N1/2.))   # float division easier here
32 | assert((k2>=-N2/2.) & (k2<N2/2.))
33 | ftest = sum(c * np.exp(1.j*(k1*x + k2*y)))
34 | err = np.abs(f[k1+N1//2, k2+N2//2] - ftest) / np.max(np.abs(f))
35 | print("Error relative to max: {0:.2e}".format(err))
36 | 


--------------------------------------------------------------------------------
/python/finufft/examples/simpleopts1d1.py:
--------------------------------------------------------------------------------
 1 | # convert DFM's simple demo to JFM interface, include modeord test.
 2 | # Barnett 10/25/17. Adde upsampfac, 6/18/18
 3 | 
 4 | import time
 5 | import finufft
 6 | import numpy as np
 7 | 
 8 | # print finufft.nufft1d1.__doc__
 9 | 
10 | np.random.seed(42)
11 | 
12 | acc = 1.e-9
13 | iflag = 1
14 | N = int(1e6)
15 | M = int(1e5)
16 | x = np.random.uniform(-np.pi, np.pi, M)
17 | c = np.random.randn(M) + 1.j * np.random.randn(M)
18 | F = np.zeros([N], dtype=np.complex128)       # allocate F (modes out)
19 | 
20 | strt = time.time()
21 | F = finufft.nufft1d1(x, c, N, eps=acc, isign=iflag, debug=1, spread_debug=1)
22 | print("Finished nufft in {0:.2g} seconds. Checking..."
23 |       .format(time.time()-strt))
24 | 
25 | n = 142519      # mode to check
26 | Ftest = 0.0
27 | # this is so slow...
28 | for j in range(M):
29 |     Ftest += c[j] * np.exp(n * x[j] * 1.j)
30 | Fmax = np.max(np.abs(F))
31 | err = np.abs((F[n + N // 2] - Ftest) / Fmax)
32 | print("Error relative to max of F: {0:.2e}".format(err))
33 | 
34 | # now test FFT mode output version, overwriting F...
35 | strt = time.time()
36 | finufft.nufft1d1(x, c, out=F, eps=acc, isign=iflag, modeord=1)
37 | print("Finished nufft in {0:.2g} seconds (modeord=1)"
38 |       .format(time.time()-strt))
39 | err = np.abs((F[n] - Ftest) / Fmax)   # now zero offset in F array
40 | print("Error relative to max of F: {0:.2e}".format(err))
41 | 
42 | # now test low-upsampfac (sigma) version...
43 | strt = time.time()
44 | Ftest2 = finufft.nufft1d1(x, c, N, F, acc, iflag, upsampfac=1.25)
45 | print(Ftest2 is F)
46 | print("Finished nufft in {0:.2g} seconds (upsampfac=1.25)"
47 |       .format(time.time()-strt))
48 | err = np.abs((Ftest2[n + N // 2] - Ftest) / Fmax)   # now zero offset in F array
49 | print("Error relative to max of F: {0:.2e}".format(err))
50 | 


--------------------------------------------------------------------------------
/python/finufft/finufft/__init__.py:
--------------------------------------------------------------------------------
 1 | """The Python interface to FINUFFT is divided into two parts: the simple
 2 | interface (through the ``nufft*`` functions) and the more advanced plan
 3 | interface (through the ``Plan`` class). The former allows the user to perform
 4 | an NUFFT in a single call while the latter allows for more efficient reuse of
 5 | resources when the same NUFFT is applied several times to different data by
 6 | saving FFTW plans, sorting the nonuniform points, and so on.
 7 | """
 8 | 
 9 | # that was the docstring for the package finufft.
10 | 
11 | __all__ = ["nufft1d1","nufft1d2","nufft1d3","nufft2d1","nufft2d2","nufft2d3","nufft3d1","nufft3d2","nufft3d3","Plan"]
12 | # etc..
13 | 
14 | # let's just get guru and nufft1d1 working first...
15 | from finufft._interfaces import Plan
16 | from finufft._interfaces import nufft1d1,nufft1d2,nufft1d3
17 | from finufft._interfaces import nufft2d1,nufft2d2,nufft2d3
18 | from finufft._interfaces import nufft3d1,nufft3d2,nufft3d3
19 | 
20 | __version__ = '2.4.0'
21 | 


--------------------------------------------------------------------------------
/python/finufft/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.12.0
2 | packaging
3 | 


--------------------------------------------------------------------------------
/python/finufft/test/README.md:
--------------------------------------------------------------------------------
 1 | ## Basic accuracy and speed tests using the python wrappers
 2 | 
 3 | To install the python wrappers for finufft
 4 | see ../../../docs/install.rst
 5 | 
 6 | Then you may run the tests as follows
 7 | 
 8 | ```
 9 | python3 run_accuracy_tests.py
10 | python3 run_speed_tests.py
11 | ```
12 | 
13 | The codes `accuracy_speed_tests.py` and `../examples/*` illustrate how to call
14 | FINUFFT from python.
15 | 


--------------------------------------------------------------------------------
/python/finufft/test/run_accuracy_tests.py:
--------------------------------------------------------------------------------
1 | from accuracy_speed_tests import accuracy_speed_tests
2 | 
3 | accuracy_speed_tests(100000,100000,1e-6)
4 | 


--------------------------------------------------------------------------------
/python/finufft/test/run_speed_tests.py:
--------------------------------------------------------------------------------
1 | from accuracy_speed_tests import accuracy_speed_tests
2 | 
3 | accuracy_speed_tests(1e6,1e6,1e-6)   # ahb lowered from 8e6
4 | 


--------------------------------------------------------------------------------
/python/finufft/test/test_fallback.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import numpy as np
 4 | from ctypes.util import find_library
 5 | 
 6 | @pytest.mark.skip(reason="Patching seems to fail in CI")
 7 | def test_fallback(mocker):
 8 |     def fake_load_library(lib_name, path):
 9 |         if lib_name in ["libfinufft", "finufft"]:
10 |             raise OSError()
11 |         else:
12 |             return np.ctypeslib.load_library(lib_name, path)
13 | 
14 |     mocker.patch("numpy.ctypeslib.load_library", fake_load_library)
15 | 
16 |     if find_library("finufft") is None:
17 |         with pytest.raises(ImportError, match="suitable finufft"):
18 |             import finufft
19 |     else:
20 |         import finufft
21 | 


--------------------------------------------------------------------------------
/src/cuda/1d/README:
--------------------------------------------------------------------------------
 1 | - cufinufft1d.cu
 2 |   This file contains the execution functions 1d type 1,2 that are called in ../cufinufft.cu
 3 | 
 4 | - spreadinterp1d.cu
 5 |   This file contains all the GPU kernels for 1d spreading, interpolation.
 6 | 
 7 | - interp1d_wrapper.cu
 8 |   Wrappers for 1d interpolations. One method is implemented:
 9 |     (1) nonuniform driven,
10 | 
11 | - spread1d_wrapper.cu
12 |   Wrappers for 1d spreading. Two methods are implemented:
13 |     (1) nonuniform driven,
14 |     (2) subproblem
15 | 


--------------------------------------------------------------------------------
/src/cuda/2d/README:
--------------------------------------------------------------------------------
 1 | - cufinufft2d.cu
 2 |   This file contains the execution functions 2d type 1,2 that are called in ../cufinufft.cu
 3 | 
 4 | - spreadinterp2d.cu
 5 |   This file contains all the GPU kernels for 2d spreading, interpolation.
 6 | 
 7 | - interp2d_wrapper.cu
 8 |   Wrappers for 2d interpolations. Two methods are implemented:
 9 |     (1) nonuniform driven,
10 |     (2) subproblem
11 | 
12 | - spread2d_wrapper.cu
13 |   Wrappers for 2d spreading. Three methods are implemented:
14 |     (1) nonuniform driven,
15 |     (2) subproblem,
16 | 


--------------------------------------------------------------------------------
/src/cuda/3d/README:
--------------------------------------------------------------------------------
 1 | - cufinufft3d.cu
 2 |   This file contains the execution functions for 3d type1,2 that are called in ../cufinufft.cu
 3 | 
 4 | - spreadinterp3d.cu
 5 |   This file contains all the GPU kernels for 3d spreading, interpolation.
 6 | 
 7 | - interp3d_wrapper.cu
 8 |   Wrappers for 3d interpolations. Two methods are implemented:
 9 |     (1) nonuniform driven,
10 |     (2) subproblem
11 | 
12 | - spread3d_wrapper.cu
13 |   Wrappers for 3d spreading. Three methods are implemented:
14 |     (1) nonuniform points driven,
15 |     (2) subproblem,
16 |     (4) block gather
17 | 


--------------------------------------------------------------------------------
/src/cuda/README:
--------------------------------------------------------------------------------
 1 | Lead developer: Yu-Hsuan Melody Shih (New York University, now at nVidia)
 2 | 
 3 | Other developers: (see github site for full list)
 4 | 
 5 | Garrett Wright (Princeton)
 6 | Joakim Anden (KTH)
 7 | Johannes Blaschke (LBNL)
 8 | Alex Barnett (CCM, Flatiron Institute)
 9 | Robert Blackwell (SCC, Flatiron Institute)
10 | 
11 | This project came out of Melody's 2018 and 2019 summer internships at
12 | the Flatiron Institute, advised by Alex Barnett.
13 | 
14 | 
15 | 
16 | --------------
17 | 
18 | This folder contains the main source files of the GPU implementations.
19 | 
20 | - cufinufft.cu
21 |   Four main stages of cufinufft API.
22 |   (1) cufinufft_makeplan, (2) cufinufft_setpts, (3) cufinufft_execute, (4) cufinufft_destroy.
23 |   Also, cufinufft_default_opts may precede stage 1.
24 | 
25 | - memtransfer_wrapper.cu
26 |   Wrapper of allocation and free GPU memories for different dimensions and methods.
27 | 
28 | - deconvolve_wrapper.cu
29 |   GPU kernels and wrappers of deconvolve and amplify the input/output coefficients by correction factor. (Step 3 in Type 1; Step 1 in Type 2)
30 | 


--------------------------------------------------------------------------------
/test/checkallaccs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # test range of requested accuracies, for both spreader and nufft, for a given
 3 | # single dimension.
 4 | # Usage:  ./checkallaccs.sh [dim]
 5 | # where dim = 1, 2, or 3.
 6 | # Barnett 2/17/17. Default dim=1 4/5/17
 7 | 
 8 | DEFAULTDIM=1
 9 | DIM=${1:-$DEFAULTDIM}
10 | echo checkallaccs for dim=$DIM :
11 | 
12 | # finufft test size params (prod{N}.N < TEST_BIGPROB so compares direct transf)
13 | TEST1="1e3 1e3"
14 | TEST2="1e2 2e1 1e3"
15 | TEST3="1e1 2e1 3e1 1e3"
16 | # bash hack to make DIM switch between one of the above 3 choices
17 | TESTD=TEST$DIM
18 | TEST=${!TESTD}
19 | 
20 | # other test args
21 | SORT=2
22 | UPSAMPFAC=2.0
23 | 
24 | for acc in `seq 1 15`;
25 | do
26 |     TOL=1e-$acc
27 |     echo ----------requesting $TOL :
28 | #    ./spreadtestnd $DIM 1e6 1e6 $TOL $SORT
29 |     ./finufft${DIM}d_test $TEST $TOL 0 $SORT $UPSAMPFAC | grep dirft
30 | done
31 | 


--------------------------------------------------------------------------------
/test/cuda/README:
--------------------------------------------------------------------------------
 1 | Testing (validation and performance) directory for GPU FINUFFT.
 2 | 
 3 | All codes test either precision (souce is templated to allow switching
 4 | via the final command line argument "f" or "d").
 5 | 
 6 | cufinufft{1,2,3}d_test: accuracy/speed tests for single transform
 7 |                    in a given dimension, either type 1 or 2.
 8 |                    (exit code 0 is a pass).
 9 |                    Call with no arguments for argument documentation.
10 | cufinufft2dmany_test: accuracy/speed tests for vectorized transforms
11 |                    in 2D only, either type 1 or 2.
12 |                    (exit code 0 is a pass).
13 |                    Call with no arguments for argument documentation.
14 | public_api_test:   tests guru plan C++ interface for GPU on tiny problem.
15 |                    (exit code 0 is a pass).
16 | test_makeplan:     test edge cases, illegal inputs in API, and error codes
17 |                    (exit code 0 is a pass).
18 | 
19 | CMakeLists.txt : shows the complete set of test (with args) done by CTest.
20 | 


--------------------------------------------------------------------------------
/test/cuda/spreadperf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # basic perf test of spread/interp for 2/3d, single/double
 3 | # Barnett 1/29/21, some 1D added 12/2/21.
 4 | 
 5 | BINDIR="./"
 6 | 
 7 | n=1000000
 8 | M=1000000
 9 | dist=0         # 0= random unif, 1 = clustered
10 | Msub=10000   # claimed default is 65536
11 | tols=1e-5
12 | told=1e-12
13 | 
14 | echo "spread 1D.............................................."
15 | $BINDIR/spread1d_test    1 $dist $n $Msub $M $told
16 | $BINDIR/spread1d_test    2 $dist $n $Msub $M $told
17 | $BINDIR/spread1d_testf 1 $dist $n $Msub $M $tols
18 | $BINDIR/spread1d_testf 2 $dist $n $Msub $M $tols
19 | 
20 | echo "interp 1D.............................................."
21 | $BINDIR/interp1d_test    1 $dist $n $M $told
22 | $BINDIR/interp1d_testf 1 $dist $n $M $tols
23 | # note there is no meth=2 in 1D interp
24 | 
25 | # 2D params... (n is grid size per dim)
26 | n=1000
27 | M=1000000
28 | 
29 | echo "spread 2D.............................................."
30 | $BINDIR/spread2d_test    1 $dist $n $n $Msub $M $told
31 | $BINDIR/spread2d_test    2 $dist $n $n $Msub $M $told
32 | $BINDIR/spread2d_testf 1 $dist $n $n $Msub $M $tols
33 | $BINDIR/spread2d_testf 2 $dist $n $n $Msub $M $tols
34 | 
35 | echo "interp 2D.............................................."
36 | $BINDIR/interp2d_test    1 $dist $n $n $M $told
37 | $BINDIR/interp2d_test    2 $dist $n $n $M $told
38 | $BINDIR/interp2d_testf 1 $dist $n $n $M $tols
39 | $BINDIR/interp2d_testf 2 $dist $n $n $M $tols
40 | 
41 | 
42 | # 3D params...
43 | n=100
44 | M=1000000
45 | 
46 | echo "spread 3D.............................................."
47 | $BINDIR/spread3d_test    1 $dist $n $n $n $Msub $M $told
48 | # note absence of meth=2 for 3D double
49 | $BINDIR/spread3d_testf 1 $dist $n $n $n $Msub $M $tols
50 | $BINDIR/spread3d_testf 2 $dist $n $n $n $Msub $M $tols
51 | 
52 | echo "interp 3D.............................................."
53 | $BINDIR/interp3d_test    1 $dist $n $n $n $M $told
54 | # note absence of meth=2 for 3D double
55 | $BINDIR/interp3d_testf 1 $dist $n $n $n $M $tols
56 | $BINDIR/interp3d_testf 2 $dist $n $n $n $M $tols
57 | 


--------------------------------------------------------------------------------
/test/results/README:
--------------------------------------------------------------------------------
1 | Results directory for test outputs, just to tidy them away.
2 | 
3 | Barnett 3/13/17; 8/18/20
4 | 


--------------------------------------------------------------------------------
/tools/common/docker/Dockerfile-x86_64:
--------------------------------------------------------------------------------
 1 | FROM quay.io/pypa/manylinux2014_x86_64:2024-09-09-f386546
 2 | LABEL maintainer "Joakim Andén"
 3 | 
 4 | ENV PATH /opt/python/cp312-cp312/bin:${PATH}
 5 | 
 6 | RUN pip install --root-user-action ignore --no-cache-dir --upgrade pip
 7 | RUN pip install --root-user-action ignore --no-cache-dir --upgrade build toml-cli
 8 | 
 9 | COPY . /io/finufft
10 | 
11 | WORKDIR /io
12 | 
13 | CMD ["/bin/bash"]
14 | 


--------------------------------------------------------------------------------
/tools/common/sdist-helper.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | dockerhub=janden
 6 | image=finufft-sdist
 7 | 
 8 | docker build --file tools/common/docker/Dockerfile-x86_64 \
 9 |              --tag ${dockerhub}/${image} \
10 |              .
11 | 
12 | docker run --volume $(pwd)/wheelhouse:/io/wheelhouse \
13 |            ${dockerhub}/${image} \
14 |            /io/finufft/tools/finufft/build-sdist.sh
15 | 
16 | docker run --volume $(pwd)/wheelhouse:/io/wheelhouse \
17 |            ${dockerhub}/${image} \
18 |            /io/finufft/tools/cufinufft/build-sdist.sh
19 | 


--------------------------------------------------------------------------------
/tools/cufinufft/build-library.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e -u -x
 3 | 
 4 | rm -rf /io/build
 5 | mkdir /io/build
 6 | cd /io/build
 7 | 
 8 | cmake -D FINUFFT_USE_CUDA=ON \
 9 |       -D FINUFFT_USE_CPU=OFF \
10 |       -D FINUFFT_BUILD_TESTS=ON \
11 |       -D CMAKE_CUDA_ARCHITECTURES="50;60;70;80" \
12 |       -D CMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets" \
13 |       -D BUILD_TESTING=ON \
14 |       ..
15 | 
16 | make -j4
17 | 


--------------------------------------------------------------------------------
/tools/cufinufft/build-sdist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | # Move pyproject.toml to root (otherwise no way to include C++ sources in sdist).
 6 | cp python/cufinufft/pyproject.toml .
 7 | 
 8 | # Fix paths in pyproject.toml to reflect the new directory structure.
 9 | toml set --toml-path pyproject.toml \
10 |          tool.scikit-build.cmake.source-dir "."
11 | toml set --toml-path pyproject.toml \
12 |          tool.scikit-build.wheel.packages --to-array "[\"python/cufinufft/cufinufft\"]"
13 | toml set --toml-path pyproject.toml \
14 |          tool.scikit-build.metadata.version.input "python/cufinufft/cufinufft/__init__.py"
15 | 
16 | # Package the sdist.
17 | python3 -m build --verbose --sdist --outdir wheelhouse .
18 | 


--------------------------------------------------------------------------------
/tools/cufinufft/build-wheels.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e -u -x
 3 | 
 4 | function get_python_binary {
 5 |     version="$1"
 6 |     echo "/opt/python/$version/bin"
 7 | }
 8 | 
 9 | function repair_wheel {
10 |     py_version="$1"
11 |     wheel="$2"
12 | 
13 |     py_binary=$(get_python_binary "${py_version}")
14 | 
15 |     if ! "${py_binary}/pip" show auditwheel > /dev/null 2>&1; then
16 |         "${py_binary}/pip" install auditwheel
17 |     fi
18 | 
19 |     if ! "${py_binary}/auditwheel" show "$wheel"; then
20 |         echo "Skipping non-platform wheel $wheel"
21 |     else
22 |         "${py_binary}/auditwheel" repair "$wheel" --plat "$PLAT" -w /io/wheelhouse/
23 |     fi
24 | }
25 | 
26 | # Explicitly list Python versions to build for
27 | py_versions=(cp36-cp36m \
28 |             cp37-cp37m \
29 |             cp38-cp38 \
30 |             cp39-cp39 \
31 |             cp310-cp310 \
32 |             cp311-cp311 \
33 |             cp312-cp312)
34 | 
35 | # NOTE: For CUDA 12, cp36-cp36m and cp37-cp37m are broken since these force an
36 | # older version of pycuda (2022.1), which does not build under CUDA 12.
37 | 
38 | # Compile wheels
39 | for py_version in ${py_versions[@]}; do
40 |     py_binary=$(get_python_binary ${py_version})
41 | 
42 |     "${py_binary}/pip" install --upgrade pip
43 |     "${py_binary}/pip" wheel /io/python/cufinufft --no-deps -w wheelhouse/
44 | done
45 | 
46 | 
47 | # Bundle external shared libraries into the wheels
48 | audit_py_version="cp310-cp310"
49 | for whl in wheelhouse/*.whl; do
50 |     repair_wheel "$audit_py_version" "$whl"
51 | done
52 | 
53 | # Install packages and test
54 | for py_version in ${py_versions[@]}; do
55 |     py_binary=$(get_python_binary ${py_version})
56 | 
57 |     "${py_binary}/pip" install --pre cufinufft -f /io/wheelhouse
58 |     "${py_binary}/pip" install pytest
59 |     "${py_binary}/pytest" /io/python/cufinufft/tests
60 | done
61 | 


--------------------------------------------------------------------------------
/tools/cufinufft/distribution_helper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | # Helper Script For Building Wheels
 4 | 
 5 | manylinux_version=manylinux2014
 6 | cuda_version=11.2
 7 | dockerhub=janden
 8 | 
 9 | image_name=cufinufft-cuda${cuda_version}
10 | 
11 | echo "# Build the docker image"
12 | docker build \
13 |     --file tools/cufinufft/docker/cuda${cuda_version}/Dockerfile-x86_64 \
14 |     --tag ${dockerhub}/cufinufft-cuda${cuda_version} \
15 |     .
16 | 
17 | echo "# Create the container and start it"
18 | docker create \
19 |     --gpus all \
20 |     --interactive \
21 |     --tty \
22 |     --volume $(pwd)/wheelhouse:/io/wheelhouse \
23 |     --name ${image_name} \
24 |     ${dockerhub}/${image_name}
25 | 
26 | docker start ${image_name}
27 | 
28 | echo "# Copy the code"
29 | docker cp . ${image_name}:/io
30 | 
31 | echo "# Build the wheels"
32 | docker exec ${image_name} \
33 |     python3 -m pip wheel \
34 |     --verbose \
35 |     /io/python/cufinufft \
36 |     --config-settings=cmake.define.FINUFFT_CUDA_ARCHITECTURES="50;60;70;80" \
37 |     --config-settings=cmake.define.CMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets" \
38 |     --config-settings=cmake.define.FINUFFT_ARCH_FLAGS="" \
39 |     --config-settings=cmake.define.CMAKE_VERBOSE_MAKEFILE=ON \
40 |     --no-deps \
41 |     --wheel-dir /io/wheelhouse
42 | 
43 | wheel_name=$(docker exec ${image_name} bash -c 'ls /io/wheelhouse/cufinufft-*-linux_x86_64.whl')
44 | 
45 | echo "# Repair the wheels"
46 | docker exec ${image_name} \
47 |     python3 -m auditwheel repair \
48 |     ${wheel_name} \
49 |     --plat manylinux2014_x86_64 \
50 |     --wheel-dir /io/wheelhouse/
51 | 
52 | echo "# Shut down the container and remove it"
53 | docker stop ${image_name}
54 | docker rm ${image_name}
55 | 
56 | echo "# Copy the wheels we care about to the dist folder"
57 | mkdir -p dist
58 | cp -v wheelhouse/cufinufft-*${manylinux_version}* dist
59 | 
60 | # TODO: Test installing the wheels and running pytest.
61 | 


--------------------------------------------------------------------------------
/tools/cufinufft/docker/README:
--------------------------------------------------------------------------------
1 | These configurations are based off of manylinux2014, which is itself based off
2 | of centos8.
3 | 
4 | These images extend manylinux with a compatible CUDA toolkit and runtime
5 | environment suitable for both building and running code inside docker.
6 | 


--------------------------------------------------------------------------------
/tools/cufinufft/docker/cuda11.2/cuda.repo:
--------------------------------------------------------------------------------
1 | [cuda]
2 | name=cuda
3 | baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64
4 | enabled=1
5 | gpgcheck=1
6 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
7 | 


--------------------------------------------------------------------------------
/tools/cufinufft/docker/cuda11.8/cuda.repo:
--------------------------------------------------------------------------------
1 | [cuda]
2 | name=cuda
3 | baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64
4 | enabled=1
5 | gpgcheck=1
6 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
7 | 


--------------------------------------------------------------------------------
/tools/cufinufft/docker/cuda12.0/cuda.repo:
--------------------------------------------------------------------------------
1 | [cuda]
2 | name=cuda
3 | baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64
4 | enabled=1
5 | gpgcheck=1
6 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
7 | 


--------------------------------------------------------------------------------
/tools/cufinufft/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e -u -x
 3 | 
 4 | py_versions=(cp36-cp36m \
 5 |             cp37-cp37m \
 6 |             cp38-cp38 \
 7 |             cp39-cp39 \
 8 |             cp310-cp310 \
 9 |             cp311-cp311)
10 | 
11 | for py_version in ${py_versions[@]}; do
12 |     py_binary="/opt/python/$py_version/bin"
13 | 
14 |     "${py_binary}/pip" install --upgrade pip
15 | 
16 |     "${py_binary}/pip" install /io/python/cufinufft
17 | 
18 |     "${py_binary}/pip" install pytest
19 |     "${py_binary}/pytest" /io/python/cufinufft/tests
20 | done
21 | 


--------------------------------------------------------------------------------
/tools/finufft/build-sdist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | # Move pyproject.toml to root (otherwise no way to include C++ sources in sdist).
 6 | cp python/finufft/pyproject.toml .
 7 | 
 8 | # Fix paths in pyproject.toml to reflect the new directory structure.
 9 | toml set --toml-path pyproject.toml \
10 |          tool.scikit-build.cmake.source-dir "."
11 | toml set --toml-path pyproject.toml \
12 |          tool.scikit-build.wheel.packages --to-array "[\"python/finufft/finufft\"]"
13 | toml set --toml-path pyproject.toml \
14 |          tool.scikit-build.metadata.version.input "python/finufft/finufft/__init__.py"
15 | 
16 | # Package the sdist.
17 | python3 -m build --verbose --sdist --outdir wheelhouse .
18 | 


--------------------------------------------------------------------------------
/tools/finufft/build-wheels.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -x
 4 | 
 5 | cd /io/
 6 | 
 7 | # Replace native compilation flags with more generic ones.
 8 | cp make-platforms/make.inc.manylinux make.inc
 9 | 
10 | # Clean up the build and make the library.
11 | make clean
12 | make lib
13 | 
14 | # Test to make sure everything is ok.
15 | make test
16 | 
17 | # Remove make.inc now that we're done.
18 | rm make.inc
19 | 
20 | # Needed for pip install to work
21 | export FINUFFT_DIR=$(pwd)
22 | # Needed for auditwheel to find the dynamic libraries
23 | export LD_LIBRARY_PATH=${FINUFFT_DIR}/lib:${LD_LIBRARY_PATH}
24 | 
25 | pys=(/opt/python/*/bin)
26 | 
27 | # Filter out old Python versions
28 | pys=(${pys[@]//*27*/})
29 | pys=(${pys[@]//*34*/})
30 | pys=(${pys[@]//*35*/})
31 | 
32 | for PYBIN in "${pys[@]}"; do
33 |     "${PYBIN}/pip" install auditwheel wheel twine numpy
34 |     "${PYBIN}/pip" wheel /io/python -w python/wheelhouse
35 | done
36 | 
37 | for whl in python/wheelhouse/$package_name-*.whl; do
38 |     auditwheel repair "$whl" -w /io/python/wheelhouse/
39 | done
40 | 


--------------------------------------------------------------------------------
/tools/finufft/docker/Dockerfile-x86_64:
--------------------------------------------------------------------------------
 1 | # We currently use manylinux2010 based on CentOS6, which has very old
 2 | # fftw 3.2.1, too old for FINUFFT. We thus here compile FFTW from source (slow).
 3 | #
 4 | # Soon (11/30/2020) we'll want to update to manylinux2014 which has fftw
 5 | # 3.3.3 (still old,
 6 | # but functions with FINUFFT), and switch to: yum install fft3-devel
 7 | # instead of building from source.
 8 | 
 9 | 
10 | FROM quay.io/pypa/manylinux2010_x86_64:2024-09-09-f386546
11 | LABEL maintainer "Libin Lu"
12 | 
13 | RUN set -e -x
14 | RUN cd ~; \
15 | curl http://www.fftw.org/fftw-3.3.8.tar.gz --output fftw-3.3.8.tar.gz; \
16 | tar -xvzf fftw-3.3.8.tar.gz; \
17 | cd fftw-3.3.8; \
18 | export CFLAGS=-fPIC; \
19 | ./configure --enable-threads --enable-openmp; \
20 | make; \
21 | make install; \
22 | make clean; \
23 | export CFLAGS=-fPIC; \
24 | ./configure --enable-threads --enable-openmp --enable-float; \
25 | make; \
26 | make install;
27 | 
28 | CMD ["/bin/bash"]
29 | 


--------------------------------------------------------------------------------
/tutorial/README:
--------------------------------------------------------------------------------
1 | Tutorials directory for FINUFFT
2 | 
3 | So far, this is a collection of MATLAB/Octave codes,
4 | which make use of utilities in utils/
5 | 
6 | See the "Tutorials and application demos" section of the documentation.
7 | 


--------------------------------------------------------------------------------
/tutorial/applyAHA.m:
--------------------------------------------------------------------------------
1 | function AHAf = applyAHA(f,x,tol)         % use pair of NUFFTs to apply A^* A
2 |   Af = finufft1d2(x,+1,tol,f);         % apply A
3 |   AHAf = finufft1d1(x,Af,-1,tol,length(f));    % apply A^*
4 | end
5 | 


--------------------------------------------------------------------------------
/tutorial/applyToep.m:
--------------------------------------------------------------------------------
 1 | function Tx = applyToep(x,vhat)
 2 | % APPLYTOEP   fast matrix-vector multiply with square Toeplitz matrix
 3 | %
 4 | % Tx = applyToep(x,vhat) multiplies vector x by the square N*N (generally
 5 | %  non-symmetric) Toeplitz matrix T defined by a vector v of length 2N-1
 6 | %  whose 2N-padded DFT vhat = fft([v;0]) the user must supply.
 7 | %  The convention for v (as in Raymond
 8 | %  Chan's book) is the 1st row of T in reverse order followed by the 2nd through
 9 | %  last elements of the 1st column in usual order. In the literature v is
10 | %  indexed -N+1:N-1. T*x is a discrete nonperiodic
11 | %  convolution, and performed here by a FFT and iFFT pair.
12 | %  This version uses FFTs of size 2N instead of 2N-1, since the latter has much
13 | %  larger factors (it is often prime) which slow down the FFT dramatically.
14 | %
15 | % Inputs: x    : input column vector length N
16 | %         vhat : DFT of v after padding to length 2N (eg, by a single zero)
17 | % Output: Tx   : T*x, col vec length N
18 | %
19 | % Without arguments does self-test; see this code for a demo of use
20 | 
21 | % Barnett 11/7/22. Realized 2N-1 slow for FFT (can be prime!) -> 2N.  12/10/23
22 | if nargin==0, test_applyToep; return; end
23 | 
24 | N = numel(x);
25 | assert(numel(vhat)==2*N)
26 | xpadhat = fft(x(:),2*N);   % zero-pads out to size of vhat
27 | Tx = ifft(xpadhat .* vhat(:));
28 | Tx = Tx(N:end-1);              % extract correct chunk of padded output
29 | 
30 | %%%%%%%
31 | function test_applyToep
32 | N = 10;                   % size to compare against direct matvec
33 | x = randn(N,1);
34 | t = randn(2*N-1,1);       % define nonsymm Toep: back 1st row then down 1st col
35 | T = toeplitz(t(N:end),t(N:-1:1));   % munge single toep vec into (C,R) format
36 | tpad = [t;0]; that = fft(tpad);     % shows user how to pad
37 | Tx = applyToep(x,that);
38 | fprintf('test_applyToep: Frob norm of diff btw fast and direct: %.3g\n',norm(T*x - Tx,'fro'))
39 | 


--------------------------------------------------------------------------------
/tutorial/serieseval2d.m:
--------------------------------------------------------------------------------
 1 | % Demo evaluating a 2D Fourier series at arbitrary points in quasi-optimal
 2 | % time via FINUFFT, in MATLAB. Barnett 6/3/20
 3 | clear; close all;
 4 | 
 5 | % we work in [0,2pi)^2. Set up a 2D Fourier series
 6 | kmax = 500;                   % bandlimit per dim
 7 | k = -kmax:kmax-1;             % freq indices in each dim
 8 | N1 = 2*kmax; N2 = N1;         % # modes in each dim
 9 | [k1 k2] = ndgrid(k,k);        % grid of freq indices
10 | rng(0);
11 | fk =  randn(N1,N2)+1i*randn(N1,N2);  % iid random complex mode data
12 | % let's scale the amplitudes vs (k1,k2) to give a Gaussian random field with
13 | % isotropic (periodized) Matern kernel (ie, covariance is Yukawa for alpha=1)...
14 | k0 = 30;                     % freq scale parameter
15 | alpha = 3.7;                 % power; alpha>2 to converge in L^2
16 | fk = fk .* ((k1.^2+k2.^2)/k0^2 + 1).^(-alpha/2);     % sqrt of spectral density
17 | 
18 | M = 1e6; x = 2*pi*rand(1,M); y = 2*pi*rand(1,M);     % random target points
19 | tol = 1e-9;
20 | tic; c = finufft2d2(x,y,+1,tol,fk); toc   % evaluate Fourier series at (x,y)'s
21 | % Elapsed time is 0.130059 seconds.
22 | 
23 | j = 1;                        % do math check on 1st target...
24 | c1 = sum(sum(fk.*exp(1i*(k1*x(j)+k2*y(j)))));
25 | abs(c1-c(j)) / norm(c,inf)
26 | 
27 | figure(1); clf;
28 | jplot = 1:1e5;          % indices to plot
29 | scatter(x(jplot),y(jplot),1.0,real(c(jplot)),'filled'); axis tight equal
30 | xlabel('x'); ylabel('y'); colorbar; title('Re f(x,y)');
31 | set(gcf,'paperposition',[0 0 8 7]); print -dpng ../docs/pics/fser2d.png
32 | 


--------------------------------------------------------------------------------