├── .clang-format ├── .gersemirc ├── .git-blame-ignore-revs ├── .gitattributes ├── .github └── workflows │ ├── C++.yml │ ├── C++_build_win.ps1 │ ├── build_cufinufft_wheels.yml │ ├── build_finufft_wheels.yml │ ├── cmake_ci.yml │ ├── generate_cmake_matrix.py │ ├── generate_matrix.py │ ├── mex.yml │ ├── mex_C++_win64.xml │ └── valgrind.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG ├── CMakeLists.txt ├── Jenkinsfile ├── LICENSE ├── README.md ├── cmake ├── CheckAVX.cpp ├── setupCPM.cmake ├── setupDUCC.cmake ├── setupFFTW.cmake ├── setupSphinx.cmake ├── setupXSIMD.cmake ├── utils.cmake └── workaround.cmake ├── contributing.md ├── devel ├── CMakeLists.txt ├── ESlocalpoly.m ├── GuruInterfaceBranch_BrainDump ├── README ├── TODO ├── agenda_11-27-23.txt ├── analyse_upsamp.py ├── check_dumbinputs.cmake ├── compare_foldrescale_PR440_laptop5700U.txt ├── cuda │ ├── cufinufft_tasks_meeting_Jun2023.txt │ └── draft_interfaces_c+py_Jun2023.txt ├── eval_ker_expts.cpp ├── eval_ker_expts2.cpp ├── eval_ker_expts_libin_simd64.cpp ├── eval_ker_expts_ludvig.cpp ├── fig_speed_ker_ppval.m ├── finufft_meeting-7-5-23.txt ├── foldrescale.cpp ├── foldrescale.sh ├── foldrescale_finufft1d.txt ├── foldrescale_perf.cpp ├── foldrescale_perf2.cpp ├── foldrescale_spreadtest1d_slower.txt ├── gen_all_horner_C_code.m ├── gen_all_horner_cpp_header.m ├── gen_ker_horner_loop_C_code.m ├── gen_ker_horner_loop_cpp_code.m ├── get_degree_and_beta.m ├── i7_1thr_ker_eval_speeds.png ├── interp_square_nowrap.cpp ├── ker_horner_allw.c ├── ker_horner_allw_loop_notpadded.c ├── ker_ppval_coeff_mat.m ├── non-contiguous_frequency_inputs_bug2d1.py ├── padding.cpp ├── plans_fall23.txt ├── reverse_engineer_tol.m ├── test_ker_ppval.cpp ├── time2d2interp.cpp ├── v2spec.md └── wisdom.py ├── docs ├── FIlogo_200.png ├── Makefile ├── README ├── ackn.rst ├── c.rst ├── c1d.doc ├── c1d.docsrc ├── c2d.doc ├── c2d.docsrc ├── c3d.doc ├── c3d.docsrc ├── c_gpu.rst ├── cex.rst ├── cguru.doc ├── cguru.docsrc ├── changelog.rst ├── conf.py ├── cufinufft_migration.rst ├── devnotes.rst ├── dirs.rst ├── error.rst ├── fortran.rst ├── genmatlabhelp.sh ├── index.rst ├── install.rst ├── install_gpu.rst ├── julia.rst ├── latexindex.rst ├── logo-32x32.png ├── logo-small.png ├── logo.png ├── logo_gpu.png ├── makecdocs.sh ├── makefile.doc ├── math.rst ├── matlab.rst ├── matlab_gpu.rst ├── matlabgpuhelp.doc ├── matlabhelp.doc ├── nfft_migr.rst ├── opts.rst ├── overview.rst ├── overview.src ├── performance.rst ├── pics │ ├── 10000x1x1-type-1-upsamp1.25-precd-thread1.png │ ├── 10000x1x1-type-1-upsamp1.25-precf-thread1.png │ ├── 10000x1x1-type-1-upsamp2.00-precd-thread1.png │ ├── 10000x1x1-type-1-upsamp2.00-precf-thread1.png │ ├── 10000x1x1-type-2-upsamp1.25-precd-thread1.png │ ├── 10000x1x1-type-2-upsamp1.25-precf-thread1.png │ ├── 10000x1x1-type-2-upsamp2.00-precd-thread1.png │ ├── 10000x1x1-type-2-upsamp2.00-precf-thread1.png │ ├── 10000x1x1-type-3-upsamp1.25-precd-thread1.png │ ├── 10000x1x1-type-3-upsamp1.25-precf-thread1.png │ ├── 10000x1x1-type-3-upsamp2.00-precd-thread1.png │ ├── 10000x1x1-type-3-upsamp2.00-precf-thread1.png │ ├── 192x192x128-type-1-upsamp1.25-precd-thread16.png │ ├── 192x192x128-type-1-upsamp1.25-precd-thread32.png │ ├── 192x192x128-type-1-upsamp2.00-precd-thread16.png │ ├── 192x192x128-type-1-upsamp2.00-precd-thread32.png │ ├── 192x192x128-type-2-upsamp1.25-precd-thread16.png │ ├── 192x192x128-type-2-upsamp1.25-precd-thread32.png │ ├── 192x192x128-type-2-upsamp2.00-precd-thread16.png │ ├── 192x192x128-type-2-upsamp2.00-precd-thread32.png │ ├── 192x192x128-type-3-upsamp1.25-precd-thread16.png │ ├── 192x192x128-type-3-upsamp1.25-precd-thread32.png │ ├── 192x192x128-type-3-upsamp2.00-precd-thread16.png │ ├── 192x192x128-type-3-upsamp2.00-precd-thread32.png │ ├── 250x250x250-type-1-upsamp2.00-precd-thread1.png │ ├── 250x250x250-type-2-upsamp2.00-precd-thread1.png │ ├── 250x250x250-type-3-upsamp2.00-precd-thread1.png │ ├── 320x320x1-type-1-upsamp1.25-precd-thread1.png │ ├── 320x320x1-type-1-upsamp1.25-precf-thread1.png │ ├── 320x320x1-type-1-upsamp1.25-precf-thread16.png │ ├── 320x320x1-type-1-upsamp1.25-precf-thread32.png │ ├── 320x320x1-type-1-upsamp2.00-precd-thread1.png │ ├── 320x320x1-type-1-upsamp2.00-precf-thread1.png │ ├── 320x320x1-type-1-upsamp2.00-precf-thread16.png │ ├── 320x320x1-type-1-upsamp2.00-precf-thread32.png │ ├── 320x320x1-type-2-upsamp1.25-precd-thread1.png │ ├── 320x320x1-type-2-upsamp1.25-precf-thread1.png │ ├── 320x320x1-type-2-upsamp1.25-precf-thread16.png │ ├── 320x320x1-type-2-upsamp1.25-precf-thread32.png │ ├── 320x320x1-type-2-upsamp2.00-precd-thread1.png │ ├── 320x320x1-type-2-upsamp2.00-precf-thread1.png │ ├── 320x320x1-type-2-upsamp2.00-precf-thread16.png │ ├── 320x320x1-type-2-upsamp2.00-precf-thread32.png │ ├── 320x320x1-type-3-upsamp1.25-precd-thread1.png │ ├── 320x320x1-type-3-upsamp1.25-precf-thread1.png │ ├── 320x320x1-type-3-upsamp1.25-precf-thread16.png │ ├── 320x320x1-type-3-upsamp1.25-precf-thread32.png │ ├── 320x320x1-type-3-upsamp2.00-precd-thread1.png │ ├── 320x320x1-type-3-upsamp2.00-precf-thread1.png │ ├── 320x320x1-type-3-upsamp2.00-precf-thread16.png │ ├── 320x320x1-type-3-upsamp2.00-precf-thread32.png │ ├── contft1d.png │ ├── contft1dN.png │ ├── contft1dans.png │ ├── contft1dsing.png │ ├── contft2dans.png │ ├── contft2dnodes.png │ ├── cufinufft_announce.png │ ├── fser1d.png │ ├── fser2d.png │ ├── grf1d.png │ ├── inv1d2err.png │ ├── inv1d2err_wellcond.png │ ├── pois_fft.png │ ├── pois_fhat.png │ ├── pois_nufft.png │ └── pois_nugrid.png ├── python.rst ├── python_gpu.rst ├── refs.rst ├── related.rst ├── requirements.txt ├── spreadpic.png ├── trouble.rst ├── tut.rst ├── tutorial │ ├── contft.rst │ ├── grf.rst │ ├── inv1d2.rst │ ├── peripois2d.rst │ └── serieseval.rst └── users.rst ├── examples ├── CMakeLists.txt ├── README ├── cuda │ ├── CMakeLists.txt │ ├── README │ ├── example2d1many.cpp │ ├── example2d2many.cpp │ ├── example2d3many.cpp │ └── getting_started.cpp ├── guru1d1.cpp ├── guru1d1c.c ├── guru1d1f.cpp ├── guru2d1.cpp ├── gurumany1d1.cpp ├── many1d1.cpp ├── simple1d1.cpp ├── simple1d1c.c ├── simple1d1cf.c ├── simple1d1f.cpp ├── simple2d1.cpp ├── simulplans1d1.cpp ├── spreadinterponly1d.cpp ├── threadsafe1d1.cpp └── threadsafe2d2f.cpp ├── fortran ├── CMakeLists.txt ├── README ├── cmcl_license.txt ├── directft │ ├── README │ ├── dirft1d.f │ ├── dirft1df.f │ ├── dirft2d.f │ ├── dirft2df.f │ ├── dirft3d.f │ ├── dirft3df.f │ └── prini.f ├── examples │ ├── guru1d1.f │ ├── guru1d1f.f │ ├── nufft1d_demo.f │ ├── nufft1d_demof.f │ ├── nufft2d_demo.f │ ├── nufft2d_demof.f │ ├── nufft2dmany_demo.f │ ├── nufft2dmany_demof.f │ ├── nufft3d_demo.f │ ├── nufft3d_demof.f │ ├── simple1d1.f │ ├── simple1d1.f90 │ └── simple1d1f.f └── finufftfort.cpp ├── include ├── cufinufft.h ├── cufinufft │ ├── common.h │ ├── contrib │ │ ├── helper_cuda.h │ │ ├── helper_math.h │ │ ├── ker_horner_allw_loop.inc │ │ └── ker_lowupsampfac_horner_allw_loop.inc │ ├── cudeconvolve.h │ ├── defs.h │ ├── impl.h │ ├── memtransfer.h │ ├── precision_independent.h │ ├── spreadinterp.h │ ├── types.h │ └── utils.h ├── cufinufft_opts.h ├── finufft.fh ├── finufft.h ├── finufft │ ├── fft.h │ ├── finufft_core.h │ ├── finufft_utils.hpp │ ├── heuristics.hpp │ ├── spreadinterp.h │ └── test_defs.h ├── finufft_eitherprec.h ├── finufft_errors.h ├── finufft_mod.f90 ├── finufft_opts.h └── finufft_spread_opts.h ├── lib-static └── README ├── lib └── README ├── make-platforms ├── README ├── make.inc.GCC7 ├── make.inc.linux_ICC ├── make.inc.macosx_arm64 ├── make.inc.macosx_arm64_matlab2022b_beta ├── make.inc.macosx_clang ├── make.inc.macosx_clang_matlab ├── make.inc.macosx_gcc-10 ├── make.inc.macosx_gcc-12 ├── make.inc.macosx_gcc-8 ├── make.inc.manylinux ├── make.inc.powerpc ├── make.inc.windows_mingw └── make.inc.windows_msys ├── makefile ├── matlab ├── @gpuArray │ ├── finufft1d1.m │ ├── finufft1d2.m │ ├── finufft1d3.m │ ├── finufft2d1.m │ ├── finufft2d2.m │ ├── finufft2d3.m │ ├── finufft3d1.m │ ├── finufft3d2.m │ └── finufft3d3.m ├── CMakeLists.txt ├── Contents.m ├── README ├── addmhelp.sh ├── cufinufft.cu ├── cufinufft.mw ├── cufinufft1d1.docsrc ├── cufinufft1d1.m ├── cufinufft1d2.docsrc ├── cufinufft1d2.m ├── cufinufft1d3.docsrc ├── cufinufft1d3.m ├── cufinufft2d1.docsrc ├── cufinufft2d1.m ├── cufinufft2d2.docsrc ├── cufinufft2d2.m ├── cufinufft2d3.docsrc ├── cufinufft2d3.m ├── cufinufft3d1.docsrc ├── cufinufft3d1.m ├── cufinufft3d2.docsrc ├── cufinufft3d2.m ├── cufinufft3d3.docsrc ├── cufinufft3d3.m ├── cufinufft_plan.docsrc ├── cufinufft_plan.m ├── errhandler.m ├── examples │ ├── README │ ├── cuda │ │ ├── README │ │ ├── guru1d1_gpu.m │ │ ├── guru1d1f_gpu.m │ │ ├── guru2d1_gpu.m │ │ ├── guru2d1f_gpu.m │ │ └── simple1d1f_gpu.m │ ├── demo_spreadinterponly2d.m │ ├── guru1d1.m │ └── guru1d1_single.m ├── finufft.cpp ├── finufft.mw ├── finufft1d1.docsrc ├── finufft1d1.m ├── finufft1d2.docsrc ├── finufft1d2.m ├── finufft1d3.docsrc ├── finufft1d3.m ├── finufft2d1.docsrc ├── finufft2d1.m ├── finufft2d2.docsrc ├── finufft2d2.m ├── finufft2d3.docsrc ├── finufft2d3.m ├── finufft3d1.docsrc ├── finufft3d1.m ├── finufft3d2.docsrc ├── finufft3d2.m ├── finufft3d3.docsrc ├── finufft3d3.m ├── finufft_plan.docsrc ├── finufft_plan.m ├── gnotes.docbit ├── gopts.docbit ├── gopts12.docbit ├── isigneps.docbit ├── notes.docbit ├── opts.docbit ├── opts12.docbit ├── test │ ├── big1dtest.m │ ├── check_finufft.m │ ├── check_finufft_single.m │ ├── check_modeords.m │ ├── fig_accuracy.m │ ├── fullmathtest.m │ ├── guru_setpts_issue.m │ └── test_strictadjoint.m ├── valid_ntr.m └── valid_setpts.m ├── perftest ├── CMakeLists.txt ├── README ├── bench.py ├── big2d2f.cpp ├── checkGuruTiming.sh ├── compare_spreads.jl ├── cuda │ ├── CMakeLists.txt │ ├── bench.py │ ├── bench.sh │ └── cuperftest.cu ├── getSpeedup.sh ├── guru_timing_test.cpp ├── highaspect3d_test.sh ├── manysmallprobs.cpp ├── multispreadtestndall.sh ├── mycpuinfo.sh ├── mymaxthreads.sh ├── mynumcores.sh ├── nuffttestnd.sh ├── perftest.cpp ├── results │ ├── gcc_vs_icc_xeon.txt │ ├── nuffttestnd_results_i7_2-2-17.txt │ ├── nuffttestnd_results_i7_3-16-17.txt │ ├── nuffttestnd_results_i7_6-17-17.txt │ ├── nuffttestnd_results_i7_gcc72_4-24-18.txt │ ├── nuffttestnd_results_i7_gcc72_4-25-18.txt │ ├── nuffttestnd_results_i7_gcc72_9-14-18.txt │ ├── nuffttestnd_results_i7_v1.1.2_gcc92.txt │ ├── nuffttestnd_results_v1.1.1_xeonE5-2643v3_gcc74.txt │ ├── nuffttestnd_results_v1.1.2_xeonE5-2643v3_gcc74.txt │ ├── nuffttestnd_results_v1.2_i7_gcc92.txt │ ├── perftest_xeon-E5-2643v3.txt │ ├── spreadtestnd_results_i7_2-2-17.txt │ ├── spreadtestnd_results_i7_3-16-17.txt │ ├── spreadtestnd_results_i7_6-17-17.txt │ ├── spreadtestnd_results_i7_gcc72_4-24-18.txt │ ├── spreadtestnd_results_i7_gcc72_4-25-18.txt │ ├── spreadtestnd_results_i7_gcc72_9-14-18.txt │ ├── spreadtestnd_results_i7_v1.1.2_gcc92.txt │ ├── spreadtestnd_results_v1.1.1_xeonE5-2643v3_gcc74.txt │ ├── spreadtestnd_results_v1.1.2_xeonE5-2643v3_gcc74.txt │ └── spreadtestnd_results_v1.2_i7_gcc92.txt ├── searchForTimeMetrics.py ├── spreadbenchmark.py ├── spreaderbench.py ├── spreadingSchemeStats.py ├── spreadtestall.sh ├── spreadtestnd.cpp ├── spreadtestnd.sh ├── spreadtestndall.cpp ├── timingBreakdowns.py └── timingResults │ ├── timingBreakdowns_largeProblems.out │ ├── timingBreakdowns_smallProblems_SequentialMulti.out │ ├── timingBreakdowns_smallProblems_SequentialMulti_noSwitch.out │ └── timingBreakdowns_smallProblems_SimultaneousSingle.out ├── python ├── CMakeLists.txt ├── cufinufft │ ├── README.md │ ├── cufinufft │ │ ├── __init__.py │ │ ├── _compat.py │ │ ├── _cufinufft.py │ │ ├── _plan.py │ │ └── _simple.py │ ├── examples │ │ ├── example2d1_pycuda.py │ │ ├── example2d2_pycuda.py │ │ ├── example3d2many_async_cupy.py │ │ ├── getting_started_cupy.py │ │ ├── getting_started_numba.py │ │ ├── getting_started_pycuda.py │ │ └── getting_started_torch.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── conftest.py │ │ ├── test_array_ordering.py │ │ ├── test_basic.py │ │ ├── test_error_checks.py │ │ ├── test_examples.py │ │ ├── test_fallback.py │ │ ├── test_multi.py │ │ ├── test_simple.py │ │ └── utils.py └── finufft │ ├── README.md │ ├── examples │ ├── guru1d1.py │ ├── guru1d1f.py │ ├── guru2d1.py │ ├── guru2d1f.py │ ├── many2d1.py │ ├── simple1d1.py │ ├── simple2d1.py │ └── simpleopts1d1.py │ ├── finufft │ ├── __init__.py │ ├── _finufft.py │ └── _interfaces.py │ ├── pyproject.toml │ ├── requirements.txt │ └── test │ ├── README.md │ ├── accuracy_speed_tests.py │ ├── run_accuracy_tests.py │ ├── run_speed_tests.py │ ├── test_fallback.py │ ├── test_finufft_plan.py │ ├── test_finufft_simple.py │ └── utils.py ├── src ├── c_interface.cpp ├── cuda │ ├── 1d │ │ ├── README │ │ ├── cufinufft1d.cu │ │ ├── interp1d_wrapper.cu │ │ ├── spread1d_wrapper.cu │ │ └── spreadinterp1d.cuh │ ├── 2d │ │ ├── README │ │ ├── cufinufft2d.cu │ │ ├── interp2d_wrapper.cu │ │ ├── spread2d_wrapper.cu │ │ └── spreadinterp2d.cuh │ ├── 3d │ │ ├── README │ │ ├── cufinufft3d.cu │ │ ├── interp3d_wrapper.cu │ │ ├── spread3d_wrapper.cu │ │ └── spreadinterp3d.cuh │ ├── CMakeLists.txt │ ├── README │ ├── common.cu │ ├── cufinufft.cu │ ├── deconvolve_wrapper.cu │ ├── memtransfer_wrapper.cu │ ├── precision_independent.cu │ ├── spreadinterp.cpp │ └── utils.cpp ├── fft.cpp ├── finufft_core.cpp ├── finufft_utils.cpp ├── ker_horner_allw_loop_constexpr.h ├── ker_lowupsampfac_horner_allw_loop_constexpr.h └── spreadinterp.cpp ├── test ├── CMakeLists.txt ├── README ├── basicpassfail.cpp ├── check_finufft.sh ├── checkallaccs.sh ├── cuda │ ├── CMakeLists.txt │ ├── README │ ├── cufinufft1d_test.cu │ ├── cufinufft1dspreadinterponly_test.cu │ ├── cufinufft2d1nupts_test.cu │ ├── cufinufft2d_test.cu │ ├── cufinufft2dmany_test.cu │ ├── cufinufft3d_test.cu │ ├── cufinufft_math_test.cu │ ├── public_api_test.c │ ├── spreadperf.sh │ └── test_makeplan.c ├── dumbinputs.cpp ├── fftw_lock_test.cpp ├── finufft1d_test.cpp ├── finufft1dmany_test.cpp ├── finufft2d_test.cpp ├── finufft2dmany_test.cpp ├── finufft3d_test.cpp ├── finufft3dkernel_test.cpp ├── finufft3dmany_test.cpp ├── results │ └── README ├── spreadinterp1d_test.cpp ├── testutils.cpp └── utils │ ├── dirft1d.hpp │ ├── dirft2d.hpp │ ├── dirft3d.hpp │ └── norms.hpp ├── tools ├── common │ ├── docker │ │ └── Dockerfile-x86_64 │ └── sdist-helper.sh ├── cufinufft │ ├── build-library.sh │ ├── build-sdist.sh │ ├── build-wheels.sh │ ├── distribution_helper.sh │ ├── docker │ │ ├── README │ │ ├── cuda11.2 │ │ │ ├── Dockerfile-x86_64 │ │ │ └── cuda.repo │ │ ├── cuda11.8 │ │ │ ├── Dockerfile-x86_64 │ │ │ └── cuda.repo │ │ └── cuda12.0 │ │ │ ├── Dockerfile-x86_64 │ │ │ └── cuda.repo │ └── test.sh └── finufft │ ├── build-sdist.sh │ ├── build-wheels.sh │ └── docker │ └── Dockerfile-x86_64 └── tutorial ├── README ├── applyAHA.m ├── applyToep.m ├── contft1d.m ├── contft2d.m ├── inv1d2.m ├── migrate2d1_test.c ├── nfft2d1_test.c ├── poisson2dnuquad.m ├── samplegrf1d.m ├── serieseval1d.m ├── serieseval2d.m └── utils └── lgwt.m /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | AlignAfterOpenBracket: Align 4 | AlignConsecutiveMacros: AcrossEmptyLinesAndComments 5 | AlignConsecutiveAssignments: Consecutive 6 | AlignEscapedNewlines: Left 7 | AlignOperands: true 8 | AlignTrailingComments: 9 | Kind: Always 10 | OverEmptyLines: 1 11 | AllowShortIfStatementsOnASingleLine: WithoutElse 12 | AllowShortLambdasOnASingleLine: Inline 13 | AllowShortLoopsOnASingleLine: true 14 | BreakBeforeBraces: Attach 15 | BreakBeforeBinaryOperators: None 16 | ColumnLimit: 90 17 | ExperimentalAutoDetectBinPacking: true 18 | FixNamespaceComments: true 19 | IndentWidth: 2 20 | MaxEmptyLinesToKeep: 1 21 | NamespaceIndentation: None 22 | ReflowComments: true 23 | PenaltyBreakComment: 1 24 | PenaltyBreakOpenParenthesis: 1 # modified; was 0 25 | SortIncludes: CaseSensitive 26 | SortUsingDeclarations: true 27 | SpacesBeforeTrailingComments: 1 28 | SpaceAfterCStyleCast: false 29 | SpaceAfterLogicalNot: false 30 | SpaceAfterTemplateKeyword: false 31 | TabWidth: 2 32 | UseTab: Never 33 | AttributeMacros: ['__host__', '__device__', '__global__', '__forceinline__'] 34 | QualifierOrder: 35 | - static 36 | - inline 37 | - constexpr 38 | - const 39 | - type 40 | QualifierAlignment: Custom 41 | ... 42 | -------------------------------------------------------------------------------- /.gersemirc: -------------------------------------------------------------------------------- 1 | definitions: 2 | - "./cmake" 3 | line_length: 120 4 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Applied clang format to the codebase 2 | 884ba427be0c60aa3399d5ea71b0e9e3a7cbf686 3 | b1e484fb294b2759d3b6b1f68ca0bf5e255b87d1 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Convert to LF line endings on checkout. 2 | *.sh text eol=lf 3 | -------------------------------------------------------------------------------- /.github/workflows/C++_build_win.ps1: -------------------------------------------------------------------------------- 1 | $ErrorActionPreference = "Stop" 2 | Set-Variable -Name MSYSTEM -Value MINGW64 3 | 4 | # Setup the make.inc file 5 | Copy-Item -Path make-platforms\make.inc.windows_msys -Destination make.inc 6 | 7 | # call make 8 | Set-Variable repo_root -Value ([IO.Path]::Combine($PSScriptRoot, '..', '..')) 9 | c:\msys64\usr\bin\env MSYSTEM=MINGW64 c:\msys64\usr\bin\bash.exe -lc "cd '$repo_root' && make spreadtestall && make lib && make test" 10 | if (-not $?) {throw "Failed make"} 11 | -------------------------------------------------------------------------------- /.github/workflows/build_cufinufft_wheels.yml: -------------------------------------------------------------------------------- 1 | name: Build cufinufft Python wheels 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build_wheels: 7 | name: Build cufinufft wheels on ${{ matrix.buildplat[1] }} 8 | runs-on: ${{ matrix.buildplat[0] }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | buildplat: 13 | - [ ubuntu-22.04, manylinux_x86_64 ] 14 | - [ windows-2019, win_amd64 ] 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: ilammy/msvc-dev-cmd@v1 18 | - name: Setup CUDA 19 | if: ${{ matrix.buildplat[0] == 'windows-2019' }} 20 | uses: Jimver/cuda-toolkit@v0.2.21 21 | with: 22 | cuda: '12.4.0' 23 | - name: Build ${{ matrix.buildplat[1] }} wheels 24 | uses: pypa/cibuildwheel@v2.22.0 25 | with: 26 | package-dir: 'python/cufinufft' 27 | env: 28 | CIBW_BUILD: '*-${{ matrix.buildplat[1] }}' 29 | CIBW_TEST_COMMAND: "echo 'Wheel installed'" 30 | CIBW_BUILD_FRONTEND: "pip; args: --no-deps" 31 | CIBW_BEFORE_ALL_LINUX: | 32 | if command -v yum &> /dev/null; then 33 | yum install -y epel-release 34 | yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo 35 | yum install -y cuda-12-4 36 | else 37 | echo "Unsupported package manager. Exiting." 38 | exit 1 39 | fi 40 | CIBW_ENVIRONMENT_LINUX: PATH=$PATH:/usr/local/cuda/bin LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 41 | CIBW_ARCHS_LINUX: x86_64 42 | 43 | - uses: actions/upload-artifact@v4 44 | with: 45 | name: cufinufft-wheels-${{ matrix.buildplat[1] }} 46 | path: ./wheelhouse/*.whl 47 | -------------------------------------------------------------------------------- /.github/workflows/generate_matrix.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | matrix = { 4 | "include": [] 5 | } 6 | 7 | python_versions = ["3.8", "3.11"] 8 | 9 | combinations = { 10 | "ubuntu-22.04": { 11 | "compiler": ["llvm", "gcc"], 12 | "arch_flags": ["-march=native", "-march=x86-64"] 13 | }, 14 | "windows-2022": { 15 | "compiler": ["msvc", "llvm"], 16 | "arch_flags": ["/arch:AVX2", "/arch:SSE2"] 17 | }, 18 | "macos-13": { 19 | "compiler": ["llvm", "gcc-14"], 20 | "arch_flags": ["-march=native", "-march=x86-64"] 21 | } 22 | } 23 | 24 | for platform in combinations.keys(): 25 | for python_version in python_versions: 26 | for compiler in combinations[platform]["compiler"]: 27 | for arch_flag in combinations[platform]["arch_flags"]: 28 | matrix["include"].append({ 29 | "os": platform, 30 | "python-version": python_version, 31 | "compiler": compiler, 32 | "arch_flags": arch_flag 33 | }) 34 | 35 | json_str = json.dumps(matrix, ensure_ascii=False) 36 | print(json_str) 37 | -------------------------------------------------------------------------------- /.github/workflows/valgrind.yml: -------------------------------------------------------------------------------- 1 | name: Valgrind memcheck 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | valgrind: 7 | runs-on: ubuntu-22.04 8 | strategy: 9 | fail-fast: false 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | - name: Setup Cpp 14 | uses: aminya/setup-cpp@v1.1.1 15 | with: 16 | compiler: gcc 17 | cmake: true 18 | ninja: true 19 | vcpkg: false 20 | cppcheck: false 21 | clangtidy: false 22 | - name: Prepare 23 | run: | 24 | sudo apt update 25 | sudo apt install -y libfftw3-dev jq valgrind 26 | - name: Configure Cmake 27 | run: | 28 | cmake -S . -B ./build -G Ninja -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -DFINUFFT_BUILD_TESTS=ON -DFINUFFT_ENABLE_SANITIZERS=OFF 29 | - name: Build 30 | run: | 31 | cmake --build ./build --config RelWithDebInfo 32 | - name: Test 33 | working-directory: ./build 34 | run: | 35 | ctest --show-only=json-v1 > ctest_tests.json 36 | # Loop over all test entries 37 | exec 3>&1 38 | jq -c '.tests[]' ctest_tests.json | while read -r test; do 39 | name=$(echo "$test" | jq -r '.name') 40 | command=$(echo "$test" | jq -r '.command | @sh') 41 | 42 | echo -e "\n▶ Running test: $name" 43 | echo " Command: $command" 44 | 45 | # Eval to reconstruct command array safely 46 | eval "cmd=( $command )" 47 | 48 | valgrind --undef-value-errors=yes --errors-for-leak-kinds=definite --error-exitcode=1 --log-fd=3 "${cmd[@]}" > /dev/null 2>&1 49 | 50 | # Check valgrind exit code 51 | status=$? 52 | if [[ $status -ne 0 ]]; then 53 | echo "❌ Valgrind detected errors in test: $name" 54 | exit $status 55 | fi 56 | done 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *_test 2 | *~ 3 | *.a 4 | *.so 5 | *.out 6 | *.o 7 | examples/example1d1 8 | examples/example1d1c 9 | examples/example2d1 10 | examples/guru1d1 11 | examples/example1d1f 12 | examples/example1d1cf 13 | examples/example2d1f 14 | examples/guru1d1f 15 | fortran/examples/guru1d1 16 | fortran/examples/nufft1d_demo 17 | fortran/examples/nufft1d_demo_legacy 18 | fortran/examples/nufft2d_demo 19 | fortran/examples/nufft2dmany_demo 20 | fortran/examples/nufft3d_demo 21 | fortran/examples/guru1d1f 22 | fortran/examples/nufft1d_demof 23 | fortran/examples/nufft1d_demo_legacyf 24 | fortran/examples/nufft2d_demof 25 | fortran/examples/nufft2dmany_demof 26 | fortran/examples/nufft3d_demof 27 | test/dumbinputs 28 | test/finufft1d_basicpassfail 29 | test/testlib 30 | __pycache__* 31 | 32 | docs/_build 33 | 34 | build/ 35 | .vscode/ 36 | 37 | cufinufft/python/cufinufft/docs/_build 38 | cufinufft/python/cufinufft/docs/_static 39 | cufinufft/python/cufinufft/docs/_templates 40 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-clang-format 3 | rev: 'v19.1.7' 4 | hooks: 5 | - id: clang-format 6 | types_or: [c++, c, cuda] 7 | exclude: '(^|/)(matlab/.*)$' 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v5.0.0 10 | hooks: 11 | - id: check-yaml 12 | - id: end-of-file-fixer 13 | - id: trailing-whitespace 14 | - id: check-illegal-windows-names 15 | - id: mixed-line-ending 16 | - repo: https://github.com/BlankSpruce/gersemi 17 | rev: 0.19.1 18 | hooks: 19 | - id: gersemi 20 | - repo: https://github.com/abravalheri/validate-pyproject 21 | rev: v0.23 # Use the latest stable version 22 | hooks: 23 | - id: validate-pyproject 24 | # Optional: Include additional validations from SchemaStore 25 | additional_dependencies: ["validate-pyproject-schema-store[all]"] 26 | files: ^python/(finufft|cufinufft)/pyproject\.toml$ 27 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # "Read the Docs" doc-hosting website configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required (for this file format) 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build all formats 15 | formats: all 16 | 17 | # Build documentation in the docs/ directory with Sphinx 18 | sphinx: 19 | configuration: docs/conf.py 20 | 21 | # Optionally set the version of Python and requirements required to build your docs 22 | python: 23 | install: 24 | - requirements: docs/requirements.txt 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2017-2024 The Simons Foundation, Inc. - All Rights Reserved. 2 | 3 | See docs/ackn.rst for the list of code authors and contributors. 4 | 5 | ------ 6 | 7 | FINUFFT is licensed under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance with the 9 | License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | ------ 20 | 21 | Certain parts of this repository are contributed by others. 22 | For their license info (which are both BSD-like), see: 23 | 24 | fortran/cmcl_license.txt 25 | tutorial/utils/lgwt.m 26 | 27 | ------ 28 | 29 | If you find this library useful, or it helps you in creating software 30 | or publications, please let us know, and acknowledge that fact by citing our 31 | source repository: 32 | 33 | https://github.com/flatironinstitute/finufft 34 | 35 | and the corresponding journal articles (particularly the first for the CPU 36 | and/or the last for the GPU): 37 | 38 | A parallel non-uniform fast Fourier transform library based on an 39 | ``exponential of semicircle'' kernel. A. H. Barnett, J. F. Magland, 40 | and L. af Klinteberg. SIAM J. Sci. Comput. 41(5), C479-C504 (2019). 41 | 42 | Aliasing error of the $\exp (\beta \sqrt{1-z^2})$ kernel in the 43 | nonuniform fast Fourier transform. A. H. Barnett, 44 | Appl. Comput. Harmon. Anal. 51, 1-16 (2021). 45 | 46 | cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs, 47 | Yu-hsuan Shih, Garrett Wright, Joakim Andén, Johannes Blaschke, and 48 | Alex H. Barnett. PDSEC2021 workshop of the IPDPS2021 conference. 49 | https://arxiv.org/abs/2102.08463 50 | -------------------------------------------------------------------------------- /cmake/CheckAVX.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | bool is_sse2_supported() { 6 | std::array cpui; 7 | __cpuid(cpui.data(), 1); 8 | return (cpui[3] & (1 << 26)) != 0; 9 | } 10 | 11 | bool is_avx_supported() { 12 | std::array cpui; 13 | __cpuid(cpui.data(), 1); 14 | bool osUsesXSAVE_XRSTORE = (cpui[2] & (1 << 27)) != 0; 15 | bool cpuAVXSupport = (cpui[2] & (1 << 28)) != 0; 16 | if (osUsesXSAVE_XRSTORE && cpuAVXSupport) { 17 | unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 18 | return (xcrFeatureMask & 0x6) == 0x6; 19 | } 20 | return false; 21 | } 22 | 23 | bool is_avx2_supported() { 24 | std::array cpui; 25 | __cpuid(cpui.data(), 7); 26 | return (cpui[1] & (1 << 5)) != 0; 27 | } 28 | 29 | bool is_avx512_supported() { 30 | std::array cpui; 31 | __cpuidex(cpui.data(), 7, 0); 32 | return (cpui[1] & (1 << 16)) != 0; 33 | } 34 | 35 | int main() { 36 | if (is_avx512_supported()) { 37 | std::cout << "AVX512"; 38 | } else if (is_avx2_supported()) { 39 | std::cout << "AVX2"; 40 | } else if (is_avx_supported()) { 41 | std::cout << "AVX"; 42 | } else if (is_sse2_supported()) { 43 | std::cout << "SSE2"; 44 | } else { 45 | std::cout << "NONE"; 46 | } 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /cmake/setupCPM.cmake: -------------------------------------------------------------------------------- 1 | # USING CPM TO HANDLE DEPENDENCIES 2 | if(CPM_SOURCE_CACHE) 3 | set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 4 | elseif(DEFINED ENV{CPM_SOURCE_CACHE}) 5 | set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 6 | else() 7 | set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 8 | endif() 9 | 10 | if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) 11 | message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}") 12 | file( 13 | DOWNLOAD 14 | https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake 15 | ${CPM_DOWNLOAD_LOCATION} 16 | ) 17 | endif() 18 | 19 | include(${CPM_DOWNLOAD_LOCATION}) 20 | -------------------------------------------------------------------------------- /cmake/setupDUCC.cmake: -------------------------------------------------------------------------------- 1 | CPMAddPackage( 2 | NAME 3 | ducc0 4 | GIT_REPOSITORY 5 | https://gitlab.mpcdf.mpg.de/mtr/ducc.git 6 | GIT_TAG 7 | ${DUCC0_VERSION} 8 | DOWNLOAD_ONLY 9 | YES 10 | ) 11 | 12 | if(ducc0_ADDED) 13 | add_library( 14 | ducc0 15 | STATIC 16 | ${ducc0_SOURCE_DIR}/src/ducc0/infra/string_utils.cc 17 | ${ducc0_SOURCE_DIR}/src/ducc0/infra/threading.cc 18 | ${ducc0_SOURCE_DIR}/src/ducc0/infra/mav.cc 19 | ${ducc0_SOURCE_DIR}/src/ducc0/math/gridding_kernel.cc 20 | ${ducc0_SOURCE_DIR}/src/ducc0/math/gl_integrator.cc 21 | ) 22 | target_include_directories(ducc0 PUBLIC ${ducc0_SOURCE_DIR}/src/) 23 | target_compile_options(ducc0 PRIVATE $<$:${FINUFFT_ARCH_FLAGS}>) 24 | target_compile_options(ducc0 PRIVATE $<$:${FINUFFT_CXX_FLAGS_RELEASE}>) 25 | target_compile_options(ducc0 PRIVATE $<$:${FINUFFT_CXX_FLAGS_RELWITHDEBINFO}>) 26 | target_compile_features(ducc0 PRIVATE cxx_std_17) 27 | # private because we do not want to propagate this requirement 28 | set_target_properties( 29 | ducc0 30 | PROPERTIES 31 | MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" 32 | POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE} 33 | ) 34 | check_cxx_compiler_flag(-ffast-math HAS_FAST_MATH) 35 | if(HAS_FAST_MATH) 36 | target_compile_options(ducc0 PRIVATE -ffast-math) 37 | endif() 38 | check_cxx_compiler_flag(/fp:fast HAS_FP_FAST) 39 | if(HAS_FP_FAST) 40 | target_compile_options(ducc0 PRIVATE /fp:fast) 41 | endif() 42 | if(NOT OpenMP_CXX_FOUND) 43 | find_package(Threads REQUIRED) 44 | target_link_libraries(ducc0 PRIVATE Threads::Threads) 45 | endif() 46 | enable_asan(ducc0) 47 | endif() 48 | -------------------------------------------------------------------------------- /cmake/setupSphinx.cmake: -------------------------------------------------------------------------------- 1 | CPMAddPackage( 2 | NAME 3 | sphinx_cmake 4 | GIT_REPOSITORY 5 | https://github.com/k0ekk0ek/cmake-sphinx.git 6 | GIT_TAG 7 | e13c40a 8 | DOWNLOAD_ONLY 9 | YES 10 | ) 11 | 12 | list(APPEND CMAKE_MODULE_PATH ${sphinx_cmake_SOURCE_DIR}/cmake/Modules) 13 | 14 | # requires pip install sphinx texext 15 | find_package(Sphinx) 16 | if(SPHINX_FOUND) 17 | message(STATUS "Sphinx found") 18 | sphinx_add_docs(finufft_sphinx BUILDER html SOURCE_DIRECTORY 19 | ${FINUFFT_SOURCE_DIR}/docs 20 | ) 21 | else() 22 | message(STATUS "Sphinx not found docs will not be generated") 23 | endif() 24 | -------------------------------------------------------------------------------- /cmake/setupXSIMD.cmake: -------------------------------------------------------------------------------- 1 | CPMAddPackage( 2 | NAME 3 | xtl 4 | GIT_REPOSITORY 5 | "https://github.com/xtensor-stack/xtl.git" 6 | GIT_TAG 7 | ${XTL_VERSION} 8 | EXCLUDE_FROM_ALL 9 | YES 10 | GIT_SHALLOW 11 | YES 12 | OPTIONS 13 | "XTL_DISABLE_EXCEPTIONS YES" 14 | ) 15 | 16 | CPMAddPackage( 17 | NAME 18 | xsimd 19 | GIT_REPOSITORY 20 | "https://github.com/xtensor-stack/xsimd.git" 21 | GIT_TAG 22 | ${XSIMD_VERSION} 23 | EXCLUDE_FROM_ALL 24 | YES 25 | GIT_SHALLOW 26 | YES 27 | OPTIONS 28 | "XSIMD_SKIP_INSTALL YES" 29 | "XSIMD_ENABLE_XTL_COMPLEX YES" 30 | ) 31 | -------------------------------------------------------------------------------- /cmake/workaround.cmake: -------------------------------------------------------------------------------- 1 | function(CPMAddPackage some_args) 2 | # This is a stub definition for gersemi formatting only. 3 | endfunction() 4 | -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | This repository is formatted according to the .clang-format in the root directory. 2 | Please enable the reformatting hook before committing your changes. 3 | See [pre-commit](https://pre-commit.com/) for more information. 4 | A quick summary: 5 | ``` 6 | pip install pre-commit 7 | pre-commit install 8 | ``` 9 | 10 | We also suggest to configure your IDE to use the same formatting settings. 11 | 12 | Another suggestion is to ignore the formatting commits in your git configuration: 13 | ``` 14 | git config blame.ignoreRevsFile .git-blame-ignore-revs 15 | ``` 16 | -------------------------------------------------------------------------------- /devel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(finufft_devel) 2 | # Set the minimum required version of CMake 3 | cmake_minimum_required(VERSION 3.5) 4 | 5 | # include cpm cmake, downloading it 6 | CPMAddPackage( 7 | NAME 8 | benchmark 9 | GITHUB_REPOSITORY 10 | google/benchmark 11 | VERSION 12 | 1.8.3 13 | OPTIONS 14 | "BENCHMARK_ENABLE_TESTING OFF" 15 | ) 16 | 17 | if(benchmark_ADDED) 18 | # patch benchmark target 19 | set_target_properties(benchmark PROPERTIES CXX_STANDARD 17) 20 | endif() 21 | 22 | add_executable(foldrescale foldrescale.cpp) 23 | target_link_libraries(foldrescale finufft benchmark xsimd) 24 | add_executable(padding padding.cpp) 25 | target_compile_features(padding PRIVATE cxx_std_17) 26 | target_link_libraries(padding finufft xsimd) 27 | target_compile_options(padding PRIVATE -march=native) 28 | -------------------------------------------------------------------------------- /devel/README: -------------------------------------------------------------------------------- 1 | Developer and experimental codes for FINUFFT 2 | -------------------------------------------- 3 | 4 | For generating kernel coefficient codes in ../src, 5 | the developer must run from MATLAB the following: 6 | 7 | gen_all_horner_C_code.m : writes C-style Horner coeffs (pre-2024) 8 | * a single call writes upsampfac=2 and 1.25 9 | * calls gen_ker_horner_loop_C_code.m 10 | gen_all_horner_cpp_header.m : writes C++ header Horner coeffs (July 2024 on) 11 | * a single call writes upsampfac=2 and 1.25 12 | * calls gen_ker_horner_loop_cpp_code.m 13 | 14 | Both of the gen_ker_* scripts call for the solve of the coeffs for each w: 15 | ker_ppval_coeff_mat.m 16 | (which has the kernel definition in it, which must match spreadinterp.cpp) 17 | 18 | The universal location for kernel approximation (degree, ES beta setting) is: 19 | get_degree_and_beta.m 20 | Tweaks should be done here, and see instructions there for resulting acc test. 21 | Another code that has to match ../src/spreadinterp.cpp is: 22 | reverse_engineer_tol.m 23 | 24 | Re measuring overall accuracy, to compare kernels, make matlab, and run: 25 | matlab/test/fig_accuracy.m 26 | 27 | Barnett 8/20/24 28 | -------------------------------------------------------------------------------- /devel/TODO: -------------------------------------------------------------------------------- 1 | Side list of items to do for FINUFFT library that are not in github Issues 2 | ========================================================================== 3 | 4 | 5 | * Add a real-valued spreader option which will be faster and use half the RAM: but how avoid code duplication? Extend the C macros we now have for dual-prec. 6 | 7 | * Check huge arrays >2^31 working in 2d,3d in C++, and for any d in MATLAB/octave. 8 | - matlab/MEX used to give zero answers for >=2e31 array sizes (big1dtest.m). 9 | - test huge arrays >=2^31 2d, 3d in C++. 10 | - test huge arrays >=2^31 in octave/mex. 11 | - ditto py. 12 | 13 | * Package as RPM and .deb for linux, brew for OSX. 14 | 15 | * R interface? 16 | 17 | 18 | 19 | LOWER PRIORITY TODO / DISCUSSIONS: 20 | 21 | * Return FFTW's internal state to single-threaded upon exit from finufft (deals with Marina & Andras' problem where fftw was then kicked from single to multi-threaded by an intervening finufft call). Remind ourselves why? 22 | * understand why two modeords not give bit-wise same answers in check_modeords.m (really why it's stochastic either exactly zero or around 1e-13) 23 | * Decide if non vs omp get different lib names? (like FFTW) -> not yet. 24 | * Intel MKL FFT interface option instead of FFTW? 25 | * recoup DFM's PyPI publishing of finufftpy, maybe awkward 26 | * make finufft.cpp shuffle routines dcomplex interface and native dcomplex arith (remove a bunch of 2* in indexing, and have no fftw_complex refs in them. However, need first to make sure using complex divide isn't slower than real divide used now). 27 | * rewrite fftw3 plans via 64bit guru interface to allow eg 1024^3 type 1 or 2 in 3D. Not sure needed (only for huge 1d transforms). see http://www.fftw.org/fftw3_doc/Guru-Interface.html#Guru-Interface 28 | -------------------------------------------------------------------------------- /devel/cuda/draft_interfaces_c+py_Jun2023.txt: -------------------------------------------------------------------------------- 1 | int finufft_makeplan(int type, int dim, int64_t* nmodes, int iflag, int ntr, double eps, finufft_plan* plan, nufft_opts* opts) 2 | int cufinufft_makeplan(int type, int dim, int* nmodes, int iflag, int ntransf, double tol, int maxbatchsize, cufinufft_plan *plan, cufinufft_opts *opts) 3 | // Remove maxbatchsize (-> opts), use int64_t. Rename ntransf to ntr, tol to eps. 4 | 5 | int finufft_setpts(finufft_plan plan, int64_t m, double* x, double* y, double* z, int64_t n, double* s, double* t, double* z) 6 | int cufinufft_setpts(int m, double* x, double* y, double* z, int n, double* s, double* t, double *u, cufinufft_plan plan) 7 | // Move plan to the beginning, use int64_t. 8 | 9 | int finufft_execute(finufft_plan plan, complex double* c, complex double* f) 10 | int cufinufft_execute(cuDoubleComplex* c, cuDoubleComplex* f, cufinufft_plan plan) 11 | // Move plan to beginning. 12 | 13 | int finufft_destroy(finufft_plan plan) 14 | int cufinufft_destroy(cufinufft_plan plan) 15 | 16 | void finufft_default_opts(finufft_opts* opts) 17 | int cufinufft_default_opts(int type, int dim, cufinufft_opts* opts); 18 | // Return type make void. Can we avoid specifying type and dim when calling? Allow "default" values for various parameters in opts struct? Yes, use meth=0 for auto. 19 | 20 | 21 | // & do same for float32 versions. 22 | 23 | 24 | -------------- PYTHON ---------- 25 | 26 | Plan.__init__(nufft_type, n_modes_or_dim, n_trans=1, eps=1e-06, 27 | isign=None, dtype='complex128', **kwargs) 28 | 29 | cufinufft.__init__(nufft_type, modes, n_trans=1, eps=1e-06, isign=None, 30 | dtype=numpy.float32, **kwargs) 31 | 32 | # avoid reliance on np? use dtype = 'complex64' or 'complex128' 33 | (and deprecate the float dtype options, and in FINUFFT). 34 | 35 | Plan.setpts(x=None, y=None, z=None, s=None, t=None, u=None) 36 | cufinufft.set_pts(kx, ky=None, kz=None) 37 | # Why not kx=None? What about type 3? 38 | 39 | Plan.execute(data, out=None) 40 | cufinufft.execute(c, fk) 41 | # Allow returning output array. Specify in/out order. 42 | -------------------------------------------------------------------------------- /devel/fig_speed_ker_ppval.m: -------------------------------------------------------------------------------- 1 | % script to generate and plot timing of raw kernel evals via various methods. 2 | % Uses test_ker_ppval and its temp data file (see test_ker_ppval.cpp). 3 | % Barnett 4/23/18 4 | 5 | clear 6 | nam = '/tmp/test_ker_ppval.dat'; % wipes any old data; make header for humans: 7 | system(['echo "# M w t_plain t_horner relsuperr" > ' nam]); 8 | 9 | Mwant=1e7; % how many NU pts for a 1d1 or 1d2 NUFFT 10 | 11 | ws=2:16; % range of kernel widths, do timing tests... 12 | for j=1:numel(ws), w=ws(j) 13 | % glib via shell must matter here, since links to glibc(?) w/o fast simd... 14 | % system(sprintf('./test_ker_ppval %d %d',Mwant,w)); % links to slower glibc? 15 | system(sprintf('(unset LD_LIBRARY_PATH; ./test_ker_ppval %d %d)',Mwant,w)); 16 | end 17 | 18 | fid=fopen(nam,'r'); % read and make plot... 19 | fgets(fid); % ignore header line 20 | [y,count] = fscanf(fid, '%f', [5,inf]); 21 | fclose(fid); 22 | if (count~=5*numel(ws)), warning('file wrong number of lines!'); end 23 | y = y'; % since rows of text file come in as cols of array 24 | M = y(:,1); 25 | w = y(:,2); 26 | r = (M.*w)./y(:,3); % rate in evals/sec 27 | r2 = (M.*w)./y(:,4); % " 28 | e = y(:,5); % rel err 29 | figure; plot(w,[r r2]/1e6,'+-'); xlabel('w'); ylabel('eval rate (Meval/s)'); 30 | 31 | ylim([0, 700]) 32 | grid on 33 | 34 | legend('exp eval','Horner'); title(sprintf('1thr, with padding, M=%d',Mwant)) 35 | %print -dpng 1thr_ker_eval_speeds_withpadding.png 36 | 37 | 38 | % xeon gcc6.4: exp max out at 40 Meval/s; horner 170-300 Meval/s. 39 | 40 | % cf ludvig's i7 results: 0.2 sec for 1e7*(w=12) = 600 Meval/s 41 | % (but that's special to m=12, also w/o the domain conditional?) 42 | % Wouldn't it be nice if could get that for all i7 cases. 43 | 44 | % Concl: for xeon w/ gcc, horner is much better! (5-10x) 45 | 46 | % Jan 2020: Ludvig padded the Horner loop too, giving a little boost for 47 | % w = 2,3 (mod 4) in GCC7,9, and big boost for old GCC5.4. 48 | % We're at 400-700 Meval/s on i7 for all compilers except GCC8 now, 49 | % in -O3 not -Ofast (which we can't use in FINUFFT). 50 | -------------------------------------------------------------------------------- /devel/foldrescale.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "BINNING (small N)...." 4 | 5 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp 6 | ./foldrescale_perf2 7 | 8 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp -ffast-math -fno-finite-math-only 9 | ./foldrescale_perf2 10 | 11 | echo "NOBIN...." 12 | 13 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp -DNOBIN 14 | ./foldrescale_perf2 15 | 16 | g++ -O3 -march=native -funroll-loops -I../include -fopenmp foldrescale_perf2.cpp -o foldrescale_perf2 -lgomp -ffast-math -fno-finite-math-only -DNOBIN 17 | ./foldrescale_perf2 18 | -------------------------------------------------------------------------------- /devel/gen_all_horner_C_code.m: -------------------------------------------------------------------------------- 1 | % Script to make all C code for looped Horner eval of kernels of all widths. 2 | % writes to "ker" array, from a variable "z", and switches by width "w". 3 | % Now does both upsampfacs. 4 | % Resulting C code needs only including in a function. 5 | 6 | % Barnett 4/23/18; now calling Ludvig's loop version from 4/25/18. 7 | % version including low upsampfac, 6/17/18. 8 | % Ludvig put in w=4n padding, 1/31/20. Mystery about why d was bigger 2/6/20. 9 | % split out code for degree, beta, etc; loop upsampfacs Barnett 7/22/24. 10 | clear 11 | opts = struct(); 12 | 13 | for upsampfac = [2.0, 1.25]; % sigma: either 2 (default) or low (eg 5/4) 14 | fprintf('upsampfac = %g...\n',upsampfac) 15 | 16 | ws = 2:16; 17 | opts.wpad = false; % pad kernel eval to multiple of 4 18 | 19 | if upsampfac==2, fid = fopen('../include/cufinufft/contrib/ker_horner_allw_loop.inc','w'); 20 | else, fid = fopen('../include/cufinufft/contrib/ker_lowupsampfac_horner_allw_loop.inc','w'); 21 | end 22 | fwrite(fid,sprintf('// Code generated by gen_all_horner_C_code.m in finufft/devel\n')); 23 | fwrite(fid,sprintf('// Authors: Alex Barnett & Ludvig af Klinteberg.\n// (C) The Simons Foundation, Inc.\n')); 24 | for j=1:numel(ws) 25 | w = ws(j); 26 | [d,beta] = get_degree_and_beta(w,upsampfac); 27 | fprintf('w=%d\td=%d\tbeta=%.3g\n',w,d,beta); 28 | str = gen_ker_horner_loop_C_code(w,d,beta,opts); 29 | if j==1 % write switch statement 30 | fwrite(fid,sprintf(' if constexpr (w==%d) {\n',w)); 31 | else 32 | fwrite(fid,sprintf(' } else if constexpr (w==%d) {\n',w)); 33 | end 34 | for i=1:numel(str); fwrite(fid,[' ',str{i}]); end 35 | end 36 | fwrite(fid,sprintf(' } else\n printf("width not implemented!\\n");\n')); 37 | fclose(fid); 38 | 39 | end 40 | -------------------------------------------------------------------------------- /devel/get_degree_and_beta.m: -------------------------------------------------------------------------------- 1 | function [d,beta] = get_degree_and_beta(w,upsampfac) 2 | % GET_DEGREE_AND_BETA defines degree & beta from w & upsampfac 3 | % 4 | % [d,beta] = get_degree_and_beta(w,upsampfac) 5 | % 6 | % Universal definition for piecewise poly degree chosen for kernel 7 | % coeff generation by matlab, and the ES kernel beta parameter. 8 | % The map from tol to width w must match code in spreadinterp used to 9 | % choose w. 10 | % 11 | % Used by all other *.m codes for generating coeffs. 12 | % 13 | % To test: use KER_PPVAL_COEFF_MAT self-test 14 | % 15 | % To verify accuracy in practice, compile FINUFFT CPU then run 16 | % test/checkallaccs.sh and matlab/test/fig_accuracy.m 17 | % 18 | % Also see: REVERSE_ENGINEER_TOL, KER_PPVAL_COEFF_MAT 19 | 20 | % Barnett 7/22/24 21 | if upsampfac==0.0, upsampfac=2.0; end 22 | 23 | % if d set to 0 in following, means it gets auto-chosen... 24 | if upsampfac==2 % hardwire the betas for this default case 25 | betaoverws = [2.20 2.26 2.38 2.30]; % must match setup_spreader 26 | beta = betaoverws(min(4,w-1)) * w; % uses last entry for w>=5 27 | d = w + 1 + (w<=7) - (w==2); % between 1-2 more degree than w. tweak 28 | elseif upsampfac==1.25 % use formulae, must match params in setup_spreader 29 | gamma=0.97; % safety factor 30 | betaoverws = gamma*pi*(1-1/(2*upsampfac)); % from cutoff freq formula 31 | beta = betaoverws * w; 32 | d = ceil(0.7*w+1.3); % less, since beta smaller. tweak 33 | %d = 0; % auto-choose override? No, too much jitter. 34 | end 35 | 36 | if d==0 37 | tol = reverse_engineer_tol(w,upsampfac); 38 | opts.cutoff = 0.5 * tol; % fudge to get more poly-approx acc than tol 39 | C = ker_ppval_coeff_mat(w,0,beta,opts); % do solve merely to get d 40 | d = size(C,1)-1; % extract the auto-chosen d 41 | end 42 | -------------------------------------------------------------------------------- /devel/i7_1thr_ker_eval_speeds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/devel/i7_1thr_ker_eval_speeds.png -------------------------------------------------------------------------------- /devel/interp_square_nowrap.cpp: -------------------------------------------------------------------------------- 1 | // this is code I was messing with timing using time2d2interp.cpp 2 | // around May 3, 2018, to figure how wrapping was slowing down spreading. 3 | 4 | void interp_square_nowrap(FLT *out, FLT *du, FLT *ker1, FLT *ker2, BIGINT i1, BIGINT i2, 5 | BIGINT N1, BIGINT N2, int ns) 6 | // *************** don't periodic wrap, avoid ptrs. correct if no NU pts nr edge 7 | { 8 | out[0] = 0.0; 9 | out[1] = 0.0; 10 | if (0) { // plain 11 | for (int dy = 0; dy < ns; dy++) { 12 | BIGINT j = N1 * (i2 + dy) + i1; 13 | for (int dx = 0; dx < ns; dx++) { 14 | FLT k = ker1[dx] * ker2[dy]; 15 | out[0] += du[2 * j] * k; 16 | out[1] += du[2 * j + 1] * k; 17 | ++j; 18 | } 19 | } 20 | } else { 21 | for (int dy = 0; dy < ns; dy++) { 22 | BIGINT j = N1 * (i2 + dy) + i1; 23 | // #pragma omp simd 24 | for (int dx = 0; dx < ns; dx++) { 25 | FLT k = ker1[dx] * ker2[dy]; 26 | out[0] += du[2 * j] * k; 27 | out[1] += du[2 * j + 1] * k; 28 | ++j; 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /devel/non-contiguous_frequency_inputs_bug2d1.py: -------------------------------------------------------------------------------- 1 | # ahb updated for v2.0 interface, and complex c. Bug seems to have been fixed :) 2 | 3 | import numpy as np 4 | 5 | from finufft import nufft2d1 6 | 7 | c = np.complex128(np.random.rand(2)) 8 | 9 | omega = np.arange(4).reshape((2, 2)) / 3 * np.pi 10 | 11 | x0 = omega[:, 0] 12 | y0 = omega[:, 1] 13 | 14 | f0 = np.zeros((4, 4), order='F', dtype=np.complex128) 15 | 16 | nufft2d1(x0, y0, c, f0.shape, out=f0, eps=1e-14) 17 | 18 | x1 = x0.copy() 19 | y1 = y0.copy() 20 | 21 | f1 = np.zeros((4, 4), order='F', dtype=np.complex128) 22 | 23 | nufft2d1(x1, y1, c, f1.shape, out=f1, eps=1e-14) 24 | 25 | print('difference: %e' % np.linalg.norm(f0 - f1)) 26 | -------------------------------------------------------------------------------- /devel/plans_fall23.txt: -------------------------------------------------------------------------------- 1 | FINUFFT (CPU+GPU) plans, Fall 2023: 2 | 3 | pick meeting date 4 | 5 | * CPU spreader/interp, bring in Wenda's stuff -> Marco? 6 | 7 | * GPU type 3 Marco? 8 | 9 | * implement Reinecke's sparse sliced FFT, w/ many-vectors too. CPU 10 | Libin help? 11 | 12 | * binaries release with GH Releases feature (Assets?). No crucial. 13 | 14 | * CPU perf tests, standardized way to benchmark spread/interp, FFT, H<>D, etc. 15 | Add to docs/devnotes.rst how to run benchmarks. 16 | Robert + Joakim 17 | 18 | * tutorial in docs: on eg inverse FFT by CG iteration. 19 | 20 | * doc for py local install #340 21 | 22 | * #340 docs for GPU simple interface 23 | 24 | Go through Issues & prioritize. 25 | 26 | PRs. 27 | -------------------------------------------------------------------------------- /devel/reverse_engineer_tol.m: -------------------------------------------------------------------------------- 1 | function tol = reverse_engineer_tol(w,upsampfac) 2 | % REVERSE_ENGINEER_TOL reconstructs tolerance from width and upsampfac 3 | % 4 | % tol = reverse_engineer_tol(w,upsampfac) 5 | % 6 | % For fixed upsampfac (aka sigma), must be the inverse function for 7 | % how w is chosen from tol in spreadinterp.cpp:setup_spreader() 8 | 9 | % Barnett 7/22/24 10 | 11 | if upsampfac==2.0 12 | tol = 10^(1-w); 13 | else 14 | tol = exp(-pi*w*sqrt(1-1/upsampfac)); % generic case, covers sigma=1.25 15 | end 16 | -------------------------------------------------------------------------------- /docs/FIlogo_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/FIlogo_200.png -------------------------------------------------------------------------------- /docs/c1d.docsrc: -------------------------------------------------------------------------------- 1 | int @F1d1(int64_t M, double* x, complex* c, int iflag, double eps, int64_t N1, complex* f, finufft_opts* opts) 2 | 3 | 1D complex nonuniform FFT of type 1 (nonuniform to uniform). 4 | @t 5 | M-1 6 | f[k1] = SUM c[j] exp(+/-i k1 x(j)) for -N1/2 <= k1 <= (N1-1)/2 7 | j=0 8 | 9 | Inputs: 10 | @nt 11 | @mi 12 | @x 13 | @ci 14 | @f 15 | @e 16 | N1 number of output Fourier modes to be computed 17 | @o 18 | 19 | Outputs: 20 | f Fourier mode coefficients (size N1*ntr complex array) 21 | @r 22 | @no 23 | @notes12 24 | 25 | 26 | int @F1d2(int64_t M, double* x, complex* c, int iflag, double eps, int64_t N1, complex* f, finufft_opts* opts) 27 | 28 | 1D complex nonuniform FFT of type 2 (uniform to nonuniform). 29 | @t 30 | c[j] = SUM f[k1] exp(+/-i k1 x[j]) for j = 0,...,M-1 31 | k1 32 | where the sum is over integers -N1/2 <= k1 <= (N1-1)/2. 33 | 34 | Inputs: 35 | @nt 36 | @mo 37 | @x 38 | @f 39 | @e 40 | N1 number of input Fourier modes 41 | f Fourier mode coefficients (size N1*ntr complex array) 42 | @o 43 | 44 | Outputs: 45 | @co 46 | @r 47 | @no 48 | @notes12 49 | 50 | 51 | int @F1d3(int64_t M, double* x, complex* c, int iflag, double eps, int64_t N, double* s, complex* f, finufft_opts* opts) 52 | 53 | 1D complex nonuniform FFT of type 3 (nonuniform to nonuniform). 54 | @t 55 | M-1 56 | f[k] = SUM c[j] exp(+-i s[k] x[j]), for k = 0,...,N-1 57 | j=0 58 | 59 | Inputs: 60 | @nt 61 | @mi 62 | @xr 63 | @ci 64 | @f 65 | @e 66 | @n 67 | @s 68 | @o 69 | 70 | Outputs: 71 | f Fourier transform values at targets (size N*ntr complex array) 72 | @r 73 | @no 74 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. _changelog: 2 | 3 | Changelog 4 | ========= 5 | 6 | .. literalinclude:: ../CHANGELOG 7 | -------------------------------------------------------------------------------- /docs/genmatlabhelp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This extracts and concatenates MATLAB documentation blocks from the .m files. 3 | # Three steps: 1) keep comment lines beginning with %, 2) remove the first 4 | # char of each line. 5 | # Barnett 11/2/17, changed output name 7/24/20. Added GPU 3/31/25. 6 | 7 | # CPU: The output is a text file... 8 | OUT=matlabhelp.doc 9 | 10 | # zero the size... 11 | > $OUT 12 | 13 | # dump the matlab comment blocks 14 | for i in ../matlab/finufft?d?.m ../matlab/finufft_plan.m 15 | do 16 | printf "::\n\n" >> $OUT 17 | sed -n '/^%/p' $i | sed 's/^.//' >> $OUT 18 | printf "\n" >> $OUT 19 | done 20 | 21 | 22 | # --------------------------------- 23 | # now GPU: output is a text file... 24 | OUT=matlabgpuhelp.doc 25 | 26 | # zero the size... 27 | > $OUT 28 | 29 | # dump the matlab comment blocks 30 | for i in ../matlab/cufinufft?d?.m ../matlab/cufinufft_plan.m 31 | do 32 | printf "::\n\n" >> $OUT 33 | sed -n '/^%/p' $i | sed 's/^.//' >> $OUT 34 | printf "\n" >> $OUT 35 | done 36 | 37 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. finufft documentation master file. This also contains what appears on the 2 | front HTML page. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | Barnett 8/27/20: this is HTML only; split out separate latexindex.rst 6 | 7 | .. _index: 8 | 9 | Flatiron Institute Nonuniform Fast Fourier Transform 10 | ======================================================== 11 | 12 | .. include:: overview.src 13 | 14 | 15 | Documentation contents 16 | ======================== 17 | 18 | .. toctree:: 19 | :maxdepth: 3 20 | 21 | install 22 | install_gpu 23 | dirs 24 | math 25 | cex 26 | c 27 | c_gpu 28 | opts 29 | error 30 | trouble 31 | performance 32 | tut 33 | fortran 34 | matlab 35 | matlab_gpu 36 | python 37 | python_gpu 38 | julia 39 | changelog 40 | nfft_migr 41 | cufinufft_migration 42 | devnotes 43 | related 44 | users 45 | ackn 46 | refs 47 | -------------------------------------------------------------------------------- /docs/julia.rst: -------------------------------------------------------------------------------- 1 | .. _julia: 2 | 3 | Julia interfaces (CPU and GPU) 4 | ============================== 5 | 6 | Principal author Ludvig af Klinteberg and others have built and maintain `FINUFFT.jl `_, an interface from the `Julia `_ language. This official Julia package supports 32-bit and 64-bit precision, now on both CPU and GPU (via `CUDA.jl`), via a common interface. 7 | The Julia package installation automatically downloads pre-built CPU binaries of the FINUFFT library for Linux, macOS, Windows and FreeBSD (for a full list see `finufft_jll `_), and the GPU binary for Linux (see `cufinufft_jll `_). 8 | 9 | `FINUFFT.jl` has itself been wrapped as part of `NFFT.jl `_, which contains an "abstract" interface 10 | to any NUFFT in Julia, with FINUFFT as an example. This was by Tobias Knopp and coworkers, starting around 2022. 11 | Their 12 | `performance comparison page `_ 13 | show that FINUFFT matches their native Julia implementation for speed of type 1 14 | and type 2 transforms 15 | in 3D, and beats NFFT, and with less precomputation. 16 | In 1D and 2D, the native Julia implementation is 1-2 times faster 17 | than FINUFFT in their tests on uniformly-random nonuniform points. 18 | -------------------------------------------------------------------------------- /docs/latexindex.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | 3 | .. finufft documentation master file - latex only. 4 | You can adapt this file completely to your liking, but it should at least 5 | contain the root `toctree` directive. 6 | Barnett 8/27/20: split this out as separate latexindex.rst. 7 | Also see latex section of conf.py for tocdepth override, etc. 8 | 9 | :orphan: 10 | 11 | 12 | Flatiron Institute Nonuniform Fast Fourier Transform 13 | ===================================================== 14 | 15 | .. the toctree seems to have to precede any text for latex chapters correct 16 | numbering. Note that ch.1 is now the overview (unlike in index.rst): 17 | 18 | .. toctree:: 19 | 20 | overview 21 | install 22 | dirs 23 | math 24 | cex 25 | c 26 | opts 27 | error 28 | trouble 29 | tut 30 | fortran 31 | matlab 32 | python 33 | julia 34 | changelog 35 | devnotes 36 | related 37 | users 38 | ackn 39 | refs 40 | -------------------------------------------------------------------------------- /docs/logo-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo-32x32.png -------------------------------------------------------------------------------- /docs/logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo-small.png -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo.png -------------------------------------------------------------------------------- /docs/logo_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/logo_gpu.png -------------------------------------------------------------------------------- /docs/makefile.doc: -------------------------------------------------------------------------------- 1 | make[1]: Entering directory '/home/marco/repos/finufft' 2 | Makefile for FINUFFT CPU library. Please specify your task: 3 | make lib - build the main library (in lib/ and lib-static/) 4 | make examples - compile and run all codes in examples/ 5 | make test - compile and run quick math validation tests 6 | make perftest - compile and run (slower) performance tests 7 | make fortran - compile and run Fortran tests and examples 8 | make matlab - compile MATLAB interfaces (no test) 9 | make octave - compile and test octave interfaces 10 | make python - compile and test python interfaces 11 | make all - do all the above (around 1 minute; assumes you have MATLAB, etc) 12 | make spreadtest - compile & run spreader-only tests (no FFT) 13 | make spreadtestall - small set spreader-only tests for CI use 14 | make objclean - remove all object files, preserving libs & MEX 15 | make clean - also remove all lib, MEX, py, and demo executables 16 | make setup - check (and possibly download) dependencies 17 | make setupclean - delete downloaded dependencies 18 | For faster (multicore) compilation, append, for example, -j8 19 | 20 | Make options: 21 | 'make [task] OMP=OFF' for single-threaded (no refs to OpenMP) 22 | 'make [task] FFT=DUCC' for DUCC0 FFT (otherwise uses FFTW3) 23 | You must at least 'make objclean' before changing such options! 24 | 25 | Also see docs/install.rst and docs/README 26 | make[1]: Leaving directory '/home/marco/repos/finufft' 27 | -------------------------------------------------------------------------------- /docs/overview.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | 3 | Overview 4 | ========= 5 | 6 | .. include:: overview.src 7 | -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-1-upsamp1.25-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp1.25-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-1-upsamp1.25-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp1.25-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-1-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-1-upsamp2.00-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-1-upsamp2.00-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-2-upsamp1.25-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp1.25-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-2-upsamp1.25-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp1.25-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-2-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-2-upsamp2.00-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-2-upsamp2.00-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-3-upsamp1.25-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp1.25-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-3-upsamp1.25-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp1.25-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-3-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/10000x1x1-type-3-upsamp2.00-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/10000x1x1-type-3-upsamp2.00-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-1-upsamp1.25-precd-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp1.25-precd-thread16.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-1-upsamp1.25-precd-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp1.25-precd-thread32.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-1-upsamp2.00-precd-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp2.00-precd-thread16.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-1-upsamp2.00-precd-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-1-upsamp2.00-precd-thread32.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-2-upsamp1.25-precd-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp1.25-precd-thread16.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-2-upsamp1.25-precd-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp1.25-precd-thread32.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-2-upsamp2.00-precd-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp2.00-precd-thread16.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-2-upsamp2.00-precd-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-2-upsamp2.00-precd-thread32.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-3-upsamp1.25-precd-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp1.25-precd-thread16.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-3-upsamp1.25-precd-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp1.25-precd-thread32.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-3-upsamp2.00-precd-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp2.00-precd-thread16.png -------------------------------------------------------------------------------- /docs/pics/192x192x128-type-3-upsamp2.00-precd-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/192x192x128-type-3-upsamp2.00-precd-thread32.png -------------------------------------------------------------------------------- /docs/pics/250x250x250-type-1-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/250x250x250-type-1-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/250x250x250-type-2-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/250x250x250-type-2-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/250x250x250-type-3-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/250x250x250-type-3-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp1.25-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp1.25-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp1.25-precf-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread16.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp1.25-precf-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp1.25-precf-thread32.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp2.00-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp2.00-precf-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread16.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-1-upsamp2.00-precf-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-1-upsamp2.00-precf-thread32.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp1.25-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp1.25-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp1.25-precf-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread16.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp1.25-precf-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp1.25-precf-thread32.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp2.00-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp2.00-precf-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread16.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-2-upsamp2.00-precf-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-2-upsamp2.00-precf-thread32.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp1.25-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp1.25-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp1.25-precf-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread16.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp1.25-precf-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp1.25-precf-thread32.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp2.00-precd-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precd-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp2.00-precf-thread1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread1.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp2.00-precf-thread16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread16.png -------------------------------------------------------------------------------- /docs/pics/320x320x1-type-3-upsamp2.00-precf-thread32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/320x320x1-type-3-upsamp2.00-precf-thread32.png -------------------------------------------------------------------------------- /docs/pics/contft1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1d.png -------------------------------------------------------------------------------- /docs/pics/contft1dN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1dN.png -------------------------------------------------------------------------------- /docs/pics/contft1dans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1dans.png -------------------------------------------------------------------------------- /docs/pics/contft1dsing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft1dsing.png -------------------------------------------------------------------------------- /docs/pics/contft2dans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft2dans.png -------------------------------------------------------------------------------- /docs/pics/contft2dnodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/contft2dnodes.png -------------------------------------------------------------------------------- /docs/pics/cufinufft_announce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/cufinufft_announce.png -------------------------------------------------------------------------------- /docs/pics/fser1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/fser1d.png -------------------------------------------------------------------------------- /docs/pics/fser2d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/fser2d.png -------------------------------------------------------------------------------- /docs/pics/grf1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/grf1d.png -------------------------------------------------------------------------------- /docs/pics/inv1d2err.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/inv1d2err.png -------------------------------------------------------------------------------- /docs/pics/inv1d2err_wellcond.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/inv1d2err_wellcond.png -------------------------------------------------------------------------------- /docs/pics/pois_fft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_fft.png -------------------------------------------------------------------------------- /docs/pics/pois_fhat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_fhat.png -------------------------------------------------------------------------------- /docs/pics/pois_nufft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_nufft.png -------------------------------------------------------------------------------- /docs/pics/pois_nugrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/pics/pois_nugrid.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | texext 2 | sphinx_rtd_theme 3 | -------------------------------------------------------------------------------- /docs/spreadpic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flatironinstitute/finufft/d5b36b10d65c11d18079573a5ab8d95f7fe94a20/docs/spreadpic.png -------------------------------------------------------------------------------- /docs/tut.rst: -------------------------------------------------------------------------------- 1 | .. _tut: 2 | 3 | Tutorials and application demos 4 | ================================== 5 | 6 | The following are instructive demos of using FINUFFT for a variety of 7 | spectrally-related tasks arising in 8 | scientific computing and signal/image processing. We will slowly grow the 9 | list (contact us to add one). 10 | For conciseness of code, and ease of writing, they are currently 11 | in MATLAB (they should work on versions at least back to R2017a). 12 | 13 | .. toctree:: 14 | 15 | tutorial/serieseval 16 | tutorial/contft 17 | tutorial/peripois2d 18 | tutorial/grf 19 | tutorial/inv1d2 20 | 21 | For further applications, see :ref:`references `, and: 22 | 23 | * These software tutorial `PDF slides `_. 24 | 25 | * These seminar `PDF slides `_. 26 | 27 | * `Fast Fresnel diffraction `_ for optics and acoustics applications. 28 | 29 | * `Equispaced Fourier methods for Gaussian process regression `_ as described in https://arxiv.org/abs/2210.10210 and https://arxiv.org/abs/2305.11065 30 | 31 | * Tutorials for PyNUFFT with 1D and 2D reconstruction examples `here `_. 32 | 33 | * The numerical sampling of `random plane waves `_. 34 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(EXAMPLES 2 | guru1d1 3 | guru1d1f 4 | guru2d1 5 | many1d1 6 | simple1d1 7 | simple1d1f 8 | simulplans1d1 9 | ) 10 | set(EXAMPLES_OPENMP threadsafe1d1 threadsafe2d2f) 11 | set(EXAMPLES_C guru1d1c simple1d1c simple1d1cf) 12 | 13 | find_library(MATH_LIBRARY m) 14 | 15 | foreach(EXAMPLE ${EXAMPLES}) 16 | add_executable(${EXAMPLE} ${EXAMPLE}.cpp) 17 | target_compile_features(${EXAMPLE} PRIVATE cxx_std_14) 18 | target_link_libraries(${EXAMPLE} PRIVATE finufft) 19 | if(CMAKE_PROJECT_NAME STREQUAL "FINUFFT") 20 | enable_asan(${EXAMPLE}) 21 | endif() 22 | endforeach() 23 | 24 | foreach(EXAMPLE ${EXAMPLES_C}) 25 | add_executable(${EXAMPLE} ${EXAMPLE}.c) 26 | target_link_libraries(${EXAMPLE} PRIVATE finufft) 27 | if(CMAKE_PROJECT_NAME STREQUAL "FINUFFT") 28 | enable_asan(${EXAMPLE}) 29 | endif() 30 | if(MATH_LIBRARY) 31 | target_link_libraries(${EXAMPLE} PRIVATE ${MATH_LIBRARY}) 32 | endif() 33 | endforeach() 34 | 35 | if(FINUFFT_USE_OPENMP) 36 | foreach(EXAMPLE ${EXAMPLES_OPENMP}) 37 | add_executable(${EXAMPLE} ${EXAMPLE}.cpp) 38 | target_link_libraries(${EXAMPLE} PRIVATE finufft OpenMP::OpenMP_CXX) 39 | target_compile_features(${EXAMPLE} PRIVATE cxx_std_11) 40 | if(CMAKE_PROJECT_NAME STREQUAL "FINUFFT") 41 | enable_asan(${EXAMPLE}) 42 | endif() 43 | endforeach() 44 | endif() 45 | -------------------------------------------------------------------------------- /examples/cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB example_src "*.cpp") 2 | 3 | foreach(srcfile ${example_src}) 4 | string(REPLACE ".cpp" "" executable ${srcfile}) 5 | get_filename_component(executable ${executable} NAME) 6 | add_executable(${executable} ${srcfile}) 7 | target_include_directories(${executable} PUBLIC ${CUFINUFFT_INCLUDE_DIRS}) 8 | target_link_libraries(${executable} cufinufft CUDA::cufft CUDA::cudart) 9 | target_compile_features(${executable} PRIVATE cxx_std_17) 10 | endforeach() 11 | -------------------------------------------------------------------------------- /examples/cuda/README: -------------------------------------------------------------------------------- 1 | Examples of cuFINUFFT usage in C++ and Python 2 | 3 | Here we show 2D transforms of type 1 and 2, being performed, and tested, 4 | in C++, and in Python. In each case, a batch of transforms is done with 5 | new coefficients or weights, but the same set of nonuniform points; this 6 | explains the suffix "many" in the code names. You may set ntransf=1 to 7 | perform a single transform. Default options are used. In each case the 8 | four steps (plan, setpts, execute, destroy) are used. A math test is also 9 | performed; see the FINUFFT documentation for the definitions of the 10 | transforms: https://finufft.readthedocs.io/en/latest/math.html 11 | 12 | For more usage examples see: 13 | 14 | ../test/cufinufft*.cu 15 | ../python/cufinufft/tests/*.py 16 | -------------------------------------------------------------------------------- /fortran/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | directft 3 | OBJECT 4 | directft/dirft1d.f 5 | directft/dirft1df.f 6 | directft/dirft2d.f 7 | directft/dirft2df.f 8 | directft/dirft3d.f 9 | directft/dirft3df.f 10 | ) 11 | 12 | set(FORTRAN_EXAMPLES 13 | guru1d1 14 | nufft1d_demo 15 | nufft2d_demo 16 | nufft2dmany_demo 17 | nufft3d_demo 18 | simple1d1 19 | ) 20 | 21 | foreach(EXAMPLE ${FORTRAN_EXAMPLES}) 22 | add_executable(fort_${EXAMPLE} examples/${EXAMPLE}.f) 23 | add_executable(fort_${EXAMPLE}f examples/${EXAMPLE}f.f) 24 | 25 | target_link_libraries(fort_${EXAMPLE} PRIVATE directft finufft ${FINUFFT_FFTLIBS}) 26 | target_link_libraries(fort_${EXAMPLE}f PRIVATE directft finufft ${FINUFFT_FFTLIBS}) 27 | endforeach() 28 | -------------------------------------------------------------------------------- /fortran/cmcl_license.txt: -------------------------------------------------------------------------------- 1 | Below is the license applying to the original fortran drivers and direct 2 | evaluation routines modified in this directory. This license does not 3 | apply to the rest of FINUFFT. 4 | 5 | ------------- 6 | 7 | Copyright (c) 2009-2014, Leslie Greengard, June-Yub Lee and Zydrunas Gimbutas 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are met: 12 | 13 | 1. Redistributions of source code must retain the above copyright notice, this 14 | list of conditions and the following disclaimer. 15 | 2. Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | The views and conclusions contained in the software and documentation are those 31 | of the authors and should not be interpreted as representing official policies, 32 | either expressed or implied, of the FreeBSD Project. 33 | -------------------------------------------------------------------------------- /fortran/directft/README: -------------------------------------------------------------------------------- 1 | This directory contains the CMCL NUFFT direct summation implementations, 2 | plus single-precision versions by Alex Barnett, 2017. 3 | It also contains the legendary prini.f, which is currently unused. 4 | -------------------------------------------------------------------------------- /include/cufinufft.h: -------------------------------------------------------------------------------- 1 | // Defines the C++/C user interface to CUFINUFFT library. 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | typedef struct cufinufft_plan_s *cufinufft_plan; 10 | typedef struct cufinufft_fplan_s *cufinufftf_plan; 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | void cufinufft_default_opts(cufinufft_opts *opts); 16 | 17 | int cufinufft_makeplan(int type, int dim, const int64_t *n_modes, int iflag, int ntr, 18 | double eps, cufinufft_plan *d_plan_ptr, cufinufft_opts *opts); 19 | int cufinufftf_makeplan(int type, int dim, const int64_t *n_modes, int iflag, int ntr, 20 | float eps, cufinufftf_plan *d_plan_ptr, cufinufft_opts *opts); 21 | 22 | int cufinufft_setpts(cufinufft_plan d_plan, int64_t M, double *d_x, double *d_y, 23 | double *d_z, int N, double *d_s, double *d_t, double *d_u); 24 | int cufinufftf_setpts(cufinufftf_plan d_plan, int64_t M, float *d_x, float *d_y, 25 | float *d_z, int N, float *d_s, float *d_t, float *d_u); 26 | 27 | int cufinufft_execute(cufinufft_plan d_plan, cuDoubleComplex *d_c, cuDoubleComplex *d_fk); 28 | int cufinufftf_execute(cufinufftf_plan d_plan, cuFloatComplex *d_c, cuFloatComplex *d_fk); 29 | 30 | int cufinufft_destroy(cufinufft_plan d_plan); 31 | int cufinufftf_destroy(cufinufftf_plan d_plan); 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /include/cufinufft/cudeconvolve.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUDECONVOLVE_H__ 2 | #define __CUDECONVOLVE_H__ 3 | 4 | #include 5 | 6 | namespace cufinufft { 7 | namespace deconvolve { 8 | template 9 | __global__ void deconvolve_1d(int ms, int nf1, int fw_width, cuda_complex *fw, 10 | cuda_complex *fk, T *fwkerhalf1); 11 | template 12 | __global__ void amplify_1d(int ms, int nf1, int fw_width, cuda_complex *fw, 13 | cuda_complex *fk, T *fwkerhalf2); 14 | template 15 | __global__ void deconvolve_2d(int ms, int mt, int nf1, int nf2, int fw_width, 16 | cuda_complex *fw, cuda_complex *fk, T *fwkerhalf1, 17 | T *fwkerhalf2); 18 | template 19 | __global__ void amplify_2d(int ms, int mt, int nf1, int nf2, int fw_width, 20 | cuda_complex *fw, cuda_complex *fk, T *fwkerhalf1, 21 | T *fwkerhalf2); 22 | 23 | template 24 | __global__ void deconvolve_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, 25 | int fw_width, cuda_complex *fw, cuda_complex *fk, 26 | T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3); 27 | template 28 | __global__ void amplify_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, 29 | int fw_width, cuda_complex *fw, cuda_complex *fk, 30 | T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3); 31 | 32 | template 33 | int cudeconvolve1d(cufinufft_plan_t *d_mem, int blksize); 34 | template 35 | int cudeconvolve2d(cufinufft_plan_t *d_mem, int blksize); 36 | template 37 | int cudeconvolve3d(cufinufft_plan_t *d_mem, int blksize); 38 | } // namespace deconvolve 39 | } // namespace cufinufft 40 | #endif 41 | -------------------------------------------------------------------------------- /include/cufinufft/defs.h: -------------------------------------------------------------------------------- 1 | #ifndef CUFINUFFT_DEFS_H 2 | #define CUFINUFFT_DEFS_H 3 | 4 | #include 5 | // constants needed within common 6 | // upper bound on w, ie nspread, even when padded (see evaluate_kernel_vector); also for 7 | // common 8 | #define MAX_NSPREAD 16 9 | #define MIN_NSPREAD 2 10 | 11 | // max number of positive quadr nodes 12 | #define MAX_NQUAD 100 13 | 14 | // Fraction growth cut-off in utils:arraywidcen, sets when translate in type-3 15 | #define ARRAYWIDCEN_GROWFRAC 0.1 16 | 17 | // FIXME: If cufft ever takes N > INT_MAX... 18 | constexpr int32_t MAX_NF = std::numeric_limits::max(); 19 | 20 | // allow compile-time switch off of openmp, so compilation without any openmp 21 | // is done (Note: _OPENMP is automatically set by -fopenmp compile flag) 22 | #ifdef _OPENMP 23 | #include 24 | // point to actual omp utils 25 | #define MY_OMP_GET_NUM_THREADS() omp_get_num_threads() 26 | #define MY_OMP_GET_MAX_THREADS() omp_get_max_threads() 27 | #define MY_OMP_GET_THREAD_NUM() omp_get_thread_num() 28 | #define MY_OMP_SET_NUM_THREADS(x) omp_set_num_threads(x) 29 | #define MY_OMP_SET_NESTED(x) omp_set_nested(x) 30 | #else 31 | // non-omp safe dummy versions of omp utils 32 | #define MY_OMP_GET_NUM_THREADS() 1 33 | #define MY_OMP_GET_MAX_THREADS() 1 34 | #define MY_OMP_GET_THREAD_NUM() 0 35 | #define MY_OMP_SET_NUM_THREADS(x) 36 | #define MY_OMP_SET_NESTED(x) 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /include/cufinufft/memtransfer.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEMTRANSFER_H__ 2 | #define __MEMTRANSFER_H__ 3 | 4 | #include "cufinufft/types.h" 5 | 6 | namespace cufinufft { 7 | namespace memtransfer { 8 | 9 | template int allocgpumem1d_plan(cufinufft_plan_t *d_plan); 10 | template int allocgpumem1d_nupts(cufinufft_plan_t *d_plan); 11 | template void freegpumemory(cufinufft_plan_t *d_plan); 12 | template int allocgpumem2d_plan(cufinufft_plan_t *d_plan); 13 | template int allocgpumem2d_nupts(cufinufft_plan_t *d_plan); 14 | template int allocgpumem3d_plan(cufinufft_plan_t *d_plan); 15 | template int allocgpumem3d_nupts(cufinufft_plan_t *d_plan); 16 | 17 | } // namespace memtransfer 18 | } // namespace cufinufft 19 | #endif 20 | -------------------------------------------------------------------------------- /include/cufinufft_opts.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUFINUFFT_OPTS_H__ 2 | #define __CUFINUFFT_OPTS_H__ 3 | 4 | typedef struct cufinufft_opts { // see cufinufft_default_opts() for defaults 5 | double upsampfac; // upsampling ratio sigma, only 2.0 (standard) is implemented 6 | /* following options are for gpu */ 7 | int gpu_method; // 1: nonuniform-pts driven, 2: shared mem (SM) 8 | int gpu_sort; // when NU-pts driven: 0: no sort (GM), 1: sort (GM-sort) 9 | 10 | int gpu_binsizex; // used for 2D, 3D subproblem method 11 | int gpu_binsizey; 12 | int gpu_binsizez; 13 | 14 | int gpu_obinsizex; // used for 3D spread block gather method 15 | int gpu_obinsizey; 16 | int gpu_obinsizez; 17 | 18 | int gpu_maxsubprobsize; 19 | int gpu_kerevalmeth; // 0: direct exp(sqrt()), 1: Horner ppval 20 | 21 | int gpu_spreadinterponly; // 0: NUFFT, 1: spread or interpolation only 22 | 23 | int gpu_maxbatchsize; 24 | 25 | /* multi-gpu support */ 26 | int gpu_device_id; 27 | 28 | void *gpu_stream; 29 | 30 | int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order 31 | // 1 FFT-style mode order 32 | 33 | int debug; // 0: no debug, 1: debug 34 | } cufinufft_opts; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /include/finufft.fh: -------------------------------------------------------------------------------- 1 | c Fortran header recreating finufft_opts struct in fortran (f90 style). 2 | c This must be kept synchronized with finufft_opts.h, matching its order. 3 | c Also see finufft_mod.f90 and ../fortran/finufftfort.cpp. 4 | c Barnett 5/29/20. One prec 7/2/20. Fix ordering bug 11/29/24. 5 | c erase chkbnds 1/7/25. 6 | 7 | type finufft_opts 8 | 9 | c data handling opts... 10 | integer modeord, spreadinterponly 11 | 12 | c diagnostic opts... 13 | integer debug, spread_debug, showwarn 14 | 15 | c alg performance opts... 16 | integer nthreads, fftw, spread_sort, spread_kerevalmeth 17 | integer spread_kerpad 18 | real*8 upsampfac 19 | integer spread_thread, maxbatchsize, spread_nthr_atomic 20 | integer spread_max_sp_size 21 | integer fftw_lock_fun, fftw_unlock_fun, fftw_lock_data 22 | 23 | end type 24 | -------------------------------------------------------------------------------- /include/finufft.h: -------------------------------------------------------------------------------- 1 | // Defines the public C++ and C compatible user interface to FINUFFT library. 2 | 3 | // This contains both single and double precision user-facing commands. 4 | // "macro-safe" rewrite, including the plan object, Barnett 5/21/22-6/7/22. 5 | // They will clobber any prior macros starting FINUFFT*. 6 | 7 | /* Devnotes. 8 | A) Two precisions done by including the "either precision" headers twice. 9 | No use of the private headers for lib/test/example compilation is made. 10 | 11 | B) Good ways to debug this header --- 12 | 1) preprocessor output (gets the general idea the macros worked): 13 | cpp include/finufft.h -Iinclude 14 | cpp -dD include/finufft.h -Iinclude 15 | then https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 16 | 2) compile examples in both precs and C/C++, needed to catch typos: 17 | g++ examples/simple1d1.cpp -Iinclude -c 18 | g++ examples/simple1d1f.cpp -Iinclude -c 19 | gcc examples/simple1d1c.c -Iinclude -c 20 | gcc examples/simple1d1cf.c -Iinclude -c 21 | */ 22 | 23 | #ifndef FINUFFT_H 24 | #define FINUFFT_H 25 | 26 | // prec-indep stuff. both these are thus made public-facing 27 | #include 28 | #include 29 | 30 | // Public error numbers 31 | #include 32 | 33 | // octave (mkoctfile) needs this otherwise it doesn't know what int64_t is! 34 | #include 35 | #define FINUFFT_BIGINT int64_t 36 | 37 | // this macro name has to be safe since exposed to user 38 | #define FINUFFT_SINGLE 39 | #include 40 | #undef FINUFFT_SINGLE 41 | // do it again for double-prec... 42 | #include 43 | 44 | // clean up any purely local defs that are not in finufft_eitherprec.h... 45 | #undef FINUFFT_BIGINT 46 | 47 | #endif // FINUFFT_H 48 | -------------------------------------------------------------------------------- /include/finufft_errors.h: -------------------------------------------------------------------------------- 1 | #ifndef FINUFFT_ERRORS_H 2 | #define FINUFFT_ERRORS_H 3 | 4 | // ---------- Global error/warning output codes for the library --------------- 5 | // All documentation is at ../docs/errors.rst (not here): 6 | enum { 7 | FINUFFT_WARN_EPS_TOO_SMALL = 1, 8 | FINUFFT_ERR_MAXNALLOC = 2, 9 | FINUFFT_ERR_SPREAD_BOX_SMALL = 3, 10 | FINUFFT_ERR_SPREAD_PTS_OUT_RANGE = 4, // DEPRECATED 11 | FINUFFT_ERR_SPREAD_ALLOC = 5, 12 | FINUFFT_ERR_SPREAD_DIR = 6, 13 | FINUFFT_ERR_UPSAMPFAC_TOO_SMALL = 7, 14 | FINUFFT_ERR_HORNER_WRONG_BETA = 8, 15 | FINUFFT_ERR_NTRANS_NOTVALID = 9, 16 | FINUFFT_ERR_TYPE_NOTVALID = 10, 17 | FINUFFT_ERR_ALLOC = 11, 18 | FINUFFT_ERR_DIM_NOTVALID = 12, 19 | FINUFFT_ERR_SPREAD_THREAD_NOTVALID = 13, 20 | FINUFFT_ERR_NDATA_NOTVALID = 14, 21 | FINUFFT_ERR_CUDA_FAILURE = 15, 22 | FINUFFT_ERR_PLAN_NOTVALID = 16, 23 | FINUFFT_ERR_METHOD_NOTVALID = 17, 24 | FINUFFT_ERR_BINSIZE_NOTVALID = 18, 25 | FINUFFT_ERR_INSUFFICIENT_SHMEM = 19, 26 | FINUFFT_ERR_NUM_NU_PTS_INVALID = 20, 27 | FINUFFT_ERR_INVALID_ARGUMENT = 21, 28 | FINUFFT_ERR_LOCK_FUNS_INVALID = 22, 29 | FINUFFT_ERR_NTHREADS_NOTVALID = 23, 30 | }; 31 | #endif 32 | -------------------------------------------------------------------------------- /include/finufft_mod.f90: -------------------------------------------------------------------------------- 1 | module finufft_mod 2 | ! Fortran header recreating finufft_opts struct in fortran (f90 style) 3 | ! Module version, contributed by Reinhard Neder, 1/20/23. Order fixed 1/7/25. 4 | ! This must be kept synchronized with finufft_opts.h, matching its order. 5 | ! Also see ../fortran/finufftfort.cpp. 6 | ! Relies on "use ISO_C_BINDING" in the fortran module. 7 | use iso_c_binding 8 | type finufft_opts 9 | 10 | ! data handling opts... 11 | integer(kind=C_INT) :: modeord, spreadinterponly 12 | 13 | ! diagnostic opts... 14 | integer(kind=C_INT) :: debug, spread_debug, showwarn 15 | 16 | ! alg perf opts... 17 | integer(kind=C_INT) :: nthreads,fftw,spread_sort,spread_kerevalmeth 18 | integer(kind=C_INT) :: spread_kerpad 19 | real(kind=C_DOUBLE) :: upsampfac 20 | integer(kind=C_INT) :: spread_thread, maxbatchsize 21 | integer(kind=C_INT) :: spread_nthr_atomic, spread_max_sp_size 22 | integer(kind=C_SIZE_T) :: fftw_lock_fun, fftw_unlock_fun, fftw_lock_data 23 | ! really, last should be type(C_PTR) :: etc, but fails to print nicely 24 | 25 | end type finufft_opts 26 | end module finufft_mod 27 | -------------------------------------------------------------------------------- /include/finufft_spread_opts.h: -------------------------------------------------------------------------------- 1 | #ifndef FINUFFT_SPREAD_OPTS_H 2 | #define FINUFFT_SPREAD_OPTS_H 3 | 4 | // C-compatible options struct for spread/interpolation within FINUFFT 5 | 6 | // Notes: 1) Has to be part of public-facing 7 | // headers since finufft_plan has an instance of this spread_opts struct. 8 | // 2) Deliberately uses fixed types (no macro precision-switching). 9 | 10 | typedef struct finufft_spread_opts { 11 | // See spreadinterp:setup_spreader for default values of the following fields. 12 | // This is the main documentation for these options... 13 | int nspread; // w, the kernel width in grid pts 14 | int spread_direction; // 1 means spread NU->U, 2 means interpolate U->NU 15 | int sort; // 0: don't sort NU pts, 1: do, 2: heuristic choice 16 | int kerevalmeth; // 0: direct exp(sqrt()), or 1: Horner ppval, fastest 17 | int kerpad; // 0: no pad w to mult of 4, 1: do pad 18 | // (this helps SIMD for kerevalmeth=0, eg on i7). 19 | int nthreads; // # threads for spreadinterp (0: use max avail) 20 | int sort_threads; // # threads for sort (0: auto-choice up to nthreads) 21 | int max_subproblem_size; // # pts per t1 subprob; sets extra RAM per thread 22 | int flags; // binary flags for timing only (may give wrong ans 23 | // if changed from 0!). See spreadinterp.h 24 | int debug; // 0: silent, 1: small text output, 2: verbose 25 | int atomic_threshold; // num threads before switching spreadSorted to using atomic ops 26 | double upsampfac; // sigma, upsampling factor 27 | // ES kernel specific consts for eval. No longer FLT, to avoid name clash... 28 | double ES_beta; 29 | double ES_halfwidth; 30 | double ES_c; 31 | } finufft_spread_opts; 32 | 33 | #endif // FINUFFT_SPREAD_OPTS_H 34 | -------------------------------------------------------------------------------- /lib-static/README: -------------------------------------------------------------------------------- 1 | This directory is where the static libraries will appear. 2 | -------------------------------------------------------------------------------- /lib/README: -------------------------------------------------------------------------------- 1 | This directory is where the shared libraries will appear. 2 | -------------------------------------------------------------------------------- /make-platforms/README: -------------------------------------------------------------------------------- 1 | This directory contains platform-specific variable settings for the 2 | GNU makefile. They are used by CI. 3 | 4 | Please copy one of these up to ../make.inc and possibly modify for 5 | your needs. 6 | 7 | Barnett 1/7/25 8 | -------------------------------------------------------------------------------- /make-platforms/make.inc.GCC7: -------------------------------------------------------------------------------- 1 | # example of how to override compiler choices in makefile. 2 | # Here we use GCC 7 in linux ubuntu 16.40 LTS (provides /usr/bin/g++-7, etc) 3 | # You should make your own. 4 | 5 | CXX=g++-7 6 | CC=gcc-7 7 | FC=gfortran-7 8 | 9 | CXXFLAGS += -g -Wall 10 | -------------------------------------------------------------------------------- /make-platforms/make.inc.linux_ICC: -------------------------------------------------------------------------------- 1 | # makefile overrides to use Intel ICC compiler & libiomp5. double prec only. 2 | # (fixed the fatal mistake of leaving -lgomp in the compile line! 6/4/20) 3 | 4 | CXX=icpc 5 | CC=icc 6 | FC=ifort 7 | 8 | # we want to start fresh, ignore the GCC flags... 9 | CFLAGS = -O3 -xHost 10 | # CFLAGS += -lsvml % fails (gives nans & no faster) 11 | 12 | CXXFLAGS = $(CFLAGS) 13 | FFLAGS = $(CFLAGS) 14 | 15 | # It is crucial to *replace* -fopenmp & -lgomp with Intel's equiv 16 | # (crucial in the sense that linking libiomp5 *and* libgomp gives weird 17 | # segfaults, corruption in parallel blocks, or works fine, frustratingly): 18 | OMPFLAGS = -qopenmp 19 | OMPLIBS = 20 | -------------------------------------------------------------------------------- /make-platforms/make.inc.macosx_arm64: -------------------------------------------------------------------------------- 1 | # Makefile variable overrides for cross-compiling for ARM silicon via 2 | # clang on Mac OSX. 3 | # 4 | # This is used for CI. 5 | # Libin Lu 12/21/23. 6 | 7 | # compile flags for use with clang: (note absence of -march, etc) 8 | CFLAGS = -O3 -arch arm64 -target arm64-apple-macos11 9 | 10 | # If you're getting warning messages of the form: 11 | # ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for 12 | # newer OSX version (10.13) than being linked (10.9) 13 | # Then you can uncomment the following two lines with the older version number 14 | # (in this example -mmacosx-version-min=10.9) 15 | # 16 | #CFLAGS += "-mmacosx-version-min=" 17 | 18 | CXX=clang++ 19 | CC=clang 20 | 21 | # assuming libomp and fftw are installed through homebrew 22 | OMP_ROOT = $(shell brew --prefix libomp) 23 | FFTW_ROOT = $(shell brew --prefix fftw) 24 | 25 | # taken from makefile... 26 | CFLAGS += -I include -I/usr/local/include -I$(OMP_ROOT)/include -I$(FFTW_ROOT)/include 27 | FFLAGS = $(CFLAGS) 28 | CXXFLAGS = $(CFLAGS) 29 | LIBS += -L/usr/local/lib -L$(OMP_ROOT)/lib -L$(FFTW_ROOT)/lib 30 | LDFLAGS += -arch arm64 -target arm64-apple-macos11 31 | 32 | # OpenMP with clang needs following... 33 | OMPFLAGS = -Xpreprocessor -fopenmp 34 | OMPLIBS = -lomp 35 | # since fftw3_omp doesn't work in OSX, we need... 36 | FFTWOMPSUFFIX=threads 37 | 38 | # MATLAB interface: this will probably segfault. Instead we suggest you use 39 | # make.inc.macosx_clang_matlab 40 | 41 | # Some of these will depend on your FFTW library location... 42 | MFLAGS += -I/usr/local/include -I/opt/homebrew/include -L/usr/local/lib -L/opt/homebrew/lib -lm 43 | # may need to edit for your MATLAB version location... 44 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex 45 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file. 46 | -------------------------------------------------------------------------------- /make-platforms/make.inc.macosx_clang: -------------------------------------------------------------------------------- 1 | # Makefile variable overrides for Mac OSX compilation with CLANG. 2 | # 3 | # Note that we have not been able to link against gfortran, so if you need 4 | # fortran interfaces, use make.inc.macosx_gcc-* instead. 5 | # 6 | # Copy this file to make.inc, and if needed edit for your setup. 7 | # Barnett 10/27/18. Input from Yu-Hsuan Shih, Amit Moskovich. 8 | 9 | # (note that /usr/bin/g++,gcc are aliased to clang/LLVM, so CXX,CC unchanged) 10 | 11 | # compile flags for use with clang: (note absence of -march, etc) 12 | CFLAGS = -O3 13 | 14 | # If you're getting warning messages of the form: 15 | # ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for 16 | # newer OSX version (10.13) than being linked (10.9) 17 | # Then you can uncomment the following two lines with the older version number 18 | # (in this example -mmacosx-version-min=10.9) 19 | # 20 | #CFLAGS += "-mmacosx-version-min=" 21 | 22 | CXX=clang++ 23 | CC=clang 24 | 25 | # taken from makefile... 26 | CFLAGS += -I include -I/usr/local/include -I/usr/local/opt/libomp/include -I/opt/homebrew/include 27 | FFLAGS = $(CFLAGS) 28 | CXXFLAGS = $(CFLAGS) 29 | LIBS += -L/usr/local/lib -L/opt/homebrew/lib 30 | 31 | # OpenMP with clang needs following... 32 | OMPFLAGS = -Xpreprocessor -fopenmp 33 | OMPLIBS = -L/usr/local/lib -L/usr/local/opt/libomp/lib -lomp 34 | # since fftw3_omp doesn't work in OSX, we need... 35 | FFTWOMPSUFFIX=threads 36 | 37 | 38 | # MATLAB interface: this will probably segfault. Instead we suggest you use 39 | # make.inc.macosx_clang_matlab 40 | 41 | # Some of these will depend on your FFTW library location... 42 | MFLAGS += -I/usr/local/include -L/usr/local/lib -lm 43 | # may need to edit for your MATLAB version location... 44 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex 45 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file. 46 | -------------------------------------------------------------------------------- /make-platforms/make.inc.macosx_gcc-10: -------------------------------------------------------------------------------- 1 | # Makefile variable overrides for Mac OSX compilation with GCC v.10.* 2 | # 3 | # Use this if you'll need to link against gfortran. 4 | # 5 | # Barnett 10/27/18. Input from Yu-Hsuan Shih, Amit Moskovich. 6 | # Lu minor modification for gcc-10 12/06/2020 7 | 8 | # By default we use clang/LLVM (which is aliased to /usr/lib/gcc, etc). 9 | # This make.inc is if you want to override this. 10 | # Get gcc from brew then use, eg: 11 | CXX=g++-10 12 | CC=gcc-10 13 | FC=gfortran 14 | 15 | # (compile flags for use with GCC are as in linux makefile) 16 | CFLAGS += 17 | 18 | # If you're getting warning messages of the form: 19 | # ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for 20 | # newer OSX version (10.13) than being linked (10.9) 21 | # Then you can uncomment the following two lines with the older version number 22 | # (in this example -mmacosx-version-min=10.9) 23 | # 24 | #CFLAGS += "-mmacosx-version-min=" 25 | 26 | # as in makefile, but with the brew /usr/local/ stuff... 27 | CFLAGS += -I src -I/usr/local/include -I/opt/homebrew/include 28 | FFLAGS = $(CFLAGS) 29 | CXXFLAGS = $(CFLAGS) 30 | LIBS += -L/usr/local/lib -L/opt/homebrew/lib 31 | 32 | # OpenMP with GCC on OSX needs following... 33 | OMPFLAGS = -fopenmp 34 | OMPLIBS = -L/usr/local/lib -lgomp 35 | # since fftw3_omp doesn't work in OSX, we need... 36 | FFTWOMPSUFFIX=threads 37 | 38 | # MATLAB interface: 39 | # some of these will depend on your FFTW library location... 40 | MFLAGS += -I/usr/local/include -I/opt/homebrew/include -L/usr/local/lib -L/opt/homebrew/lib -lm 41 | # edit for your MATLAB version location... 42 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex 43 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file. 44 | 45 | # If you have segfault of MATLAB then please try the following: 46 | #MOMPFLAGS = -D_OPENMP 47 | #OMPFLAGS = -Xpreprocessor -fopenmp 48 | #OMPLIBS = $(shell ls -d /Applications/MATLAB_R20**.app)/sys/os/maci64/libiomp5.dylib 49 | # This links to MATLAB's omp not gomp. 50 | -------------------------------------------------------------------------------- /make-platforms/make.inc.macosx_gcc-8: -------------------------------------------------------------------------------- 1 | # Makefile variable overrides for Mac OSX compilation with GCC v.8.* 2 | # 3 | # Use this if you'll need to link against gfortran. 4 | # 5 | # Copy this file to make.inc, and if needed edit for your setup. 6 | # 7 | # Barnett 10/27/18. Input from Yu-Hsuan Shih, Amit Moskovich. 8 | 9 | # By default we use clang/LLVM (which is aliased to /usr/lib/gcc, etc). 10 | # This make.inc is if you want to override this. 11 | # Get gcc from brew then use, eg: 12 | CXX=g++-8 13 | CC=gcc-8 14 | FC=gfortran 15 | 16 | # (compile flags for use with GCC are as in linux makefile) 17 | CFLAGS += 18 | 19 | # If you're getting warning messages of the form: 20 | # ld: warning: object file (lib-static/libfinufft.a(finufft1d.o)) was built for 21 | # newer OSX version (10.13) than being linked (10.9) 22 | # Then you can uncomment the following two lines with the older version number 23 | # (in this example -mmacosx-version-min=10.9) 24 | # 25 | #CFLAGS += "-mmacosx-version-min=" 26 | 27 | # as in makefile, but with the brew /usr/local/ stuff... 28 | CFLAGS += -I src -I/usr/local/include -I/opt/homebrew/include 29 | FFLAGS = $(CFLAGS) 30 | CXXFLAGS = $(CFLAGS) 31 | LIBS += -L/usr/local/lib -L/opt/homebrew/lib 32 | 33 | # OpenMP with GCC on OSX needs following... 34 | OMPFLAGS = -fopenmp 35 | OMPLIBS = -L/usr/local/lib -lgomp 36 | # since fftw3_omp doesn't work in OSX, we need... 37 | FFTWOMPSUFFIX=threads 38 | 39 | # MATLAB interface: 40 | # some of these will depend on your FFTW library location... 41 | MFLAGS += -I/usr/local/include -I/opt/homebrew/include -L/usr/local/lib -L/opt/homebrew/lib -lm 42 | # edit for your MATLAB version location... 43 | MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex 44 | # Also see docs/install.rst for possible edits to MATLAB's MEX XML file. 45 | 46 | # If you have segfault of MATLAB then please try the following: 47 | #MOMPFLAGS = -D_OPENMP 48 | #OMPFLAGS = -Xpreprocessor -fopenmp 49 | #OMPLIBS = $(shell ls -d /Applications/MATLAB_R20**.app)/sys/os/maci64/libiomp5.dylib 50 | # This links to MATLAB's omp not gomp. 51 | -------------------------------------------------------------------------------- /make-platforms/make.inc.manylinux: -------------------------------------------------------------------------------- 1 | CFLAGS = -O3 -funroll-loops -march=x86-64 -mtune=generic -msse4 -fcx-limited-range 2 | CXXFLAGS = $(CFLAGS) 3 | -------------------------------------------------------------------------------- /make-platforms/make.inc.powerpc: -------------------------------------------------------------------------------- 1 | # Custom makefile settings for PowerPC (as opposed to Intel x86) architecture. 2 | # Copy this to make.inc to apply. 3 | # It adds a COMP (compiler) make argument, with values COMP=ibm, otherwise GCC. 4 | # By Hugo Brunie, May 2020. 5 | # This makefile is greatly inspired by AMReX (amrex/Tools/GNUMake/comps) 6 | 7 | ## Compiler choice 8 | ifeq ($(COMP),ibm) 9 | OMPFLAGS = -qsmp=omp 10 | ifneq ($(OMP),OFF) 11 | CXX = xlC_r 12 | CC = xlc_r 13 | FC = xlf_r 14 | else 15 | CXX = xlC 16 | CC = xlc 17 | FC = xlf 18 | endif 19 | ifneq ($(DEBUG),TRUE) 20 | CXXFLAGS = -qsimd=auto -qmaxmem=-1 21 | CFLAGS = -qsimd=auto -qmaxmem=-1 22 | endif 23 | else # DEFAULT is gnu compiler (you can update to g++-9, etc) 24 | CXX = g++ 25 | CC = gcc 26 | FC=gfortran 27 | OMPFLAGS = -fopenmp 28 | CFLAGS = -funroll-loops -mcpu=powerpc64 -fcx-limited-range 29 | endif 30 | 31 | FFLAGS = $(CFLAGS) 32 | CXXFLAGS = $(CFLAGS) -DNEED_EXTERN_C 33 | 34 | ## shared library 35 | CFLAGS += -fPIC 36 | 37 | ifeq ($(DEBUG),TRUE) 38 | CXXFLAGS += -g -O0 39 | CFLAGS += -g -O0 40 | else 41 | CXXFLAGS += -g -O3 42 | CFLAGS += -g -O3 43 | endif 44 | CLINK=-lstdc++ 45 | FLINK=$(CLINK) 46 | -------------------------------------------------------------------------------- /make-platforms/make.inc.windows_mingw: -------------------------------------------------------------------------------- 1 | MINGW=ON 2 | # libm not available on Windows? Has to be removed from LIBS to build MATLAB mex file. Does not interfere with library build 3 | LIBS= 4 | # please set these paths 5 | FFTW_H_DIR= 6 | FFTW_LIB_DIR= 7 | # might be needed for MATLAB 8 | LGOMP_DIR= 9 | 10 | # modify FLAGS such that FFTW headers are included 11 | ifneq ($(FFTW_H_DIR),) 12 | CFLAGS+=-I$(FFTW_H_DIR) 13 | CXXFLAGS+=-I$(FFTW_H_DIR) 14 | endif 15 | # add FFTW DLL location to LIBS 16 | ifneq ($(FFTW_LIB_DIR),) 17 | LIBS+=-L$(FFTW_LIB_DIR) 18 | endif 19 | 20 | # adjust MATLAB flags, add path of lgomp 21 | ifneq ($(FFTW_H_DIR),) 22 | MFLAGS=-I$(FFTW_H_DIR) -largeArrayDims 23 | endif 24 | ifneq ($(LGOMP_DIR),) 25 | MFLAGS+=-L$(LGOMP_DIR) 26 | endif 27 | MFLAGS+=-lgomp.dll 28 | -------------------------------------------------------------------------------- /make-platforms/make.inc.windows_msys: -------------------------------------------------------------------------------- 1 | MSYS=ON 2 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft1d1.m: -------------------------------------------------------------------------------- 1 | % FINUFFT1D1 GPU 1D complex nonuniform FFT, type 1 (nonuniform to uniform). 2 | % 3 | % See CUFINUFFT1D1 4 | function f = finufft1d1(varargin) 5 | f = cufinufft1d1(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft1d2.m: -------------------------------------------------------------------------------- 1 | % FINUFFT1D2 GPU 1D complex nonuniform FFT, type 2 (uniform to nonuniform). 2 | % 3 | % See CUFINUFFT1D2 4 | function c = finufft1d2(varargin) 5 | c = cufinufft1d2(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft1d3.m: -------------------------------------------------------------------------------- 1 | % FINUFFT1D3 GPU 1D complex nonuniform FFT, type 3 (nonuniform to nonuniform). 2 | % 3 | % See CUFINUFFT1D3 4 | function f = finufft1d3(varargin) 5 | f = cufinufft1d3(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft2d1.m: -------------------------------------------------------------------------------- 1 | % FINUFFT2D1 GPU 2D complex nonuniform FFT, type 1 (nonuniform to uniform). 2 | % 3 | % See CUFINUFFT2D1 4 | function f = finufft2d1(varargin) 5 | f = cufinufft2d1(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft2d2.m: -------------------------------------------------------------------------------- 1 | % FINUFFT2D2 GPU 2D complex nonuniform FFT, type 2 (uniform to nonuniform). 2 | % 3 | % See CUFINUFFT2D2 4 | function c = finufft2d2(varargin) 5 | c = cufinufft2d2(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft2d3.m: -------------------------------------------------------------------------------- 1 | % FINUFFT2D3 GPU 2D complex nonuniform FFT, type 3 (nonuniform to nonuniform). 2 | % 3 | % See CUFINUFFT2D3 4 | function f = finufft2d3(varargin) 5 | f = cufinufft2d3(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft3d1.m: -------------------------------------------------------------------------------- 1 | % FINUFFT3D1 GPU 3D complex nonuniform FFT, type 1 (nonuniform to uniform). 2 | % 3 | % See CUFINUFFT3D1 4 | function f = finufft3d1(varargin) 5 | f = cufinufft3d1(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft3d2.m: -------------------------------------------------------------------------------- 1 | % FINUFFT3D2 GPU 3D complex nonuniform FFT, type 2 (uniform to nonuniform). 2 | % 3 | % See CUFINUFFT3D2 4 | function c = finufft3d2(varargin) 5 | c = cufinufft3d2(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/@gpuArray/finufft3d3.m: -------------------------------------------------------------------------------- 1 | % FINUFFT3D3 GPU 3D complex nonuniform FFT, type 3 (nonuniform to nonuniform). 2 | % 3 | % See CUFINUFFT3D3 4 | function f = finufft3d3(varargin) 5 | f = cufinufft3d3(varargin{:}); 6 | -------------------------------------------------------------------------------- /matlab/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Matlab REQUIRED) 2 | matlab_add_mex(NAME finufft_mex SRC finufft.cpp LINK_TO finufft OUTPUT_NAME finufft R2018a) 3 | target_compile_definitions(finufft_mex PRIVATE -DR2008OO) 4 | 5 | file(GLOB FINUFFT_MATLAB_M_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.m) 6 | 7 | add_custom_command( 8 | TARGET finufft_mex 9 | POST_BUILD 10 | COMMAND ${CMAKE_COMMAND} -E copy ${FINUFFT_MATLAB_M_SOURCES} ${CMAKE_CURRENT_BINARY_DIR} 11 | VERBATIM 12 | ) 13 | -------------------------------------------------------------------------------- /matlab/cufinufft1d1.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT1D1 GPU 1D complex nonuniform FFT, type 1 (nonuniform to uniform). 2 | % 3 | % f = cufinufft1d1(x,c,isign,eps,ms) 4 | % f = cufinufft1d1(x,c,isign,eps,ms,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f(k1) = SUM c[j] exp(+/-i k1 x(j)) for -ms/2 <= k1 <= (ms-1)/2 10 | % j=1 11 | % Inputs: 12 | % x length-nj vector of real-valued locations of nonuniform sources 13 | % c length-nj complex vector of source strengths. If numel(c)>nj, 14 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 15 | % transformed with the same source locations. 16 | ISIGNEPS 17 | % ms number of Fourier modes computed, may be even or odd; 18 | % in either case, mode range is integers lying in [-ms/2, (ms-1)/2] 19 | GOPTS 20 | GOPTS12 21 | % Outputs: 22 | % f size-ms complex column vector of Fourier coefficients, or, if 23 | % ntrans>1, a matrix of size (ms,ntrans). 24 | % 25 | GNOTES 26 | -------------------------------------------------------------------------------- /matlab/cufinufft1d2.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT1D2 GPU 1D complex nonuniform FFT, type 2 (uniform to nonuniform). 2 | % 3 | % c = cufinufft1d2(x,isign,eps,f) 4 | % c = cufinufft1d2(x,isign,eps,f,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % c[j] = SUM f[k1] exp(+/-i k1 x[j]) for j = 1,...,nj 9 | % k1 10 | % where sum is over -ms/2 <= k1 <= (ms-1)/2. 11 | % 12 | % Inputs: 13 | % x length-nj vector of real-valued locations of nonuniform sources 14 | % f complex Fourier coefficients. If a vector, length sets ms 15 | % (with mode ordering given by opts.modeord). If a matrix, each 16 | % of ntrans columns is transformed with the same nonuniform targets. 17 | ISIGNEPS 18 | GOPTS 19 | GOPTS12 20 | % Outputs: 21 | % c complex column vector of nj answers at targets, or, 22 | % if ntrans>1, matrix of size (nj,ntrans). 23 | % 24 | GNOTES 25 | -------------------------------------------------------------------------------- /matlab/cufinufft1d3.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT1D3 GPU 1D complex nonuniform FFT, type 3 (nonuniform to nonuniform). 2 | % 3 | % f = cufinufft1d3(x,c,isign,eps,s) 4 | % f = cufinufft1d3(x,c,isign,eps,s,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k] = SUM c[j] exp(+-i s[k] x[j]), for k = 1, ..., nk 10 | % j=1 11 | % Inputs: 12 | % x length-nj vector of real-valued locations of nonuniform sources 13 | % c length-nj complex vector of source strengths. If numel(c)>nj, 14 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 15 | % transformed with the same source and target locations. 16 | ISIGNEPS 17 | % s length-nk vector of frequency locations of nonuniform targets 18 | GOPTS 19 | % Outputs: 20 | % f length-nk complex vector of values at targets, or, if ntrans>1, 21 | % a matrix of size (nk,ntrans) 22 | % 23 | GNOTES 24 | -------------------------------------------------------------------------------- /matlab/cufinufft2d1.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT2D1 GPU 2D complex nonuniform FFT, type 1 (nonuniform to uniform). 2 | % 3 | % f = cufinufft2d1(x,y,c,isign,eps,ms,mt) 4 | % f = cufinufft2d1(x,y,c,isign,eps,ms,mt,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k1,k2] = SUM c[j] exp(+-i (k1 x[j] + k2 y[j])) 10 | % j=1 11 | % 12 | % for -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2. 13 | % 14 | % Inputs: 15 | % x,y real-valued coordinates of nonuniform sources in the plane, 16 | % each a length-nj vector 17 | % c length-nj complex vector of source strengths. If numel(c)>nj, 18 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 19 | % transformed with the same source locations. 20 | ISIGNEPS 21 | % ms,mt number of Fourier modes requested in x & y; each may be even or odd. 22 | % In either case the mode range is integers lying in [-m/2, (m-1)/2] 23 | GOPTS 24 | GOPTS12 25 | % Outputs: 26 | % f size (ms,mt) complex matrix of Fourier coefficients 27 | % (ordering given by opts.modeord in each dimension; ms fast, mt slow), 28 | % or, if ntrans>1, a 3D array of size (ms,mt,ntrans). 29 | % 30 | GNOTES 31 | -------------------------------------------------------------------------------- /matlab/cufinufft2d2.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT2D2 GPU 2D complex nonuniform FFT, type 2 (uniform to nonuniform). 2 | % 3 | % c = cufinufft2d2(x,y,isign,eps,f) 4 | % c = cufinufft2d2(x,y,isign,eps,f,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % c[j] = SUM f[k1,k2] exp(+/-i (k1 x[j] + k2 y[j])) for j = 1,..,nj 9 | % k1,k2 10 | % where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2, 11 | % 12 | % Inputs: 13 | % x,y real-valued coordinates of nonuniform targets in the plane, 14 | % each a vector of length nj 15 | % f complex Fourier coefficient matrix, whose size determines (ms,mt). 16 | % (Mode ordering given by opts.modeord, in each dimension.) 17 | % If a 3D array, 3rd dimension sets ntrans, and each of ntrans 18 | % matrices is transformed with the same nonuniform targets. 19 | ISIGNEPS 20 | GOPTS 21 | GOPTS12 22 | % Outputs: 23 | % c complex column vector of nj answers at targets, or, 24 | % if ntrans>1, matrix of size (nj,ntrans). 25 | % 26 | GNOTES 27 | -------------------------------------------------------------------------------- /matlab/cufinufft2d3.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT2D3 GPU 2D complex nonuniform FFT, type 3 (nonuniform to nonuniform). 2 | % 3 | % f = cufinufft2d3(x,y,c,isign,eps,s,t) 4 | % f = cufinufft2d3(x,y,c,isign,eps,s,t,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k] = SUM c[j] exp(+-i (s[k] x[j] + t[k] y[j])), for k = 1, ..., nk 10 | % j=1 11 | % Inputs: 12 | % x,y coordinates of nonuniform sources in R^2, each a length-nj vector. 13 | % c length-nj complex vector of source strengths. If numel(c)>nj, 14 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 15 | % transformed with the same source and target locations. 16 | ISIGNEPS 17 | % s,t frequency coordinates of nonuniform targets in R^2, 18 | % each a length-nk vector. 19 | GOPTS 20 | % Outputs: 21 | % f length-nk complex vector of values at targets, or, if ntrans>1, 22 | % a matrix of size (nk,ntrans) 23 | % 24 | GNOTES 25 | -------------------------------------------------------------------------------- /matlab/cufinufft3d1.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT3D1 GPU 3D complex nonuniform FFT, type 1 (nonuniform to uniform). 2 | % 3 | % f = cufinufft3d1(x,y,z,c,isign,eps,ms,mt,mu) 4 | % f = cufinufft3d1(x,y,z,c,isign,eps,ms,mt,mu,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k1,k2,k3] = SUM c[j] exp(+-i (k1 x[j] + k2 y[j] + k3 z[j])) 10 | % j=1 11 | % 12 | % for -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2, 13 | % -mu/2 <= k3 <= (mu-1)/2. 14 | % 15 | % Inputs: 16 | % x,y,z real-valued coordinates of nonuniform sources, 17 | % each a length-nj vector 18 | % c length-nj complex vector of source strengths. If numel(c)>nj, 19 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 20 | % transformed with the same source locations. 21 | ISIGNEPS 22 | % ms,mt,mu number of Fourier modes requested in x,y and z; each may be 23 | % even or odd. 24 | % In either case the mode range is integers lying in [-m/2, (m-1)/2] 25 | GOPTS 26 | GOPTS12 27 | % Outputs: 28 | % f size (ms,mt,mu) complex array of Fourier coefficients 29 | % (ordering given by opts.modeord in each dimension; ms fastest, mu 30 | % slowest), or, if ntrans>1, a 4D array of size (ms,mt,mu,ntrans). 31 | % 32 | GNOTES 33 | -------------------------------------------------------------------------------- /matlab/cufinufft3d2.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT3D2 GPU 3D complex nonuniform FFT, type 2 (uniform to nonuniform). 2 | % 3 | % c = cufinufft3d2(x,y,z,isign,eps,f) 4 | % c = cufinufft3d2(x,y,z,isign,eps,f,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % c[j] = SUM f[k1,k2,k3] exp(+/-i (k1 x[j] + k2 y[j] + k3 z[j])) 9 | % k1,k2,k3 10 | % for j = 1,..,nj 11 | % where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2, 12 | % -mu/2 <= k3 <= (mu-1)/2. 13 | % 14 | % Inputs: 15 | % x,y,z real-valued coordinates of nonuniform targets, 16 | % each a vector of length nj 17 | % f complex Fourier coefficient array, whose size sets (ms,mt,mu). 18 | % (Mode ordering given by opts.modeord, in each dimension.) 19 | % If a 4D array, 4th dimension sets ntrans, and each of ntrans 20 | % 3D arrays is transformed with the same nonuniform targets. 21 | ISIGNEPS 22 | GOPTS 23 | GOPTS12 24 | % Outputs: 25 | % c complex column vector of nj answers at targets, or, 26 | % if ntrans>1, matrix of size (nj,ntrans). 27 | % 28 | GNOTES 29 | -------------------------------------------------------------------------------- /matlab/cufinufft3d3.docsrc: -------------------------------------------------------------------------------- 1 | % CUFINUFFT3D3 GPU 3D complex nonuniform FFT, type 3 (nonuniform to nonuniform). 2 | % 3 | % f = cufinufft3d3(x,y,z,c,isign,eps,s,t,u) 4 | % f = cufinufft3d3(x,y,z,c,isign,eps,s,t,u,opts) 5 | % 6 | % This computes on the GPU, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k] = SUM c[j] exp(+-i (s[k] x[j] + t[k] y[j] + u[k] z[j])), 10 | % j=1 11 | % for k = 1, ..., nk 12 | % Inputs: 13 | % x,y,z coordinates of nonuniform sources in R^3, each a length-nj vector. 14 | % c length-nj complex vector of source strengths. If numel(c)>nj, 15 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 16 | % transformed with the same source and target locations. 17 | ISIGNEPS 18 | % s,t,u frequency coordinates of nonuniform targets in R^3, 19 | % each a length-nk vector. 20 | GOPTS 21 | % Outputs: 22 | % f length-nk complex vector of values at targets, or, if ntrans>1, 23 | % a matrix of size (nk,ntrans) 24 | % 25 | GNOTES 26 | -------------------------------------------------------------------------------- /matlab/errhandler.m: -------------------------------------------------------------------------------- 1 | function errhandler(ier) 2 | % ERRHANDLER translate FINUFFT's ier status into MATLAB warnings/error throws. 3 | 4 | % Barnett 6/13/20 5 | 6 | % Note that there are other matlab-only error types defined in valid_*.m 7 | 8 | switch ier 9 | % These are the ERR_ #defines in ../include/finufft_errors.h: 10 | case 1 11 | warning('FINUFFT:epsTooSmall','FINUFFT eps tolerance too small to achieve'); 12 | case 2 13 | error('FINUFFT:mallocGtMaxNf','FINUFFT malloc size requested greater than MAXNF'); 14 | case 3 15 | error('FINUFFT:spreadinterp:fineGridSmall','FINUFFT spreader fine grid too small compared to kernel width'); 16 | case 4 17 | error('FINUFFT:spreadinterp:NUrange','[DEPRECATED]'); 18 | case 5 19 | error('FINUFFT:spreadinterp:malloc','FINUFFT spreader malloc error'); 20 | case 6 21 | error('FINUFFT:spreadinterp:badDir','FINUFFT spreader illegal direction (must be 1 or 2)'); 22 | case 7 23 | error('FINUFFT:upsampfacSmall','FINUFFT opts.upsampfac not > 1.0'); 24 | case 8 25 | error('FINUFFT:upsampfacNotHorner','FINUFFT opts.upsampfac not a value with known Horner polynomial rule'); 26 | case 9 27 | error('FINUFFT:badNtrans','FINUFFT number of transforms ntrans invalid'); 28 | case 10 29 | error('FINUFFT:badType','FINUFFT transform type invalid'); 30 | case 11 31 | error('FINUFFT:malloc','FINUFFT general malloc failure'); 32 | case 12 33 | error('FINUFFT:badDim','FINUFFT number of dimensions dim invalid'); 34 | end 35 | -------------------------------------------------------------------------------- /matlab/examples/README: -------------------------------------------------------------------------------- 1 | MATLAB/octave example drivers for FINUFFT (CPU and GPU versions) 2 | 3 | This directory contains example codes for MATLAB/Octave users of FINUFFT. 4 | The GPU examples are in cuda/ (these are MATLAB-only). 5 | 6 | Please also see: 7 | 8 | ../test/check_finufft.m for examples of eleven types of transform calls in double-precision. 9 | ../test/check_finufft_single.m for examples of eleven types of transform calls in single-precision. 10 | 11 | ../../tutorial/*.m many of which are in MATLAB. 12 | -------------------------------------------------------------------------------- /matlab/examples/cuda/README: -------------------------------------------------------------------------------- 1 | MATLAB GPU FINUFFT example codes. All data I/O are gpuArrays. 2 | 3 | Double-precision demos have filenames *_gpu.m 4 | Single-precision are *f_gpu.m 5 | 6 | Unlike the CPU codes in the above directory, these are not compatible 7 | with Octave. 8 | 9 | Libin Lu & Alex Barnett, March 2025. 10 | -------------------------------------------------------------------------------- /matlab/examples/cuda/guru1d1_gpu.m: -------------------------------------------------------------------------------- 1 | % MATLAB double-precision FINUFFT GPU demo for 1D type 1 transform. 2 | clear 3 | 4 | % set required parameters... 5 | isign = +1; % sign of imaginary unit in exponential 6 | tol = 1e-8; % requested accuracy 7 | M = 1e7; % # pts 8 | N = 1e7; % # of modes 9 | type = 1; 10 | n_modes = [N]; % n_dims inferred from length of this 11 | ntrans = 1; % number of transforms (>1: demo many-vector interface) 12 | 13 | xg = pi*(2*gpuArray.rand(M,1)-1); % NU points on GPU 14 | cg = gpuArray.randn(M,ntrans)+1i*gpuArray.randn(M,ntrans); % strengths on GPU 15 | 16 | opts.debug=1; % set options then plan the transform... 17 | opts.floatprec = 'double'; % tells it to make a double-precision plan 18 | opts.gpu_method=2; % "SM" method 19 | 20 | dev = gpuDevice(); % needed for timing 21 | disp('starting...'), tic % just time cuFINUFFT, not the data creation 22 | 23 | plan = cufinufft_plan(type,n_modes,isign,ntrans,tol,opts); % make plan 24 | 25 | plan.setpts(xg); % send in NU pts 26 | 27 | fg = plan.execute(cg); % do transform (to fg on GPU) 28 | 29 | wait(dev); tgpu = toc; % since GPU async 30 | fprintf('done in %.3g s: throughput (excl H<->D) is %.3g NUpt/s\n',... 31 | tgpu, M*ntrans/tgpu) 32 | 33 | % if you do not want to do more transforms of this size, clean up... 34 | delete(plan); 35 | 36 | % check the error of only one output also on GPU... 37 | t = ceil(0.7*ntrans); % pick a transform in stack 38 | if ntrans>1, ct = cg(:,t); ft = fg(:,t); else, ct = cg; ft = fg; end 39 | nt = ceil(0.37*N); % pick a mode index 40 | fe = sum(ct.*exp(1i*isign*nt*xg)); % exact 41 | of1 = floor(N/2)+1; % mode index offset 42 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-ft(nt+of1))/norm(ft,Inf)) 43 | -------------------------------------------------------------------------------- /matlab/examples/cuda/guru1d1f_gpu.m: -------------------------------------------------------------------------------- 1 | % MATLAB single-precision FINUFFT GPU demo for 1D type 1 transform. 2 | clear 3 | 4 | % set required parameters... 5 | isign = +1; % sign of imaginary unit in exponential 6 | tol = 1e-3; % requested accuracy 7 | M = 1e8; % # pts 8 | N = 1e6; % # of modes; note N*eps('single') limits accuracy 9 | type = 1; 10 | n_modes = [N]; % n_dims inferred from length of this 11 | ntrans = 1; % number of transforms (>1: demo many-vector interface) 12 | 13 | xg = pi*(2*gpuArray.rand(M,1,'single')-1); % float32 NU pts on GPU 14 | % stack of float32 strengths on GPU... 15 | cg = gpuArray.randn(M,ntrans,'single')+1i*gpuArray.randn(M,ntrans,'single'); 16 | 17 | opts.debug=1; % set options then plan the transform... 18 | opts.floatprec = 'single'; % tells it to make a single-precision plan 19 | opts.gpu_method=2; 20 | 21 | dev = gpuDevice(); % needed for timing 22 | disp('starting...'), tic % just time cuFINUFFT, not the data creation 23 | 24 | plan = cufinufft_plan(type,n_modes,isign,ntrans,tol,opts); % make plan 25 | 26 | plan.setpts(xg); % send in NU pts 27 | 28 | fg = plan.execute(cg); % do transform (to fg on GPU) 29 | 30 | wait(dev); tgpu = toc; % since GPU async 31 | fprintf('done in %.3g s: throughput (excl H<->D) is %.3g NUpt/s\n',... 32 | tgpu, M*ntrans/tgpu) 33 | 34 | % if you do not want to do more transforms of this size, clean up... 35 | delete(plan); 36 | 37 | % check the error of only one output also on GPU... 38 | t = ceil(0.7*ntrans); % pick a transform in stack 39 | if ntrans>1, ct = cg(:,t); ft = fg(:,t); else, ct = cg; ft = fg; end 40 | nt = ceil(0.37*N); % pick a mode index 41 | fe = sum(ct.*exp(1i*isign*nt*xg)); % exact 42 | of1 = floor(N/2)+1; % mode index offset 43 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-ft(nt+of1))/norm(ft,Inf)) 44 | -------------------------------------------------------------------------------- /matlab/examples/cuda/simple1d1f_gpu.m: -------------------------------------------------------------------------------- 1 | % MATLAB single-precision FINUFFT GPU demo for 1D type 1 transform. 2 | clear 3 | 4 | M = 1e8; 5 | x = 2*pi*gpuArray.rand(M,1,'single'); % random pts in [0,2pi]^2 6 | y = 2*pi*gpuArray.rand(M,1,'single'); 7 | % iid random complex data... 8 | c = gpuArray.randn(M,1,'single')+1i*gpuArray.randn(M,1,'single'); 9 | 10 | N1 = 10000; N2 = 5000; % desired Fourier mode array sizes 11 | tol = 1e-3; 12 | 13 | dev = gpuDevice(); % crucial for valid timing 14 | tic 15 | f = cufinufft2d1(x,y,c,+1,tol,N1,N2); % do it (all opts default) 16 | %opts.gpu_method=2; f = cufinufft2d1(x,y,c,+1,tol,N1,N2,opts); % do it with opts 17 | wait(dev) % crucial for valid timing 18 | tgpu = toc; 19 | fprintf('done in %.3g s: throughput (excl H<->D) is %.3g NUpt/s\n',tgpu,M/tgpu) 20 | 21 | % check the error of only one output, also on GPU... 22 | nt = ceil(0.47*N); % pick a mode index in -N/2,..,N/2-1 23 | fe = sum(c.*exp(1i*isign*nt*x)); % exact 24 | of = floor(N/2)+1; % mode index offset 25 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-f(nt+of))/norm(f,Inf)) 26 | -------------------------------------------------------------------------------- /matlab/examples/guru1d1.m: -------------------------------------------------------------------------------- 1 | % MATLAB/octave demo script of guru interface to FINUFFT, 1D type 1. 2 | % Lu 5/11/2020. Barnett added timing, tweaked. 3 | clear 4 | 5 | % set required parameters... 6 | isign = +1; % sign of imaginary unit in exponential 7 | tol = 1e-9; % requested accuracy 8 | M = 1e6; 9 | N = 1e6; % # of modes (approx total, used in all dims) 10 | type = 1; 11 | n_modes = N; % n_dims inferred from length of this 12 | ntrans = 2; 13 | 14 | x = pi*(2*rand(1,M)-1); % choose NU points 15 | c = randn(1,M*ntrans)+1i*randn(1,M*ntrans); % choose stack of strengths 16 | 17 | disp('starting...'), tic % just time FINUFFT not the data creation 18 | opts.debug=2; % set options then plan the transform... 19 | plan = finufft_plan(type,n_modes,isign,ntrans,tol,opts); 20 | 21 | plan.setpts(x); % send in NU pts 22 | 23 | f = plan.execute(c); % do the transform 24 | disp('done.'); toc 25 | 26 | % if you do not want to do more transforms of this size, clean up... 27 | delete(plan); 28 | 29 | % check the error of one output... 30 | nt = ceil(0.37*N); % pick a mode index 31 | t = ceil(0.7*ntrans); % pick a transform in stack 32 | fe = sum(c(M*(t-1)+(1:M)).*exp(1i*isign*nt*x)); % exact 33 | of1 = floor(N/2) + 1 + N*(t-1); % mode index offset 34 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-f(nt+of1))/norm(f,Inf)) 35 | -------------------------------------------------------------------------------- /matlab/examples/guru1d1_single.m: -------------------------------------------------------------------------------- 1 | % MATLAB/octave demo script of guru interface to FINUFFT, 1D type 1. 2 | % Single-precision case. 3 | % Lu 5/11/2020. Barnett added timing, tweaked. 4 | clear 5 | 6 | % set required parameters... 7 | isign = +1; % sign of imaginary unit in exponential 8 | tol = 1e-5; % requested accuracy (cannot ask for much more in single prec) 9 | M = 2e5; 10 | N = 1e5; % # of modes (approx total, used in all dims) 11 | type = 1; 12 | n_modes = N; % n_dims inferred from length of this 13 | ntrans = 3; 14 | 15 | x = pi*(2*rand(1,M,'single')-1); % choose NU points 16 | c = randn(1,M*ntrans,'single')+1i*randn(1,M*ntrans,'single'); % strengths 17 | 18 | % set options then plan the transform... 19 | opts.debug=2; 20 | opts.floatprec = 'single'; % tells it to make a single-precision plan 21 | disp('starting...'), tic 22 | plan = finufft_plan(type,n_modes,isign,ntrans,tol,opts); 23 | 24 | plan.setpts(x); % send in NU pts 25 | 26 | f = plan.execute(c); % do the transform 27 | disp('done.'); toc 28 | 29 | % if you do not want to do more transforms of this size, clean up... 30 | delete(plan); 31 | 32 | % check the error of one output... 33 | nt = ceil(0.37*N); % pick a mode index 34 | t = ceil(0.7*ntrans); % pick a transform in stack 35 | fe = sum(c(M*(t-1)+(1:M)).*exp(1i*isign*nt*x)); % exact 36 | of1 = floor(N/2) + 1 + N*(t-1); % mode index offset 37 | fprintf('rel err in F[%d] is %.3g\n',nt,abs(fe-f(nt+of1))/norm(f,Inf)) 38 | -------------------------------------------------------------------------------- /matlab/finufft1d1.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT1D1 1D complex nonuniform FFT of type 1 (nonuniform to uniform). 2 | % 3 | % f = finufft1d1(x,c,isign,eps,ms) 4 | % f = finufft1d1(x,c,isign,eps,ms,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f(k1) = SUM c[j] exp(+/-i k1 x(j)) for -ms/2 <= k1 <= (ms-1)/2 10 | % j=1 11 | % Inputs: 12 | % x length-nj vector of real-valued locations of nonuniform sources 13 | % c length-nj complex vector of source strengths. If numel(c)>nj, 14 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 15 | % transformed with the same source locations. 16 | ISIGNEPS 17 | % ms number of Fourier modes computed, may be even or odd; 18 | % in either case, mode range is integers lying in [-ms/2, (ms-1)/2] 19 | OPTS 20 | OPTS12 21 | % Outputs: 22 | % f size-ms complex column vector of Fourier coefficients, or, if 23 | % ntrans>1, a matrix of size (ms,ntrans). 24 | % 25 | NOTES 26 | -------------------------------------------------------------------------------- /matlab/finufft1d2.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT1D2 1D complex nonuniform FFT of type 2 (uniform to nonuniform). 2 | % 3 | % c = finufft1d2(x,isign,eps,f) 4 | % c = finufft1d2(x,isign,eps,f,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % c[j] = SUM f[k1] exp(+/-i k1 x[j]) for j = 1,...,nj 9 | % k1 10 | % where sum is over -ms/2 <= k1 <= (ms-1)/2. 11 | % 12 | % Inputs: 13 | % x length-nj vector of real-valued locations of nonuniform sources 14 | % f complex Fourier coefficients. If a vector, length sets ms 15 | % (with mode ordering given by opts.modeord). If a matrix, each 16 | % of ntrans columns is transformed with the same nonuniform targets. 17 | ISIGNEPS 18 | OPTS 19 | OPTS12 20 | % Outputs: 21 | % c complex column vector of nj answers at targets, or, 22 | % if ntrans>1, matrix of size (nj,ntrans). 23 | % 24 | NOTES 25 | -------------------------------------------------------------------------------- /matlab/finufft1d3.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT1D3 1D complex nonuniform FFT of type 3 (nonuniform to nonuniform). 2 | % 3 | % f = finufft1d3(x,c,isign,eps,s) 4 | % f = finufft1d3(x,c,isign,eps,s,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k] = SUM c[j] exp(+-i s[k] x[j]), for k = 1, ..., nk 10 | % j=1 11 | % Inputs: 12 | % x length-nj vector of real-valued locations of nonuniform sources 13 | % c length-nj complex vector of source strengths. If numel(c)>nj, 14 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 15 | % transformed with the same source and target locations. 16 | ISIGNEPS 17 | % s length-nk vector of frequency locations of nonuniform targets 18 | OPTS 19 | % Outputs: 20 | % f length-nk complex vector of values at targets, or, if ntrans>1, 21 | % a matrix of size (nk,ntrans) 22 | % 23 | NOTES 24 | -------------------------------------------------------------------------------- /matlab/finufft2d1.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT2D1 2D complex nonuniform FFT of type 1 (nonuniform to uniform). 2 | % 3 | % f = finufft2d1(x,y,c,isign,eps,ms,mt) 4 | % f = finufft2d1(x,y,c,isign,eps,ms,mt,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k1,k2] = SUM c[j] exp(+-i (k1 x[j] + k2 y[j])) 10 | % j=1 11 | % 12 | % for -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2. 13 | % 14 | % Inputs: 15 | % x,y real-valued coordinates of nonuniform sources in the plane, 16 | % each a length-nj vector 17 | % c length-nj complex vector of source strengths. If numel(c)>nj, 18 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 19 | % transformed with the same source locations. 20 | ISIGNEPS 21 | % ms,mt number of Fourier modes requested in x & y; each may be even or odd. 22 | % In either case the mode range is integers lying in [-m/2, (m-1)/2] 23 | OPTS 24 | OPTS12 25 | % Outputs: 26 | % f size (ms,mt) complex matrix of Fourier coefficients 27 | % (ordering given by opts.modeord in each dimension; ms fast, mt slow), 28 | % or, if ntrans>1, a 3D array of size (ms,mt,ntrans). 29 | % 30 | NOTES 31 | -------------------------------------------------------------------------------- /matlab/finufft2d2.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT2D2 2D complex nonuniform FFT of type 2 (uniform to nonuniform). 2 | % 3 | % c = finufft2d2(x,y,isign,eps,f) 4 | % c = finufft2d2(x,y,isign,eps,f,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % c[j] = SUM f[k1,k2] exp(+/-i (k1 x[j] + k2 y[j])) for j = 1,..,nj 9 | % k1,k2 10 | % where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2, 11 | % 12 | % Inputs: 13 | % x,y real-valued coordinates of nonuniform targets in the plane, 14 | % each a vector of length nj 15 | % f complex Fourier coefficient matrix, whose size determines (ms,mt). 16 | % (Mode ordering given by opts.modeord, in each dimension.) 17 | % If a 3D array, 3rd dimension sets ntrans, and each of ntrans 18 | % matrices is transformed with the same nonuniform targets. 19 | ISIGNEPS 20 | OPTS 21 | OPTS12 22 | % Outputs: 23 | % c complex column vector of nj answers at targets, or, 24 | % if ntrans>1, matrix of size (nj,ntrans). 25 | % 26 | NOTES 27 | -------------------------------------------------------------------------------- /matlab/finufft2d3.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT2D3 2D complex nonuniform FFT of type 3 (nonuniform to nonuniform). 2 | % 3 | % f = finufft2d3(x,y,c,isign,eps,s,t) 4 | % f = finufft2d3(x,y,c,isign,eps,s,t,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k] = SUM c[j] exp(+-i (s[k] x[j] + t[k] y[j])), for k = 1, ..., nk 10 | % j=1 11 | % Inputs: 12 | % x,y coordinates of nonuniform sources in R^2, each a length-nj vector. 13 | % c length-nj complex vector of source strengths. If numel(c)>nj, 14 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 15 | % transformed with the same source and target locations. 16 | ISIGNEPS 17 | % s,t frequency coordinates of nonuniform targets in R^2, 18 | % each a length-nk vector. 19 | OPTS 20 | % Outputs: 21 | % f length-nk complex vector of values at targets, or, if ntrans>1, 22 | % a matrix of size (nk,ntrans) 23 | % 24 | NOTES 25 | -------------------------------------------------------------------------------- /matlab/finufft3d1.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT3D1 3D complex nonuniform FFT of type 1 (nonuniform to uniform). 2 | % 3 | % f = finufft3d1(x,y,z,c,isign,eps,ms,mt,mu) 4 | % f = finufft3d1(x,y,z,c,isign,eps,ms,mt,mu,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k1,k2,k3] = SUM c[j] exp(+-i (k1 x[j] + k2 y[j] + k3 z[j])) 10 | % j=1 11 | % 12 | % for -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2, 13 | % -mu/2 <= k3 <= (mu-1)/2. 14 | % 15 | % Inputs: 16 | % x,y,z real-valued coordinates of nonuniform sources, 17 | % each a length-nj vector 18 | % c length-nj complex vector of source strengths. If numel(c)>nj, 19 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 20 | % transformed with the same source locations. 21 | ISIGNEPS 22 | % ms,mt,mu number of Fourier modes requested in x,y and z; each may be 23 | % even or odd. 24 | % In either case the mode range is integers lying in [-m/2, (m-1)/2] 25 | OPTS 26 | OPTS12 27 | % Outputs: 28 | % f size (ms,mt,mu) complex array of Fourier coefficients 29 | % (ordering given by opts.modeord in each dimension; ms fastest, mu 30 | % slowest), or, if ntrans>1, a 4D array of size (ms,mt,mu,ntrans). 31 | % 32 | NOTES 33 | -------------------------------------------------------------------------------- /matlab/finufft3d2.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT3D2 3D complex nonuniform FFT of type 2 (uniform to nonuniform). 2 | % 3 | % c = finufft3d2(x,y,z,isign,eps,f) 4 | % c = finufft3d2(x,y,z,isign,eps,f,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % c[j] = SUM f[k1,k2,k3] exp(+/-i (k1 x[j] + k2 y[j] + k3 z[j])) 9 | % k1,k2,k3 10 | % for j = 1,..,nj 11 | % where sum is over -ms/2 <= k1 <= (ms-1)/2, -mt/2 <= k2 <= (mt-1)/2, 12 | % -mu/2 <= k3 <= (mu-1)/2. 13 | % 14 | % Inputs: 15 | % x,y,z real-valued coordinates of nonuniform targets, 16 | % each a vector of length nj 17 | % f complex Fourier coefficient array, whose size sets (ms,mt,mu). 18 | % (Mode ordering given by opts.modeord, in each dimension.) 19 | % If a 4D array, 4th dimension sets ntrans, and each of ntrans 20 | % 3D arrays is transformed with the same nonuniform targets. 21 | ISIGNEPS 22 | OPTS 23 | OPTS12 24 | % Outputs: 25 | % c complex column vector of nj answers at targets, or, 26 | % if ntrans>1, matrix of size (nj,ntrans). 27 | % 28 | NOTES 29 | -------------------------------------------------------------------------------- /matlab/finufft3d3.docsrc: -------------------------------------------------------------------------------- 1 | % FINUFFT3D3 3D complex nonuniform FFT of type 3 (nonuniform to nonuniform). 2 | % 3 | % f = finufft3d3(x,y,z,c,isign,eps,s,t,u) 4 | % f = finufft3d3(x,y,z,c,isign,eps,s,t,u,opts) 5 | % 6 | % This computes, to relative precision eps, via a fast algorithm: 7 | % 8 | % nj 9 | % f[k] = SUM c[j] exp(+-i (s[k] x[j] + t[k] y[j] + u[k] z[j])), 10 | % j=1 11 | % for k = 1, ..., nk 12 | % Inputs: 13 | % x,y,z coordinates of nonuniform sources in R^3, each a length-nj vector. 14 | % c length-nj complex vector of source strengths. If numel(c)>nj, 15 | % expects a stack of vectors (eg, a nj*ntrans matrix) each of which is 16 | % transformed with the same source and target locations. 17 | ISIGNEPS 18 | % s,t,u frequency coordinates of nonuniform targets in R^3, 19 | % each a length-nk vector. 20 | OPTS 21 | % Outputs: 22 | % f length-nk complex vector of values at targets, or, if ntrans>1, 23 | % a matrix of size (nk,ntrans) 24 | % 25 | NOTES 26 | -------------------------------------------------------------------------------- /matlab/gnotes.docbit: -------------------------------------------------------------------------------- 1 | % Notes: 2 | % * For CUFINUFFT all array I/O is in the form of gpuArrays (on-device). 3 | % * The precision of gpuArray input x controls whether the double or 4 | % single precision GPU library is called; all array inputs must match in 5 | % location (ie, be gpuArrays), and in precision. 6 | % * The vectorized (many vector) interface, ie ntrans>1, can be faster 7 | % than repeated calls with the same nonuniform points. Note that here the 8 | % I/O data ordering is stacked not interleaved. See ../docs/matlab_gpu.rst 9 | % * For more details about the opts fields, see ../docs/c_gpu.rst 10 | % * See ERRHANDLER, VALID_* and CUFINUFFT_PLAN for possible warning/error IDs. 11 | % * Full documentation is online at http://finufft.readthedocs.io 12 | % 13 | % See also CUFINUFFT_PLAN. 14 | -------------------------------------------------------------------------------- /matlab/gopts.docbit: -------------------------------------------------------------------------------- 1 | % opts optional struct with optional fields controlling the following: 2 | % opts.debug: 0 (silent, default), 1 (timing breakdown), 2 (debug info). 3 | % opts.upsampfac: sigma. 2.0 (default), or 1.25 (low RAM, smaller FFT). 4 | % opts.gpu_method: 0 (auto, default), 1 (GM or GM-sort), 2 (SM). 5 | % opts.gpu_sort: 0 (do not sort NU pts), 1 (sort when GM method, default). 6 | % opts.gpu_kerevalmeth: 0 (slow reference). 1 (Horner ppoly, default). 7 | % opts.gpu_maxsubprobsize: max # NU pts per subprob (gpu_method=2 only). 8 | % opts.gpu_binsize{x,y,z}: various binsizes in GM-sort/SM (for experts). 9 | % opts.gpu_maxbatchsize: 0 (auto, default), or many-vector batch size. 10 | % opts.gpu_device_id: sets the GPU device ID (experts only). 11 | -------------------------------------------------------------------------------- /matlab/gopts12.docbit: -------------------------------------------------------------------------------- 1 | % opts.modeord: 0 (CMCL increasing mode ordering, default), 1 (FFT ordering) 2 | % opts.gpu_spreadinterponly: 0 (do NUFFT, default), 1 (only spread/interp) 3 | -------------------------------------------------------------------------------- /matlab/isigneps.docbit: -------------------------------------------------------------------------------- 1 | % isign if >=0, uses + sign in exponential, otherwise - sign. 2 | % eps relative precision requested (generally between 1e-15 and 1e-1) 3 | -------------------------------------------------------------------------------- /matlab/notes.docbit: -------------------------------------------------------------------------------- 1 | % Notes: 2 | % * The vectorized (many vector) interface, ie ntrans>1, can be much faster 3 | % than repeated calls with the same nonuniform points. Note that here the I/O 4 | % data ordering is stacked rather than interleaved. See ../docs/matlab.rst 5 | % * The class of input x (double vs single) controls whether the double or 6 | % single precision library are called; precisions of all data should match. 7 | % * For more details about the opts fields, see ../docs/opts.rst 8 | % * See ERRHANDLER, VALID_* and FINUFFT_PLAN for possible warning/error IDs. 9 | % * Full documentation is online at http://finufft.readthedocs.io 10 | % 11 | % See also FINUFFT_PLAN. 12 | -------------------------------------------------------------------------------- /matlab/opts.docbit: -------------------------------------------------------------------------------- 1 | % opts optional struct with optional fields controlling the following: 2 | % opts.debug: 0 (silent, default), 1 (timing breakdown), 2 (debug info). 3 | % opts.spread_debug: spreader: 0 (no text, default), 1 (some), or 2 (lots) 4 | % opts.spread_sort: 0 (don't sort NU pts), 1 (do), 2 (auto, default) 5 | % opts.spread_kerevalmeth: 0: exp(sqrt()), 1: Horner ppval (faster) 6 | % opts.spread_kerpad: (iff kerevalmeth=0) 0: don't pad to mult of 4, 1: do 7 | % opts.fftw: FFTW plan mode, 64=FFTW_ESTIMATE (default), 0=FFTW_MEASURE, etc 8 | % opts.upsampfac: sigma. 2.0 (default), or 1.25 (low RAM, smaller FFT) 9 | % opts.spread_thread: for ntrans>1 only. 0:auto, 1:seq multi, 2:par, etc 10 | % opts.maxbatchsize: for ntrans>1 only. max blocking size, or 0 for auto. 11 | % opts.nthreads: number of threads, or 0: use all available (default) 12 | -------------------------------------------------------------------------------- /matlab/opts12.docbit: -------------------------------------------------------------------------------- 1 | % opts.modeord: 0 (CMCL increasing mode ordering, default), 1 (FFT ordering) 2 | % opts.spreadinterponly: 0 (perform NUFFT, default), 1 (only spread/interp) 3 | -------------------------------------------------------------------------------- /matlab/test/big1dtest.m: -------------------------------------------------------------------------------- 1 | % Explore large problems: timing, RAM usage, matlab interface. 2 | % Barnett 3/28/17 3 | clear 4 | isign = +1; % sign of imaginary unit in exponential 5 | eps = 1e-3; % requested accuracy 6 | o.debug = 1; % choose 1 for timing breakdown text output 7 | o.spread_sort=0; 8 | M = 2.2e9; % # of NU pts - when >=2e31, answer is wrong, zero *** 9 | N = 1e6; % # of modes (approx total, used in all dims) 10 | 11 | j = ceil(0.93*M); % target pt index to test 12 | 13 | if 0 14 | fprintf('generating x & c data (single-threaded and slow)...\n') 15 | x = pi*(2*rand(M,1)-1); 16 | c = randn(M,1)+1i*randn(M,1); 17 | fprintf('1D type 1: using %d modes...\n',N) 18 | tic; 19 | f = finufft1d1(x,c,isign,eps,N,o); 20 | fprintf('done in %.3g s\n',toc) 21 | if ~ier 22 | nt = ceil(0.37*N); % pick a mode index 23 | fe = sum(c.*exp(1i*isign*nt*x)); % exact 24 | of1 = floor(N/2)+1; % mode index offset 25 | fprintf('rel err in F[%d] is %.3g\n',nt,abs((fe-f(nt+of1))/fe)) 26 | end 27 | end 28 | 29 | if 1 30 | fprintf('generating x data (single-threaded and slow)...\n') 31 | x = pi*(2*rand(M,1)-1); 32 | f = randn(N,1)+1i*randn(N,1); 33 | fprintf('1D type 2: using %d modes...\n',N) 34 | tic 35 | c = finufft1d2(x,isign,eps,f,o); % Out of memory iff >=2^31 36 | fprintf('done in %.3g s\n',toc) 37 | ms=numel(f); mm = (ceil(-ms/2):floor((ms-1)/2))'; % mode index list 38 | ce = sum(f.*exp(1i*isign*mm*x(j))); % crucial f, mm same shape 39 | fprintf('1D type-2: rel err in c[%d] is %.3g\n',j,abs((ce-c(j))/ce)) 40 | end 41 | 42 | % conclusion: we get zeros output if >=2^31. Fix this issue w/ mex interface. 43 | -------------------------------------------------------------------------------- /matlab/test/fig_accuracy.m: -------------------------------------------------------------------------------- 1 | % finufft accuracy test figs, deciding err norm to report. Barnett 6/6/17 2 | % Changed to rel 2-norm, 7/22/24. 3 | clear 4 | %M=1e5; N=1e2; % M = # NU pts, N = # modes. Note: keep MN<1e8 for now 5 | M=1e4; N=1e2; % keel N small to see close to epsmach; cond # = O(N) 6 | %M=1e2; N=1e5; % confusion about N vs M controlling err prefac (it's N) 7 | isign = +1; % sign of imaginary unit in exponential 8 | o.debug = 0; % choose 1 for timing breakdown text output 9 | 10 | % use one of these two... 11 | tols = 10.^(-1:-0.02:-15); o.upsampfac = 2.0; 12 | %tols = 10.^(-1:-0.02:-10); o.upsampfac=1.25; % for lowupsampfac 13 | 14 | % other expts... 15 | %tols = 1e-6; 16 | %tols = 10.^(-1:-1:-10); o.upsampfac=1.25; % for lowupsampfac 17 | 18 | errs = nan*tols; 19 | for t=1:numel(tols) 20 | x = pi*(2*rand(1,M)-1); 21 | c = randn(1,M)+1i*randn(1,M); 22 | ns = (ceil(-N/2):floor((N-1)/2))'; % mode indices, col vec 23 | f = finufft1d1(x,c,isign,tols(t),N,o); 24 | fe = exp(1i*isign*ns*x) * c.'; % exact (note mat fill, matvec) 25 | %errs(t) = max(abs(f(:)-fe(:)))/norm(c,1); % eps as in err analysis... 26 | %p=2; errs(t) = norm(f(:)-fe(:),p)/norm(c,p); % ... or p-norm rel to input 27 | p=2; errs(t) = norm(f(:)-fe(:),p)/norm(fe(:),p); % ... or rel p-norm 28 | end 29 | figure; loglog(tols,errs,'+'); hold on; plot(tols,tols,'-'); 30 | axis tight; xlabel('tol'); ylabel('err'); 31 | %title(sprintf('1d1: (maxerr)/||c||_1, M=%d, N=%d\n',M,N)); 32 | title(sprintf('1d1: ||\tilde f - f||_2/||f||_2, M=%d, N=%d\n',M,N)); 33 | -------------------------------------------------------------------------------- /matlab/test/guru_setpts_issue.m: -------------------------------------------------------------------------------- 1 | % Tests fix of setpts temporary array loss by MWrap (issue 185). 2 | % The issue occurred when expressions such as -x were passed into setpts, 3 | % resulting in crash or incorrect answers (due to pointing to destroyed temp 4 | % arrays). 5 | % It is fixed as of 5/6/2021. 6 | % code by Dan Fortunato. 7 | 8 | % Random points 9 | M = 10000; 10 | x = pi*(2*rand(M,1)-1); 11 | y = pi*(2*rand(M,1)-1); 12 | 13 | % Random Fourier coefficients 14 | N = 64; 15 | coeffs = randn(N) + 1i*randn(N); 16 | 17 | % FINUFFT options 18 | tol = 1e-12; 19 | opts = struct(); 20 | 21 | for k = 1:100 22 | 23 | disp(k) 24 | 25 | % Without planning 26 | vals = finufft2d2(-x, -y, -1, tol, coeffs, opts); 27 | 28 | % With planning (was buggy, at seemingly random times) 29 | plan = finufft_plan(2, [N N], -1, 1, tol, opts); 30 | plan.setpts(-x, -y); 31 | vals2 = plan.execute(coeffs); 32 | 33 | % With planning (was the workaround, now not needed) 34 | plan = finufft_plan(2, [N N], -1, 1, tol, opts); 35 | xx = -x; 36 | yy = -y; 37 | plan.setpts(xx, yy); 38 | vals3 = plan.execute(coeffs); 39 | 40 | if ( any(isnan(vals2)) || norm(vals - vals2) > tol ) 41 | warning('Something went wrong during run #%i', k); 42 | fprintf('norm(vals - vals2) = %g\n', norm(vals - vals2)); 43 | fprintf('norm(vals - vals3) = %g\n', norm(vals - vals3)); 44 | break 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /matlab/test/test_strictadjoint.m: -------------------------------------------------------------------------------- 1 | % check t1 and t2 are adjoints to rounding error, not merely to requested tol. 2 | % 1d only for now. Barnett 8/27/18 3 | %clear; addpath ~/numerics/finufft/matlab 4 | 5 | M=1e5; % pts 6 | N=1e4; % modes 7 | tol = 1e-6; 8 | x = pi*(2*rand(M,1)-1); 9 | % pick two vectors to check (u,F1 v) = (F2 u,v) with... 10 | v = randn(M,1)+1i*randn(M,1); 11 | u = randn(N,1)+1i*randn(N,1); 12 | ip1 = dot(u,finufft1d1(x,v,+1,tol,N)); 13 | ip2 = dot(finufft1d2(x,-1,tol,u),v); % note sign flips to be complex adjoint 14 | fprintf('M=%d,N=%d,tol=%.1g: rel err (u,F1 v) vs (F2 u,v): %.3g\n',M,N,tol,abs(ip1-ip2)/abs(ip1)) 15 | clear eps 16 | fprintf('cf estimated rounding err for this prob size; %.3g\n',0.2*eps*N) 17 | -------------------------------------------------------------------------------- /matlab/valid_ntr.m: -------------------------------------------------------------------------------- 1 | function n_transf = valid_ntr(x,c) 2 | % VALID_NTR deduce n_transforms and validate the size of c, for types 1 and 3. 3 | % also check for array device consistency. 4 | 5 | if isa(x, 'gpuArray') ~= isa(c, 'gpuArray') 6 | error('FINUFFT:mixedDevice','FINUFFT: x and c must be both on GPU or CPU'); 7 | end 8 | 9 | n_transf = round(numel(c)/numel(x)); % this allows general row/col vec, matrix, input shapes 10 | if n_transf*numel(x)~=numel(c) 11 | error('FINUFFT:badCsize','FINUFFT numel(c) must be divisible by numel(x)'); 12 | end 13 | -------------------------------------------------------------------------------- /perftest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Each source test file is instantiated in single and double precision 2 | set(PERFTESTS guru_timing_test manysmallprobs spreadtestnd spreadtestndall) 3 | 4 | foreach(TEST ${PERFTESTS}) 5 | add_executable(${TEST} ${TEST}.cpp) 6 | if(FINUFFT_USE_DUCC0) 7 | target_compile_definitions(${TEST} PRIVATE -DFINUFFT_USE_DUCC0) 8 | endif() 9 | finufft_link_test(${TEST}) 10 | 11 | add_executable(${TEST}f ${TEST}.cpp) 12 | target_compile_definitions(${TEST}f PRIVATE -DSINGLE) 13 | if(FINUFFT_USE_DUCC0) 14 | target_compile_definitions(${TEST}f PRIVATE -DFINUFFT_USE_DUCC0) 15 | endif() 16 | finufft_link_test(${TEST}f) 17 | endforeach() 18 | 19 | include(CheckIncludeFile) 20 | check_include_file("getopt.h" HAVE_GETOPT_H) 21 | if(HAVE_GETOPT_H) 22 | add_executable(perftest perftest.cpp) 23 | if(FINUFFT_USE_DUCC0) 24 | target_compile_definitions(perftest PRIVATE -DFINUFFT_USE_DUCC0) 25 | endif() 26 | finufft_link_test(perftest) 27 | endif() 28 | -------------------------------------------------------------------------------- /perftest/README: -------------------------------------------------------------------------------- 1 | Performance and development test directory for FINUFFT. 2 | 3 | spreadtestnd : time spread & interp for given dim, tol, etc. 4 | spreadtestndall : time spread or interp sweeping over all tols (w), given dim. 5 | [note the above two differ in 4th cmd-line arg being "tol" vs "dir"] 6 | big2d2f : tests int64_t (8byte int) indexing, ie data size > 2^31. 7 | 8 | Scripts: 9 | spreadtestall.sh : rapid test of spreadtestnd in all cases. 10 | spreadtestnd.sh : performance test of spreader only, in dims 1,2, or 3. 11 | nuffttestnd.sh : performance test of NUFFT library, in dims 1,2, or 3. 12 | mycpuinfo.sh : prints info about the CPU 13 | multispreadtestndall.sh : runs Marco's w-sweeping scripts all dims, precs. 14 | 15 | Possibly obsolete scripts (for developers): 16 | highaspect3d_test.sh : comparing various pizza-box orientations for speed 17 | 18 | 19 | Also see: 20 | ../test for validation tests 21 | -------------------------------------------------------------------------------- /perftest/big2d2f.cpp: -------------------------------------------------------------------------------- 1 | /* This is a 2D type-2 demo calling FINUFFT for big number of transforms, that 2 | results in a number of data exceeding the max signed int value of 2^31. 3 | This verifies correct handling via int64_t (8byte) indexing. 4 | It takes about 30 s to run on 8 threads, and demands about 30 GB of RAM. 5 | 6 | See makefile for compilation. Libin Lu 6/7/22; edits Alex Barnett. 7 | */ 8 | 9 | // this is all you must include for the finufft lib... 10 | #include 11 | 12 | // also used in this example... 13 | #include 14 | #include 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | int test_finufft(finufft_opts *opts) { 20 | size_t nj = 129 * 129 * 2; 21 | size_t ms = 129, mt = 129; 22 | size_t ntrans = 75000; // the point is: 129*129*2*75000 > 2^31 ~ 2.15e9 23 | std::vector x(nj); // bunch of zero data 24 | std::vector y(nj); 25 | std::vector> cj(ntrans * nj); 26 | std::vector> fk(ntrans * ms * mt); 27 | 28 | int ier = finufftf2d2many(ntrans, nj, x.data(), y.data(), cj.data(), -1, 1e-3, ms, mt, 29 | fk.data(), opts); 30 | 31 | std::cout << "\tbig2d2f finufft status: " << ier << std::endl; 32 | return ier; 33 | } 34 | 35 | int main(int argc, char *argv[]) { 36 | finufft_opts opts; 37 | finufftf_default_opts(&opts); 38 | return test_finufft(&opts); 39 | } 40 | -------------------------------------------------------------------------------- /perftest/checkGuruTiming.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # by Andrea Malleo, summer 2019. 3 | 4 | srcpts=1e7 5 | tolerance=1e-6 6 | debug=1 7 | modes[0]=1e6 8 | modes[1]=1 9 | modes[2]=1 10 | modes[3]=1e3 11 | modes[4]=1e3 12 | modes[5]=1 13 | modes[6]=1e2 14 | modes[7]=1e2 15 | modes[8]=1e2 16 | 17 | for dimension in 1 2 3 18 | do 19 | for type in 1 2 3 20 | do 21 | for n_trials in 1 20 41 22 | do 23 | declare -i row 24 | row=${dimension}-1 25 | 26 | declare -i index 27 | index=row*3 28 | 29 | declare -i modeNum 30 | modeNum1=${modes[index]} 31 | modeNum2=${modes[index+1]} 32 | modeNum3=${modes[index+2]} 33 | 34 | echo "./guru_timing_test ${n_trials} ${type} ${dimension} ${modeNum1} ${modeNum2} ${modeNum3} ${srcpts} ${tolerance} ${debug}" 35 | ./guru_timing_test ${n_trials} ${type} ${dimension} ${modeNum1} ${modeNum2} ${modeNum3} ${srcpts} ${tolerance} ${debug} 36 | done 37 | done 38 | done 39 | -------------------------------------------------------------------------------- /perftest/cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(cuperftest cuperftest.cu) 2 | target_include_directories(cuperftest PUBLIC ${CUFINUFFT_INCLUDE_DIRS}) 3 | target_link_libraries(cuperftest cufinufft CUDA::cufft CUDA::cudart) 4 | target_compile_features(cuperftest PRIVATE cxx_std_17) 5 | target_compile_options(cuperftest PRIVATE $<$:--extended-lambda>) 6 | set_target_properties( 7 | cuperftest 8 | PROPERTIES 9 | LINKER_LANGUAGE CUDA 10 | CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" 11 | CUDA_STANDARD 17 12 | CUDA_STANDARD_REQUIRED ON 13 | ) 14 | -------------------------------------------------------------------------------- /perftest/cuda/bench.sh: -------------------------------------------------------------------------------- 1 | ./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --M 2e6 --method 0 --tol 1e-4 2 | ./cuperftest --prec d --n_runs 5 --N1 1e1 --N2 1e1 --N3 1e1 --M 2e6 --method 0 --tol 1e-4 3 | ./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --N3 1e1 --M 2e6 --method 0 --tol 1e-4 4 | ./cuperftest --prec d --n_runs 5 --N1 1e1 --N2 1e2 --N3 1e3 --M 2e6 --method 0 --tol 1e-4 5 | ./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --N3 1e3 --M 2e6 --method 0 --tol 1e-4 6 | #./cuperftest --prec d --n_runs 5 --N1 1e5 --N2 1e5 --N3 1e5 --M 2e6 --method 0 --tol 1e-10 7 | #./cuperftest --prec d --n_runs 5 --N1 1e4 --N2 1e4 --N3 1e4 --M 2e6 --method 0 --tol 1e-10 8 | #./cuperftest --prec d --n_runs 5 --N1 1e5 --N2 1e5 --N3 1e5 --M 2e6 --method 0 --tol 1e-10 9 | #./cuperftest --prec d --n_runs 5 --N1 1e6 --N2 1e6 --M 2e6 --method 0 --tol 1e-10 10 | #./cuperftest --prec d --n_runs 5 --N1 1e8 --N2 1e6 --M 2e6 --method 0 --tol 1e-10 11 | #./cuperftest --prec d --n_runs 5 --N1 1e6 --N2 1e6 --M 2e6 --method 0 --tol 1e-10 12 | #./cuperftest --prec d --n_runs 5 --N1 1e7 --N2 1e7 --M 2e6 --method 0 --tol 1e-10 13 | #./cuperftest --prec d --n_runs 5 --N1 1e8 --N2 1e8 --M 2e6 --method 0 --tol 1e-10 14 | -------------------------------------------------------------------------------- /perftest/getSpeedup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # by Andrea Malleo, summer 2019. 3 | 4 | ./checkGuruTiming.sh | grep -E 'guru_timing_test|speedup' 5 | -------------------------------------------------------------------------------- /perftest/highaspect3d_test.sh: -------------------------------------------------------------------------------- 1 | # test various poor aspect ratios in 3d 2 | # Barnett 2/6/17 3 | 4 | # fastest 5 | time ./finufft3d_test 10 400 400 1e6 1e-12 0 6 | 7 | # weird thing is this one is slowest even though z split is easy - RAM access? 8 | time ./finufft3d_test 400 10 400 1e6 1e-12 0 9 | 10 | # expect poor when split only along z: 11 | time ./finufft3d_test 400 400 10 1e6 1e-12 0 12 | -------------------------------------------------------------------------------- /perftest/multispreadtestndall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # simple driver for Marco's sweeping-w spreadtest variant, all precs & dims. 3 | # used my the makefile. 4 | # all avail threads for now. 5 | # human has to check the output for now. 6 | # Barnett 6/4/24 7 | 8 | M=1e7 # problem size (sets both # NU pts and # U modes); it's a string 9 | N=1e6 # num U grid pts 10 | 11 | ./spreadtestndall 1 $M $N 1 1 12 | ./spreadtestndall 1 $M $N 2 1 13 | ./spreadtestndall 2 $M $N 1 1 14 | ./spreadtestndall 2 $M $N 2 1 15 | ./spreadtestndall 3 $M $N 1 1 16 | ./spreadtestndall 3 $M $N 2 1 17 | ./spreadtestndallf 1 $M $N 1 1 18 | ./spreadtestndallf 1 $M $N 2 1 19 | ./spreadtestndallf 2 $M $N 1 1 20 | ./spreadtestndallf 2 $M $N 2 1 21 | ./spreadtestndallf 3 $M $N 1 1 22 | ./spreadtestndallf 3 $M $N 2 1 23 | -------------------------------------------------------------------------------- /perftest/mycpuinfo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Barnett 2/2/17 3 | # Linux and OSX both, 11/1/18 4 | 5 | echo "what CPUs do I have?..." 6 | unameOut="$(uname -s)" 7 | case "${unameOut}" in 8 | Linux*) 9 | echo "(I'm in a linux OS)" 10 | grep "model name" /proc/cpuinfo | uniq 11 | if hash lscpu 2> /dev/null; then # only do it if cmd exists... 12 | lscpu 13 | fi 14 | ;; 15 | Darwin*) 16 | echo "(I'm in Mac OSX)" 17 | sysctl -n machdep.cpu.brand_string 18 | sysctl -a | grep machdep.cpu 19 | ;; 20 | *) 21 | echo "I'm in an unknown or unsupported operating system";; 22 | esac 23 | 24 | # help from: 25 | 26 | #lscpu | egrep 'Thread|Core|Socket|^CPU\(|MHz' 27 | # thanks to http://unix.stackexchange.com/questions/218074/how-to-know-number-of-cores-of-a-system-in-linux 28 | 29 | # https://www.cyberciti.biz/faq/lscpu-command-find-out-cpu-architecture-information/ 30 | 31 | # https://stackoverflow.com/questions/3466166/how-to-check-if-running-in-cygwin-mac-or-linux/27776822 32 | -------------------------------------------------------------------------------- /perftest/mymaxthreads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Output number of logical cores as a string, OS-indep. Barnett 7/5/20. 3 | 4 | # Linux and OSX for now. this doesn't handle non-linux unices. 5 | 6 | unameOut="$(uname -s)" 7 | case "${unameOut}" in 8 | Linux*) 9 | lscpu -p | egrep -v '^#' | wc -l 10 | ;; 11 | Darwin*) 12 | sysctl -n hw.logicalcpu_max 13 | ;; 14 | MINGW*) 15 | # not sure this correct... 16 | echo "$NUMBER_OF_PROCESSORS" 17 | ;; 18 | *) 19 | echo "I'm in an unknown or unsupported operating system: ${unameOut}" >&2 20 | ;; 21 | esac 22 | -------------------------------------------------------------------------------- /perftest/mynumcores.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Output number of physical cores as a string, OS-indep. Barnett 7/5/20. 3 | 4 | # see: 5 | # https://stackoverflow.com/questions/6481005/how-to-obtain-the-number-of-cpus-cores-in-linux-from-the-command-line 6 | # https://en.wikipedia.org/wiki/Uname 7 | 8 | # Linux and MAX only. this doesn't handle non-linux unices. 9 | 10 | unameOut="$(uname -s)" 11 | case "${unameOut}" in 12 | Linux*) 13 | lscpu -p | egrep -v '^#' | sort -u -t, -k 2,4 | wc -l 14 | ;; 15 | Darwin*) 16 | # sysctl -n machdep.cpu.core_count 17 | sysctl -n hw.physicalcpu_max 18 | ;; 19 | MINGW*) 20 | # not sure this is correct... 21 | echo "$NUMBER_OF_PROCESSORS" 22 | ;; 23 | *) 24 | echo "I'm in an unknown or unsupported operating system: ${unameOut}" >&2 25 | ;; 26 | esac 27 | -------------------------------------------------------------------------------- /perftest/nuffttestnd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # A basic set of performance tests for Flatiron Institute NUFFT. 3 | # Relies on test executables in ../test 4 | 5 | # Barnett 2/2/17, tidied 3/13/17. no sort 6/13/20. prec switch, threads 7/2/20. 6 | 7 | M=1e6 # problem size (sets both # NU pts and # U modes); it's a string 8 | TOL=1e-6 # overall requested accuracy, also a string 9 | DEBUG=0 # whether to see timing breakdowns 10 | 11 | #TESTTHREADS=$(./mymaxthreads.sh) # max threads (hyperthreading) 12 | TESTTHREADS=$(./mynumcores.sh) # one thread per core (no hyperthreading) 13 | 14 | echo "nuffttestnd output:" 15 | ./mycpuinfo.sh 16 | 17 | if [[ $1 == "SINGLE" ]]; then 18 | PREC=single 19 | PRECSUF=f 20 | else 21 | PREC=double 22 | PRECSUF= 23 | fi 24 | 25 | echo 26 | export OMP_NUM_THREADS=$TESTTHREADS 27 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: size = $M, tol = $TOL..." 28 | # currently we run 1e6 modes in each case, in non-equal dims (more generic): 29 | ../test/finufft1d_test$PRECSUF 1e6 $M $TOL $DEBUG 30 | ../test/finufft2d_test$PRECSUF 500 2000 $M $TOL $DEBUG 31 | ../test/finufft3d_test$PRECSUF 100 200 50 $M $TOL $DEBUG 32 | 33 | echo 34 | export OMP_NUM_THREADS=1 35 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: size = $M, tol = $TOL..." 36 | ../test/finufft1d_test$PRECSUF 1e6 $M $TOL $DEBUG 37 | ../test/finufft2d_test$PRECSUF 500 2000 $M $TOL $DEBUG 38 | ../test/finufft3d_test$PRECSUF 100 200 50 $M $TOL $DEBUG 39 | -------------------------------------------------------------------------------- /perftest/results/gcc_vs_icc_xeon.txt: -------------------------------------------------------------------------------- 1 | Comparing GCC 6.4.0 vs ICC 17.0.4, on xeon E5-2643 v3 @ 3.40GHz 2 | 3/28/18 3 | 4 | spreadtestnd d 1e7 1e7 1e-6 2 5 | 6 | Timings in sec: 7 | 8 | single-thread: 9 | 10 | t1 t2 11 | 12 | GCC ICC GCC ICC 13 | 14 | 1d 2.7 1.7 3.3 2.7 15 | 2d 4.5 2.7 5.0 4.5 16 | 3d 8.6 5.7 10.9 8.4 17 | 18 | 24-thread: 19 | 20 | 1d .49 .38 .48 .36 21 | 2d .5 .34 .48 .37 22 | 3d .82 .62 .76 .58 23 | 24 | Note: sorting a bit slower under ICC, but spreading is faster. 25 | -------------------------------------------------------------------------------- /perftest/searchForTimeMetrics.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | decimalMatchString = "\d+\.?\d+" #regular expression to match a decimal number 5 | sciNotString = "(\d*.?\d*e-\d* s)" #regular expression to match a number in scientific notation 6 | wholeNumberMatchString = "\d+" 7 | 8 | 9 | #search string needs to have two groupings! (one for everything besides) (time s) 10 | def extractTime(searchString, strOut): 11 | time = 0 12 | lineMatch = re.search(searchString,strOut) 13 | if(lineMatch): 14 | val = re.search(sciNotString,lineMatch.group(2)) 15 | if(not val): 16 | val = re.search(decimalMatchString, lineMatch.group(2)) 17 | if(not val): 18 | val = re.search(wholeNumberMatchString, lineMatch.group(2)) 19 | time = round(float(val.group(0).split('s')[0].strip()),5) 20 | return time 21 | 22 | 23 | def sumAllTime(searchString, strOut): 24 | newVal = 0 25 | lineMatch = re.findall(searchString,strOut) 26 | for match in lineMatch: 27 | val = re.search(sciNotString, match[1]) 28 | if(not val): #search failed, try decimal format 29 | val = re.search(decimalMatchString, match[1]) 30 | if(not val): 31 | val = re.search(wholeNumberMatchString, match[1]) 32 | newVal = newVal + float(val.group(0).split('s')[0].strip()) #trim off " s" 33 | newVal = round(newVal,5) 34 | return newVal 35 | -------------------------------------------------------------------------------- /perftest/spreaderbench.py: -------------------------------------------------------------------------------- 1 | fast = 'new.txt' 2 | slow = 'old.txt' 3 | 4 | 5 | def read_data(filename): 6 | data = [0] * 17 7 | with open(filename) as f1: 8 | nspread = 0 9 | speed = 0 10 | for line in f1: 11 | if 'nspread' in line: 12 | nspread = int(line.split('=')[-1]) 13 | if 'pts/s' in line: 14 | speed = float(line.split(' ')[12]) 15 | data[nspread] = speed 16 | return data 17 | 18 | # compute relative increment in percentage between two numbers 19 | 20 | 21 | vec = read_data(fast)[2:] 22 | old = read_data(slow)[2:] 23 | 24 | # 1 : slow = x : fast 25 | # x = (1 - slow/fast) * 100 26 | i = 2 27 | for vec, old in zip(vec, old): 28 | diff = (1 - old/vec)*100 29 | print(f'nspread={i:02d} delta={diff:.3f}%') 30 | i+=1 31 | -------------------------------------------------------------------------------- /perftest/spreadtestall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tester for spreadinterp component that hits as many options & code lines 3 | # as possible. Takes around 2-3 seconds wall-clock time. 4 | # No math test is done (human could check "rel err" outputs small), 5 | # since this is based on a speed-testing executable (spreadtestnd). 6 | # For pass-fail math tests instead see ../test/finufft*test 7 | # Barnett 10/23/20 8 | 9 | M=1e6 # problem size (# NU pts) 10 | N=1e6 # num U grid pts 11 | 12 | # one thread per core (ie no hyperthreading)... 13 | export OMP_NUM_THREADS=$(./mynumcores.sh) 14 | 15 | echo "spreadtestall.sh :" 16 | echo "" 17 | echo "Double-prec spread/interp tests --------------------------------------" 18 | echo "=========== default kernel choice ============" 19 | TOL=1e-6 # req precision 20 | ./spreadtestnd 1 $M $N $TOL 21 | ./spreadtestnd 2 $M $N $TOL 22 | ./spreadtestnd 3 $M $N $TOL 23 | echo "=========== kerevalmeth=0 nonstandard upsampfac + debug ============" 24 | # nonstandard upsampfac to test with the direct kernel eval (slower)... 25 | UP=1.5 26 | # debug output 27 | DEB=1 28 | ./spreadtestnd 1 $M $N $TOL 2 0 $DEB 0 0 $UP 29 | ./spreadtestnd 2 $M $N $TOL 2 0 $DEB 0 0 $UP 30 | ./spreadtestnd 3 $M $N $TOL 2 0 $DEB 0 0 $UP 31 | 32 | echo "" 33 | echo "Single-prec spread/interp tests --------------------------------------" 34 | echo "=========== default kernel choice ============" 35 | TOL=1e-3 # req precision 36 | ./spreadtestndf 1 $M $N $TOL 37 | ./spreadtestndf 2 $M $N $TOL 38 | ./spreadtestndf 3 $M $N $TOL 39 | echo "=========== kerevalmeth=0 nonstandard upsampfac + debug ============" 40 | ./spreadtestndf 1 $M $N $TOL 2 0 $DEB 0 0 $UP 41 | ./spreadtestndf 2 $M $N $TOL 2 0 $DEB 0 0 $UP 42 | ./spreadtestndf 3 $M $N $TOL 2 0 $DEB 0 0 $UP 43 | -------------------------------------------------------------------------------- /perftest/spreadtestnd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # a basic quick set of quasi-uniform multidimensional spreader speed tests. 3 | # Usage: 4 | # double-prec: ./spreadtestnd.sh 5 | # single-prec: ./spreadtestnd.sh SINGLE 6 | 7 | # Barnett started 2/2/17. both-precision handling, choose # threads 7/3/20. 8 | 9 | M=1e6 # problem size (# NU pts) 10 | N=1e6 # num U grid pts 11 | TOL=1e-6 # overall requested accuracy 12 | 13 | #TESTTHREADS=$(./mymaxthreads.sh) # max threads (hyperthreading) 14 | TESTTHREADS=$(./mynumcores.sh) # one thread per core (no hyperthreading) 15 | 16 | echo "spreadtestnd output:" 17 | ./mycpuinfo.sh 18 | 19 | if [[ $1 == "SINGLE" ]]; then 20 | PREC=single 21 | ST=./spreadtestndf 22 | else 23 | PREC=double 24 | ST=./spreadtestnd 25 | fi 26 | 27 | echo 28 | export OMP_NUM_THREADS=$TESTTHREADS 29 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: #NU = $M, #U = $N, tol = $TOL..." 30 | $ST 1 $M $N $TOL 31 | $ST 2 $M $N $TOL 32 | $ST 3 $M $N $TOL 33 | 34 | echo 35 | export OMP_NUM_THREADS=1 36 | echo "$PREC-precision $OMP_NUM_THREADS-thread tests: #NU = $M, #U = $N, tol = $TOL..." 37 | $ST 1 $M $N $TOL 38 | $ST 2 $M $N $TOL 39 | $ST 3 $M $N $TOL 40 | -------------------------------------------------------------------------------- /python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(FINUFFT_USE_CPU) 2 | if(WIN32) 3 | install(TARGETS finufft LIBRARY DESTINATION finufft RUNTIME DESTINATION finufft) 4 | else() 5 | install(TARGETS finufft LIBRARY DESTINATION finufft) 6 | endif() 7 | endif() 8 | 9 | if(FINUFFT_USE_CUDA) 10 | install(TARGETS cufinufft LIBRARY DESTINATION cufinufft) 11 | endif() 12 | 13 | # Warn if the user invokes CMake directly 14 | if(NOT SKBUILD) 15 | message( 16 | WARNING 17 | "\ 18 | This CMake file is meant to be executed using 'scikit-build-core'. 19 | Running it directly will almost certainly not produce the desired 20 | result. If you are a user trying to install this package, use the 21 | command below, which will install all necessary build dependencies, 22 | compile the package in an isolated environment, and then install it. 23 | ===================================================================== 24 | $ pip install python/finufft 25 | or 26 | $ pip install python/cufinufft 27 | ===================================================================== 28 | If you are a software developer, and this is your own package, then 29 | it is usually much more efficient to install the build dependencies 30 | in your environment once and use the following command that avoids 31 | a costly creation of a new virtual environment at every compilation: 32 | ===================================================================== 33 | $ pip install finufft scikit-build-core[pyproject] 34 | $ pip install --no-build-isolation -ve . 35 | ===================================================================== 36 | You may optionally add -Ceditable.rebuild=true to auto-rebuild when 37 | the package is imported. Otherwise, you need to rerun the above 38 | after editing C++ files." 39 | ) 40 | endif() 41 | -------------------------------------------------------------------------------- /python/cufinufft/README.md: -------------------------------------------------------------------------------- 1 | # FINUFFT GPU library Python wrappers 2 | 3 | This is a Python interface to the efficient GPU CUDA implementation of the 1-, 2- and 4 | 3-dimensional nonuniform fast Fourier transform (NUFFT), provided 5 | in the FINUFFT library. It performs type 6 | 1 (nonuniform to uniform) or type 2 (uniform to nonuniform) transforms. 7 | For a mathematical description of the NUFFT and applications to signal 8 | processing, imaging, and scientific computing, see [the FINUFFT 9 | documentation](https://finufft.readthedocs.io). 10 | The Python GPU interface is [here](https://finufft.readthedocs.io/en/latest/python_gpu.html). 11 | Usage examples can be found in the examples folder in the same directory as 12 | the file you are reading. 13 | 14 | If you use this GPU feature of our package, please cite our GPU paper: 15 | 16 | Y. Shih, G. Wright, J. Andén, J. Blaschke, A. H. Barnett (2021). 17 | cuFINUFFT: a load-balanced GPU library for general-purpose nonuniform FFTs. 18 | arXiv preprint arXiv:2102.08463. 19 | [(paper)](https://arxiv.org/abs/2102.08463) 20 | [(bibtex)](https://arxiv.org/bibtex/2102.08463) 21 | 22 | **Note**: With version 2.2 we have changed the GPU interfaces slightly to better align with FINUFFT. For an outline of the changes, please see [the migration guide](https://finufft.readthedocs.io/en/latest/cufinufft_migration.html). 23 | -------------------------------------------------------------------------------- /python/cufinufft/cufinufft/__init__.py: -------------------------------------------------------------------------------- 1 | from cufinufft._plan import Plan 2 | 3 | from cufinufft._simple import (nufft1d1, nufft1d2, nufft2d1, nufft2d2, 4 | nufft3d1, nufft3d2) 5 | 6 | __all__ = ["nufft1d1", "nufft1d2", 7 | "nufft2d1", "nufft2d2", 8 | "nufft3d1", "nufft3d2", 9 | "Plan"] 10 | 11 | __version__ = '2.4.0' 12 | -------------------------------------------------------------------------------- /python/cufinufft/examples/example2d2_pycuda.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demonstrate the type 2 NUFFT using cuFINUFFT 3 | """ 4 | 5 | import numpy as np 6 | 7 | import pycuda.autoinit 8 | from pycuda.gpuarray import to_gpu 9 | 10 | import cufinufft 11 | 12 | # Set up parameters for problem. 13 | N1, N2 = 37, 41 # Size of uniform grid 14 | M = 17 # Number of nonuniform points 15 | n_transf = 2 # Number of input arrays 16 | eps = 1e-6 # Requested tolerance 17 | dtype = np.float32 # Datatype (real) 18 | complex_dtype = np.complex64 # Datatype (complex) 19 | 20 | # Generate coordinates of non-uniform points. 21 | x = np.random.uniform(-np.pi, np.pi, size=M) 22 | y = np.random.uniform(-np.pi, np.pi, size=M) 23 | 24 | # Generate grid values. 25 | fk = (np.random.standard_normal((n_transf, N1, N2)) 26 | + 1j * np.random.standard_normal((n_transf, N1, N2))) 27 | 28 | # Cast to desired datatype. 29 | x = x.astype(dtype) 30 | y = y.astype(dtype) 31 | fk = fk.astype(complex_dtype) 32 | 33 | # Initialize the plan and set the points. 34 | plan = cufinufft.Plan(2, (N1, N2), n_transf, eps=eps, dtype=complex_dtype) 35 | plan.setpts(to_gpu(x), to_gpu(y)) 36 | 37 | # Execute the plan, reading from the uniform grid fk and storing the result 38 | # in c_gpu. 39 | c_gpu = plan.execute(to_gpu(fk)) 40 | 41 | # Retreive the result from the GPU. 42 | c = c_gpu.get() 43 | 44 | # Check accuracy of the transform at index jt. 45 | jt = M // 2 46 | 47 | for i in range(n_transf): 48 | # Calculate the true value of the type 2 transform at the index jt. 49 | m, n = np.mgrid[-(N1 // 2):(N1 + 1) // 2, -(N2 // 2):(N2 + 1) // 2] 50 | c_true = np.sum(fk[i] * np.exp(-1j * (m * x[jt] + n * y[jt]))) 51 | 52 | # Calculate the absolute and relative error. 53 | err = np.abs(c[i, jt] - c_true) 54 | rel_err = err / np.max(np.abs(c[i])) 55 | 56 | print(f"[{i}] Absolute error on point [{jt}] is {err:.3g}") 57 | print(f"[{i}] Relative error on point [{jt}] is {rel_err:.3g}") 58 | 59 | assert(rel_err < 15 * eps) 60 | -------------------------------------------------------------------------------- /python/cufinufft/examples/getting_started_cupy.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | 3 | import cufinufft 4 | 5 | # number of nonuniform points 6 | M = 100000 7 | 8 | # grid size 9 | N = 200000 10 | 11 | # generate positions for the nonuniform points and the coefficients 12 | x_gpu = 2 * cp.pi * cp.random.uniform(size=M) 13 | c_gpu = (cp.random.standard_normal(size=M) 14 | + 1J * cp.random.standard_normal(size=M)) 15 | 16 | # compute the transform 17 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,)) 18 | 19 | # move results off the GPU 20 | f = f_gpu.get() 21 | -------------------------------------------------------------------------------- /python/cufinufft/examples/getting_started_numba.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import numba.cuda 4 | 5 | import cufinufft 6 | 7 | # number of nonuniform points 8 | M = 100000 9 | 10 | # grid size 11 | N = 200000 12 | 13 | # generate positions for the nonuniform points and the coefficients 14 | x = 2 * np.pi * np.random.uniform(size=M) 15 | c = (np.random.standard_normal(size=M) + 1J * np.random.standard_normal(size=M)) 16 | 17 | # transfer to GPU 18 | x_gpu = numba.cuda.to_device(x) 19 | c_gpu = numba.cuda.to_device(c) 20 | 21 | # compute the transform 22 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,)) 23 | 24 | # move results off the GPU 25 | f = f_gpu.copy_to_host() 26 | -------------------------------------------------------------------------------- /python/cufinufft/examples/getting_started_pycuda.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pycuda.autoinit 4 | from pycuda.gpuarray import to_gpu 5 | 6 | import cufinufft 7 | 8 | # number of nonuniform points 9 | M = 100000 10 | 11 | # grid size 12 | N = 200000 13 | 14 | # generate positions for the nonuniform points and the coefficients 15 | x = 2 * np.pi * np.random.uniform(size=M) 16 | c = (np.random.standard_normal(size=M) 17 | + 1J * np.random.standard_normal(size=M)) 18 | 19 | # move the data to GPU 20 | x_gpu = to_gpu(x) 21 | c_gpu = to_gpu(c) 22 | 23 | # compute the transform 24 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,)) 25 | 26 | # move results off the GPU 27 | f = f_gpu.get() 28 | -------------------------------------------------------------------------------- /python/cufinufft/examples/getting_started_torch.py: -------------------------------------------------------------------------------- 1 | import cufinufft 2 | 3 | import torch 4 | 5 | # number of nonuniform points 6 | M = 100000 7 | 8 | # grid size 9 | N = 200000 10 | 11 | # generate positions for the nonuniform points and the coefficients 12 | x_gpu = 2 * torch.pi * torch.rand(size=(M,)).cuda() 13 | c_gpu = (torch.randn(size=(M,)) + 1J * torch.randn(size=(M,))).cuda() 14 | 15 | # compute the transform 16 | f_gpu = cufinufft.nufft1d1(x_gpu, c_gpu, (N,)) 17 | 18 | # move results off the GPU 19 | f = f_gpu.cpu() 20 | -------------------------------------------------------------------------------- /python/cufinufft/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | six 3 | packaging 4 | -------------------------------------------------------------------------------- /python/cufinufft/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import utils 4 | 5 | 6 | def pytest_addoption(parser): 7 | parser.addoption("--framework", action="append", default=[], help="List of frameworks") 8 | 9 | def pytest_generate_tests(metafunc): 10 | if "framework" in metafunc.fixturenames: 11 | metafunc.parametrize("framework", metafunc.config.getoption("framework")) 12 | 13 | @pytest.fixture 14 | def to_gpu(framework): 15 | to_gpu, _ = utils.transfer_funcs(framework) 16 | 17 | return to_gpu 18 | 19 | 20 | @pytest.fixture 21 | def to_cpu(framework): 22 | _, to_cpu = utils.transfer_funcs(framework) 23 | 24 | return to_cpu 25 | -------------------------------------------------------------------------------- /python/cufinufft/tests/test_array_ordering.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | from cufinufft import Plan, _compat 6 | 7 | import utils 8 | 9 | 10 | def test_type1_ordering(to_gpu, to_cpu, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): 11 | complex_dtype = utils._complex_dtype(dtype) 12 | 13 | k, c = utils.type1_problem(dtype, shape, M) 14 | 15 | k_gpu = to_gpu(k) 16 | c_gpu = to_gpu(c) 17 | 18 | plan = Plan(1, shape, eps=tol, dtype=complex_dtype) 19 | 20 | plan.setpts(*k_gpu) 21 | 22 | out = np.empty(shape, dtype=complex_dtype, order="F") 23 | 24 | out_gpu = to_gpu(out) 25 | 26 | with pytest.raises(TypeError, match="following requirement: C") as err: 27 | plan.execute(c_gpu, out=out_gpu) 28 | -------------------------------------------------------------------------------- /python/cufinufft/tests/test_examples.py: -------------------------------------------------------------------------------- 1 | """ 2 | Discover and run Python example scripts as unit tests. 3 | """ 4 | 5 | import os 6 | import subprocess 7 | import sys 8 | from pathlib import Path 9 | 10 | import pytest 11 | 12 | examples_dir = os.path.join(Path(__file__).resolve().parents[1], "examples") 13 | 14 | scripts = [] 15 | for filename in os.listdir(examples_dir): 16 | if filename.endswith(".py"): 17 | scripts.append(os.path.join(examples_dir, filename)) 18 | 19 | @pytest.mark.parametrize("filename", scripts) 20 | def test_example(filename, request): 21 | # Extract framework from format `example_framework.py`. 22 | framework = Path(filename).stem.split("_")[-1] 23 | 24 | if framework in request.config.getoption("framework"): 25 | subprocess.check_call([sys.executable, filename]) 26 | else: 27 | pytest.skip("Example not in list of frameworks") 28 | -------------------------------------------------------------------------------- /python/cufinufft/tests/test_fallback.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from ctypes.util import find_library 5 | 6 | 7 | # Check to make sure the fallback mechanism works if there is no bundled 8 | # dynamic library. 9 | @pytest.mark.skip(reason="Patching seems to fail in CI") 10 | def test_fallback(mocker): 11 | def fake_load_library(lib_name, path): 12 | if lib_name in ["libcufinufft", "cufinufft"]: 13 | raise OSError() 14 | else: 15 | return np.ctypeslib.load_library(lib_name, path) 16 | 17 | # Block out the bundled library. 18 | mocker.patch("numpy.ctypeslib.load_library", fake_load_library) 19 | 20 | # Make sure an error is raised if no system library is found. 21 | if find_library("cufinufft") is None: 22 | with pytest.raises(ImportError, match="suitable cufinufft"): 23 | import cufinufft 24 | else: 25 | import cufinufft 26 | -------------------------------------------------------------------------------- /python/cufinufft/tests/test_multi.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from cufinufft import Plan 5 | 6 | import utils 7 | 8 | 9 | def test_multi_type1(framework, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): 10 | if framework == "pycuda": 11 | import pycuda.driver as drv 12 | import pycuda.gpuarray as gpuarray 13 | else: 14 | pytest.skip("Multi-GPU support only tested for pycuda") 15 | 16 | complex_dtype = utils._complex_dtype(dtype) 17 | 18 | drv.init() 19 | 20 | dev_count = drv.Device.count() 21 | 22 | if dev_count == 1: 23 | pytest.skip() 24 | 25 | devs = [drv.Device(dev_id) for dev_id in range(dev_count)] 26 | 27 | dim = len(shape) 28 | 29 | errs = [] 30 | 31 | for dev_id, dev in enumerate(devs): 32 | ctx = dev.make_context() 33 | 34 | k = utils.gen_nu_pts(M, dim=dim).astype(dtype) 35 | c = utils.gen_nonuniform_data(M).astype(complex_dtype) 36 | 37 | k_gpu = gpuarray.to_gpu(k) 38 | c_gpu = gpuarray.to_gpu(c) 39 | fk_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype) 40 | 41 | plan = Plan(1, shape, eps=tol, dtype=complex_dtype, 42 | gpu_device_id=dev_id) 43 | 44 | plan.setpts(k_gpu[0], k_gpu[1], k_gpu[2]) 45 | 46 | plan.execute(c_gpu, fk_gpu) 47 | 48 | fk = fk_gpu.get() 49 | 50 | ind = int(0.1789 * np.prod(shape)) 51 | 52 | fk_est = fk.ravel()[ind] 53 | fk_target = utils.direct_type1(c, k, shape, ind) 54 | 55 | type1_rel_err = np.abs(fk_target - fk_est) / np.abs(fk_target) 56 | 57 | print(f'Type 1 relative error (GPU {dev_id}):', type1_rel_err) 58 | 59 | ctx.pop() 60 | 61 | errs.append(type1_rel_err) 62 | 63 | assert all(err < 0.01 for err in errs) 64 | -------------------------------------------------------------------------------- /python/finufft/README.md: -------------------------------------------------------------------------------- 1 | # Flatiron Institute Nonuniform Fast Fourier Transform library: FINUFFT 2 | 3 | Principal author **Alex H. Barnett**, main co-developers Jeremy F. Magland, Ludvig af Klinteberg, Yu-hsuan "Melody" Shih, Andrea Malleo, Libin Lu, Joakim Andén, Marco Barbone, and Robert Blackwell. 4 | 5 | This package provides a Python interface to the CPU library, enabling fast computation of nonuniform discrete Fourier transforms to specified precision in one, two, and three dimensions. 6 | It supports transforms of type 1 (nonuniform to uniform), type 2 (uniform to nonuniform) and type 3 (nonuniform to nonuniform). 7 | For more information, see the [online documentation](https://finufft.readthedocs.io/en/latest/python.html). 8 | 9 | If you find FINUFFT useful in your work, please cite this package and our paper: 10 | 11 | A parallel non-uniform fast Fourier transform library based on an ``exponential of semicircle'' kernel. 12 | A. H. Barnett, J. F. Magland, and L. af Klinteberg. 13 | SIAM J. Sci. Comput. 41(5), C479–C504 (2019). 14 | -------------------------------------------------------------------------------- /python/finufft/examples/guru1d1.py: -------------------------------------------------------------------------------- 1 | # Simple 1d1 python interface call 2 | # Lu 02/07/20. 3 | 4 | import time 5 | import finufft as fp 6 | import numpy as np 7 | 8 | np.random.seed(42) 9 | 10 | N = int(1e6) 11 | M = int(1e5) 12 | x = np.random.uniform(-np.pi, np.pi, M) 13 | c = np.random.randn(M) + 1.j * np.random.randn(M) 14 | F = np.zeros([N], dtype=np.complex128) # allocate F (modes out) 15 | n_modes = np.ones([1], dtype=np.int64) 16 | n_modes[0] = N 17 | 18 | strt = time.time() 19 | 20 | #plan 21 | plan = fp.Plan(1,(N,)) 22 | 23 | #set pts 24 | plan.setpts(x) 25 | 26 | #exec 27 | plan.execute(c,F) 28 | 29 | #timing 30 | print("Finished nufft in {0:.2g} seconds. Checking..." 31 | .format(time.time()-strt)) 32 | 33 | #check error 34 | n = 142519 # mode to check 35 | Ftest = 0.0 36 | # this is so slow... 37 | for j in range(M): 38 | Ftest += c[j] * np.exp(n * x[j] * 1.j) 39 | Fmax = np.max(np.abs(F)) 40 | err = np.abs((F[n + N // 2] - Ftest) / Fmax) 41 | print("Error relative to max of F: {0:.2e}".format(err)) 42 | -------------------------------------------------------------------------------- /python/finufft/examples/guru1d1f.py: -------------------------------------------------------------------------------- 1 | # Simple 1d1 python interface call 2 | # Lu 02/07/20. 3 | 4 | import time 5 | import finufft as fp 6 | import numpy as np 7 | 8 | np.random.seed(42) 9 | 10 | N = int(1e6) 11 | M = int(1e5) 12 | x = np.random.uniform(-np.pi, np.pi, M) 13 | x = x.astype('float32') 14 | c = np.random.randn(M) + 1.j * np.random.randn(M) 15 | c = c.astype('complex64') 16 | F = np.zeros([N], dtype=np.complex64) # allocate F (modes out) 17 | n_modes = np.ones([1], dtype=np.int64) 18 | n_modes[0] = N 19 | 20 | strt = time.time() 21 | 22 | # plan, using proper specifier for single-precision transform 23 | plan = fp.Plan(1,(N,),dtype='complex64') 24 | 25 | # set pts 26 | plan.setpts(x) 27 | 28 | # exec 29 | plan.execute(c,F) 30 | 31 | # timing 32 | print("Finished nufft in {0:.2g} seconds. Checking..." 33 | .format(time.time()-strt)) 34 | 35 | # check error 36 | n = 143 # mode to check 37 | Ftest = 0.0 38 | # this is so slow... 39 | for j in range(M): 40 | Ftest += c[j] * np.exp(n * x[j] * 1.j) 41 | Fmax = np.max(np.abs(F)) 42 | err = np.abs((F[n + N // 2] - Ftest) / Fmax) 43 | print("Error relative to max of F: {0:.2e}".format(err)) 44 | -------------------------------------------------------------------------------- /python/finufft/examples/guru2d1.py: -------------------------------------------------------------------------------- 1 | # demo of vectorized 2D type 1 FINUFFT in python via guru interface. Should stay close to docs/python.rst 2 | # Lu 8/20/20 3 | 4 | import numpy as np 5 | import finufft 6 | import time 7 | np.random.seed(42) 8 | 9 | # number of nonuniform points 10 | M = 100000 11 | 12 | # the nonuniform points in the square [0,2pi)^2 13 | x = 2 * np.pi * np.random.uniform(size=M) 14 | y = 2 * np.pi * np.random.uniform(size=M) 15 | 16 | # number of transforms 17 | K = 4 18 | 19 | # generate K stacked strength arrays 20 | c = (np.random.standard_normal(size=(K, M)) 21 | + 1J * np.random.standard_normal(size=(K, M))) 22 | 23 | # desired number of Fourier modes (in x,y directions respectively) 24 | N1 = 1000 25 | N2 = 2000 26 | 27 | # specify type 1 transform 28 | nufft_type = 1 29 | 30 | # instantiate the plan (note n_trans must be set here), also setting tolerance: 31 | t0 = time.time() 32 | plan = finufft.Plan(nufft_type, (N1, N2), eps=1e-9, n_trans=K) 33 | 34 | # set the nonuniform points 35 | plan.setpts(x, y) 36 | 37 | # execute the plan (K transforms together, note c.shape must match) 38 | f = plan.execute(c) 39 | print("vectorized guru finufft2d1 done in {0:.2g} s.".format(time.time()-t0)) 40 | 41 | print(f.dtype) 42 | print(f.shape) 43 | 44 | k1 = 376 # do a math check, for a single output mode index (k1,k2) 45 | k2 = -1000 46 | t = K-2 # from the t'th transform 47 | assert((k1>=-N1/2.) & (k1=-N2/2.) & (k2=0) & (t=-N1/2.) & (k1=-N2/2.) & (k2=0) & (t=-N1/2.) & (k1=-N2/2.) & (k2=0) & (t=-N/2.) & (n=-N1/2.) & (k1=-N2/2.) & (k2= 1.12.0 2 | packaging 3 | -------------------------------------------------------------------------------- /python/finufft/test/README.md: -------------------------------------------------------------------------------- 1 | ## Basic accuracy and speed tests using the python wrappers 2 | 3 | To install the python wrappers for finufft 4 | see ../../../docs/install.rst 5 | 6 | Then you may run the tests as follows 7 | 8 | ``` 9 | python3 run_accuracy_tests.py 10 | python3 run_speed_tests.py 11 | ``` 12 | 13 | The codes `accuracy_speed_tests.py` and `../examples/*` illustrate how to call 14 | FINUFFT from python. 15 | -------------------------------------------------------------------------------- /python/finufft/test/run_accuracy_tests.py: -------------------------------------------------------------------------------- 1 | from accuracy_speed_tests import accuracy_speed_tests 2 | 3 | accuracy_speed_tests(100000,100000,1e-6) 4 | -------------------------------------------------------------------------------- /python/finufft/test/run_speed_tests.py: -------------------------------------------------------------------------------- 1 | from accuracy_speed_tests import accuracy_speed_tests 2 | 3 | accuracy_speed_tests(1e6,1e6,1e-6) # ahb lowered from 8e6 4 | -------------------------------------------------------------------------------- /python/finufft/test/test_fallback.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from ctypes.util import find_library 5 | 6 | @pytest.mark.skip(reason="Patching seems to fail in CI") 7 | def test_fallback(mocker): 8 | def fake_load_library(lib_name, path): 9 | if lib_name in ["libfinufft", "finufft"]: 10 | raise OSError() 11 | else: 12 | return np.ctypeslib.load_library(lib_name, path) 13 | 14 | mocker.patch("numpy.ctypeslib.load_library", fake_load_library) 15 | 16 | if find_library("finufft") is None: 17 | with pytest.raises(ImportError, match="suitable finufft"): 18 | import finufft 19 | else: 20 | import finufft 21 | -------------------------------------------------------------------------------- /src/cuda/1d/README: -------------------------------------------------------------------------------- 1 | - cufinufft1d.cu 2 | This file contains the execution functions 1d type 1,2 that are called in ../cufinufft.cu 3 | 4 | - spreadinterp1d.cu 5 | This file contains all the GPU kernels for 1d spreading, interpolation. 6 | 7 | - interp1d_wrapper.cu 8 | Wrappers for 1d interpolations. One method is implemented: 9 | (1) nonuniform driven, 10 | 11 | - spread1d_wrapper.cu 12 | Wrappers for 1d spreading. Two methods are implemented: 13 | (1) nonuniform driven, 14 | (2) subproblem 15 | -------------------------------------------------------------------------------- /src/cuda/2d/README: -------------------------------------------------------------------------------- 1 | - cufinufft2d.cu 2 | This file contains the execution functions 2d type 1,2 that are called in ../cufinufft.cu 3 | 4 | - spreadinterp2d.cu 5 | This file contains all the GPU kernels for 2d spreading, interpolation. 6 | 7 | - interp2d_wrapper.cu 8 | Wrappers for 2d interpolations. Two methods are implemented: 9 | (1) nonuniform driven, 10 | (2) subproblem 11 | 12 | - spread2d_wrapper.cu 13 | Wrappers for 2d spreading. Three methods are implemented: 14 | (1) nonuniform driven, 15 | (2) subproblem, 16 | -------------------------------------------------------------------------------- /src/cuda/3d/README: -------------------------------------------------------------------------------- 1 | - cufinufft3d.cu 2 | This file contains the execution functions for 3d type1,2 that are called in ../cufinufft.cu 3 | 4 | - spreadinterp3d.cu 5 | This file contains all the GPU kernels for 3d spreading, interpolation. 6 | 7 | - interp3d_wrapper.cu 8 | Wrappers for 3d interpolations. Two methods are implemented: 9 | (1) nonuniform driven, 10 | (2) subproblem 11 | 12 | - spread3d_wrapper.cu 13 | Wrappers for 3d spreading. Three methods are implemented: 14 | (1) nonuniform points driven, 15 | (2) subproblem, 16 | (4) block gather 17 | -------------------------------------------------------------------------------- /src/cuda/README: -------------------------------------------------------------------------------- 1 | Lead developer: Yu-Hsuan Melody Shih (New York University, now at nVidia) 2 | 3 | Other developers: (see github site for full list) 4 | 5 | Garrett Wright (Princeton) 6 | Joakim Anden (KTH) 7 | Johannes Blaschke (LBNL) 8 | Alex Barnett (CCM, Flatiron Institute) 9 | Robert Blackwell (SCC, Flatiron Institute) 10 | 11 | This project came out of Melody's 2018 and 2019 summer internships at 12 | the Flatiron Institute, advised by Alex Barnett. 13 | 14 | 15 | 16 | -------------- 17 | 18 | This folder contains the main source files of the GPU implementations. 19 | 20 | - cufinufft.cu 21 | Four main stages of cufinufft API. 22 | (1) cufinufft_makeplan, (2) cufinufft_setpts, (3) cufinufft_execute, (4) cufinufft_destroy. 23 | Also, cufinufft_default_opts may precede stage 1. 24 | 25 | - memtransfer_wrapper.cu 26 | Wrapper of allocation and free GPU memories for different dimensions and methods. 27 | 28 | - deconvolve_wrapper.cu 29 | GPU kernels and wrappers of deconvolve and amplify the input/output coefficients by correction factor. (Step 3 in Type 1; Step 1 in Type 2) 30 | -------------------------------------------------------------------------------- /test/checkallaccs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # test range of requested accuracies, for both spreader and nufft, for a given 3 | # single dimension. 4 | # Usage: ./checkallaccs.sh [dim] 5 | # where dim = 1, 2, or 3. 6 | # Barnett 2/17/17. Default dim=1 4/5/17 7 | 8 | DEFAULTDIM=1 9 | DIM=${1:-$DEFAULTDIM} 10 | echo checkallaccs for dim=$DIM : 11 | 12 | # finufft test size params (prod{N}.N < TEST_BIGPROB so compares direct transf) 13 | TEST1="1e3 1e3" 14 | TEST2="1e2 2e1 1e3" 15 | TEST3="1e1 2e1 3e1 1e3" 16 | # bash hack to make DIM switch between one of the above 3 choices 17 | TESTD=TEST$DIM 18 | TEST=${!TESTD} 19 | 20 | # other test args 21 | SORT=2 22 | UPSAMPFAC=2.0 23 | 24 | for acc in `seq 1 15`; 25 | do 26 | TOL=1e-$acc 27 | echo ----------requesting $TOL : 28 | # ./spreadtestnd $DIM 1e6 1e6 $TOL $SORT 29 | ./finufft${DIM}d_test $TEST $TOL 0 $SORT $UPSAMPFAC | grep dirft 30 | done 31 | -------------------------------------------------------------------------------- /test/cuda/README: -------------------------------------------------------------------------------- 1 | Testing (validation and performance) directory for GPU FINUFFT. 2 | 3 | All codes test either precision (souce is templated to allow switching 4 | via the final command line argument "f" or "d"). 5 | 6 | cufinufft{1,2,3}d_test: accuracy/speed tests for single transform 7 | in a given dimension, either type 1 or 2. 8 | (exit code 0 is a pass). 9 | Call with no arguments for argument documentation. 10 | cufinufft2dmany_test: accuracy/speed tests for vectorized transforms 11 | in 2D only, either type 1 or 2. 12 | (exit code 0 is a pass). 13 | Call with no arguments for argument documentation. 14 | public_api_test: tests guru plan C++ interface for GPU on tiny problem. 15 | (exit code 0 is a pass). 16 | test_makeplan: test edge cases, illegal inputs in API, and error codes 17 | (exit code 0 is a pass). 18 | 19 | CMakeLists.txt : shows the complete set of test (with args) done by CTest. 20 | -------------------------------------------------------------------------------- /test/cuda/spreadperf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # basic perf test of spread/interp for 2/3d, single/double 3 | # Barnett 1/29/21, some 1D added 12/2/21. 4 | 5 | BINDIR="./" 6 | 7 | n=1000000 8 | M=1000000 9 | dist=0 # 0= random unif, 1 = clustered 10 | Msub=10000 # claimed default is 65536 11 | tols=1e-5 12 | told=1e-12 13 | 14 | echo "spread 1D.............................................." 15 | $BINDIR/spread1d_test 1 $dist $n $Msub $M $told 16 | $BINDIR/spread1d_test 2 $dist $n $Msub $M $told 17 | $BINDIR/spread1d_testf 1 $dist $n $Msub $M $tols 18 | $BINDIR/spread1d_testf 2 $dist $n $Msub $M $tols 19 | 20 | echo "interp 1D.............................................." 21 | $BINDIR/interp1d_test 1 $dist $n $M $told 22 | $BINDIR/interp1d_testf 1 $dist $n $M $tols 23 | # note there is no meth=2 in 1D interp 24 | 25 | # 2D params... (n is grid size per dim) 26 | n=1000 27 | M=1000000 28 | 29 | echo "spread 2D.............................................." 30 | $BINDIR/spread2d_test 1 $dist $n $n $Msub $M $told 31 | $BINDIR/spread2d_test 2 $dist $n $n $Msub $M $told 32 | $BINDIR/spread2d_testf 1 $dist $n $n $Msub $M $tols 33 | $BINDIR/spread2d_testf 2 $dist $n $n $Msub $M $tols 34 | 35 | echo "interp 2D.............................................." 36 | $BINDIR/interp2d_test 1 $dist $n $n $M $told 37 | $BINDIR/interp2d_test 2 $dist $n $n $M $told 38 | $BINDIR/interp2d_testf 1 $dist $n $n $M $tols 39 | $BINDIR/interp2d_testf 2 $dist $n $n $M $tols 40 | 41 | 42 | # 3D params... 43 | n=100 44 | M=1000000 45 | 46 | echo "spread 3D.............................................." 47 | $BINDIR/spread3d_test 1 $dist $n $n $n $Msub $M $told 48 | # note absence of meth=2 for 3D double 49 | $BINDIR/spread3d_testf 1 $dist $n $n $n $Msub $M $tols 50 | $BINDIR/spread3d_testf 2 $dist $n $n $n $Msub $M $tols 51 | 52 | echo "interp 3D.............................................." 53 | $BINDIR/interp3d_test 1 $dist $n $n $n $M $told 54 | # note absence of meth=2 for 3D double 55 | $BINDIR/interp3d_testf 1 $dist $n $n $n $M $tols 56 | $BINDIR/interp3d_testf 2 $dist $n $n $n $M $tols 57 | -------------------------------------------------------------------------------- /test/results/README: -------------------------------------------------------------------------------- 1 | Results directory for test outputs, just to tidy them away. 2 | 3 | Barnett 3/13/17; 8/18/20 4 | -------------------------------------------------------------------------------- /tools/common/docker/Dockerfile-x86_64: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux2014_x86_64:2024-09-09-f386546 2 | LABEL maintainer "Joakim Andén" 3 | 4 | ENV PATH /opt/python/cp312-cp312/bin:${PATH} 5 | 6 | RUN pip install --root-user-action ignore --no-cache-dir --upgrade pip 7 | RUN pip install --root-user-action ignore --no-cache-dir --upgrade build toml-cli 8 | 9 | COPY . /io/finufft 10 | 11 | WORKDIR /io 12 | 13 | CMD ["/bin/bash"] 14 | -------------------------------------------------------------------------------- /tools/common/sdist-helper.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | dockerhub=janden 6 | image=finufft-sdist 7 | 8 | docker build --file tools/common/docker/Dockerfile-x86_64 \ 9 | --tag ${dockerhub}/${image} \ 10 | . 11 | 12 | docker run --volume $(pwd)/wheelhouse:/io/wheelhouse \ 13 | ${dockerhub}/${image} \ 14 | /io/finufft/tools/finufft/build-sdist.sh 15 | 16 | docker run --volume $(pwd)/wheelhouse:/io/wheelhouse \ 17 | ${dockerhub}/${image} \ 18 | /io/finufft/tools/cufinufft/build-sdist.sh 19 | -------------------------------------------------------------------------------- /tools/cufinufft/build-library.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e -u -x 3 | 4 | rm -rf /io/build 5 | mkdir /io/build 6 | cd /io/build 7 | 8 | cmake -D FINUFFT_USE_CUDA=ON \ 9 | -D FINUFFT_USE_CPU=OFF \ 10 | -D FINUFFT_BUILD_TESTS=ON \ 11 | -D CMAKE_CUDA_ARCHITECTURES="50;60;70;80" \ 12 | -D CMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets" \ 13 | -D BUILD_TESTING=ON \ 14 | .. 15 | 16 | make -j4 17 | -------------------------------------------------------------------------------- /tools/cufinufft/build-sdist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | # Move pyproject.toml to root (otherwise no way to include C++ sources in sdist). 6 | cp python/cufinufft/pyproject.toml . 7 | 8 | # Fix paths in pyproject.toml to reflect the new directory structure. 9 | toml set --toml-path pyproject.toml \ 10 | tool.scikit-build.cmake.source-dir "." 11 | toml set --toml-path pyproject.toml \ 12 | tool.scikit-build.wheel.packages --to-array "[\"python/cufinufft/cufinufft\"]" 13 | toml set --toml-path pyproject.toml \ 14 | tool.scikit-build.metadata.version.input "python/cufinufft/cufinufft/__init__.py" 15 | 16 | # Package the sdist. 17 | python3 -m build --verbose --sdist --outdir wheelhouse . 18 | -------------------------------------------------------------------------------- /tools/cufinufft/build-wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e -u -x 3 | 4 | function get_python_binary { 5 | version="$1" 6 | echo "/opt/python/$version/bin" 7 | } 8 | 9 | function repair_wheel { 10 | py_version="$1" 11 | wheel="$2" 12 | 13 | py_binary=$(get_python_binary "${py_version}") 14 | 15 | if ! "${py_binary}/pip" show auditwheel > /dev/null 2>&1; then 16 | "${py_binary}/pip" install auditwheel 17 | fi 18 | 19 | if ! "${py_binary}/auditwheel" show "$wheel"; then 20 | echo "Skipping non-platform wheel $wheel" 21 | else 22 | "${py_binary}/auditwheel" repair "$wheel" --plat "$PLAT" -w /io/wheelhouse/ 23 | fi 24 | } 25 | 26 | # Explicitly list Python versions to build for 27 | py_versions=(cp36-cp36m \ 28 | cp37-cp37m \ 29 | cp38-cp38 \ 30 | cp39-cp39 \ 31 | cp310-cp310 \ 32 | cp311-cp311 \ 33 | cp312-cp312) 34 | 35 | # NOTE: For CUDA 12, cp36-cp36m and cp37-cp37m are broken since these force an 36 | # older version of pycuda (2022.1), which does not build under CUDA 12. 37 | 38 | # Compile wheels 39 | for py_version in ${py_versions[@]}; do 40 | py_binary=$(get_python_binary ${py_version}) 41 | 42 | "${py_binary}/pip" install --upgrade pip 43 | "${py_binary}/pip" wheel /io/python/cufinufft --no-deps -w wheelhouse/ 44 | done 45 | 46 | 47 | # Bundle external shared libraries into the wheels 48 | audit_py_version="cp310-cp310" 49 | for whl in wheelhouse/*.whl; do 50 | repair_wheel "$audit_py_version" "$whl" 51 | done 52 | 53 | # Install packages and test 54 | for py_version in ${py_versions[@]}; do 55 | py_binary=$(get_python_binary ${py_version}) 56 | 57 | "${py_binary}/pip" install --pre cufinufft -f /io/wheelhouse 58 | "${py_binary}/pip" install pytest 59 | "${py_binary}/pytest" /io/python/cufinufft/tests 60 | done 61 | -------------------------------------------------------------------------------- /tools/cufinufft/distribution_helper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -xe 2 | 3 | # Helper Script For Building Wheels 4 | 5 | manylinux_version=manylinux2014 6 | cuda_version=11.2 7 | dockerhub=janden 8 | 9 | image_name=cufinufft-cuda${cuda_version} 10 | 11 | echo "# Build the docker image" 12 | docker build \ 13 | --file tools/cufinufft/docker/cuda${cuda_version}/Dockerfile-x86_64 \ 14 | --tag ${dockerhub}/cufinufft-cuda${cuda_version} \ 15 | . 16 | 17 | echo "# Create the container and start it" 18 | docker create \ 19 | --gpus all \ 20 | --interactive \ 21 | --tty \ 22 | --volume $(pwd)/wheelhouse:/io/wheelhouse \ 23 | --name ${image_name} \ 24 | ${dockerhub}/${image_name} 25 | 26 | docker start ${image_name} 27 | 28 | echo "# Copy the code" 29 | docker cp . ${image_name}:/io 30 | 31 | echo "# Build the wheels" 32 | docker exec ${image_name} \ 33 | python3 -m pip wheel \ 34 | --verbose \ 35 | /io/python/cufinufft \ 36 | --config-settings=cmake.define.FINUFFT_CUDA_ARCHITECTURES="50;60;70;80" \ 37 | --config-settings=cmake.define.CMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets" \ 38 | --config-settings=cmake.define.FINUFFT_ARCH_FLAGS="" \ 39 | --config-settings=cmake.define.CMAKE_VERBOSE_MAKEFILE=ON \ 40 | --no-deps \ 41 | --wheel-dir /io/wheelhouse 42 | 43 | wheel_name=$(docker exec ${image_name} bash -c 'ls /io/wheelhouse/cufinufft-*-linux_x86_64.whl') 44 | 45 | echo "# Repair the wheels" 46 | docker exec ${image_name} \ 47 | python3 -m auditwheel repair \ 48 | ${wheel_name} \ 49 | --plat manylinux2014_x86_64 \ 50 | --wheel-dir /io/wheelhouse/ 51 | 52 | echo "# Shut down the container and remove it" 53 | docker stop ${image_name} 54 | docker rm ${image_name} 55 | 56 | echo "# Copy the wheels we care about to the dist folder" 57 | mkdir -p dist 58 | cp -v wheelhouse/cufinufft-*${manylinux_version}* dist 59 | 60 | # TODO: Test installing the wheels and running pytest. 61 | -------------------------------------------------------------------------------- /tools/cufinufft/docker/README: -------------------------------------------------------------------------------- 1 | These configurations are based off of manylinux2014, which is itself based off 2 | of centos8. 3 | 4 | These images extend manylinux with a compatible CUDA toolkit and runtime 5 | environment suitable for both building and running code inside docker. 6 | -------------------------------------------------------------------------------- /tools/cufinufft/docker/cuda11.2/cuda.repo: -------------------------------------------------------------------------------- 1 | [cuda] 2 | name=cuda 3 | baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64 4 | enabled=1 5 | gpgcheck=1 6 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA 7 | -------------------------------------------------------------------------------- /tools/cufinufft/docker/cuda11.8/cuda.repo: -------------------------------------------------------------------------------- 1 | [cuda] 2 | name=cuda 3 | baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64 4 | enabled=1 5 | gpgcheck=1 6 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA 7 | -------------------------------------------------------------------------------- /tools/cufinufft/docker/cuda12.0/cuda.repo: -------------------------------------------------------------------------------- 1 | [cuda] 2 | name=cuda 3 | baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64 4 | enabled=1 5 | gpgcheck=1 6 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA 7 | -------------------------------------------------------------------------------- /tools/cufinufft/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e -u -x 3 | 4 | py_versions=(cp36-cp36m \ 5 | cp37-cp37m \ 6 | cp38-cp38 \ 7 | cp39-cp39 \ 8 | cp310-cp310 \ 9 | cp311-cp311) 10 | 11 | for py_version in ${py_versions[@]}; do 12 | py_binary="/opt/python/$py_version/bin" 13 | 14 | "${py_binary}/pip" install --upgrade pip 15 | 16 | "${py_binary}/pip" install /io/python/cufinufft 17 | 18 | "${py_binary}/pip" install pytest 19 | "${py_binary}/pytest" /io/python/cufinufft/tests 20 | done 21 | -------------------------------------------------------------------------------- /tools/finufft/build-sdist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | # Move pyproject.toml to root (otherwise no way to include C++ sources in sdist). 6 | cp python/finufft/pyproject.toml . 7 | 8 | # Fix paths in pyproject.toml to reflect the new directory structure. 9 | toml set --toml-path pyproject.toml \ 10 | tool.scikit-build.cmake.source-dir "." 11 | toml set --toml-path pyproject.toml \ 12 | tool.scikit-build.wheel.packages --to-array "[\"python/finufft/finufft\"]" 13 | toml set --toml-path pyproject.toml \ 14 | tool.scikit-build.metadata.version.input "python/finufft/finufft/__init__.py" 15 | 16 | # Package the sdist. 17 | python3 -m build --verbose --sdist --outdir wheelhouse . 18 | -------------------------------------------------------------------------------- /tools/finufft/build-wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -x 4 | 5 | cd /io/ 6 | 7 | # Replace native compilation flags with more generic ones. 8 | cp make-platforms/make.inc.manylinux make.inc 9 | 10 | # Clean up the build and make the library. 11 | make clean 12 | make lib 13 | 14 | # Test to make sure everything is ok. 15 | make test 16 | 17 | # Remove make.inc now that we're done. 18 | rm make.inc 19 | 20 | # Needed for pip install to work 21 | export FINUFFT_DIR=$(pwd) 22 | # Needed for auditwheel to find the dynamic libraries 23 | export LD_LIBRARY_PATH=${FINUFFT_DIR}/lib:${LD_LIBRARY_PATH} 24 | 25 | pys=(/opt/python/*/bin) 26 | 27 | # Filter out old Python versions 28 | pys=(${pys[@]//*27*/}) 29 | pys=(${pys[@]//*34*/}) 30 | pys=(${pys[@]//*35*/}) 31 | 32 | for PYBIN in "${pys[@]}"; do 33 | "${PYBIN}/pip" install auditwheel wheel twine numpy 34 | "${PYBIN}/pip" wheel /io/python -w python/wheelhouse 35 | done 36 | 37 | for whl in python/wheelhouse/$package_name-*.whl; do 38 | auditwheel repair "$whl" -w /io/python/wheelhouse/ 39 | done 40 | -------------------------------------------------------------------------------- /tools/finufft/docker/Dockerfile-x86_64: -------------------------------------------------------------------------------- 1 | # We currently use manylinux2010 based on CentOS6, which has very old 2 | # fftw 3.2.1, too old for FINUFFT. We thus here compile FFTW from source (slow). 3 | # 4 | # Soon (11/30/2020) we'll want to update to manylinux2014 which has fftw 5 | # 3.3.3 (still old, 6 | # but functions with FINUFFT), and switch to: yum install fft3-devel 7 | # instead of building from source. 8 | 9 | 10 | FROM quay.io/pypa/manylinux2010_x86_64:2024-09-09-f386546 11 | LABEL maintainer "Libin Lu" 12 | 13 | RUN set -e -x 14 | RUN cd ~; \ 15 | curl http://www.fftw.org/fftw-3.3.8.tar.gz --output fftw-3.3.8.tar.gz; \ 16 | tar -xvzf fftw-3.3.8.tar.gz; \ 17 | cd fftw-3.3.8; \ 18 | export CFLAGS=-fPIC; \ 19 | ./configure --enable-threads --enable-openmp; \ 20 | make; \ 21 | make install; \ 22 | make clean; \ 23 | export CFLAGS=-fPIC; \ 24 | ./configure --enable-threads --enable-openmp --enable-float; \ 25 | make; \ 26 | make install; 27 | 28 | CMD ["/bin/bash"] 29 | -------------------------------------------------------------------------------- /tutorial/README: -------------------------------------------------------------------------------- 1 | Tutorials directory for FINUFFT 2 | 3 | So far, this is a collection of MATLAB/Octave codes, 4 | which make use of utilities in utils/ 5 | 6 | See the "Tutorials and application demos" section of the documentation. 7 | -------------------------------------------------------------------------------- /tutorial/applyAHA.m: -------------------------------------------------------------------------------- 1 | function AHAf = applyAHA(f,x,tol) % use pair of NUFFTs to apply A^* A 2 | Af = finufft1d2(x,+1,tol,f); % apply A 3 | AHAf = finufft1d1(x,Af,-1,tol,length(f)); % apply A^* 4 | end 5 | -------------------------------------------------------------------------------- /tutorial/applyToep.m: -------------------------------------------------------------------------------- 1 | function Tx = applyToep(x,vhat) 2 | % APPLYTOEP fast matrix-vector multiply with square Toeplitz matrix 3 | % 4 | % Tx = applyToep(x,vhat) multiplies vector x by the square N*N (generally 5 | % non-symmetric) Toeplitz matrix T defined by a vector v of length 2N-1 6 | % whose 2N-padded DFT vhat = fft([v;0]) the user must supply. 7 | % The convention for v (as in Raymond 8 | % Chan's book) is the 1st row of T in reverse order followed by the 2nd through 9 | % last elements of the 1st column in usual order. In the literature v is 10 | % indexed -N+1:N-1. T*x is a discrete nonperiodic 11 | % convolution, and performed here by a FFT and iFFT pair. 12 | % This version uses FFTs of size 2N instead of 2N-1, since the latter has much 13 | % larger factors (it is often prime) which slow down the FFT dramatically. 14 | % 15 | % Inputs: x : input column vector length N 16 | % vhat : DFT of v after padding to length 2N (eg, by a single zero) 17 | % Output: Tx : T*x, col vec length N 18 | % 19 | % Without arguments does self-test; see this code for a demo of use 20 | 21 | % Barnett 11/7/22. Realized 2N-1 slow for FFT (can be prime!) -> 2N. 12/10/23 22 | if nargin==0, test_applyToep; return; end 23 | 24 | N = numel(x); 25 | assert(numel(vhat)==2*N) 26 | xpadhat = fft(x(:),2*N); % zero-pads out to size of vhat 27 | Tx = ifft(xpadhat .* vhat(:)); 28 | Tx = Tx(N:end-1); % extract correct chunk of padded output 29 | 30 | %%%%%%% 31 | function test_applyToep 32 | N = 10; % size to compare against direct matvec 33 | x = randn(N,1); 34 | t = randn(2*N-1,1); % define nonsymm Toep: back 1st row then down 1st col 35 | T = toeplitz(t(N:end),t(N:-1:1)); % munge single toep vec into (C,R) format 36 | tpad = [t;0]; that = fft(tpad); % shows user how to pad 37 | Tx = applyToep(x,that); 38 | fprintf('test_applyToep: Frob norm of diff btw fast and direct: %.3g\n',norm(T*x - Tx,'fro')) 39 | -------------------------------------------------------------------------------- /tutorial/serieseval2d.m: -------------------------------------------------------------------------------- 1 | % Demo evaluating a 2D Fourier series at arbitrary points in quasi-optimal 2 | % time via FINUFFT, in MATLAB. Barnett 6/3/20 3 | clear; close all; 4 | 5 | % we work in [0,2pi)^2. Set up a 2D Fourier series 6 | kmax = 500; % bandlimit per dim 7 | k = -kmax:kmax-1; % freq indices in each dim 8 | N1 = 2*kmax; N2 = N1; % # modes in each dim 9 | [k1 k2] = ndgrid(k,k); % grid of freq indices 10 | rng(0); 11 | fk = randn(N1,N2)+1i*randn(N1,N2); % iid random complex mode data 12 | % let's scale the amplitudes vs (k1,k2) to give a Gaussian random field with 13 | % isotropic (periodized) Matern kernel (ie, covariance is Yukawa for alpha=1)... 14 | k0 = 30; % freq scale parameter 15 | alpha = 3.7; % power; alpha>2 to converge in L^2 16 | fk = fk .* ((k1.^2+k2.^2)/k0^2 + 1).^(-alpha/2); % sqrt of spectral density 17 | 18 | M = 1e6; x = 2*pi*rand(1,M); y = 2*pi*rand(1,M); % random target points 19 | tol = 1e-9; 20 | tic; c = finufft2d2(x,y,+1,tol,fk); toc % evaluate Fourier series at (x,y)'s 21 | % Elapsed time is 0.130059 seconds. 22 | 23 | j = 1; % do math check on 1st target... 24 | c1 = sum(sum(fk.*exp(1i*(k1*x(j)+k2*y(j))))); 25 | abs(c1-c(j)) / norm(c,inf) 26 | 27 | figure(1); clf; 28 | jplot = 1:1e5; % indices to plot 29 | scatter(x(jplot),y(jplot),1.0,real(c(jplot)),'filled'); axis tight equal 30 | xlabel('x'); ylabel('y'); colorbar; title('Re f(x,y)'); 31 | set(gcf,'paperposition',[0 0 8 7]); print -dpng ../docs/pics/fser2d.png 32 | --------------------------------------------------------------------------------