├── .clang-format
├── .gitattributes
├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── Doxyfile
├── Doxyfile.in
├── LICENSE.txt
├── Makefile
├── README.md
├── README_cmake.md
├── README_rungen.md
├── TACO_Benchmarks
    ├── Makefile.inc
    ├── autoscheduler
    │   ├── AutoSchedule.cpp
    │   ├── AutoSchedule.h
    │   ├── DerivativeUtils.cpp
    │   ├── DerivativeUtils.h
    │   ├── Errors.h
    │   ├── SimpleAutoSchedule.cpp
    │   └── SimpleAutoSchedule.h
    ├── bilateral_grid
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── bilateral_grid_generator.cpp
    │   ├── filter.cpp
    │   ├── gather_data.py
    │   ├── grad.csv
    │   ├── man.csv
    │   └── manual.csv
    ├── camera_pipe
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── camera_pipe_generator.cpp
    │   ├── gather_data.py
    │   ├── man.csv
    │   └── process.cpp
    ├── clean_tests.sh
    ├── conv_layer
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── conv_layer_generator.cpp
    │   ├── gather_data.py
    │   ├── man.csv
    │   └── process.cpp
    ├── harris
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── auto_runtime.csv
    │   ├── filter.cpp
    │   ├── gather_data.py
    │   ├── grad.csv
    │   ├── grad_runtime.csv
    │   ├── harris_generator.cpp
    │   ├── man_runtime.csv
    │   ├── manual.csv
    │   └── naive.csv
    ├── hist
    │   ├── Makefile
    │   ├── filter.cpp
    │   └── hist_generator.cpp
    ├── iir_blur_generator
    │   ├── Makefile
    │   ├── iir_blur_generator.cpp
    │   └── process.cpp
    ├── images
    │   ├── bayer_raw.png
    │   ├── gray.png
    │   ├── gray_small.png
    │   ├── rgb.png
    │   ├── rgb_small.png
    │   ├── rgb_small16.png
    │   └── rgba.png
    ├── interpolate_generator
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── auto_runtime.csv
    │   ├── filter.cpp
    │   ├── gather_data.py
    │   ├── grad.csv
    │   ├── grad_runtime.csv
    │   ├── interpolate_generator.cpp
    │   ├── man_runtime.csv
    │   └── manual.csv
    ├── lens_blur
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── gather_data.py
    │   ├── lens_blur_generator.cpp
    │   ├── man.csv
    │   └── process.cpp
    ├── local_laplacian
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto_laplacian.csv
    │   ├── auto_runtime.csv
    │   ├── gather_data.py
    │   ├── grad_laplacian.csv
    │   ├── grad_runtime.csv
    │   ├── local_laplacian_generator.cpp
    │   ├── man_laplacian.csv
    │   ├── man_runtime.csv
    │   └── process.cpp
    ├── mat_mul
    │   ├── Makefile
    │   ├── filter.cpp
    │   └── mat_mul_generator.cpp
    ├── max_filter
    │   ├── Makefile
    │   ├── filter.cpp
    │   ├── gather_data.py
    │   ├── man.csv
    │   └── max_filter_generator.cpp
    ├── nl_means
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── gather_data.py
    │   ├── grad.csv
    │   ├── man.csv
    │   ├── nested.csv
    │   ├── nl_means_generator.cpp
    │   └── process.cpp
    ├── plot_results_ti.py
    ├── plot_results_total.py
    ├── plot_results_xavier.py
    ├── run_tests.sh
    ├── run_tests_xavier.sh
    ├── setup_env.sh
    ├── stencil_chain
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── gather_data.py
    │   ├── grad.csv
    │   ├── manual.csv
    │   ├── process.cpp
    │   ├── results_42070.txt
    │   └── stencil_chain_generator.cpp
    ├── support
    │   ├── Makefile.inc
    │   ├── autoscheduler.inc
    │   ├── benchmark_util.h
    │   ├── benchmark_util2.h
    │   └── viz_auto.sh
    └── unsharp
    │   ├── Makefile
    │   ├── auto.csv
    │   ├── filter.cpp
    │   ├── gather_data.py
    │   ├── grad.csv
    │   ├── manual.csv
    │   ├── ov.csv
    │   └── unsharp_generator.cpp
├── apps
    ├── CMakeLists.txt
    ├── HelloAndroid
    │   ├── .gitignore
    │   ├── AndroidManifest.xml
    │   ├── README.md
    │   ├── ant.properties
    │   ├── build-gradle.sh
    │   ├── build.gradle
    │   ├── build.sh
    │   ├── build.xml
    │   ├── gradle
    │   │   └── wrapper
    │   │   │   ├── gradle-wrapper.jar
    │   │   │   └── gradle-wrapper.properties
    │   ├── gradlew
    │   ├── gradlew.bat
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── Application.mk
    │   │   ├── hello_generator.cpp
    │   │   └── hello_wrapper.cpp
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   └── strings.xml
    │   └── src
    │   │   └── com
    │   │       └── example
    │   │           └── hellohalide
    │   │               ├── CameraActivity.java
    │   │               ├── CameraPreview.java
    │   │               └── FrameHandler.java
    ├── HelloAndroidCamera2
    │   ├── .gitignore
    │   ├── AndroidManifest.xml
    │   ├── README.md
    │   ├── ant.properties
    │   ├── build-gradle.sh
    │   ├── build.gradle
    │   ├── build.sh
    │   ├── build.xml
    │   ├── gradle
    │   │   └── wrapper
    │   │   │   ├── gradle-wrapper.jar
    │   │   │   └── gradle-wrapper.properties
    │   ├── gradlew
    │   ├── gradlew.bat
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── AndroidBufferUtilities.cpp
    │   │   ├── AndroidBufferUtilities.h
    │   │   ├── Application.mk
    │   │   ├── HalideFilters.cpp
    │   │   ├── LockedSurface.cpp
    │   │   ├── LockedSurface.h
    │   │   ├── YuvBufferT.cpp
    │   │   ├── YuvBufferT.h
    │   │   ├── deinterleave_generator.cpp
    │   │   └── edge_detect_generator.cpp
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   ├── activity_camera.xml
    │   │   │   ├── fragment_camera2_basic.xml
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   ├── strings.xml
    │   │   │   └── styles.xml
    │   └── src
    │   │   └── com
    │   │       ├── android
    │   │           └── ex
    │   │           │   └── camera2
    │   │           │       ├── blocking
    │   │           │           ├── BlockingCameraManager.java
    │   │           │           ├── BlockingCaptureCallback.java
    │   │           │           ├── BlockingSessionCallback.java
    │   │           │           └── BlockingStateCallback.java
    │   │           │       ├── exceptions
    │   │           │           └── TimeoutRuntimeException.java
    │   │           │       ├── pos
    │   │           │           └── AutoFocusStateMachine.java
    │   │           │       └── utils
    │   │           │           ├── StateChangeListener.java
    │   │           │           ├── StateWaiter.java
    │   │           │           └── SysTrace.java
    │   │       └── example
    │   │           └── helloandroidcamera2
    │   │               ├── AndroidBufferUtilities.java
    │   │               ├── AutoFitSurfaceView.java
    │   │               ├── Camera2BasicFragment.java
    │   │               ├── CameraActivity.java
    │   │               ├── HalideFilters.java
    │   │               ├── HalideYuvBufferT.java
    │   │               └── NativeSurfaceHandle.java
    ├── HelloAndroidGL
    │   ├── AndroidManifest.xml
    │   ├── ant.properties
    │   ├── build.sh
    │   ├── build.xml
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── Application.mk
    │   │   ├── android_halide_gl_native.cpp
    │   │   └── halide_gl_filter.cpp
    │   ├── project.properties
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   └── strings.xml
    │   └── src
    │   │   └── org
    │   │       └── halide_lang
    │   │           └── hellohalidegl
    │   │               └── HelloHalideGL.java
    ├── HelloMatlab
    │   ├── Makefile
    │   ├── iir_blur.cpp
    │   ├── run_blur.m
    │   └── run_blur.sh
    ├── HelloiOS
    │   ├── HelloiOS.xcodeproj
    │   │   └── project.pbxproj
    │   └── HelloiOS
    │   │   ├── AppDelegate.h
    │   │   ├── AppDelegate.mm
    │   │   ├── HalideView.h
    │   │   ├── HalideView.mm
    │   │   ├── HalideViewController.h
    │   │   ├── HalideViewController.mm
    │   │   ├── HelloiOS-Info.plist
    │   │   ├── HelloiOS-Prefix.pch
    │   │   ├── Images.xcassets
    │   │       ├── AppIcon.appiconset
    │   │       │   └── Contents.json
    │   │       └── LaunchImage.launchimage
    │   │       │   └── Contents.json
    │   │   ├── en.lproj
    │   │       └── InfoPlist.strings
    │   │   ├── main.mm
    │   │   └── reaction_diffusion_2_generator.cpp
    ├── auto_viz
    │   ├── Makefile
    │   ├── auto_viz_demo.cpp
    │   └── auto_viz_demo_generator.cpp
    ├── bilateral_grid
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── bilateral_grid_generator.cpp
    │   ├── filter.cpp
    │   └── viz.sh
    ├── blur
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── adb_run_on_device.sh
    │   ├── halide_blur_generator.cpp
    │   └── test.cpp
    ├── c_backend
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── pipeline_cpp_generator.cpp
    │   ├── pipeline_generator.cpp
    │   ├── run.cpp
    │   └── run_cpp.cpp
    ├── camera_pipe
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── camera_pipe_generator.cpp
    │   ├── process.cpp
    │   └── viz.sh
    ├── conv_layer
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── conv_layer_generator.cpp
    │   └── process.cpp
    ├── cuda_mat_mul
    │   ├── Makefile
    │   ├── mat_mul_generator.cpp
    │   └── runner.cpp
    ├── fft
    │   ├── Makefile
    │   ├── complex.h
    │   ├── fft.cpp
    │   ├── fft.h
    │   ├── fft_aot_test.cpp
    │   ├── fft_generator.cpp
    │   ├── funct.h
    │   └── main.cpp
    ├── glsl
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── halide_blur_glsl_generator.cpp
    │   ├── halide_ycc_glsl_generator.cpp
    │   └── opengl_test.cpp
    ├── hexagon_benchmarks
    │   ├── Makefile
    │   ├── adb_run_on_device.sh
    │   ├── conv3x3_generator.cpp
    │   ├── dilate3x3_generator.cpp
    │   ├── gaussian5x5_generator.cpp
    │   ├── median3x3_generator.cpp
    │   ├── process.cpp
    │   ├── process.h
    │   └── sobel_generator.cpp
    ├── hexagon_dma
    │   ├── Makefile
    │   ├── mock_dma_implementation.cpp
    │   ├── pipeline_raw_linear_interleaved_basic.cpp
    │   ├── pipeline_yuv_linear_basic.cpp
    │   ├── process_raw_linear_interleaved_basic.cpp
    │   └── process_yuv_linear_basic.cpp
    ├── images
    │   ├── bayer_raw.png
    │   ├── bayer_small.png
    │   ├── gray.png
    │   ├── gray_small.png
    │   ├── rgb.png
    │   ├── rgb_small.png
    │   ├── rgb_small16.png
    │   └── rgba.png
    ├── interpolate
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   └── interpolate.cpp
    ├── lens_blur
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── lens_blur_generator.cpp
    │   └── process.cpp
    ├── linear_algebra
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── benchmarks
    │   │   ├── CMakeLists.txt
    │   │   ├── cblas_benchmarks.cpp
    │   │   ├── clock.h
    │   │   ├── eigen_benchmarks.cpp
    │   │   ├── halide_benchmarks.cpp
    │   │   └── macros.h
    │   ├── src
    │   │   ├── CMakeLists.txt
    │   │   ├── blas_l1_generators.cpp
    │   │   ├── blas_l2_generators.cpp
    │   │   ├── blas_l3_generators.cpp
    │   │   ├── halide_blas.cpp
    │   │   └── halide_blas.h
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   └── test_halide_blas.cpp
    ├── linear_blur
    │   ├── CMakeLists.txt
    │   ├── linear_blur_generator.cpp
    │   ├── linear_to_srgb_generator.cpp
    │   ├── run_linear_blur.cpp
    │   ├── simple_blur_generator.cpp
    │   └── srgb_to_linear_generator.cpp
    ├── local_laplacian
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── local_laplacian_generator.cpp
    │   ├── process.cpp
    │   └── viz.sh
    ├── nl_means
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── nl_means_generator.cpp
    │   └── process.cpp
    ├── nn_ops
    │   ├── AveragePool.cpp
    │   ├── AveragePool.sh
    │   ├── AveragePool_generator.cpp
    │   ├── Convolution.cpp
    │   ├── Convolution.sh
    │   ├── Convolution_generator.cpp
    │   ├── DepthwiseConvolution.cpp
    │   ├── DepthwiseConvolution.sh
    │   ├── DepthwiseConvolution_generator.cpp
    │   ├── Im2col.cpp
    │   ├── Im2col.sh
    │   ├── Im2col_generator.cpp
    │   ├── Makefile
    │   ├── MatrixMultiply.cpp
    │   ├── MatrixMultiply.sh
    │   ├── MatrixMultiply_generator.cpp
    │   ├── MaxPool.cpp
    │   ├── MaxPool.sh
    │   ├── MaxPool_generator.cpp
    │   ├── README.md
    │   ├── adb_run_on_device.sh
    │   ├── common.cpp
    │   ├── common.h
    │   ├── common_reference.cpp
    │   └── common_reference.h
    ├── opengl_demo
    │   ├── Makefile
    │   ├── README.md
    │   ├── glfw_helpers.cpp
    │   ├── glfw_helpers.h
    │   ├── image.png
    │   ├── layout.cpp
    │   ├── layout.h
    │   ├── main.cpp
    │   ├── opengl_helpers.cpp
    │   ├── opengl_helpers.h
    │   ├── png_helpers.cpp
    │   ├── png_helpers.h
    │   ├── sample_filter_generator.cpp
    │   ├── timer.cpp
    │   └── timer.h
    ├── openglcompute
    │   ├── AndroidManifest.xml
    │   ├── Makefile
    │   ├── build.sh
    │   ├── build.xml
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── Application.mk
    │   │   ├── oglc_run.cpp
    │   │   └── oglc_two_kernels_run.cpp
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   └── strings.xml
    │   ├── src
    │   │   └── com
    │   │   │   └── example
    │   │   │       └── hellohalideopenglcompute
    │   │   │           └── HalideOpenGLComputeActivity.java
    │   ├── test_oglc_avg.cpp
    │   └── test_two_kernels.cpp
    ├── resize
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── resize.cpp
    │   └── resize_generator.cpp
    ├── resnet_50
    │   ├── Makefile
    │   ├── Resnet50Generator.cpp
    │   ├── load_weights.py
    │   ├── process.cpp
    │   └── validate_resnet50_output.py
    ├── simd_op_check
    │   ├── Makefile
    │   └── driver.cpp
    ├── stencil_chain
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── process.cpp
    │   └── stencil_chain_generator.cpp
    ├── support
    │   ├── Makefile.inc
    │   └── viz_auto.sh
    └── wavelet
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── README.md
    │   ├── daubechies_constants.h
    │   ├── daubechies_x_generator.cpp
    │   ├── haar_x_generator.cpp
    │   ├── inverse_daubechies_x_generator.cpp
    │   ├── inverse_haar_x_generator.cpp
    │   └── wavelet.cpp
├── halide.cmake
├── python_bindings
    ├── Makefile
    ├── apps
    │   ├── bilateral_grid.py
    │   ├── blur.py
    │   ├── erode.py
    │   ├── interpolate.py
    │   └── local_laplacian.py
    ├── correctness
    │   ├── addconstant_generator.cpp
    │   ├── addconstant_test.py
    │   ├── basics.py
    │   ├── bit_generator.cpp
    │   ├── bit_test.py
    │   ├── boundary_conditions.py
    │   ├── buffer.py
    │   ├── compile_to.py
    │   ├── complexstub_generator.cpp
    │   ├── division.py
    │   ├── extern.py
    │   ├── iroperator.py
    │   ├── multipass_constraints.py
    │   ├── nobuildmethod_generator.cpp
    │   ├── partialbuildmethod_generator.cpp
    │   ├── pystub.py
    │   ├── rdom.py
    │   ├── simplestub_generator.cpp
    │   ├── target.py
    │   ├── the_sort_function.c
    │   ├── tuple_select.py
    │   ├── type.py
    │   ├── user_context_generator.cpp
    │   ├── user_context_test.py
    │   └── var.py
    ├── readme.md
    ├── requirements.txt
    ├── src
    │   ├── PyArgument.cpp
    │   ├── PyArgument.h
    │   ├── PyBinaryOperators.h
    │   ├── PyBoundaryConditions.cpp
    │   ├── PyBoundaryConditions.h
    │   ├── PyBuffer.cpp
    │   ├── PyBuffer.h
    │   ├── PyConciseCasts.cpp
    │   ├── PyConciseCasts.h
    │   ├── PyEnums.cpp
    │   ├── PyEnums.h
    │   ├── PyError.cpp
    │   ├── PyError.h
    │   ├── PyExpr.cpp
    │   ├── PyExpr.h
    │   ├── PyExternFuncArgument.cpp
    │   ├── PyExternFuncArgument.h
    │   ├── PyFunc.cpp
    │   ├── PyFunc.h
    │   ├── PyFuncRef.cpp
    │   ├── PyFuncRef.h
    │   ├── PyHalide.cpp
    │   ├── PyHalide.h
    │   ├── PyIROperator.cpp
    │   ├── PyIROperator.h
    │   ├── PyImageParam.cpp
    │   ├── PyImageParam.h
    │   ├── PyInlineReductions.cpp
    │   ├── PyInlineReductions.h
    │   ├── PyLambda.cpp
    │   ├── PyLambda.h
    │   ├── PyLoopLevel.cpp
    │   ├── PyLoopLevel.h
    │   ├── PyMachineParams.cpp
    │   ├── PyMachineParams.h
    │   ├── PyModule.cpp
    │   ├── PyModule.h
    │   ├── PyOutputs.cpp
    │   ├── PyOutputs.h
    │   ├── PyParam.cpp
    │   ├── PyParam.h
    │   ├── PyPipeline.cpp
    │   ├── PyPipeline.h
    │   ├── PyRDom.cpp
    │   ├── PyRDom.h
    │   ├── PyScheduleMethods.h
    │   ├── PyStage.cpp
    │   ├── PyStage.h
    │   ├── PyTarget.cpp
    │   ├── PyTarget.h
    │   ├── PyTuple.cpp
    │   ├── PyTuple.h
    │   ├── PyType.cpp
    │   ├── PyType.h
    │   ├── PyVar.cpp
    │   ├── PyVar.h
    │   ├── PyVarOrRVar.cpp
    │   └── PyVarOrRVar.h
    ├── stub
    │   ├── PyStub.cpp
    │   └── PyStubImpl.cpp
    ├── todo.txt
    └── tutorial
    │   ├── lesson_01_basics.py
    │   ├── lesson_02_input_image.py
    │   ├── lesson_03_debugging_1.py
    │   ├── lesson_04_debugging_2.py
    │   ├── lesson_05_scheduling_1.py
    │   ├── lesson_06_realizing_over_shifted_domains.py
    │   ├── lesson_07_multi_stage_pipelines.py
    │   ├── lesson_08_scheduling_2.py
    │   ├── lesson_09_update_definitions.py
    │   ├── lesson_10_aot_compilation_generate.py
    │   ├── lesson_10_aot_compilation_run.py
    │   ├── lesson_11_cross_compilation.py
    │   ├── lesson_12_using_the_gpu.py
    │   ├── lesson_13_tuples.py
    │   └── lesson_14_types.py
├── src
    ├── AddImageChecks.cpp
    ├── AddImageChecks.h
    ├── AddParameterChecks.cpp
    ├── AddParameterChecks.h
    ├── AlignLoads.cpp
    ├── AlignLoads.h
    ├── AllocationBoundsInference.cpp
    ├── AllocationBoundsInference.h
    ├── ApplySplit.cpp
    ├── ApplySplit.h
    ├── Argument.cpp
    ├── Argument.h
    ├── AssociativeOpsTable.cpp
    ├── AssociativeOpsTable.h
    ├── Associativity.cpp
    ├── Associativity.h
    ├── AsyncProducers.cpp
    ├── AsyncProducers.h
    ├── AutoSchedule.cpp
    ├── AutoSchedule.h
    ├── AutoScheduleUtils.cpp
    ├── AutoScheduleUtils.h
    ├── BoundSmallAllocations.cpp
    ├── BoundSmallAllocations.h
    ├── BoundaryConditions.cpp
    ├── BoundaryConditions.h
    ├── Bounds.cpp
    ├── Bounds.h
    ├── BoundsInference.cpp
    ├── BoundsInference.h
    ├── Buffer.cpp
    ├── Buffer.h
    ├── CMakeLists.txt
    ├── CPlusPlusMangle.cpp
    ├── CPlusPlusMangle.h
    ├── CSE.cpp
    ├── CSE.h
    ├── CanonicalizeGPUVars.cpp
    ├── CanonicalizeGPUVars.h
    ├── Closure.cpp
    ├── Closure.h
    ├── CodeGen_ARM.cpp
    ├── CodeGen_ARM.h
    ├── CodeGen_C.cpp
    ├── CodeGen_C.h
    ├── CodeGen_D3D12Compute_Dev.cpp
    ├── CodeGen_D3D12Compute_Dev.h
    ├── CodeGen_GPU_Dev.cpp
    ├── CodeGen_GPU_Dev.h
    ├── CodeGen_GPU_Host.cpp
    ├── CodeGen_GPU_Host.h
    ├── CodeGen_Hexagon.cpp
    ├── CodeGen_Hexagon.h
    ├── CodeGen_Internal.cpp
    ├── CodeGen_Internal.h
    ├── CodeGen_LLVM.cpp
    ├── CodeGen_LLVM.h
    ├── CodeGen_MIPS.cpp
    ├── CodeGen_MIPS.h
    ├── CodeGen_Metal_Dev.cpp
    ├── CodeGen_Metal_Dev.h
    ├── CodeGen_OpenCL_Dev.cpp
    ├── CodeGen_OpenCL_Dev.h
    ├── CodeGen_OpenGLCompute_Dev.cpp
    ├── CodeGen_OpenGLCompute_Dev.h
    ├── CodeGen_OpenGL_Dev.cpp
    ├── CodeGen_OpenGL_Dev.h
    ├── CodeGen_PTX_Dev.cpp
    ├── CodeGen_PTX_Dev.h
    ├── CodeGen_Posix.cpp
    ├── CodeGen_Posix.h
    ├── CodeGen_PowerPC.cpp
    ├── CodeGen_PowerPC.h
    ├── CodeGen_X86.cpp
    ├── CodeGen_X86.h
    ├── ConciseCasts.h
    ├── Debug.cpp
    ├── Debug.h
    ├── DebugArguments.cpp
    ├── DebugArguments.h
    ├── DebugToFile.cpp
    ├── DebugToFile.h
    ├── Definition.cpp
    ├── Definition.h
    ├── Deinterleave.cpp
    ├── Deinterleave.h
    ├── Derivative.cpp
    ├── Derivative.h
    ├── DerivativeUtils.cpp
    ├── DerivativeUtils.h
    ├── DeviceArgument.cpp
    ├── DeviceArgument.h
    ├── DeviceInterface.cpp
    ├── DeviceInterface.h
    ├── Dimension.cpp
    ├── Dimension.h
    ├── EarlyFree.cpp
    ├── EarlyFree.h
    ├── Elf.cpp
    ├── Elf.h
    ├── EliminateBoolVectors.cpp
    ├── EliminateBoolVectors.h
    ├── Error.cpp
    ├── Error.h
    ├── Expr.h
    ├── ExprUsesVar.h
    ├── Extern.h
    ├── ExternalCode.h
    ├── FastIntegerDivide.cpp
    ├── FastIntegerDivide.h
    ├── FindCalls.cpp
    ├── FindCalls.h
    ├── Float16.cpp
    ├── Float16.h
    ├── Func.cpp
    ├── Func.h
    ├── Function.cpp
    ├── Function.h
    ├── FunctionPtr.h
    ├── FuseGPUThreadLoops.cpp
    ├── FuseGPUThreadLoops.h
    ├── FuzzFloatStores.cpp
    ├── FuzzFloatStores.h
    ├── Generator.cpp
    ├── Generator.h
    ├── HexagonAlignment.h
    ├── HexagonOffload.cpp
    ├── HexagonOffload.h
    ├── HexagonOptimize.cpp
    ├── HexagonOptimize.h
    ├── IR.cpp
    ├── IR.h
    ├── IREquality.cpp
    ├── IREquality.h
    ├── IRMatch.cpp
    ├── IRMatch.h
    ├── IRMutator.cpp
    ├── IRMutator.h
    ├── IROperator.cpp
    ├── IROperator.h
    ├── IRPrinter.cpp
    ├── IRPrinter.h
    ├── IRVisitor.cpp
    ├── IRVisitor.h
    ├── ImageParam.cpp
    ├── ImageParam.h
    ├── InferArguments.cpp
    ├── InferArguments.h
    ├── InjectHostDevBufferCopies.cpp
    ├── InjectHostDevBufferCopies.h
    ├── InjectOpenGLIntrinsics.cpp
    ├── InjectOpenGLIntrinsics.h
    ├── Inline.cpp
    ├── Inline.h
    ├── InlineReductions.cpp
    ├── InlineReductions.h
    ├── IntegerDivisionTable.cpp
    ├── IntegerDivisionTable.h
    ├── Interval.cpp
    ├── Interval.h
    ├── Introspection.cpp
    ├── Introspection.h
    ├── IntrusivePtr.h
    ├── JITModule.cpp
    ├── JITModule.h
    ├── LICM.cpp
    ├── LICM.h
    ├── LLVM_Headers.h
    ├── LLVM_Output.cpp
    ├── LLVM_Output.h
    ├── LLVM_Runtime_Linker.cpp
    ├── LLVM_Runtime_Linker.h
    ├── Lambda.h
    ├── Lerp.cpp
    ├── Lerp.h
    ├── LoopCarry.cpp
    ├── LoopCarry.h
    ├── Lower.cpp
    ├── Lower.h
    ├── LowerWarpShuffles.cpp
    ├── LowerWarpShuffles.h
    ├── MainPage.h
    ├── MatlabWrapper.cpp
    ├── MatlabWrapper.h
    ├── Memoization.cpp
    ├── Memoization.h
    ├── Module.cpp
    ├── Module.h
    ├── ModulusRemainder.cpp
    ├── ModulusRemainder.h
    ├── Monotonic.cpp
    ├── Monotonic.h
    ├── ObjectInstanceRegistry.cpp
    ├── ObjectInstanceRegistry.h
    ├── OutputImageParam.cpp
    ├── OutputImageParam.h
    ├── Outputs.h
    ├── ParallelRVar.cpp
    ├── ParallelRVar.h
    ├── Param.h
    ├── ParamMap.cpp
    ├── ParamMap.h
    ├── Parameter.cpp
    ├── Parameter.h
    ├── PartitionLoops.cpp
    ├── PartitionLoops.h
    ├── Pipeline.cpp
    ├── Pipeline.h
    ├── Prefetch.cpp
    ├── Prefetch.h
    ├── PrintLoopNest.cpp
    ├── PrintLoopNest.h
    ├── Profiling.cpp
    ├── Profiling.h
    ├── PurifyIndexMath.cpp
    ├── PurifyIndexMath.h
    ├── PythonExtensionGen.cpp
    ├── PythonExtensionGen.h
    ├── Qualify.cpp
    ├── Qualify.h
    ├── RDom.cpp
    ├── RDom.h
    ├── Random.cpp
    ├── Random.h
    ├── RealizationOrder.cpp
    ├── RealizationOrder.h
    ├── Reduction.cpp
    ├── Reduction.h
    ├── RegionCosts.cpp
    ├── RegionCosts.h
    ├── RemoveDeadAllocations.cpp
    ├── RemoveDeadAllocations.h
    ├── RemoveExternLoops.cpp
    ├── RemoveExternLoops.h
    ├── RemoveUndef.cpp
    ├── RemoveUndef.h
    ├── RoundingMode.h
    ├── Schedule.cpp
    ├── Schedule.h
    ├── ScheduleFunctions.cpp
    ├── ScheduleFunctions.h
    ├── Scope.h
    ├── SelectGPUAPI.cpp
    ├── SelectGPUAPI.h
    ├── Simplify.cpp
    ├── Simplify.h
    ├── SimplifyCorrelatedDifferences.cpp
    ├── SimplifyCorrelatedDifferences.h
    ├── SimplifySpecializations.cpp
    ├── SimplifySpecializations.h
    ├── Simplify_Add.cpp
    ├── Simplify_And.cpp
    ├── Simplify_Call.cpp
    ├── Simplify_Cast.cpp
    ├── Simplify_Div.cpp
    ├── Simplify_EQ.cpp
    ├── Simplify_Exprs.cpp
    ├── Simplify_Internal.h
    ├── Simplify_LT.cpp
    ├── Simplify_Let.cpp
    ├── Simplify_Max.cpp
    ├── Simplify_Min.cpp
    ├── Simplify_Mod.cpp
    ├── Simplify_Mul.cpp
    ├── Simplify_Not.cpp
    ├── Simplify_Or.cpp
    ├── Simplify_Select.cpp
    ├── Simplify_Shuffle.cpp
    ├── Simplify_Stmts.cpp
    ├── Simplify_Sub.cpp
    ├── SkipStages.cpp
    ├── SkipStages.h
    ├── SlidingWindow.cpp
    ├── SlidingWindow.h
    ├── Solve.cpp
    ├── Solve.h
    ├── SplitTuples.cpp
    ├── SplitTuples.h
    ├── StmtToHtml.cpp
    ├── StmtToHtml.h
    ├── StorageFlattening.cpp
    ├── StorageFlattening.h
    ├── StorageFolding.cpp
    ├── StorageFolding.h
    ├── StrictifyFloat.cpp
    ├── StrictifyFloat.h
    ├── Substitute.cpp
    ├── Substitute.h
    ├── Target.cpp
    ├── Target.h
    ├── ThreadPool.h
    ├── Tracing.cpp
    ├── Tracing.h
    ├── TrimNoOps.cpp
    ├── TrimNoOps.h
    ├── Tuple.cpp
    ├── Tuple.h
    ├── Type.cpp
    ├── Type.h
    ├── UnifyDuplicateLets.cpp
    ├── UnifyDuplicateLets.h
    ├── UniquifyVariableNames.cpp
    ├── UniquifyVariableNames.h
    ├── UnpackBuffers.cpp
    ├── UnpackBuffers.h
    ├── UnrollLoops.cpp
    ├── UnrollLoops.h
    ├── UnsafePromises.cpp
    ├── UnsafePromises.h
    ├── Util.cpp
    ├── Util.h
    ├── Var.cpp
    ├── Var.h
    ├── VaryingAttributes.cpp
    ├── VaryingAttributes.h
    ├── VectorizeLoops.cpp
    ├── VectorizeLoops.h
    ├── WrapCalls.cpp
    ├── WrapCalls.h
    ├── WrapExternStages.cpp
    ├── WrapExternStages.h
    └── runtime
    │   ├── HalideBuffer.h
    │   ├── HalideRuntime.h
    │   ├── HalideRuntimeCuda.h
    │   ├── HalideRuntimeD3D12Compute.h
    │   ├── HalideRuntimeHexagonDma.h
    │   ├── HalideRuntimeHexagonHost.h
    │   ├── HalideRuntimeMetal.h
    │   ├── HalideRuntimeOpenCL.h
    │   ├── HalideRuntimeOpenGL.h
    │   ├── HalideRuntimeOpenGLCompute.h
    │   ├── HalideRuntimeQurt.h
    │   ├── aarch64.ll
    │   ├── aarch64_cpu_features.cpp
    │   ├── alignment_128.cpp
    │   ├── alignment_32.cpp
    │   ├── alignment_64.cpp
    │   ├── allocation_cache.cpp
    │   ├── android_clock.cpp
    │   ├── android_host_cpu_count.cpp
    │   ├── android_io.cpp
    │   ├── android_ioctl.h
    │   ├── android_opengl_context.cpp
    │   ├── arm.ll
    │   ├── arm_cpu_features.cpp
    │   ├── arm_no_neon.ll
    │   ├── buffer_t.cpp
    │   ├── cache.cpp
    │   ├── can_use_target.cpp
    │   ├── cl_functions.h
    │   ├── cpu_features.h
    │   ├── cuda.cpp
    │   ├── cuda_functions.h
    │   ├── d3d12_abi_patch_64.h
    │   ├── d3d12_abi_patch_64.ll
    │   ├── d3d12compute.cpp
    │   ├── destructors.cpp
    │   ├── device_buffer_utils.h
    │   ├── device_interface.cpp
    │   ├── device_interface.h
    │   ├── errors.cpp
    │   ├── fake_get_symbol.cpp
    │   ├── fake_thread_pool.cpp
    │   ├── float16_t.cpp
    │   ├── fuchsia_clock.cpp
    │   ├── fuchsia_host_cpu_count.cpp
    │   ├── fuchsia_yield.cpp
    │   ├── gpu_device_selection.cpp
    │   ├── hashmap.h
    │   ├── hexagon_cache_allocator.cpp
    │   ├── hexagon_cpu_features.cpp
    │   ├── hexagon_dma.cpp
    │   ├── hexagon_dma_pool.cpp
    │   ├── hexagon_dma_pool.h
    │   ├── hexagon_host.cpp
    │   ├── hexagon_remote
    │       ├── .gitignore
    │       ├── Makefile
    │       ├── bin
    │       │   ├── arm-32-android
    │       │   │   └── libhalide_hexagon_host.so
    │       │   ├── arm-64-android
    │       │   │   └── libhalide_hexagon_host.so
    │       │   ├── host
    │       │   │   └── libhalide_hexagon_host.so
    │       │   ├── src
    │       │   │   ├── halide_hexagon_remote.h
    │       │   │   ├── halide_hexagon_remote_skel.c
    │       │   │   └── halide_hexagon_remote_stub.c
    │       │   └── v60
    │       │   │   ├── hexagon_sim_remote
    │       │   │   ├── libhalide_hexagon_remote_skel.so
    │       │   │   ├── libsim_qurt.a
    │       │   │   ├── libsim_qurt_vtcm.a
    │       │   │   └── signed_by_debug
    │       │   │       └── libhalide_hexagon_remote_skel.so
    │       ├── c11_stubs.cpp
    │       ├── dlib.cpp
    │       ├── dlib.h
    │       ├── halide_hexagon_remote.idl
    │       ├── halide_remote.cpp
    │       ├── host_malloc.cpp
    │       ├── host_shim.cpp
    │       ├── instruction_encodings.txt
    │       ├── known_symbols.cpp
    │       ├── known_symbols.h
    │       ├── libadsprpc_shim.cpp
    │       ├── log.cpp
    │       ├── log.h
    │       ├── nearbyint.cpp
    │       ├── pipeline_context.h
    │       ├── sim_host.cpp
    │       ├── sim_protocol.h
    │       ├── sim_qurt.cpp
    │       ├── sim_qurt_vtcm.cpp
    │       └── sim_remote.cpp
    │   ├── hvx_128.ll
    │   ├── hvx_64.ll
    │   ├── ios_io.cpp
    │   ├── linux_clock.cpp
    │   ├── linux_host_cpu_count.cpp
    │   ├── linux_opengl_context.cpp
    │   ├── linux_yield.cpp
    │   ├── matlab.cpp
    │   ├── metadata.cpp
    │   ├── metal.cpp
    │   ├── metal_objc_arm.cpp
    │   ├── metal_objc_platform_dependent.cpp
    │   ├── metal_objc_platform_dependent.h
    │   ├── metal_objc_x86.cpp
    │   ├── mex_functions.h
    │   ├── mingw_math.cpp
    │   ├── mini_cl.h
    │   ├── mini_cuda.h
    │   ├── mini_d3d12.h
    │   ├── mini_hexagon_dma.h
    │   ├── mini_opengl.h
    │   ├── mini_qurt.h
    │   ├── mini_qurt_vtcm.h
    │   ├── mips.ll
    │   ├── mips_cpu_features.cpp
    │   ├── module_aot_ref_count.cpp
    │   ├── module_jit_ref_count.cpp
    │   ├── msan.cpp
    │   ├── msan_stubs.cpp
    │   ├── nvidia_libdevice_bitcode
    │       ├── libdevice.compute_20.10.bc
    │       ├── libdevice.compute_30.10.bc
    │       └── libdevice.compute_35.10.bc
    │   ├── objc_support.h
    │   ├── old_buffer_t.cpp
    │   ├── opencl.cpp
    │   ├── opengl.cpp
    │   ├── openglcompute.cpp
    │   ├── osx_clock.cpp
    │   ├── osx_get_symbol.cpp
    │   ├── osx_host_cpu_count.cpp
    │   ├── osx_opengl_context.cpp
    │   ├── osx_yield.cpp
    │   ├── posix_abort.cpp
    │   ├── posix_allocator.cpp
    │   ├── posix_clock.cpp
    │   ├── posix_error_handler.cpp
    │   ├── posix_get_symbol.cpp
    │   ├── posix_io.cpp
    │   ├── posix_math.ll
    │   ├── posix_print.cpp
    │   ├── posix_threads.cpp
    │   ├── posix_threads_tsan.cpp
    │   ├── powerpc.ll
    │   ├── powerpc_cpu_features.cpp
    │   ├── prefetch.cpp
    │   ├── printer.h
    │   ├── profiler.cpp
    │   ├── profiler_inlined.cpp
    │   ├── pseudostack.cpp
    │   ├── ptx_dev.ll
    │   ├── qurt_allocator.cpp
    │   ├── qurt_hvx.cpp
    │   ├── qurt_hvx_vtcm.cpp
    │   ├── qurt_init_fini.cpp
    │   ├── qurt_threads.cpp
    │   ├── qurt_threads_tsan.cpp
    │   ├── qurt_yield.cpp
    │   ├── runtime_api.cpp
    │   ├── runtime_internal.h
    │   ├── scoped_mutex_lock.h
    │   ├── scoped_spin_lock.h
    │   ├── ssp.cpp
    │   ├── synchronization_common.h
    │   ├── thread_pool_common.h
    │   ├── to_string.cpp
    │   ├── trace_helper.cpp
    │   ├── tracing.cpp
    │   ├── win32_math.ll
    │   ├── windows_abort.cpp
    │   ├── windows_clock.cpp
    │   ├── windows_cuda.cpp
    │   ├── windows_get_symbol.cpp
    │   ├── windows_io.cpp
    │   ├── windows_opencl.cpp
    │   ├── windows_profiler.cpp
    │   ├── windows_threads.cpp
    │   ├── windows_threads_tsan.cpp
    │   ├── windows_yield.cpp
    │   ├── write_debug_image.cpp
    │   ├── x86.ll
    │   ├── x86_avx.ll
    │   ├── x86_avx2.ll
    │   ├── x86_cpu_features.cpp
    │   └── x86_sse41.ll
├── test
    ├── CMakeLists.txt
    ├── auto_schedule
    │   ├── cost_function.cpp
    │   ├── data_dependent.cpp
    │   ├── extern.cpp
    │   ├── fibonacci.cpp
    │   ├── harris.cpp
    │   ├── histogram.cpp
    │   ├── iir.cpp
    │   ├── interpolate.cpp
    │   ├── large_window.cpp
    │   ├── mat_mul.cpp
    │   ├── max_filter.cpp
    │   ├── multi_output.cpp
    │   ├── overlap.cpp
    │   ├── param.cpp
    │   ├── reorder.cpp
    │   ├── tile_vs_inline.cpp
    │   ├── unbounded_nonpure.cpp
    │   ├── unsharp.cpp
    │   ├── unused_func.cpp
    │   └── vectorize_var_in_update.cpp
    ├── common
    │   ├── check_call_graphs.h
    │   ├── expect_failure.sh
    │   ├── gpu_object_lifetime_tracker.h
    │   └── halide_test_dirs.h
    ├── correctness
    │   ├── align_bounds.cpp
    │   ├── argmax.cpp
    │   ├── assertion_failure_in_parallel_for.cpp
    │   ├── async.cpp
    │   ├── async_copy_chain.cpp
    │   ├── async_device_copy.cpp
    │   ├── autodiff.cpp
    │   ├── autoschedule_small_pure_update.cpp
    │   ├── autotune_bug.cpp
    │   ├── autotune_bug_2.cpp
    │   ├── autotune_bug_3.cpp
    │   ├── autotune_bug_4.cpp
    │   ├── autotune_bug_5.cpp
    │   ├── bad_likely.cpp
    │   ├── bit_counting.cpp
    │   ├── bitwise_ops.cpp
    │   ├── bool_compute_root_vectorize.cpp
    │   ├── bound.cpp
    │   ├── bound_small_allocations.cpp
    │   ├── boundary_conditions.cpp
    │   ├── bounds.cpp
    │   ├── bounds_inference.cpp
    │   ├── bounds_inference_chunk.cpp
    │   ├── bounds_inference_complex.cpp
    │   ├── bounds_inference_outer_split.cpp
    │   ├── bounds_of_abs.cpp
    │   ├── bounds_of_cast.cpp
    │   ├── bounds_of_func.cpp
    │   ├── bounds_of_monotonic_math.cpp
    │   ├── bounds_of_multiply.cpp
    │   ├── bounds_query.cpp
    │   ├── buffer_t.cpp
    │   ├── c_function.cpp
    │   ├── cascaded_filters.cpp
    │   ├── cast.cpp
    │   ├── cast_handle.cpp
    │   ├── chunk.cpp
    │   ├── chunk_sharing.cpp
    │   ├── circular_reference_leak.cpp
    │   ├── code_explosion.cpp
    │   ├── compare_vars.cpp
    │   ├── compile_to.cpp
    │   ├── compile_to_bitcode.cpp
    │   ├── compile_to_lowered_stmt.cpp
    │   ├── compile_to_multitarget.cpp
    │   ├── compute_at_reordered_update_stage.cpp
    │   ├── compute_at_split_rvar.cpp
    │   ├── compute_outermost.cpp
    │   ├── compute_with.cpp
    │   ├── compute_with_in.cpp
    │   ├── compute_with_inlined.cpp
    │   ├── computed_index.cpp
    │   ├── constant_expr.cpp
    │   ├── constant_type.cpp
    │   ├── constraints.cpp
    │   ├── convolution.cpp
    │   ├── convolution_multiple_kernels.cpp
    │   ├── cross_compilation.cpp
    │   ├── custom_allocator.cpp
    │   ├── custom_auto_scheduler.cpp
    │   ├── custom_error_reporter.cpp
    │   ├── custom_lowering_pass.cpp
    │   ├── debug_to_file.cpp
    │   ├── debug_to_file_multiple_outputs.cpp
    │   ├── debug_to_file_reorder.cpp
    │   ├── deferred_loop_level.cpp
    │   ├── deinterleave4.cpp
    │   ├── device_buffer_copy.cpp
    │   ├── device_crop.cpp
    │   ├── device_slice.cpp
    │   ├── dilate3x3.cpp
    │   ├── dynamic_reduction_bounds.cpp
    │   ├── embed_bitcode.cpp
    │   ├── erf.cpp
    │   ├── exception.cpp
    │   ├── explicit_inline_reductions.cpp
    │   ├── extern_bounds_inference.cpp
    │   ├── extern_consumer.cpp
    │   ├── extern_consumer_tiled.cpp
    │   ├── extern_error.cpp
    │   ├── extern_output_expansion.cpp
    │   ├── extern_partial.cpp
    │   ├── extern_producer.cpp
    │   ├── extern_reorder_storage.cpp
    │   ├── extern_sort.cpp
    │   ├── extern_stage.cpp
    │   ├── extern_stage_on_device.cpp
    │   ├── external_code.cpp
    │   ├── failed_unroll.cpp
    │   ├── fibonacci.cpp
    │   ├── fit_function.cpp
    │   ├── float16_t_comparison.cpp
    │   ├── float16_t_constants.cpp
    │   ├── float16_t_image_type.cpp
    │   ├── for_each_element.cpp
    │   ├── force_onto_stack.cpp
    │   ├── func_clone.cpp
    │   ├── func_lifetime.cpp
    │   ├── func_lifetime_2.cpp
    │   ├── func_wrapper.cpp
    │   ├── fuse.cpp
    │   ├── fuse_gpu_threads.cpp
    │   ├── fused_where_inner_extent_is_zero.cpp
    │   ├── fuzz_float_stores.cpp
    │   ├── fuzz_simplify.cpp
    │   ├── gameoflife.cpp
    │   ├── gather.cpp
    │   ├── gpu_allocation_cache.cpp
    │   ├── gpu_assertion_in_kernel.cpp
    │   ├── gpu_bounds_inference_failure.cpp
    │   ├── gpu_cpu_simultaneous_read.cpp
    │   ├── gpu_data_flows.cpp
    │   ├── gpu_dynamic_shared.cpp
    │   ├── gpu_free_sync.cpp
    │   ├── gpu_give_input_buffers_device_allocations.cpp
    │   ├── gpu_jit_explicit_copy_to_device.cpp
    │   ├── gpu_large_alloc.cpp
    │   ├── gpu_mixed_dimensionality.cpp
    │   ├── gpu_mixed_shared_mem_types.cpp
    │   ├── gpu_multi_device.cpp
    │   ├── gpu_multi_kernel.cpp
    │   ├── gpu_non_contiguous_copy.cpp
    │   ├── gpu_object_lifetime_1.cpp
    │   ├── gpu_object_lifetime_2.cpp
    │   ├── gpu_object_lifetime_3.cpp
    │   ├── gpu_param_allocation.cpp
    │   ├── gpu_reuse_shared_memory.cpp
    │   ├── gpu_specialize.cpp
    │   ├── gpu_sum_scan.cpp
    │   ├── gpu_thread_barrier.cpp
    │   ├── gpu_transpose.cpp
    │   ├── gpu_vectorized_shared_memory.cpp
    │   ├── halide_buffer.cpp
    │   ├── handle.cpp
    │   ├── heap_cleanup.cpp
    │   ├── hello_gpu.cpp
    │   ├── hexagon_scatter.cpp
    │   ├── histogram.cpp
    │   ├── histogram_equalize.cpp
    │   ├── host_alignment.cpp
    │   ├── image_io.cpp
    │   ├── image_of_lists.cpp
    │   ├── image_wrapper.cpp
    │   ├── implicit_args.cpp
    │   ├── implicit_args_tests.cpp
    │   ├── in_place.cpp
    │   ├── infer_arguments.cpp
    │   ├── inline_reduction.cpp
    │   ├── inlined_generator.cpp
    │   ├── input_image_bounds_check.cpp
    │   ├── input_larger_than_two_gigs.cpp
    │   ├── integer_powers.cpp
    │   ├── interleave.cpp
    │   ├── interleave_rgb.cpp
    │   ├── interleave_x.cpp
    │   ├── introspection.cpp
    │   ├── inverse.cpp
    │   ├── isnan.cpp
    │   ├── iterate_over_circle.cpp
    │   ├── lambda.cpp
    │   ├── lazy_convolution.cpp
    │   ├── leak_device_memory.cpp
    │   ├── left_shift_negative.cpp
    │   ├── legal_race_condition.cpp
    │   ├── lerp.cpp
    │   ├── let_in_rdom_bound.cpp
    │   ├── likely.cpp
    │   ├── load_library.cpp
    │   ├── logical.cpp
    │   ├── loop_invariant_extern_calls.cpp
    │   ├── loop_level_generator_param.cpp
    │   ├── lots_of_dimensions.cpp
    │   ├── make_struct.cpp
    │   ├── many_dimensions.cpp
    │   ├── many_small_extern_stages.cpp
    │   ├── many_updates.cpp
    │   ├── math.cpp
    │   ├── median3x3.cpp
    │   ├── memoize.cpp
    │   ├── memoize_cloned.cpp
    │   ├── min_extent.cpp
    │   ├── mod.cpp
    │   ├── mul_div_mod.cpp
    │   ├── multi_output_pipeline_with_bad_sizes.cpp
    │   ├── multi_pass_reduction.cpp
    │   ├── multi_splits_with_diff_tail_strategies.cpp
    │   ├── multi_way_select.cpp
    │   ├── multipass_constraints.cpp
    │   ├── multiple_outputs.cpp
    │   ├── multiple_outputs_extern.cpp
    │   ├── named_updates.cpp
    │   ├── nested_shiftinwards.cpp
    │   ├── newtons_method.cpp
    │   ├── non_vector_aligned_embeded_buffer.cpp
    │   ├── obscure_image_references.cpp
    │   ├── oddly_sized_output.cpp
    │   ├── out_constraint.cpp
    │   ├── out_of_memory.cpp
    │   ├── output_larger_than_two_gigs.cpp
    │   ├── parallel.cpp
    │   ├── parallel_alloc.cpp
    │   ├── parallel_fork.cpp
    │   ├── parallel_gpu_nested.cpp
    │   ├── parallel_nested.cpp
    │   ├── parallel_nested_1.cpp
    │   ├── parallel_reductions.cpp
    │   ├── parallel_rvar.cpp
    │   ├── param.cpp
    │   ├── param_map.cpp
    │   ├── parameter_constraints.cpp
    │   ├── partial_application.cpp
    │   ├── partition_loops.cpp
    │   ├── partition_loops_bug.cpp
    │   ├── pipeline_set_jit_externs_func.cpp
    │   ├── plain_c_includes.c
    │   ├── popc_clz_ctz_bounds.cpp
    │   ├── predicated_store_load.cpp
    │   ├── prefetch.cpp
    │   ├── print.cpp
    │   ├── process_some_tiles.cpp
    │   ├── pseudostack_shares_slots.cpp
    │   ├── python_extension_gen.cpp
    │   ├── random.cpp
    │   ├── realize_larger_than_two_gigs.cpp
    │   ├── realize_over_shifted_domain.cpp
    │   ├── reduction_chain.cpp
    │   ├── reduction_non_rectangular.cpp
    │   ├── reduction_schedule.cpp
    │   ├── reduction_subregion.cpp
    │   ├── register_shuffle.cpp
    │   ├── reorder_rvars.cpp
    │   ├── reorder_storage.cpp
    │   ├── require.cpp
    │   ├── reschedule.cpp
    │   ├── reuse_stack_alloc.cpp
    │   ├── rfactor.cpp
    │   ├── round.cpp
    │   ├── saturating_casts.cpp
    │   ├── scatter.cpp
    │   ├── set_custom_trace.cpp
    │   ├── shared_self_references.cpp
    │   ├── shifted_image.cpp
    │   ├── side_effects.cpp
    │   ├── simd_op_check.cpp
    │   ├── simplified_away_embedded_image.cpp
    │   ├── simplify.cpp
    │   ├── skip_stages.cpp
    │   ├── skip_stages_external_array_functions.cpp
    │   ├── skip_stages_memoize.cpp
    │   ├── sliding_backwards.cpp
    │   ├── sliding_reduction.cpp
    │   ├── sliding_window.cpp
    │   ├── sort_exprs.cpp
    │   ├── specialize.cpp
    │   ├── specialize_to_gpu.cpp
    │   ├── split_by_non_factor.cpp
    │   ├── split_fuse_rvar.cpp
    │   ├── split_reuse_inner_name_bug.cpp
    │   ├── split_store_compute.cpp
    │   ├── stack_allocations.cpp
    │   ├── stencil_chain_in_update_definitions.cpp
    │   ├── stmt_to_html.cpp
    │   ├── storage_folding.cpp
    │   ├── store_in.cpp
    │   ├── stream_compaction.cpp
    │   ├── strict_float.cpp
    │   ├── strict_float_bounds.cpp
    │   ├── strided_load.cpp
    │   ├── target.cpp
    │   ├── thread_safety.cpp
    │   ├── tracing.cpp
    │   ├── tracing_bounds.cpp
    │   ├── tracing_broadcast.cpp
    │   ├── tracing_stack.cpp
    │   ├── transitive_bounds.cpp
    │   ├── trim_no_ops.cpp
    │   ├── truncated_pyramid.cpp
    │   ├── tuple_partial_update.cpp
    │   ├── tuple_reduction.cpp
    │   ├── tuple_select.cpp
    │   ├── tuple_undef.cpp
    │   ├── tuple_update_ops.cpp
    │   ├── two_vector_args.cpp
    │   ├── undef.cpp
    │   ├── uninitialized_read.cpp
    │   ├── unique_func_image.cpp
    │   ├── unroll_dynamic_loop.cpp
    │   ├── unrolled_reduction.cpp
    │   ├── unsafe_dedup_lets.cpp
    │   ├── unsafe_promises.cpp
    │   ├── update_chunk.cpp
    │   ├── vector_bounds_inference.cpp
    │   ├── vector_cast.cpp
    │   ├── vector_extern.cpp
    │   ├── vector_math.cpp
    │   ├── vector_print_bug.cpp
    │   ├── vectorize_guard_with_if.cpp
    │   ├── vectorize_mixed_widths.cpp
    │   ├── vectorize_varying_allocation_size.cpp
    │   ├── vectorized_gpu_allocation.cpp
    │   ├── vectorized_initialization.cpp
    │   ├── vectorized_load_from_vectorized_allocation.cpp
    │   ├── vectorized_reduction_bug.cpp
    │   └── widening_reduction.cpp
    ├── error
    │   ├── ambiguous_inline_reductions.cpp
    │   ├── async_require_fail.cpp
    │   ├── auto_schedule_no_bounds.cpp
    │   ├── auto_schedule_no_parallel.cpp
    │   ├── auto_schedule_no_reorder.cpp
    │   ├── bad_bound.cpp
    │   ├── bad_compute_at.cpp
    │   ├── bad_compute_with.cpp
    │   ├── bad_compute_with_invalid_specialization.cpp
    │   ├── bad_compute_with_parent_func_not_used.cpp
    │   ├── bad_const_cast.cpp
    │   ├── bad_device_api.cpp
    │   ├── bad_dimensions.cpp
    │   ├── bad_extern_split.cpp
    │   ├── bad_fold.cpp
    │   ├── bad_host_alignment.cpp
    │   ├── bad_rvar_order.cpp
    │   ├── bad_schedule.cpp
    │   ├── bad_store_at.cpp
    │   ├── broken_promise.cpp
    │   ├── buffer_larger_than_two_gigs.cpp
    │   ├── clamp_out_of_range.cpp
    │   ├── constrain_wrong_output_buffer.cpp
    │   ├── constraint_uses_non_param.cpp
    │   ├── define_after_realize.cpp
    │   ├── define_after_use.cpp
    │   ├── device_target_mismatch.cpp
    │   ├── expanding_reduction.cpp
    │   ├── extern_func_self_argument.cpp
    │   ├── five_d_gpu_buffer.cpp
    │   ├── float_arg.cpp
    │   ├── forward_on_undefined_buffer.cpp
    │   ├── implicit_args.cpp
    │   ├── impossible_constraints.cpp
    │   ├── init_def_should_be_all_vars.cpp
    │   ├── inspect_loop_level.cpp
    │   ├── lerp_float_weight_out_of_range.cpp
    │   ├── lerp_mismatch.cpp
    │   ├── lerp_signed_weight.cpp
    │   ├── memoize_different_compute_store.cpp
    │   ├── metal_vector_too_large.cpp
    │   ├── missing_args.cpp
    │   ├── modulo_constant_zero.cpp
    │   ├── no_default_device.cpp
    │   ├── nonexistent_update_stage.cpp
    │   ├── null_host_field.cpp
    │   ├── overflow_during_constant_folding.cpp
    │   ├── pointer_arithmetic.cpp
    │   ├── race_condition.cpp
    │   ├── rdom_undefined.cpp
    │   ├── realize_constantly_larger_than_two_gigs.cpp
    │   ├── reduction_bounds.cpp
    │   ├── reduction_type_mismatch.cpp
    │   ├── require_fail.cpp
    │   ├── reuse_var_in_schedule.cpp
    │   ├── reused_args.cpp
    │   ├── rfactor_inner_dim_non_commutative.cpp
    │   ├── specialize_fail.cpp
    │   ├── split_inner_wrong_tail_strategy.cpp
    │   ├── thread_id_outside_block_id.cpp
    │   ├── too_many_args.cpp
    │   ├── tuple_arg_select_undef.cpp
    │   ├── tuple_val_select_undef.cpp
    │   ├── unbounded_input.cpp
    │   ├── unbounded_output.cpp
    │   ├── undefined_func_compile.cpp
    │   ├── undefined_func_realize.cpp
    │   ├── undefined_loop_level.cpp
    │   ├── undefined_pipeline_compile.cpp
    │   ├── undefined_pipeline_realize.cpp
    │   ├── undefined_rdom_dimension.cpp
    │   ├── unknown_target.cpp
    │   ├── vectorize_dynamic.cpp
    │   ├── vectorize_too_little.cpp
    │   ├── vectorize_too_much.cpp
    │   ├── vectorized_extern.cpp
    │   ├── wrap_custom_after_shared.cpp
    │   ├── wrap_frozen.cpp
    │   ├── wrapper_never_used.cpp
    │   ├── wrong_dimensionality_extern_stage.cpp
    │   └── wrong_type.cpp
    ├── failing_with_issue
    │   ├── 3292_async_specialize.cpp
    │   ├── 3293_storage_folding_async.cpp
    │   └── 3357_vectorize_pred.cpp
    ├── generator
    │   ├── acquire_release_aottest.cpp
    │   ├── acquire_release_generator.cpp
    │   ├── alias_aottest.cpp
    │   ├── alias_generator.cpp
    │   ├── argvcall_aottest.cpp
    │   ├── argvcall_generator.cpp
    │   ├── async_parallel_aottest.cpp
    │   ├── async_parallel_generator.cpp
    │   ├── bit_operations_aottest.cpp
    │   ├── bit_operations_generator.cpp
    │   ├── blur2x2_aottest.cpp
    │   ├── blur2x2_generator.cpp
    │   ├── buffer_copy_aottest.cpp
    │   ├── buffer_copy_generator.cpp
    │   ├── buildmethod_aottest.cpp
    │   ├── buildmethod_generator.cpp
    │   ├── can_use_target_aottest.cpp
    │   ├── can_use_target_generator.cpp
    │   ├── cleanup_on_error_aottest.cpp
    │   ├── cleanup_on_error_generator.cpp
    │   ├── configure_aottest.cpp
    │   ├── configure_generator.cpp
    │   ├── configure_jittest.cpp
    │   ├── cxx_mangling_aottest.cpp
    │   ├── cxx_mangling_define_extern_aottest.cpp
    │   ├── cxx_mangling_define_extern_externs.cpp
    │   ├── cxx_mangling_define_extern_generator.cpp
    │   ├── cxx_mangling_externs.cpp
    │   ├── cxx_mangling_generator.cpp
    │   ├── define_extern_opencl_aottest.cpp
    │   ├── define_extern_opencl_generator.cpp
    │   ├── embed_image_aottest.cpp
    │   ├── embed_image_generator.cpp
    │   ├── error_codes_aottest.cpp
    │   ├── error_codes_generator.cpp
    │   ├── example_aottest.cpp
    │   ├── example_generator.cpp
    │   ├── example_jittest.cpp
    │   ├── extern_output_aottest.cpp
    │   ├── extern_output_generator.cpp
    │   ├── external_code_aottest.cpp
    │   ├── external_code_extern.cpp
    │   ├── external_code_generator.cpp
    │   ├── float16_t_aottest.cpp
    │   ├── float16_t_generator.cpp
    │   ├── gpu_object_lifetime_aottest.cpp
    │   ├── gpu_object_lifetime_generator.cpp
    │   ├── gpu_only_aottest.cpp
    │   ├── gpu_only_generator.cpp
    │   ├── image_from_array_aottest.cpp
    │   ├── image_from_array_generator.cpp
    │   ├── mandelbrot_aottest.cpp
    │   ├── mandelbrot_generator.cpp
    │   ├── matlab_aottest.cpp
    │   ├── matlab_generator.cpp
    │   ├── memory_profiler_mandelbrot_aottest.cpp
    │   ├── memory_profiler_mandelbrot_generator.cpp
    │   ├── metadata_tester_aottest.cpp
    │   ├── metadata_tester_generator.cpp
    │   ├── msan_aottest.cpp
    │   ├── msan_generator.cpp
    │   ├── multitarget_aottest.cpp
    │   ├── multitarget_generator.cpp
    │   ├── nested_externs_aottest.cpp
    │   ├── nested_externs_generator.cpp
    │   ├── old_buffer_t_aottest.cpp
    │   ├── old_buffer_t_generator.cpp
    │   ├── output_assign_aottest.cpp
    │   ├── output_assign_generator.cpp
    │   ├── pyramid_aottest.cpp
    │   ├── pyramid_generator.cpp
    │   ├── rdom_input_aottest.cpp
    │   ├── rdom_input_generator.cpp
    │   ├── registration_test.cpp
    │   ├── rungen_test.cpp
    │   ├── stubtest_aottest.cpp
    │   ├── stubtest_generator.cpp
    │   ├── stubtest_jittest.cpp
    │   ├── stubuser_aottest.cpp
    │   ├── stubuser_generator.cpp
    │   ├── tiled_blur_aottest.cpp
    │   ├── tiled_blur_generator.cpp
    │   ├── user_context_aottest.cpp
    │   ├── user_context_generator.cpp
    │   ├── user_context_insanity_aottest.cpp
    │   ├── user_context_insanity_generator.cpp
    │   ├── variable_num_threads_aottest.cpp
    │   └── variable_num_threads_generator.cpp
    ├── internal.cpp
    ├── opengl
    │   ├── conv_select.cpp
    │   ├── copy_pixels.cpp
    │   ├── copy_to_device.cpp
    │   ├── copy_to_host.cpp
    │   ├── float_texture.cpp
    │   ├── inline_reduction.cpp
    │   ├── internal.cpp
    │   ├── lut.cpp
    │   ├── multiple_stages.cpp
    │   ├── produce.cpp
    │   ├── rewrap_texture.cpp
    │   ├── save_state.cpp
    │   ├── select.cpp
    │   ├── set_pixels.cpp
    │   ├── shifted_domains.cpp
    │   ├── special_funcs.cpp
    │   ├── sum_reduction.cpp
    │   ├── sumcolor_reduction.cpp
    │   ├── testing.h
    │   ├── tuples.cpp
    │   ├── vagrant
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── Vagrantfile
    │   │   ├── build_tests.sh
    │   │   └── provision
    │   │   │   ├── etc
    │   │   │       ├── environment
    │   │   │       ├── init
    │   │   │       │   └── xdummy.conf
    │   │   │       └── systemd
    │   │   │       │   └── system
    │   │   │       │       └── xdummy.service
    │   │   │   └── usr
    │   │   │       └── share
    │   │   │           └── X11
    │   │   │               └── xorg.conf.d
    │   │   │                   └── xdummy.conf
    │   └── varying.cpp
    ├── performance
    │   ├── async_gpu.cpp
    │   ├── block_transpose.cpp
    │   ├── boundary_conditions.cpp
    │   ├── clamped_vector_load.cpp
    │   ├── const_division.cpp
    │   ├── fan_in.cpp
    │   ├── fast_inverse.cpp
    │   ├── fast_pow.cpp
    │   ├── inner_loop_parallel.cpp
    │   ├── jit_stress.cpp
    │   ├── lots_of_inputs.cpp
    │   ├── lots_of_small_allocations.cpp
    │   ├── matrix_multiplication.cpp
    │   ├── memcpy.cpp
    │   ├── memory_profiler.cpp
    │   ├── packed_planar_fusion.cpp
    │   ├── parallel_performance.cpp
    │   ├── profiler.cpp
    │   ├── realize_overhead.cpp
    │   ├── rfactor.cpp
    │   ├── rgb_interleaved.cpp
    │   ├── sort.cpp
    │   ├── thread_safe_jit.cpp
    │   ├── vectorize.cpp
    │   └── wrap.cpp
    ├── scripts
    │   └── build_travis.sh
    └── warning
    │   ├── double_vectorize.cpp
    │   ├── hidden_pure_definition.cpp
    │   ├── parallel_size_one.cpp
    │   ├── require_const_false.cpp
    │   └── vectorize_size_one.cpp
├── tools
    ├── GenGen.cpp
    ├── RunGen.h
    ├── RunGenMain.cpp
    ├── binary2cpp.cpp
    ├── build_halide_h.cpp
    ├── find_inverse.cpp
    ├── halide_benchmark.h
    ├── halide_config.cmake.tpl
    ├── halide_config.make.tpl
    ├── halide_image.h
    ├── halide_image_info.h
    ├── halide_image_io.h
    ├── halide_malloc_trace.h
    ├── halide_trace_config.h
    ├── makelib.sh
    └── mex_halide.m
├── tutorial
    ├── .gitignore
    ├── CMakeLists.txt
    ├── clock.h
    ├── figures
    │   ├── generate_figures_17.sh
    │   ├── generate_figures_18.sh
    │   ├── generate_figures_19.sh
    │   ├── generate_figures_5.sh
    │   ├── generate_figures_8.sh
    │   ├── generate_figures_9.sh
    │   ├── generate_output_snippets.sh
    │   ├── lesson_02_input.jpg
    │   ├── lesson_02_output.jpg
    │   ├── lesson_05_col_major.gif
    │   ├── lesson_05_fast.mp4
    │   ├── lesson_05_parallel_tiles.gif
    │   ├── lesson_05_row_major.gif
    │   ├── lesson_05_split_7_by_3.gif
    │   ├── lesson_05_tiled.gif
    │   ├── lesson_05_vectors.gif
    │   ├── lesson_08_compute_root.gif
    │   ├── lesson_08_compute_y.gif
    │   ├── lesson_08_mixed.mp4
    │   ├── lesson_08_store_root_compute_x.gif
    │   ├── lesson_08_store_root_compute_y.gif
    │   ├── lesson_08_tile.gif
    │   ├── lesson_09_compute_at_multiple_updates.mp4
    │   ├── lesson_09_compute_at_pure.gif
    │   ├── lesson_09_compute_at_pure_and_update.gif
    │   ├── lesson_09_compute_at_rvar.gif
    │   ├── lesson_09_compute_at_update.gif
    │   ├── lesson_09_inline_reduction.gif
    │   ├── lesson_09_update.gif
    │   ├── lesson_09_update_rdom.mp4
    │   ├── lesson_09_update_schedule.mp4
    │   ├── lesson_17_rdom_calls_in_predicate.mp4
    │   ├── lesson_17_rdom_circular.mp4
    │   ├── lesson_17_rdom_triangular.mp4
    │   ├── lesson_18_hist_manual_par.mp4
    │   ├── lesson_18_hist_rfactor_par.mp4
    │   ├── lesson_18_hist_rfactor_tile.mp4
    │   ├── lesson_18_hist_rfactor_vec.mp4
    │   ├── lesson_18_hist_serial.mp4
    │   ├── lesson_19_group_updates.mp4
    │   ├── lesson_19_transpose.mp4
    │   ├── lesson_19_wrapper_global.mp4
    │   ├── lesson_19_wrapper_local.mp4
    │   ├── lesson_19_wrapper_unique.mp4
    │   └── lesson_19_wrapper_vary_schedule.mp4
    ├── images
    │   ├── gray.png
    │   └── rgb.png
    ├── lesson_01_basics.cpp
    ├── lesson_02_input_image.cpp
    ├── lesson_03_debugging_1.cpp
    ├── lesson_04_debugging_2.cpp
    ├── lesson_05_scheduling_1.cpp
    ├── lesson_06_realizing_over_shifted_domains.cpp
    ├── lesson_07_multi_stage_pipelines.cpp
    ├── lesson_08_scheduling_2.cpp
    ├── lesson_09_update_definitions.cpp
    ├── lesson_10_aot_compilation_generate.cpp
    ├── lesson_10_aot_compilation_run.cpp
    ├── lesson_11_cross_compilation.cpp
    ├── lesson_12_using_the_gpu.cpp
    ├── lesson_13_tuples.cpp
    ├── lesson_14_types.cpp
    ├── lesson_15_generators.cpp
    ├── lesson_15_generators_usage.sh
    ├── lesson_16_rgb_generate.cpp
    ├── lesson_16_rgb_run.cpp
    ├── lesson_17_predicated_rdom.cpp
    ├── lesson_18_parallel_associative_reductions.cpp
    ├── lesson_19_wrapper_funcs.cpp
    ├── lesson_20_cloning_funcs.cpp
    ├── lesson_21_auto_scheduler_generate.cpp
    ├── lesson_21_auto_scheduler_run.cpp
    └── todo.txt
└── util
    ├── CMakeLists.txt
    ├── Halide-VS2017.natvis
    ├── Halide.natvis
    ├── HalideTraceDump.cpp
    ├── HalideTraceUtils.cpp
    ├── HalideTraceUtils.h
    ├── HalideTraceViz.cpp
    └── inconsolata.h
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted
 5 | # to native line endings on checkout.
 6 | *.cpp text
 7 | *.c text
 8 | *.h text
 9 | 
10 | # Denote all files that are truly binary and should not be modified.
11 | *.png binary
12 | *.jpg binary
13 | *.tiff binary
14 | 
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/.gitmodules
--------------------------------------------------------------------------------
/TACO_Benchmarks/bilateral_grid/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(bilateral_grid_process filter.cpp)
 2 | halide_use_image_io(bilateral_grid_process)
 3 | 
 4 | halide_generator(bilateral_grid.generator SRCS bilateral_grid_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB bilateral_grid_auto_schedule)
 8 |     else()
 9 |         set(LIB bilateral_grid)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR bilateral_grid.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}
14 |                                   EXTRA_OUTPUTS stmt schedule)
15 |     target_link_libraries(bilateral_grid_process PRIVATE ${LIB})
16 | endforeach()
17 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/conv_layer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(conv_layer_process process.cpp)
 2 | halide_use_image_io(conv_layer_process)
 3 | 
 4 | halide_generator(conv_layer.generator SRCS conv_layer_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB conv_layer_auto_schedule)
 8 |     else()
 9 |         set(LIB conv_layer)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR conv_layer.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(conv_layer_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/bayer_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/bayer_raw.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/gray.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/gray_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/gray_small.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/rgb.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/rgb_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/rgb_small.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/rgb_small16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/rgb_small16.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/images/rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/TACO_Benchmarks/images/rgba.png
--------------------------------------------------------------------------------
/TACO_Benchmarks/lens_blur/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(lens_blur_process process.cpp)
 2 | halide_use_image_io(lens_blur_process)
 3 | 
 4 | halide_generator(lens_blur.generator SRCS lens_blur_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB lens_blur_auto_schedule)
 8 |     else()
 9 |         set(LIB lens_blur)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR lens_blur.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(lens_blur_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/local_laplacian/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(local_laplacian_process process.cpp)
 2 | halide_use_image_io(local_laplacian_process)
 3 | 
 4 | halide_generator(local_laplacian.generator SRCS local_laplacian_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB local_laplacian_auto_schedule)
 8 |     else()
 9 |         set(LIB local_laplacian)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR local_laplacian.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(local_laplacian_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/nl_means/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(nl_means_process process.cpp)
 2 | halide_use_image_io(nl_means_process)
 3 | 
 4 | halide_generator(nl_means.generator SRCS nl_means_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB nl_means_auto_schedule)
 8 |     else()
 9 |         set(LIB nl_means)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR nl_means.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(nl_means_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/setup_env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export HL_GPU_DEVICE=0
 4 | export HL_PERMIT_FAILED_UNROLL=1
 5 | export CXXFLAGS=
 6 | export CXXFLAGS+=-Dcuda_alloc
 7 | export HL_GPU_L2_COST=200
 8 | export HL_GPU_SHARED_COST=1
 9 | export HL_GPU_GLOBAL_COST=1
10 | export HL_CUDA_JIT_MAX_REGISTERS=256
11 | export HL_TARGET=host-cuda-cuda_capability_61
12 | 
13 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/stencil_chain/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(stencil_chain_process process.cpp)
 2 | halide_use_image_io(stencil_chain_process)
 3 | 
 4 | halide_generator(stencil_chain.generator SRCS stencil_chain_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB stencil_chain_auto_schedule)
 8 |     else()
 9 |         set(LIB stencil_chain)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR stencil_chain.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(stencil_chain_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/stencil_chain/results_42070.txt:
--------------------------------------------------------------------------------
1 | bilateral_grid:
2 | 
--------------------------------------------------------------------------------
/TACO_Benchmarks/support/viz_auto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # $1 = filter cmd to run, including args
 4 | # $2 = HalideTraceViz executable
 5 | # $3 = path to output mp4
 6 | 
 7 | rm -rf "$3"
 8 | 
 9 | # Use a named pipe for the $1 -> HTV pipe, just in case
10 | # the exe in $1 writes any random output to stdout.
11 | PIPE=/tmp/halide_viz_auto_pipe
12 | rm -rf $PIPE
13 | mkfifo $PIPE
14 | 
15 | HL_TRACE_FILE=${PIPE} HL_NUMTHREADS=8 $1 &
16 | 
17 | $2 --auto_layout --ignore_tags 0<${PIPE} | \
18 | ${HL_AVCONV} -y -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 "$3"
19 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/.gitignore:
--------------------------------------------------------------------------------
1 | .gradle/**
2 | gen/**
3 | gradle_build/**
4 | HelloAndroid.iml
5 | local.properties
6 | obj/**
7 | proguard-project.txt
8 | project.properties
9 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/ant.properties:
--------------------------------------------------------------------------------
 1 | # This file is used to override default values used by the Ant build system.
 2 | #
 3 | # This file must be checked into Version Control Systems, as it is
 4 | # integral to the build system of your project.
 5 | 
 6 | # This file is only used by the Ant script.
 7 | 
 8 | # You can use this to override default values such as
 9 | #  'source.dir' for the location of your java source folder and
10 | #  'out.dir' for the location of your output folder.
11 | 
12 | # You can also use it define how the release builds are signed by declaring
13 | # the following properties:
14 | #  'key.store' for the location of your keystore and
15 | #  'key.alias' for the name of the key to use.
16 | # The password will be asked during the build when you use the 'release' target.
17 | 
18 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/build-gradle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Gradle needs to know where the NDK is.
 4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable.
 5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says
 6 | # that it's only used by ant).
 7 | # However, if you run "android update" (say, via build.sh), this variable will
 8 | # be clobbered.
 9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroid-debug.apk && adb shell am start com.example.hellohalide/com.example.hellohalide.CameraActivity
10 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Mon Jan 05 14:23:44 PST 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-bin.zip
7 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/jni/Android.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | 
 3 | include $(CLEAR_VARS)
 4 | 
 5 | LOCAL_MODULE    := HelloAndroid
 6 | LOCAL_ARM_MODE  := arm
 7 | LOCAL_SRC_FILES := hello_wrapper.cpp
 8 | LOCAL_LDFLAGS   := -L$(LOCAL_PATH)/../jni
 9 | LOCAL_LDLIBS    := -lm -llog -landroid $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/hello.a
10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue
11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include $(LOCAL_PATH)/../../../build/include $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/
12 | 
13 | include $(BUILD_SHARED_LIBRARY)
14 | 
15 | $(call import-module,android/native_app_glue)
16 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since
2 | # llvm will not compile for the R6 version of the ISA without Nan2008
3 | # and the gcc toolchain used by the Android build setup requires those
4 | # two options together.
5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
6 | APP_PLATFORM := android-17
7 | APP_STL := gnustl_static
8 | APP_CPPFLAGS := -std=c++11
9 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 7 | 
 8 |   
14 | 
15 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |     HelloHalide
4 | 
5 | 
--------------------------------------------------------------------------------
/apps/HelloAndroid/src/com/example/hellohalide/FrameHandler.java:
--------------------------------------------------------------------------------
 1 | package com.example.hellohalide;
 2 | 
 3 | import android.hardware.Camera;
 4 | import android.util.Log;
 5 | 
 6 | public class FrameHandler implements Camera.PreviewCallback {
 7 |     private static final String TAG = "FrameHandler";
 8 | 
 9 |     public void onPreviewFrame(byte[] data, Camera camera) {
10 |         Log.d(TAG, "Got a frame!");
11 |     }
12 | }
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/.gitignore:
--------------------------------------------------------------------------------
1 | .gradle/**
2 | gen/**
3 | gradle_build/**
4 | *.iml
5 | local.properties
6 | obj/**
7 | proguard-project.txt
8 | project.properties
9 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/build-gradle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Gradle needs to know where the NDK is.
 4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable.
 5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says
 6 | # that it's only used by ant).
 7 | # However, if you run "android update" (say, via build.sh), this variable will
 8 | # be clobbered.
 9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroidCamera2-debug.apk && adb shell am start com.example.helloandroidcamera2/com.example.helloandroidcamera2.CameraActivity
10 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Wed Jul 15 16:34:43 PDT 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-all.zip
7 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since
2 | # llvm will not compile for the R6 version of the ISA without Nan2008
3 | # and the gcc toolchain used by the Android build setup requires those
4 | # two options together.
5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
6 | APP_PLATFORM := android-21
7 | APP_STL := c++_static
8 | APP_CPPFLAGS := -std=c++11 -fno-rtti -fexceptions
9 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 7 | 
 8 |   
14 | 
15 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |     HelloHalideCamera2
4 |     Toggle Edge Detector
5 | 
6 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/ant.properties:
--------------------------------------------------------------------------------
 1 | # This file is used to override default values used by the Ant build system.
 2 | #
 3 | # This file must be checked into Version Control Systems, as it is
 4 | # integral to the build system of your project.
 5 | 
 6 | # This file is only used by the Ant script.
 7 | 
 8 | # You can use this to override default values such as
 9 | #  'source.dir' for the location of your java source folder and
10 | #  'out.dir' for the location of your output folder.
11 | 
12 | # You can also use it define how the release builds are signed by declaring
13 | # the following properties:
14 | #  'key.store' for the location of your keystore and
15 | #  'key.alias' for the name of the key to use.
16 | # The password will be asked during the build when you use the 'release' target.
17 | 
18 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | android update project -p . --target android-17
 4 | cd jni
 5 | c++ -std=c++11 halide_gl_filter.cpp -L ../../../bin -lHalide -I ../../../include -ldl -lpthread -lz
 6 | HL_TARGET=arm-32-android-opengl-debug DYLD_LIBRARY_PATH=../../../bin LD_LIBRARY_PATH=../../../bin ./a.out
 7 | cd ..
 8 | pwd
 9 | ndk-build
10 | ant debug
11 | adb install -r bin/HelloAndroidGL-debug.apk
12 | adb logcat
13 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/jni/Android.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | 
 3 | include $(CLEAR_VARS)
 4 | 
 5 | LOCAL_MODULE    := android_halide_gl_native
 6 | LOCAL_ARM_MODE  := arm
 7 | LOCAL_SRC_FILES := android_halide_gl_native.cpp
 8 | LOCAL_LDFLAGS   := -Ljni
 9 | LOCAL_LDLIBS    := -lm -llog -landroid -lEGL -lGLESv2 jni/halide_gl_filter.o
10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue
11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include
12 | 
13 | include $(BUILD_SHARED_LIBRARY)
14 | 
15 | $(call import-module,android/native_app_glue)
16 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # The ARMv7 is significanly faster due to the use of the hardware FPU
2 | APP_ABI := armeabi-v7a
3 | APP_PLATFORM := android-17
4 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/project.properties:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by Android Tools.
 2 | # Do not modify this file -- YOUR CHANGES WILL BE ERASED!
 3 | #
 4 | # This file must be checked in Version Control Systems.
 5 | #
 6 | # To customize properties used by the Ant build system edit
 7 | # "ant.properties", and override values to adapt the script to your
 8 | # project structure.
 9 | #
10 | # To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
11 | #proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
12 | 
13 | # Project target.
14 | target=android-17
15 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 7 | 
 8 |   
14 | 
15 | 
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |     Halide GL Demo
4 | 
5 | 
--------------------------------------------------------------------------------
/apps/HelloMatlab/Makefile:
--------------------------------------------------------------------------------
1 | include ../support/Makefile.inc
2 | 
3 | BIN ?= bin
4 | 
5 | test:
6 | 	./run_blur.sh
7 | 
8 | 
--------------------------------------------------------------------------------
/apps/HelloMatlab/run_blur.m:
--------------------------------------------------------------------------------
 1 | % Add the path to mex_halide.m.
 2 | addpath(fullfile(getenv('HALIDE_DISTRIB_PATH'), 'tools'));
 3 | 
 4 | % Build the mex library from the blur generator.
 5 | mex_halide('iir_blur.cpp', '-g', 'IirBlur');
 6 | 
 7 | % Load the input, create an output buffer of equal size.
 8 | input = cast(imread('../images/rgb.png'), 'single') / 255;
 9 | output = zeros(size(input), 'single');
10 | 
11 | % The blur filter coefficient.
12 | alpha = 0.1;
13 | 
14 | % Call the Halide pipeline.
15 | for i = 1:10
16 |     tic;
17 |     iir_blur(input, alpha, output);
18 |     toc;
19 | end
20 | 
21 | % Write the blurred image.
22 | imwrite(cast(output * 255, 'uint8'), 'blurred.png');
23 | 
--------------------------------------------------------------------------------
/apps/HelloMatlab/run_blur.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script is run by the nightly tests to check that mex_halide works.
 4 | 
 5 | command -v octave >/dev/null 2>&1 || { echo >&2 "Octave not found.  Aborting."; exit 0; }
 6 | 
 7 | if [[ $CXX == *"-m32"* ]]; then
 8 |     echo "Not proceeding because Halide is compiled in 32-bit mode but octave is (likely) 64-bit"
 9 |     exit 0
10 | fi
11 | 
12 | rm -f blurred.png iir_blur.mex
13 | octave run_blur.m
14 | 
15 | if [ -f blurred.png ]
16 | then
17 |     echo "Success!"
18 |     exit 0
19 | fi
20 | 
21 | echo "Failed to produce blurred.png!"
22 | exit 1
23 | 
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/AppDelegate.h:
--------------------------------------------------------------------------------
1 | #import 
2 | 
3 | @interface AppDelegate : UIResponder 
4 | 
5 | @property (strong, nonatomic) UIWindow *window;
6 | 
7 | @end
8 | 
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/HalideViewController.h:
--------------------------------------------------------------------------------
 1 | #ifndef HelloiOS_HalideViewController_h
 2 | #define HelloiOS_HalideViewController_h
 3 | 
 4 | #import "HalideView.h"
 5 | #import 
 6 | 
 7 | 
 8 | @interface HalideViewController : UIViewController
 9 | 
10 | @property HalideView *halide_view;
11 | 
12 | - (void)viewWillAppear:(BOOL)animated;
13 | 
14 | @end
15 | 
16 | #endif
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/HelloiOS-Prefix.pch:
--------------------------------------------------------------------------------
 1 | //
 2 | //  Prefix header
 3 | //
 4 | //  The contents of this file are implicitly included at the beginning of every source file.
 5 | //
 6 | 
 7 | #import 
 8 | 
 9 | #ifndef __IPHONE_3_0
10 | #warning "This project uses features only available in iOS SDK 3.0 and later."
11 | #endif
12 | 
13 | #ifdef __OBJC__
14 |     #import 
15 |     #import 
16 | #endif
17 | 
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/en.lproj/InfoPlist.strings:
--------------------------------------------------------------------------------
1 | /* Localized versions of Info.plist keys */
2 | 
3 | 
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/main.mm:
--------------------------------------------------------------------------------
 1 | #import 
 2 | 
 3 | #import "AppDelegate.h"
 4 | 
 5 | int main(int argc, char * argv[])
 6 | {
 7 |     @autoreleasepool {
 8 |         return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
 9 |     }
10 | }
11 | 
--------------------------------------------------------------------------------
/apps/bilateral_grid/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(bilateral_grid_process filter.cpp)
 2 | halide_use_image_io(bilateral_grid_process)
 3 | 
 4 | halide_generator(bilateral_grid.generator SRCS bilateral_grid_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB bilateral_grid_auto_schedule)
 8 |     else()
 9 |         set(LIB bilateral_grid)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR bilateral_grid.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}
14 |                                   EXTRA_OUTPUTS stmt schedule)
15 |     target_link_libraries(bilateral_grid_process PRIVATE ${LIB})
16 | endforeach()
17 | 
--------------------------------------------------------------------------------
/apps/bilateral_grid/viz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo HL_AVCONV is ${HL_AVCONV}
 3 | export HL_TRACE_FILE=/dev/stdout
 4 | export HL_NUMTHREADS=4
 5 | rm -f $1/bilateral_grid.mp4
 6 | make $1/filter_viz && \
 7 | $1/filter_viz ../images/gray_small.png $1/out_small.png 0.2 0 | \
 8 | ../../bin/HalideTraceViz --size 1920 1080 | \
 9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/bilateral_grid.mp4
10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
11 | 
--------------------------------------------------------------------------------
/apps/blur/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Generator
 2 | halide_library(halide_blur SRCS halide_blur_generator.cpp)
 3 | 
 4 | # Final executable
 5 | add_executable(blur_test test.cpp)
 6 | target_link_libraries(blur_test PUBLIC halide_blur)
 7 | 
 8 | if (NOT MSVC)
 9 |   target_compile_options(blur_test PRIVATE "-O2")
10 |   if (OPENMP_FOUND)
11 |     target_compile_options(blur_test PRIVATE ${OpenMP_CXX_FLAGS})
12 |     target_link_libraries(blur_test PRIVATE ${OpenMP_CXX_FLAGS})
13 |   else()
14 |     target_compile_options(blur_test PRIVATE "-Wno-unknown-pragmas")
15 |   endif()
16 | endif()
17 | 
--------------------------------------------------------------------------------
/apps/camera_pipe/viz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export HL_TRACE_FILE=/dev/stdout
 3 | export HL_NUMTHREADS=4
 4 | rm -f $1/camera_pipe.mp4
 5 | # Do trivial partial-overrides of trace settings via flags
 6 | # (--zoom and --rlabel) just to demonstrate that it works.
 7 | $1/viz/process ../images/bayer_small.png 3700 1.8 50 1 1 $1/out.png |
 8 | ../../bin/HalideTraceViz --timestep 1000 --size 1920 1080 \
 9 | --zoom 4 --func sharpen_strength_x32 \
10 | --rlabel curve "tone curve LUT" 0 0 10 \
11 | |\
12 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/camera_pipe.mp4
13 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
14 | 
--------------------------------------------------------------------------------
/apps/conv_layer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(conv_layer_process process.cpp)
 2 | halide_use_image_io(conv_layer_process)
 3 | 
 4 | halide_generator(conv_layer.generator SRCS conv_layer_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB conv_layer_auto_schedule)
 8 |     else()
 9 |         set(LIB conv_layer)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR conv_layer.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(conv_layer_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/apps/images/bayer_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/bayer_raw.png
--------------------------------------------------------------------------------
/apps/images/bayer_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/bayer_small.png
--------------------------------------------------------------------------------
/apps/images/gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/gray.png
--------------------------------------------------------------------------------
/apps/images/gray_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/gray_small.png
--------------------------------------------------------------------------------
/apps/images/rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/rgb.png
--------------------------------------------------------------------------------
/apps/images/rgb_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/rgb_small.png
--------------------------------------------------------------------------------
/apps/images/rgb_small16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/rgb_small16.png
--------------------------------------------------------------------------------
/apps/images/rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/images/rgba.png
--------------------------------------------------------------------------------
/apps/interpolate/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | halide_project(interpolate "apps" interpolate.cpp)
2 | halide_use_image_io(interpolate)
3 | set_target_properties(interpolate PROPERTIES RUNTIME_OUTPUT_DIRECTORY
4 |                       "${CMAKE_CURRENT_BINARY_DIR}")
5 | 
--------------------------------------------------------------------------------
/apps/interpolate/Makefile:
--------------------------------------------------------------------------------
 1 | include ../support/Makefile.inc
 2 | 
 3 | CXXFLAGS += -g -Wall
 4 | 
 5 | .PHONY: clean
 6 | 
 7 | $(BIN)/interpolate: interpolate.cpp
 8 | 	@mkdir -p $(@D)
 9 | 	$(CXX) $(CXXFLAGS) interpolate.cpp $(LIB_HALIDE) -o $@ $(IMAGE_IO_FLAGS) $(LDFLAGS) $(HALIDE_SYSTEM_LIBS)
10 | 
11 | $(BIN)/out.png: $(BIN)/interpolate
12 | 	@mkdir -p $(@D)
13 | 	$^ $(IMAGES)/rgba.png $@
14 | 
15 | clean:
16 | 	rm -rf $(BIN)
17 | 
18 | test: $(BIN)/out.png
19 | 
--------------------------------------------------------------------------------
/apps/lens_blur/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(lens_blur_process process.cpp)
 2 | halide_use_image_io(lens_blur_process)
 3 | 
 4 | halide_generator(lens_blur.generator SRCS lens_blur_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB lens_blur_auto_schedule)
 8 |     else()
 9 |         set(LIB lens_blur)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR lens_blur.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(lens_blur_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/apps/linear_algebra/.gitignore:
--------------------------------------------------------------------------------
1 | src/kernels/*
2 | 
--------------------------------------------------------------------------------
/apps/linear_algebra/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if (NOT CBLAS_FOUND)
 2 |   message(STATUS "linear_algebra: No CBLAS header, skipping CBLAS tests")
 3 |   return()
 4 | endif()
 5 | 
 6 | add_executable(test_halide_blas
 7 |   test_halide_blas.cpp
 8 | )
 9 | target_include_directories(test_halide_blas SYSTEM
10 |   PRIVATE
11 |    ${CBLAS_INCLUDE_DIR}
12 | )
13 | target_include_directories(test_halide_blas BEFORE
14 |   PRIVATE
15 |     ${halide_blas_INCLUDE_DIRS}
16 | )
17 | target_compile_options(test_halide_blas PRIVATE -Wno-unused-variable)
18 | 
19 | target_link_libraries(test_halide_blas
20 |   PRIVATE
21 |    halide_blas
22 |    cblas # XXX fragile
23 |    Halide
24 | )
25 | 
26 | 
--------------------------------------------------------------------------------
/apps/local_laplacian/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(local_laplacian_process process.cpp)
 2 | halide_use_image_io(local_laplacian_process)
 3 | 
 4 | halide_generator(local_laplacian.generator SRCS local_laplacian_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB local_laplacian_auto_schedule)
 8 |     else()
 9 |         set(LIB local_laplacian)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR local_laplacian.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(local_laplacian_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/apps/local_laplacian/viz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export HL_TRACE_FILE=/dev/stdout
 3 | export HL_NUM_THREADS=4
 4 | rm -f bin/local_laplacian.mp4
 5 | make bin/process_viz && \
 6 | ./bin/process_viz ../images/rgb_small.png 4 1 1 0 ./bin/out_small.png | \
 7 | ../../bin/HalideTraceViz \
 8 | --size 1920 1080 --timestep 3000 | \
 9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 ./bin/local_laplacian.mp4
10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
11 | 
--------------------------------------------------------------------------------
/apps/nl_means/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(nl_means_process process.cpp)
 2 | halide_use_image_io(nl_means_process)
 3 | 
 4 | halide_generator(nl_means.generator SRCS nl_means_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB nl_means_auto_schedule)
 8 |     else()
 9 |         set(LIB nl_means)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR nl_means.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(nl_means_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/apps/nn_ops/AveragePool.sh:
--------------------------------------------------------------------------------
 1 | AVERAGE_POOL=$1
 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max
 3 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 0 255
 4 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 0 255
 5 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 0 255
 6 | $AVERAGE_POOL 8 16 16 1 2 2 2 5 5 0 255
 7 | 
 8 | $AVERAGE_POOL 32 7 7 1 1 0 0 1 1 0 255
 9 | $AVERAGE_POOL 32 7 7 1 1 1 1 3 3 0 255
10 | $AVERAGE_POOL 32 7 7 1 2 1 1 3 3 0 255
11 | $AVERAGE_POOL 32 7 7 4 2 2 2 5 5 0 255
12 | 
13 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 64 128
14 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 64 128
15 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 64 128
16 | 
--------------------------------------------------------------------------------
/apps/nn_ops/Convolution.sh:
--------------------------------------------------------------------------------
 1 | CONVOLUTION=$1
 2 | # Columns are: schedule C W H N filter_width, filter_height, output_depth,
 3 | # input_offset, filter_offset, input_depth, stride, pad_width, pad_height,
 4 | # byte_zero, output_multiplier, output_shift, output_offset, output_min,
 5 | # output_max
 6 | 
 7 | $CONVOLUTION 8 17 17 1 1 1 8 -128 -128 8 1 0 0 0
 8 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 1 1 1 0
 9 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 2 1 1 0
10 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -128 8 1 1 1 0
11 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -140 8 1 1 1 0
12 | $CONVOLUTION 12 17 17 1 3 3 16 -128 -140 12 1 1 1 0
13 | 
--------------------------------------------------------------------------------
/apps/nn_ops/Im2col.sh:
--------------------------------------------------------------------------------
 1 | IM2COL=$1
 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height byte zero
 3 | $IM2COL 8 16 16 1 1 0 0 1 1 0
 4 | $IM2COL 8 16 16 1 1 1 1 3 3 0
 5 | $IM2COL 8 16 16 1 2 1 1 3 3 0
 6 | $IM2COL 8 16 16 1 2 2 2 5 5 0
 7 | 
 8 | $IM2COL 32 7 7 1 1 0 0 1 1 0
 9 | $IM2COL 32 7 7 1 1 1 1 3 3 0
10 | $IM2COL 32 7 7 1 2 1 1 3 3 0
11 | $IM2COL 32 7 7 4 2 2 2 5 5 0
12 | 
13 | $IM2COL 8 16 16 1 1 0 0 1 1 5
14 | $IM2COL 8 16 16 1 1 1 1 3 3 5
15 | $IM2COL 8 16 16 1 2 1 1 3 3 5
16 | 
--------------------------------------------------------------------------------
/apps/nn_ops/MaxPool.sh:
--------------------------------------------------------------------------------
 1 | MAXPOOL=$1
 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max
 3 | $MAXPOOL 8 16 16 1 1 0 0 1 1 0 255
 4 | $MAXPOOL 8 16 16 1 1 1 1 3 3 0 255
 5 | $MAXPOOL 8 16 16 1 2 1 1 3 3 0 255
 6 | $MAXPOOL 8 16 16 1 2 2 2 5 5 0 255
 7 | 
 8 | $MAXPOOL 32 7 7 1 1 0 0 1 1 0 255
 9 | $MAXPOOL 32 7 7 1 1 1 1 3 3 0 255
10 | $MAXPOOL 32 7 7 1 2 1 1 3 3 0 255
11 | $MAXPOOL 32 7 7 4 2 2 2 5 5 0 255
12 | 
13 | $MAXPOOL 8 16 16 1 1 0 0 1 1 64 128
14 | $MAXPOOL 8 16 16 1 1 1 1 3 3 64 128
15 | $MAXPOOL 8 16 16 1 2 1 1 3 3 64 128
16 | 
--------------------------------------------------------------------------------
/apps/nn_ops/common.h:
--------------------------------------------------------------------------------
 1 | // A collection of utility functions shared by the halide generators.
 2 | 
 3 | #ifndef COMMON_HALIDE_H_
 4 | #define COMMON_HALIDE_H_
 5 | 
 6 | #include 
 7 | 
 8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH
 9 | // instruction.
10 | Halide::Expr saturating_rounding_doubling_high_multiply(Halide::Expr a, Halide::Expr b);
11 | 
12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as
13 | // rounding arithmetic right shift.
14 | Halide::Expr rounding_shift_right(Halide::Expr x, Halide::Expr shift);
15 | 
16 | // Performs right shift and multiply by a multiplier.
17 | Halide::Expr multiply_quantized_multiplier(
18 |     Halide::Expr x, Halide::Expr quantized_multiplier, Halide::Expr shift);
19 | #endif
20 | 
--------------------------------------------------------------------------------
/apps/nn_ops/common_reference.h:
--------------------------------------------------------------------------------
 1 | // A collection of utility functions shared by test apps.
 2 | 
 3 | #ifndef COMMON_REFERENCE_H_
 4 | #define COMMON_REFERENCE_H_
 5 | 
 6 | #include 
 7 | 
 8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH
 9 | // instruction.
10 | int32_t saturating_rounding_doubling_high_multiply_reference(int32_t a, int32_t b);
11 | 
12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as
13 | // rounding arithmetic right shift.
14 | int32_t rounding_shift_right_reference(int32_t x, int32_t shift);
15 | 
16 | // Performs right shift and multiply by a multiplier.
17 | int32_t multiply_quantized_multiplier_reference(int32_t x, int32_t q, int32_t shift);
18 | 
19 | #endif
20 | 
--------------------------------------------------------------------------------
/apps/opengl_demo/glfw_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GLFW_HELPERS_H_
 2 | #define _GLFW_HELPERS_H_
 3 | 
 4 | namespace GlfwHelpers {
 5 | 
 6 |     struct info {
 7 |         float dpi_scale;
 8 |     };
 9 | 
10 |     struct info setup(int width, int height);
11 |     void set_opengl_context();
12 |     void terminate();
13 | }
14 | 
15 | #endif
16 | 
--------------------------------------------------------------------------------
/apps/opengl_demo/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/opengl_demo/image.png
--------------------------------------------------------------------------------
/apps/opengl_demo/layout.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LAYOUT_HELPERS_H_
 2 | #define _LAYOUT_HELPERS_H_
 3 | 
 4 | #if defined(__APPLE__)
 5 | #include 
 6 | #else
 7 | #include 
 8 | #endif
 9 | 
10 | namespace Layout {
11 | 
12 |     enum location { UL, UR, LL, LR };
13 | 
14 |     struct info {
15 |         int window_width;
16 |         int window_height;
17 |     };
18 | 
19 |     const struct info &setup(int image_width, int image_height);
20 | 
21 |     void draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label);
22 |     void draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label);
23 | }
24 | 
25 | #endif
26 | 
27 | 
--------------------------------------------------------------------------------
/apps/opengl_demo/opengl_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _OPENGL_HELPERS_H_
 2 | #define _OPENGL_HELPERS_H_
 3 | 
 4 | #include 
 5 | 
 6 | #if defined(__APPLE__)
 7 | #include 
 8 | #else
 9 | #include 
10 | #endif
11 | 
12 | namespace OpenGLHelpers {
13 |     void setup(float dpi_scale);
14 |     GLuint create_texture(int width, int height, const uint8_t *data);
15 |     void delete_texture(GLuint texture_id);
16 |     void display_texture(GLuint texture_id, float x0, float x1, float y0, float y1);
17 |     void draw_text(const std::string &text, float x, float y);
18 | }
19 | 
20 | #endif
21 | 
--------------------------------------------------------------------------------
/apps/opengl_demo/png_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PNG_HELPERS_
 2 | #define _PNG_HELPERS_
 3 | 
 4 | namespace PNGHelpers {
 5 | 
 6 |     struct image_info {
 7 | 	unsigned int width;
 8 | 	unsigned int height;
 9 | 	const uint8_t *data;
10 |     };
11 | 
12 |     struct image_info load(const std::string &filepath);
13 | }
14 | 
15 | 
16 | #endif
17 | 
--------------------------------------------------------------------------------
/apps/opengl_demo/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TIMER_H_
 2 | #define _TIMER_H_
 3 | 
 4 | #include 
 5 | 
 6 | namespace Timer
 7 | {
 8 |     struct info {
 9 |         const std::string what;
10 |         std::chrono::time_point time;
11 |     };
12 | 
13 |     struct info start(const std::string &what);
14 |     std::string report(const struct info &);
15 | }
16 | 
17 | #endif
18 | 
--------------------------------------------------------------------------------
/apps/openglcompute/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | android update project -p . --target android-21
 4 | make jni-libs
 5 | ant debug
 6 | adb install -r bin/HelloHalideOpenGLCompute-debug.apk
 7 | adb logcat -c
 8 | adb shell am start -n com.example.hellohalideopenglcompute/.HalideOpenGLComputeActivity
 9 | adb logcat | grep "^I/oglc"
10 | 
--------------------------------------------------------------------------------
/apps/openglcompute/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # TODO(aam): Confirm that application builds and runs for all supported targets:
2 | # APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
3 | APP_ABI := armeabi-v7a
4 | APP_PLATFORM := android-17
5 | 
6 | APP_STL := c++_static
7 | LOCAL_C_INCLUDES += ${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.8/include
8 | 
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/openglcompute/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/openglcompute/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/openglcompute/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUE-EE-ES/HalideAutoGPU/3115b7650b0c3b37a71259aeb3db32fc598bdfdd/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 7 | 
 8 |   
14 | 
15 | 
--------------------------------------------------------------------------------
/apps/openglcompute/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |     HelloHalideAndroidOpenGLCompute
4 | 
5 | 
--------------------------------------------------------------------------------
/apps/stencil_chain/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(stencil_chain_process process.cpp)
 2 | halide_use_image_io(stencil_chain_process)
 3 | 
 4 | halide_generator(stencil_chain.generator SRCS stencil_chain_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB stencil_chain_auto_schedule)
 8 |     else()
 9 |         set(LIB stencil_chain)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR stencil_chain.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(stencil_chain_process PRIVATE ${LIB})
15 | endforeach()
16 | 
--------------------------------------------------------------------------------
/apps/support/viz_auto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # $1 = filter cmd to run, including args
 4 | # $2 = HalideTraceViz executable
 5 | # $3 = path to output mp4
 6 | 
 7 | rm -rf "$3"
 8 | 
 9 | # Use a named pipe for the $1 -> HTV pipe, just in case
10 | # the exe in $1 writes any random output to stdout.
11 | PIPE=/tmp/halide_viz_auto_pipe
12 | rm -rf $PIPE
13 | mkfifo $PIPE
14 | 
15 | HL_TRACE_FILE=${PIPE} HL_NUMTHREADS=8 $1 &
16 | 
17 | $2 --auto_layout --ignore_tags 0<${PIPE} | \
18 | ${HL_AVCONV} -y -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 "$3"
19 | 
--------------------------------------------------------------------------------
/apps/wavelet/README.md:
--------------------------------------------------------------------------------
1 | wavelet is a trivial app designed to show ahead-of-time Generator usage (with both Make and CMake), as opposed to using direct calls to (e.g.) Func::compile_to_file().
2 | 
--------------------------------------------------------------------------------
/apps/wavelet/daubechies_constants.h:
--------------------------------------------------------------------------------
 1 | #ifndef DAUBECHIES_CONSTANTS_H_
 2 | #define DAUBECHIES_CONSTANTS_H_
 3 | 
 4 | const float D0 = 0.4829629131445341f;
 5 | const float D1 = 0.83651630373780772f;
 6 | const float D2 = 0.22414386804201339f;
 7 | const float D3 = -0.12940952255126034f;
 8 | 
 9 | #endif  // DAUBECHIES_CONSTANTS_H_
10 | 
--------------------------------------------------------------------------------
/apps/wavelet/haar_x_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include "daubechies_constants.h"
 4 | 
 5 | namespace {
 6 | 
 7 | Halide::Var x("x"), y("y"), c("c");
 8 | 
 9 | class haar_x : public Halide::Generator {
10 | public:
11 |     Input> in_{"in" , 2};
12 |     Output> out_{"out" , 3};
13 | 
14 |     void generate() {
15 |         Func in = Halide::BoundaryConditions::repeat_edge(in_);
16 | 
17 |         out_(x, y, c) = select(c == 0,
18 |                               (in(2*x, y) + in(2*x+1, y)),
19 |                               (in(2*x, y) - in(2*x+1, y)))/2;
20 |         out_.unroll(c, 2);
21 |     }
22 | };
23 | 
24 | }  // namespace
25 | 
26 | HALIDE_REGISTER_GENERATOR(haar_x, haar_x)
27 | 
--------------------------------------------------------------------------------
/apps/wavelet/inverse_haar_x_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include "daubechies_constants.h"
 4 | 
 5 | namespace {
 6 | 
 7 | Halide::Var x("x"), y("y"), c("c");
 8 | 
 9 | class inverse_haar_x : public Halide::Generator {
10 | public:
11 |     Input> in_{"in" , 3};
12 |     Output> out_{"out" , 2};
13 | 
14 |     void generate() {
15 |         Func in = Halide::BoundaryConditions::repeat_edge(in_);
16 | 
17 |         out_(x, y) = select(x%2 == 0,
18 |                            in(x/2, y, 0) + in(x/2, y, 1),
19 |                            in(x/2, y, 0) - in(x/2, y, 1));
20 |         out_.unroll(x, 2);
21 |     }
22 | };
23 | 
24 | }  // namespace
25 | 
26 | HALIDE_REGISTER_GENERATOR(inverse_haar_x, inverse_haar_x)
27 | 
--------------------------------------------------------------------------------
/python_bindings/correctness/bit_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | class BitGenerator : public Halide::Generator {
 6 | public:
 7 |     Input> bit_input{"input_uint1", 1};
 8 |     Input bit_constant{"constant_uint1"};
 9 | 
10 |     Output> bit_output{"output_uint1", 1};
11 | 
12 |     Var x, y, z;
13 | 
14 |     void generate() {
15 |         bit_output(x) = bit_input(x) + bit_constant;
16 |     }
17 | 
18 |     void schedule() {
19 |     }
20 | };
21 | 
22 | HALIDE_REGISTER_GENERATOR(BitGenerator, bit)
23 | 
--------------------------------------------------------------------------------
/python_bindings/correctness/rdom.py:
--------------------------------------------------------------------------------
 1 | import halide as hl
 2 | 
 3 | def test_rdom():
 4 |     x = hl.Var("x")
 5 |     y = hl.Var("y")
 6 | 
 7 |     diagonal = hl.Func("diagonal")
 8 |     diagonal[x, y] = 1
 9 | 
10 |     domain_width = 10
11 |     domain_height = 10
12 | 
13 |     r = hl.RDom([(0, domain_width), (0, domain_height)])
14 |     r.where(r.x <= r.y)
15 | 
16 |     diagonal[r.x, r.y] += 2
17 |     output = diagonal.realize(domain_width, domain_height)
18 |     
19 |     for iy in range(domain_height):
20 |         for ix in range(domain_width):
21 |             if ix <= iy:
22 |                 assert output[ix, iy] == 3
23 |             else:
24 |                 assert output[ix, iy] == 1
25 | 
26 |     return 0
27 | 
28 | if __name__ == "__main__":
29 |     test_rdom()
30 | 
--------------------------------------------------------------------------------
/python_bindings/correctness/user_context_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | class UserContextGenerator : public Halide::Generator {
 6 | public:
 7 |     Input constant{"constant"};
 8 |     Output> output{"output", 1};
 9 | 
10 |     Var x;
11 | 
12 |     void generate() {
13 |         output(x) = constant;
14 |     }
15 | 
16 |     void schedule() {
17 |     }
18 | };
19 | 
20 | HALIDE_REGISTER_GENERATOR(UserContextGenerator, user_context)
21 | 
--------------------------------------------------------------------------------
/python_bindings/correctness/user_context_test.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import user_context
 3 | 
 4 | 
 5 | def test():
 6 |     output = bytearray("\0\0\0\0", "ascii")
 7 |     user_context.user_context(None, ord('q'), output)
 8 |     assert output == bytearray("qqqq", "ascii")
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     test()
13 | 
--------------------------------------------------------------------------------
/python_bindings/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file lists the python dependencies, 
 2 | # it is meant to be used with pip (and/or possibly virtualenv, pbundler, etc)
 3 | # See http://pip.readthedocs.org/en/latest/user_guide.html#requirements-files
 4 | # You will probably want to run 
 5 | # something similar to `pip3 install --user -r requirements.txt`
 6 | 
 7 | # science packages
 8 | numpy
 9 | scipy
10 | pillow
11 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyArgument.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_argument(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyBoundaryConditions.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_boundary_conditions(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyBuffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBUFFER_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYBUFFER_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_buffer(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYBUFFER_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyConciseCasts.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_concise_casts(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyEnums.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYENUMS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYENUMS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_enums(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYENUMS_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyError.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYERROR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYERROR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_error(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYERROR_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyExpr.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXPR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYEXPR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_expr(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYEXPR_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyExternFuncArgument.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_extern_func_argument(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyFunc.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_func(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyFuncRef.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_func_ref(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyIROperator.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_operators(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyImageParam.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_image_param(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyInlineReductions.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_inline_reductions(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyLambda.cpp:
--------------------------------------------------------------------------------
 1 | #include "PyLambda.h"
 2 | 
 3 | namespace Halide {
 4 | namespace PythonBindings {
 5 | 
 6 | void define_lambda(py::module &m) {
 7 |     // TODO: 'lambda' is a reserved word in Python, so we
 8 |     // can't use it for a function. Using 'lambda_func' for now.
 9 |     m.def("lambda_func", [](py::args args) -> Func {
10 |         auto vars = args_to_vector(args, 0, 1);
11 |         Expr e = args[args.size() - 1].cast();
12 |         Func f("lambda" + Internal::unique_name('_'));
13 |         f(vars) = e;
14 |         return f;
15 |     });
16 | }
17 | 
18 | }  // namespace PythonBindings
19 | }  // namespace Halide
20 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyLambda.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_lambda(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyLoopLevel.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_loop_level(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyMachineParams.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_machine_params(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyModule.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMODULE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYMODULE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_module(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYMODULE_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyOutputs.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_outputs(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyParam.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPARAM_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYPARAM_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_param(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYPARAM_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyPipeline.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_pipeline(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyRDom.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYRDOM_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYRDOM_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_rdom(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYRDOM_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyStage.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYSTAGE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYSTAGE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_stage(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_STAGE_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyTarget.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTARGET_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYTARGET_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_target(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYTARGET_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyTuple.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTUPLE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYTUPLE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_tuple(py::module &m);
10 | 
11 | // Templated function to allow for use with Realization as well as Tuple
12 | template
13 | inline py::tuple to_python_tuple(const T &ht) {
14 |     py::tuple pt(ht.size());
15 |     for (size_t i = 0; i < ht.size(); i++) {
16 |         pt[i] = py::cast(ht[i]);
17 |     }
18 |     return pt;
19 | }
20 | 
21 | }  // namespace PythonBindings
22 | }  // namespace Halide
23 | 
24 | #endif  // HALIDE_PYTHON_BINDINGS_PYTUPLE_H
25 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyType.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTYPE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYTYPE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_type(py::module &m);
10 | 
11 | std::string halide_type_to_string(const Type &type);
12 | 
13 | }  // namespace PythonBindings
14 | }  // namespace Halide
15 | 
16 | #endif  // HALIDE_PYTHON_BINDINGS_PYTYPE_H
17 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyVar.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYVAR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYVAR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_var(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYVAR_H
15 | 
--------------------------------------------------------------------------------
/python_bindings/src/PyVarOrRVar.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_var_or_rvar(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
15 | 
--------------------------------------------------------------------------------
/src/AddParameterChecks.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H
 2 | #define HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines the lowering pass that adds the assertions that validate
 7 |  * scalar parameters.
 8 |  */
 9 | 
10 | #include "IR.h"
11 | 
12 | namespace Halide {
13 | 
14 | struct Target;
15 | 
16 | namespace Internal {
17 | 
18 | /** Insert checks to make sure that all referenced parameters meet
19 |  * their constraints. Also injects any custom requirements provided
20 |  * by the user. */
21 | Stmt add_parameter_checks(const std::vector &requirements, Stmt s, const Target &t);
22 | 
23 | }  // namespace Internal
24 | }  // namespace Halide
25 | 
26 | #endif
27 | 
--------------------------------------------------------------------------------
/src/AlignLoads.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ALIGN_LOADS_H
 2 | #define HALIDE_ALIGN_LOADS_H
 3 | 
 4 | /** \file
 5 |  * Defines a lowering pass that rewrites unaligned loads into
 6 |  * sequences of aligned loads.
 7 |  */
 8 | #include "IR.h"
 9 | #include "ModulusRemainder.h"
10 | #include "Scope.h"
11 | #include "Target.h"
12 | 
13 | namespace Halide {
14 | namespace Internal {
15 | 
16 | /** Attempt to rewrite unaligned loads from buffers which are known to
17 |  * be aligned to instead load aligned vectors that cover the original
18 |  * load, and then slice the original load out of the aligned
19 |  * vectors. */
20 | Stmt align_loads(Stmt s, int alignment);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 
--------------------------------------------------------------------------------
/src/AllocationBoundsInference.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ALLOCATION_BOUNDS_INFERENCE_H
 2 | #define HALIDE_ALLOCATION_BOUNDS_INFERENCE_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that determines how large internal allocations should be.
 6 |  */
 7 | 
 8 | #include "Bounds.h"
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Take a partially statement with Realize nodes in terms of
15 |  * variables, and define values for those variables. */
16 | Stmt allocation_bounds_inference(Stmt s,
17 |                                  const std::map &env,
18 |                                  const std::map, Interval> &func_bounds);
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 
--------------------------------------------------------------------------------
/src/AsyncProducers.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ASYNC_PRODUCERS_H
 2 | #define HALIDE_ASYNC_PRODUCERS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects task parallelism for producers that are scheduled as async.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | Stmt fork_async_producers(Stmt s, const std::map &env);
14 | 
15 | }
16 | }
17 | 
18 | #endif
19 | 
--------------------------------------------------------------------------------
/src/BoundSmallAllocations.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_BOUND_SMALL_ALLOCATIONS
 2 | #define HALIDE_BOUND_SMALL_ALLOCATIONS
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines the lowering pass that attempts to rewrite small
 8 |  * allocations to have constant size.
 9 |  */
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** \file
15 |  *
16 |  * Use bounds analysis to attempt to bound the sizes of small
17 |  * allocations. Inside GPU kernels this is necessary in order to
18 |  * compile. On the CPU this is also useful, because it prevents malloc
19 |  * calls for (provably) tiny allocations. */
20 | Stmt bound_small_allocations(const Stmt &s);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 
--------------------------------------------------------------------------------
/src/CanonicalizeGPUVars.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_CANONICALIZE_GPU_VARS_H
 2 | #define HALIDE_CANONICALIZE_GPU_VARS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that canonicalize the GPU var names over.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Canonicalize GPU var names into some pre-determined block/thread names
14 |  * (i.e. __block_id_x, __thread_id_x, etc.). The x/y/z/w order is determined
15 |  * by the nesting order: innermost is assigned to x and so on. */
16 | Stmt canonicalize_gpu_vars(Stmt s);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 
--------------------------------------------------------------------------------
/src/Debug.cpp:
--------------------------------------------------------------------------------
 1 | #include "Debug.h"
 2 | 
 3 | namespace Halide {
 4 | namespace Internal {
 5 | 
 6 | int debug::debug_level() {
 7 |     static int cached_debug_level = ([]() -> int {
 8 |         std::string lvl = get_env_variable("HL_DEBUG_CODEGEN");
 9 |         return !lvl.empty() ? atoi(lvl.c_str()) : 0;
10 |     })();
11 |     return cached_debug_level;
12 | }
13 | 
14 | }  // namespace Internal
15 | }  // namespace Halide
16 | 
--------------------------------------------------------------------------------
/src/DebugArguments.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
 2 | #define HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines a lowering pass that injects debug statements inside a
 7 |  * LoweredFunc. Intended to be used when Target::Debug is on.
 8 |  */
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | struct LoweredFunc;
14 | 
15 | /** Injects debug prints in a LoweredFunc that describe the arguments. Mutates the given func. */
16 | void debug_arguments(LoweredFunc *func);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 
--------------------------------------------------------------------------------
/src/EarlyFree.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_EARLY_FREE_H
 2 | #define HALIDE_EARLY_FREE_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects markers just after
 6 |  * the last use of each buffer so that they can potentially be freed
 7 |  * earlier.
 8 |  */
 9 | 
10 | #include "IR.h"
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Take a statement with allocations and inject markers (of the form
16 |  * of calls to "mark buffer dead") after the last use of each
17 |  * allocation. Targets may use this to free buffers earlier than the
18 |  * close of their Allocate node. */
19 | Stmt inject_early_frees(Stmt s);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 
--------------------------------------------------------------------------------
/src/HexagonOffload.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_HEXAGON_OFFLOAD_H
 2 | #define HALIDE_HEXAGON_OFFLOAD_H
 3 | 
 4 | /** \file
 5 |  * Defines a lowering pass to pull loops marked with the
 6 |  * Hexagon device API to a separate module, and call them through the
 7 |  * Hexagon host runtime module.
 8 |  */
 9 | 
10 | #include "Module.h"
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Pull loops marked with the Hexagon device API to a separate
16 |  * module, and call them through the Hexagon host runtime module. */
17 | Stmt inject_hexagon_rpc(Stmt s, const Target &host_target, Module &module);
18 | 
19 | Buffer compile_module_to_hexagon_shared_object(const Module &device_code);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 
--------------------------------------------------------------------------------
/src/InjectHostDevBufferCopies.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
 2 | #define HALIDE_HOST_GPU_BUFFER_COPIES_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering passes that deal with host and device buffer flow.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | #include "Target.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** A helper function to call an extern function, and assert that it
15 |  * returns 0. */
16 | Stmt call_extern_and_assert(const std::string &name, const std::vector &args);
17 | 
18 | /** Inject calls to halide_device_malloc, halide_copy_to_device, and
19 |  * halide_copy_to_host as needed. */
20 | Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 
--------------------------------------------------------------------------------
/src/InjectOpenGLIntrinsics.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INJECT_OPENGL_INTRINSICS_H
 2 | #define HALIDE_INJECT_OPENGL_INTRINSICS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects texture loads and texture
 6 |  * stores for opengl.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Take a statement with for kernel for loops and turn loads and
15 |  * stores inside the loops into OpenGL texture load and store
16 |  * intrinsics. Should only be run when the OpenGL target is active. */
17 | Stmt inject_opengl_intrinsics(Stmt s);
18 | 
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 
--------------------------------------------------------------------------------
/src/LICM.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LICM_H
 2 | #define HALIDE_LICM_H
 3 | 
 4 | /** \file
 5 |  * Methods for lifting loop invariants out of inner loops.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Hoist loop-invariants out of inner loops. This is especially
14 |  * important in cases where LLVM would not do it for us
15 |  * automatically. For example, it hoists loop invariants out of cuda
16 |  * kernels. */
17 | Stmt loop_invariant_code_motion(Stmt);
18 | 
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 
--------------------------------------------------------------------------------
/src/Lerp.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LERP_H
 2 | #define HALIDE_LERP_H
 3 | 
 4 | /** \file
 5 |  * Defines methods for converting a lerp intrinsic into Halide IR.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Build Halide IR that computes a lerp. Use by codegen targets that
14 |  * don't have a native lerp. */
15 | Expr lower_lerp(Expr zero_val, Expr one_val, Expr weight);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 
--------------------------------------------------------------------------------
/src/LoopCarry.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LOOP_CARRY_H
 2 | #define HALIDE_LOOP_CARRY_H
 3 | 
 4 | #include "Expr.h"
 5 | 
 6 | namespace Halide {
 7 | namespace Internal {
 8 | 
 9 | /** Reuse loads done on previous loop iterations by stashing them in
10 |  * induction variables instead of redoing the load. If the loads are
11 |  * predicated, the predicates need to match. Can be an optimization or
12 |  * pessimization depending on how good the L1 cache is on the architecture
13 |  * and how many memory issue slots there are. Currently only intended
14 |  * for Hexagon. */
15 | Stmt loop_carry(Stmt, int max_carried_values = 8);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 
--------------------------------------------------------------------------------
/src/LowerWarpShuffles.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LOWER_WARP_SHUFFLES_H
 2 | #define HALIDE_LOWER_WARP_SHUFFLES_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects CUDA warp shuffle
 6 |  * instructions to access storage outside of a GPULane loop.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Rewrite access to things stored outside the loop over GPU lanes to
15 |  * use nvidia's warp shuffle instructions. */
16 | Stmt lower_warp_shuffles(Stmt s);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 
--------------------------------------------------------------------------------
/src/Monotonic.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_MONOTONIC_H
 2 | #define HALIDE_MONOTONIC_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Methods for computing whether expressions are monotonic
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | #include "Scope.h"
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /**
16 |  * Detect whether an expression is monotonic increasing in a variable,
17 |  * decreasing, or unknown.
18 |  */
19 | enum class Monotonic {Constant, Increasing, Decreasing, Unknown};
20 | Monotonic is_monotonic(Expr e, const std::string &var,
21 |                        const Scope &scope = Scope::empty_scope());
22 | 
23 | void is_monotonic_test();
24 | 
25 | }  // namespace Internal
26 | }  // namespace Halide
27 | 
28 | #endif
29 | 
--------------------------------------------------------------------------------
/src/PrintLoopNest.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_PRINT_LOOP_NEST_H
 2 | #define HALIDE_INTERNAL_PRINT_LOOP_NEST_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines methods to print out the loop nest corresponding to a schedule.
 7 |  */
 8 | 
 9 | #include 
10 | #include 
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | class Function;
16 | 
17 | /** Emit some simple pseudocode that shows the structure of the loop
18 |  * nest specified by this pipeline's schedule, and the schedules of
19 |  * the functions it uses. */
20 | std::string print_loop_nest(const std::vector &output_funcs);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 
--------------------------------------------------------------------------------
/src/PythonExtensionGen.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_EXTENSION_GEN_H_
 2 | #define HALIDE_PYTHON_EXTENSION_GEN_H_
 3 | 
 4 | #include 
 5 | #include "Module.h"
 6 | #include "Target.h"
 7 | 
 8 | namespace Halide {
 9 | 
10 | class Module;
11 | struct Target;
12 | 
13 | namespace Internal {
14 | 
15 | class PythonExtensionGen {
16 | public:
17 |     PythonExtensionGen(std::ostream &dest, const std::string &header_name, Target target);
18 | 
19 |     void compile(const Module &module);
20 |     void compile(const LoweredFunc &f);
21 | private:
22 |     void convert_buffer(std::string name, const LoweredArgument* arg);
23 |     std::ostream &dest;
24 |     std::string header_name;
25 |     Target target;
26 | };
27 | 
28 | }
29 | }
30 | 
31 | #endif // HALIDE_PYTHON_EXTENSION_GEN_H_
32 | 
--------------------------------------------------------------------------------
/src/Qualify.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_QUALIFY_H
 2 | #define HALIDE_QUALIFY_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines methods for prefixing names in an expression with a prefix string.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Prefix all variable names in the given expression with the prefix string. */
15 | Expr qualify(const std::string &prefix, Expr value);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 
--------------------------------------------------------------------------------
/src/RemoveDeadAllocations.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_REMOVE_DEAD_ALLOCATIONS_H
 2 | #define HALIDE_REMOVE_DEAD_ALLOCATIONS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that removes allocate and free nodes that
 6 |  * are not used.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Find Allocate/Free pairs that are never loaded from or stored to,
15 |  *  and remove them from the Stmt. This doesn't touch Realize/Call
16 |  *  nodes and so must be called after storage_flattening.
17 |  */
18 | Stmt remove_dead_allocations(Stmt s);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 
--------------------------------------------------------------------------------
/src/RemoveExternLoops.cpp:
--------------------------------------------------------------------------------
 1 | #include "RemoveExternLoops.h"
 2 | #include "IRMutator.h"
 3 | 
 4 | namespace Halide {
 5 | namespace Internal {
 6 | 
 7 | class RemoveExternLoops : public IRMutator {
 8 | private:
 9 |     using IRMutator::visit;
10 | 
11 |     Stmt visit(const For *op) override {
12 |         if (op->for_type != ForType::Extern) {
13 |             return IRMutator::visit(op);
14 |         }
15 |         // Replace the for with its first iteration (implemented with a let).
16 |         return LetStmt::make(op->name, op->min, mutate(op->body));
17 |     }
18 | };
19 | 
20 | Stmt remove_extern_loops(Stmt s) {
21 |     return RemoveExternLoops().mutate(s);
22 | }
23 | 
24 | }  // namespace Internal
25 | }  // namespace Halide
26 | 
--------------------------------------------------------------------------------
/src/RemoveExternLoops.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_REMOVE_EXTERN_LOOPS
 2 | #define HALIDE_REMOVE_EXTERN_LOOPS
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines a lowering pass that removes placeholder loops for extern stages.
 8 |  */
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 |   /** Removes placeholder loops for extern stages. */
14 | Stmt remove_extern_loops(Stmt s);
15 | 
16 | }  // namespace Internal
17 | }  // namespace Halide
18 | 
19 | #endif
20 | 
--------------------------------------------------------------------------------
/src/RemoveUndef.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_REMOVE_UNDEF
 2 | #define HALIDE_REMOVE_UNDEF
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines a lowering pass that elides stores that depend on unitialized values.
 8 |  */
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Removes stores that depend on undef values, and statements that
14 |  * only contain such stores. */
15 | Stmt remove_undef(Stmt s);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 
--------------------------------------------------------------------------------
/src/RoundingMode.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ROUNDING_MODE_H
 2 | #define HALIDE_ROUNDING_MODE_H
 3 | namespace Halide {
 4 | 
 5 | /** Rounding modes (IEEE754 2008 4.3 Rounding-direction attributes) */
 6 | enum class RoundingMode {
 7 |     TowardZero, ///< Round towards zero (IEEE754 2008 4.3.2)
 8 |     ToNearestTiesToEven, ///< Round to nearest, when there is a tie pick even integral significand (IEEE754 2008 4.3.1)
 9 |     ToNearestTiesToAway, ///< Round to nearest, when there is a tie pick value furthest away from zero (IEEE754 2008 4.3.1)
10 |     TowardPositiveInfinity, ///< Round towards positive infinity (IEEE754 2008 4.3.2)
11 |     TowardNegativeInfinity ///< Round towards negative infinity (IEEE754 2008 4.3.2)
12 | };
13 | 
14 | }  // namespace Halide
15 | #endif
16 | 
--------------------------------------------------------------------------------
/src/SelectGPUAPI.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_SELECT_GPU_API_H
 2 | #define HALIDE_INTERNAL_SELECT_GPU_API_H
 3 | 
 4 | #include "IR.h"
 5 | #include "Target.h"
 6 | 
 7 | /** \file
 8 |  * Defines a lowering pass that selects which GPU api to use for each
 9 |  * gpu for loop
10 |  */
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Replace for loops with GPU_Default device_api with an actual
16 |  * device API depending on what's enabled in the target. Choose the
17 |  * first of the following: opencl, cuda, openglcompute, opengl */
18 | Stmt select_gpu_api(Stmt s, Target t);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 
--------------------------------------------------------------------------------
/src/SimplifySpecializations.h:
--------------------------------------------------------------------------------
 1 | #ifndef SIMPLIFY_SPECIALIZATIONS_H
 2 | #define SIMPLIFY_SPECIALIZATIONS_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines pass that try to simplify the RHS/LHS of a function's definition
 7 |  * based on its specializations.
 8 |  */
 9 | 
10 | #include