├── .clang-format
├── .gitattributes
├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── Doxyfile
├── Doxyfile.in
├── LICENSE.txt
├── Makefile
├── README.md
├── README_cmake.md
├── README_rungen.md
├── README_webassembly.md
├── ai_scripts
    ├── conv.prx
    ├── data
    │   ├── data.txt
    │   ├── kernelAndInImage_256x16_k3_gaussblur.txt
    │   ├── old_data.txt
    │   └── refImage_256x16_k3_gaussblur.txt
    ├── data_points.txt
    ├── execute.py
    ├── explore.bash
    ├── global.h
    ├── main.cc
    ├── naive.cc
    ├── run.bash
    ├── run1.bash
    ├── run2.bash
    ├── run3.bash
    ├── run4.bash
    ├── run_single.bash
    └── xhalide_generated.cc
├── apps
    ├── CMakeLists.txt
    ├── HelloAndroid
    │   ├── .gitignore
    │   ├── AndroidManifest.xml
    │   ├── README.md
    │   ├── ant.properties
    │   ├── build-gradle.sh
    │   ├── build.gradle
    │   ├── build.sh
    │   ├── build.xml
    │   ├── gradle
    │   │   └── wrapper
    │   │   │   ├── gradle-wrapper.jar
    │   │   │   └── gradle-wrapper.properties
    │   ├── gradlew
    │   ├── gradlew.bat
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── Application.mk
    │   │   ├── hello_generator.cpp
    │   │   └── hello_wrapper.cpp
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   └── strings.xml
    │   └── src
    │   │   └── com
    │   │       └── example
    │   │           └── hellohalide
    │   │               ├── CameraActivity.java
    │   │               ├── CameraPreview.java
    │   │               └── FrameHandler.java
    ├── HelloAndroidCamera2
    │   ├── .gitignore
    │   ├── AndroidManifest.xml
    │   ├── README.md
    │   ├── ant.properties
    │   ├── build-gradle.sh
    │   ├── build.gradle
    │   ├── build.sh
    │   ├── build.xml
    │   ├── gradle
    │   │   └── wrapper
    │   │   │   ├── gradle-wrapper.jar
    │   │   │   └── gradle-wrapper.properties
    │   ├── gradlew
    │   ├── gradlew.bat
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── AndroidBufferUtilities.cpp
    │   │   ├── AndroidBufferUtilities.h
    │   │   ├── Application.mk
    │   │   ├── HalideFilters.cpp
    │   │   ├── LockedSurface.cpp
    │   │   ├── LockedSurface.h
    │   │   ├── YuvBufferT.cpp
    │   │   ├── YuvBufferT.h
    │   │   ├── deinterleave_generator.cpp
    │   │   └── edge_detect_generator.cpp
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   ├── activity_camera.xml
    │   │   │   ├── fragment_camera2_basic.xml
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   ├── strings.xml
    │   │   │   └── styles.xml
    │   └── src
    │   │   └── com
    │   │       ├── android
    │   │           └── ex
    │   │           │   └── camera2
    │   │           │       ├── blocking
    │   │           │           ├── BlockingCameraManager.java
    │   │           │           ├── BlockingCaptureCallback.java
    │   │           │           ├── BlockingSessionCallback.java
    │   │           │           └── BlockingStateCallback.java
    │   │           │       ├── exceptions
    │   │           │           └── TimeoutRuntimeException.java
    │   │           │       ├── pos
    │   │           │           └── AutoFocusStateMachine.java
    │   │           │       └── utils
    │   │           │           ├── StateChangeListener.java
    │   │           │           ├── StateWaiter.java
    │   │           │           └── SysTrace.java
    │   │       └── example
    │   │           └── helloandroidcamera2
    │   │               ├── AndroidBufferUtilities.java
    │   │               ├── AutoFitSurfaceView.java
    │   │               ├── Camera2BasicFragment.java
    │   │               ├── CameraActivity.java
    │   │               ├── HalideFilters.java
    │   │               ├── HalideYuvBufferT.java
    │   │               └── NativeSurfaceHandle.java
    ├── HelloAndroidGL
    │   ├── AndroidManifest.xml
    │   ├── ant.properties
    │   ├── build.sh
    │   ├── build.xml
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── Application.mk
    │   │   ├── android_halide_gl_native.cpp
    │   │   └── halide_gl_filter.cpp
    │   ├── project.properties
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   └── strings.xml
    │   └── src
    │   │   └── org
    │   │       └── halide_lang
    │   │           └── hellohalidegl
    │   │               └── HelloHalideGL.java
    ├── HelloMatlab
    │   ├── Makefile
    │   ├── iir_blur.cpp
    │   ├── run_blur.m
    │   └── run_blur.sh
    ├── HelloiOS
    │   ├── HelloiOS.xcodeproj
    │   │   └── project.pbxproj
    │   └── HelloiOS
    │   │   ├── AppDelegate.h
    │   │   ├── AppDelegate.mm
    │   │   ├── HalideView.h
    │   │   ├── HalideView.mm
    │   │   ├── HalideViewController.h
    │   │   ├── HalideViewController.mm
    │   │   ├── HelloiOS-Info.plist
    │   │   ├── HelloiOS-Prefix.pch
    │   │   ├── Images.xcassets
    │   │       ├── AppIcon.appiconset
    │   │       │   └── Contents.json
    │   │       └── LaunchImage.launchimage
    │   │       │   └── Contents.json
    │   │   ├── en.lproj
    │   │       └── InfoPlist.strings
    │   │   ├── main.mm
    │   │   └── reaction_diffusion_2_generator.cpp
    ├── auto_viz
    │   ├── Makefile
    │   ├── auto_viz_demo.cpp
    │   └── auto_viz_demo_generator.cpp
    ├── autoscheduler
    │   ├── ASLog.cpp
    │   ├── ASLog.h
    │   ├── AutoSchedule.cpp
    │   ├── AutoSchedule.h
    │   ├── CostModel.h
    │   ├── DefaultCostModel.cpp
    │   ├── Errors.h
    │   ├── Featurization.h
    │   ├── FunctionDAG.h
    │   ├── Makefile
    │   ├── NetworkSize.h
    │   ├── PerfectHashMap.h
    │   ├── autotune_loop.sh
    │   ├── cost_model_generator.cpp
    │   ├── cost_model_schedule.h
    │   ├── demo_generator.cpp
    │   ├── featurization_to_sample.cpp
    │   ├── test.cpp
    │   ├── test_perfect_hash_map.cpp
    │   ├── train_cost_model.cpp
    │   └── weights
    │   │   ├── head1_conv1_bias.data
    │   │   ├── head1_conv1_weight.data
    │   │   ├── head2_conv1_bias.data
    │   │   ├── head2_conv1_weight.data
    │   │   ├── trunk_conv1_bias.data
    │   │   └── trunk_conv1_weight.data
    ├── bilateral_grid
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── bilateral_grid_generator.cpp
    │   ├── filter.cpp
    │   └── viz.sh
    ├── blur
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── adb_run_on_device.sh
    │   ├── halide_blur_generator.cpp
    │   └── test.cpp
    ├── c_backend
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── pipeline_cpp_generator.cpp
    │   ├── pipeline_generator.cpp
    │   ├── run.cpp
    │   └── run_cpp.cpp
    ├── camera_pipe
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── camera_pipe_generator.cpp
    │   ├── process.cpp
    │   └── viz.sh
    ├── conv_layer
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── conv_layer_generator.cpp
    │   └── process.cpp
    ├── cuda_mat_mul
    │   ├── Makefile
    │   ├── mat_mul_generator.cpp
    │   └── runner.cpp
    ├── fft
    │   ├── Makefile
    │   ├── complex.h
    │   ├── fft.cpp
    │   ├── fft.h
    │   ├── fft_aot_test.cpp
    │   ├── fft_generator.cpp
    │   ├── funct.h
    │   └── main.cpp
    ├── glsl
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── halide_blur_glsl_generator.cpp
    │   ├── halide_ycc_glsl_generator.cpp
    │   └── opengl_test.cpp
    ├── hexagon_benchmarks
    │   ├── Makefile
    │   ├── adb_run_on_device.sh
    │   ├── conv3x3_generator.cpp
    │   ├── dilate3x3_generator.cpp
    │   ├── gaussian5x5_generator.cpp
    │   ├── median3x3_generator.cpp
    │   ├── process.cpp
    │   ├── process.h
    │   └── sobel_generator.cpp
    ├── hexagon_dma
    │   ├── Makefile
    │   ├── mock_dma_implementation.cpp
    │   ├── pipeline_raw_linear_interleaved_basic.cpp
    │   ├── pipeline_yuv_linear_basic.cpp
    │   ├── process_raw_linear_interleaved_basic.cpp
    │   └── process_yuv_linear_basic.cpp
    ├── images
    │   ├── bayer_raw.png
    │   ├── bayer_small.png
    │   ├── gray.png
    │   ├── gray_small.png
    │   ├── rgb.png
    │   ├── rgb_small.png
    │   ├── rgb_small16.png
    │   └── rgba.png
    ├── interpolate
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   └── interpolate.cpp
    ├── lens_blur
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── lens_blur_generator.cpp
    │   └── process.cpp
    ├── linear_algebra
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── benchmarks
    │   │   ├── CMakeLists.txt
    │   │   ├── cblas_benchmarks.cpp
    │   │   ├── clock.h
    │   │   ├── eigen_benchmarks.cpp
    │   │   ├── halide_benchmarks.cpp
    │   │   └── macros.h
    │   ├── src
    │   │   ├── CMakeLists.txt
    │   │   ├── blas_l1_generators.cpp
    │   │   ├── blas_l2_generators.cpp
    │   │   ├── blas_l3_generators.cpp
    │   │   ├── halide_blas.cpp
    │   │   └── halide_blas.h
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   └── test_halide_blas.cpp
    ├── linear_blur
    │   ├── CMakeLists.txt
    │   ├── linear_blur_generator.cpp
    │   ├── linear_to_srgb_generator.cpp
    │   ├── run_linear_blur.cpp
    │   ├── simple_blur_generator.cpp
    │   └── srgb_to_linear_generator.cpp
    ├── local_laplacian
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── local_laplacian_generator.cpp
    │   ├── process.cpp
    │   └── viz.sh
    ├── nl_means
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── nl_means_generator.cpp
    │   └── process.cpp
    ├── nn_ops
    │   ├── AveragePool.cpp
    │   ├── AveragePool.sh
    │   ├── AveragePool_generator.cpp
    │   ├── Convolution.cpp
    │   ├── Convolution.sh
    │   ├── Convolution_generator.cpp
    │   ├── DepthwiseConvolution.cpp
    │   ├── DepthwiseConvolution.sh
    │   ├── DepthwiseConvolution_generator.cpp
    │   ├── Im2col.cpp
    │   ├── Im2col.sh
    │   ├── Im2col_generator.cpp
    │   ├── Makefile
    │   ├── MatrixMultiply.cpp
    │   ├── MatrixMultiply.sh
    │   ├── MatrixMultiply_generator.cpp
    │   ├── MaxPool.cpp
    │   ├── MaxPool.sh
    │   ├── MaxPool_generator.cpp
    │   ├── README.md
    │   ├── adb_run_on_device.sh
    │   ├── common.cpp
    │   ├── common.h
    │   ├── common_reference.cpp
    │   └── common_reference.h
    ├── onnx
    │   ├── Makefile
    │   ├── common_types.h
    │   ├── halide_as_onnx_backend.py
    │   ├── halide_as_onnx_backend_test.py
    │   ├── model.cpp
    │   ├── model.py
    │   ├── model_test.py
    │   ├── onnx_converter.cc
    │   ├── onnx_converter.h
    │   ├── onnx_converter_generator.cc
    │   ├── onnx_converter_generator_test.cc
    │   ├── onnx_converter_test.cc
    │   └── test_model_proto.txt
    ├── opengl_demo
    │   ├── Makefile
    │   ├── README.md
    │   ├── glfw_helpers.cpp
    │   ├── glfw_helpers.h
    │   ├── image.png
    │   ├── layout.cpp
    │   ├── layout.h
    │   ├── main.cpp
    │   ├── opengl_helpers.cpp
    │   ├── opengl_helpers.h
    │   ├── png_helpers.cpp
    │   ├── png_helpers.h
    │   ├── sample_filter_generator.cpp
    │   ├── timer.cpp
    │   └── timer.h
    ├── openglcompute
    │   ├── AndroidManifest.xml
    │   ├── Makefile
    │   ├── build.sh
    │   ├── build.xml
    │   ├── jni
    │   │   ├── Android.mk
    │   │   ├── Application.mk
    │   │   ├── oglc_run.cpp
    │   │   └── oglc_two_kernels_run.cpp
    │   ├── res
    │   │   ├── drawable-hdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-ldpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-mdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── drawable-xhdpi
    │   │   │   └── ic_launcher.png
    │   │   ├── layout
    │   │   │   └── main.xml
    │   │   └── values
    │   │   │   └── strings.xml
    │   ├── src
    │   │   └── com
    │   │   │   └── example
    │   │   │       └── hellohalideopenglcompute
    │   │   │           └── HalideOpenGLComputeActivity.java
    │   ├── test_oglc_avg.cpp
    │   └── test_two_kernels.cpp
    ├── resize
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── resize.cpp
    │   └── resize_generator.cpp
    ├── resnet_50
    │   ├── Makefile
    │   ├── Resnet50Generator.cpp
    │   ├── load_weights.py
    │   ├── process.cpp
    │   └── validate_resnet50_output.py
    ├── simd_op_check
    │   ├── Makefile
    │   └── driver.cpp
    ├── stencil_chain
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── process.cpp
    │   └── stencil_chain_generator.cpp
    ├── support
    │   ├── Makefile.inc
    │   ├── autoscheduler.inc
    │   └── viz_auto.sh
    └── wavelet
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── README.md
    │   ├── daubechies_constants.h
    │   ├── daubechies_x_generator.cpp
    │   ├── haar_x_generator.cpp
    │   ├── inverse_daubechies_x_generator.cpp
    │   ├── inverse_haar_x_generator.cpp
    │   └── wavelet.cpp
├── halide.cmake
├── python_bindings
    ├── Makefile
    ├── apps
    │   ├── bilateral_grid.py
    │   ├── blur.py
    │   ├── erode.py
    │   ├── interpolate.py
    │   └── local_laplacian.py
    ├── correctness
    │   ├── addconstant_generator.cpp
    │   ├── addconstant_test.py
    │   ├── autodiff.py
    │   ├── basics.py
    │   ├── bit_generator.cpp
    │   ├── bit_test.py
    │   ├── boundary_conditions.py
    │   ├── buffer.py
    │   ├── compile_to.py
    │   ├── complexstub_generator.cpp
    │   ├── division.py
    │   ├── extern.py
    │   ├── iroperator.py
    │   ├── multipass_constraints.py
    │   ├── nobuildmethod_generator.cpp
    │   ├── partialbuildmethod_generator.cpp
    │   ├── pystub.py
    │   ├── rdom.py
    │   ├── simplestub_generator.cpp
    │   ├── target.py
    │   ├── the_sort_function.c
    │   ├── tuple_select.py
    │   ├── type.py
    │   ├── user_context_generator.cpp
    │   ├── user_context_test.py
    │   └── var.py
    ├── readme.md
    ├── requirements.txt
    ├── src
    │   ├── PyArgument.cpp
    │   ├── PyArgument.h
    │   ├── PyBinaryOperators.h
    │   ├── PyBoundaryConditions.cpp
    │   ├── PyBoundaryConditions.h
    │   ├── PyBuffer.cpp
    │   ├── PyBuffer.h
    │   ├── PyConciseCasts.cpp
    │   ├── PyConciseCasts.h
    │   ├── PyDerivative.cpp
    │   ├── PyDerivative.h
    │   ├── PyEnums.cpp
    │   ├── PyEnums.h
    │   ├── PyError.cpp
    │   ├── PyError.h
    │   ├── PyExpr.cpp
    │   ├── PyExpr.h
    │   ├── PyExternFuncArgument.cpp
    │   ├── PyExternFuncArgument.h
    │   ├── PyFunc.cpp
    │   ├── PyFunc.h
    │   ├── PyFuncRef.cpp
    │   ├── PyFuncRef.h
    │   ├── PyHalide.cpp
    │   ├── PyHalide.h
    │   ├── PyIROperator.cpp
    │   ├── PyIROperator.h
    │   ├── PyImageParam.cpp
    │   ├── PyImageParam.h
    │   ├── PyInlineReductions.cpp
    │   ├── PyInlineReductions.h
    │   ├── PyLambda.cpp
    │   ├── PyLambda.h
    │   ├── PyLoopLevel.cpp
    │   ├── PyLoopLevel.h
    │   ├── PyMachineParams.cpp
    │   ├── PyMachineParams.h
    │   ├── PyModule.cpp
    │   ├── PyModule.h
    │   ├── PyOutputs.cpp
    │   ├── PyOutputs.h
    │   ├── PyParam.cpp
    │   ├── PyParam.h
    │   ├── PyPipeline.cpp
    │   ├── PyPipeline.h
    │   ├── PyRDom.cpp
    │   ├── PyRDom.h
    │   ├── PyScheduleMethods.h
    │   ├── PyStage.cpp
    │   ├── PyStage.h
    │   ├── PyTarget.cpp
    │   ├── PyTarget.h
    │   ├── PyTuple.cpp
    │   ├── PyTuple.h
    │   ├── PyType.cpp
    │   ├── PyType.h
    │   ├── PyVar.cpp
    │   ├── PyVar.h
    │   ├── PyVarOrRVar.cpp
    │   └── PyVarOrRVar.h
    ├── stub
    │   ├── PyStub.cpp
    │   └── PyStubImpl.cpp
    ├── todo.txt
    └── tutorial
    │   ├── lesson_01_basics.py
    │   ├── lesson_02_input_image.py
    │   ├── lesson_03_debugging_1.py
    │   ├── lesson_04_debugging_2.py
    │   ├── lesson_05_scheduling_1.py
    │   ├── lesson_06_realizing_over_shifted_domains.py
    │   ├── lesson_07_multi_stage_pipelines.py
    │   ├── lesson_08_scheduling_2.py
    │   ├── lesson_09_update_definitions.py
    │   ├── lesson_10_aot_compilation_generate.py
    │   ├── lesson_10_aot_compilation_run.py
    │   ├── lesson_11_cross_compilation.py
    │   ├── lesson_12_using_the_gpu.py
    │   ├── lesson_13_tuples.py
    │   └── lesson_14_types.py
├── src
    ├── AddImageChecks.cpp
    ├── AddImageChecks.h
    ├── AddParameterChecks.cpp
    ├── AddParameterChecks.h
    ├── AlignLoads.cpp
    ├── AlignLoads.h
    ├── AllocationBoundsInference.cpp
    ├── AllocationBoundsInference.h
    ├── ApplySplit.cpp
    ├── ApplySplit.h
    ├── Argument.cpp
    ├── Argument.h
    ├── AssociativeOpsTable.cpp
    ├── AssociativeOpsTable.h
    ├── Associativity.cpp
    ├── Associativity.h
    ├── AsyncProducers.cpp
    ├── AsyncProducers.h
    ├── AutoSchedule.cpp
    ├── AutoSchedule.h
    ├── AutoScheduleUtils.cpp
    ├── AutoScheduleUtils.h
    ├── BoundSmallAllocations.cpp
    ├── BoundSmallAllocations.h
    ├── BoundaryConditions.cpp
    ├── BoundaryConditions.h
    ├── Bounds.cpp
    ├── Bounds.h
    ├── BoundsInference.cpp
    ├── BoundsInference.h
    ├── Buffer.cpp
    ├── Buffer.h
    ├── CMakeLists.txt
    ├── CPlusPlusMangle.cpp
    ├── CPlusPlusMangle.h
    ├── CSE.cpp
    ├── CSE.h
    ├── CanonicalizeGPUVars.cpp
    ├── CanonicalizeGPUVars.h
    ├── Closure.cpp
    ├── Closure.h
    ├── CodeGen_ARM.cpp
    ├── CodeGen_ARM.h
    ├── CodeGen_C.cpp
    ├── CodeGen_C.h
    ├── CodeGen_D3D12Compute_Dev.cpp
    ├── CodeGen_D3D12Compute_Dev.h
    ├── CodeGen_GPU_Dev.cpp
    ├── CodeGen_GPU_Dev.h
    ├── CodeGen_GPU_Host.cpp
    ├── CodeGen_GPU_Host.h
    ├── CodeGen_Hexagon.cpp
    ├── CodeGen_Hexagon.h
    ├── CodeGen_Internal.cpp
    ├── CodeGen_Internal.h
    ├── CodeGen_LLVM.cpp
    ├── CodeGen_LLVM.h
    ├── CodeGen_MIPS.cpp
    ├── CodeGen_MIPS.h
    ├── CodeGen_Metal_Dev.cpp
    ├── CodeGen_Metal_Dev.h
    ├── CodeGen_OpenCL_Dev.cpp
    ├── CodeGen_OpenCL_Dev.h
    ├── CodeGen_OpenGLCompute_Dev.cpp
    ├── CodeGen_OpenGLCompute_Dev.h
    ├── CodeGen_OpenGL_Dev.cpp
    ├── CodeGen_OpenGL_Dev.h
    ├── CodeGen_PTX_Dev.cpp
    ├── CodeGen_PTX_Dev.h
    ├── CodeGen_Posix.cpp
    ├── CodeGen_Posix.h
    ├── CodeGen_PowerPC.cpp
    ├── CodeGen_PowerPC.h
    ├── CodeGen_RISCV.cpp
    ├── CodeGen_RISCV.h
    ├── CodeGen_WebAssembly.cpp
    ├── CodeGen_WebAssembly.h
    ├── CodeGen_X86.cpp
    ├── CodeGen_X86.h
    ├── ConciseCasts.h
    ├── ConvolutionsCompilerForAICore.cpp
    ├── ConvolutionsCompilerForAICore.h
    ├── Debug.cpp
    ├── Debug.h
    ├── DebugArguments.cpp
    ├── DebugArguments.h
    ├── DebugToFile.cpp
    ├── DebugToFile.h
    ├── Definition.cpp
    ├── Definition.h
    ├── Deinterleave.cpp
    ├── Deinterleave.h
    ├── Derivative.cpp
    ├── Derivative.h
    ├── DerivativeUtils.cpp
    ├── DerivativeUtils.h
    ├── DeviceArgument.cpp
    ├── DeviceArgument.h
    ├── DeviceInterface.cpp
    ├── DeviceInterface.h
    ├── Dimension.cpp
    ├── Dimension.h
    ├── EarlyFree.cpp
    ├── EarlyFree.h
    ├── Elf.cpp
    ├── Elf.h
    ├── EliminateBoolVectors.cpp
    ├── EliminateBoolVectors.h
    ├── Error.cpp
    ├── Error.h
    ├── Expr.h
    ├── ExprUsesVar.h
    ├── Extern.h
    ├── ExternalCode.h
    ├── FastIntegerDivide.cpp
    ├── FastIntegerDivide.h
    ├── FindCalls.cpp
    ├── FindCalls.h
    ├── Float16.cpp
    ├── Float16.h
    ├── Func.cpp
    ├── Func.h
    ├── Function.cpp
    ├── Function.h
    ├── FunctionPtr.h
    ├── FuseGPUThreadLoops.cpp
    ├── FuseGPUThreadLoops.h
    ├── FuzzFloatStores.cpp
    ├── FuzzFloatStores.h
    ├── Generator.cpp
    ├── Generator.h
    ├── HexagonAlignment.h
    ├── HexagonOffload.cpp
    ├── HexagonOffload.h
    ├── HexagonOptimize.cpp
    ├── HexagonOptimize.h
    ├── IR.cpp
    ├── IR.h
    ├── IREquality.cpp
    ├── IREquality.h
    ├── IRMatch.cpp
    ├── IRMatch.h
    ├── IRMutator.cpp
    ├── IRMutator.h
    ├── IROperator.cpp
    ├── IROperator.h
    ├── IRPrinter.cpp
    ├── IRPrinter.h
    ├── IRVisitor.cpp
    ├── IRVisitor.h
    ├── ImageParam.cpp
    ├── ImageParam.h
    ├── InferArguments.cpp
    ├── InferArguments.h
    ├── InjectHostDevBufferCopies.cpp
    ├── InjectHostDevBufferCopies.h
    ├── InjectOpenGLIntrinsics.cpp
    ├── InjectOpenGLIntrinsics.h
    ├── Inline.cpp
    ├── Inline.h
    ├── InlineReductions.cpp
    ├── InlineReductions.h
    ├── IntegerDivisionTable.cpp
    ├── IntegerDivisionTable.h
    ├── Interval.cpp
    ├── Interval.h
    ├── Introspection.cpp
    ├── Introspection.h
    ├── IntrusivePtr.h
    ├── JITModule.cpp
    ├── JITModule.h
    ├── LICM.cpp
    ├── LICM.h
    ├── LLVM_Headers.h
    ├── LLVM_Output.cpp
    ├── LLVM_Output.h
    ├── LLVM_Runtime_Linker.cpp
    ├── LLVM_Runtime_Linker.h
    ├── Lambda.h
    ├── Lerp.cpp
    ├── Lerp.h
    ├── LoopCarry.cpp
    ├── LoopCarry.h
    ├── Lower.cpp
    ├── Lower.h
    ├── LowerWarpShuffles.cpp
    ├── LowerWarpShuffles.h
    ├── MainPage.h
    ├── MatlabWrapper.cpp
    ├── MatlabWrapper.h
    ├── Memoization.cpp
    ├── Memoization.h
    ├── Module.cpp
    ├── Module.h
    ├── ModulusRemainder.cpp
    ├── ModulusRemainder.h
    ├── Monotonic.cpp
    ├── Monotonic.h
    ├── ObjectInstanceRegistry.cpp
    ├── ObjectInstanceRegistry.h
    ├── OutputImageParam.cpp
    ├── OutputImageParam.h
    ├── Outputs.h
    ├── ParallelRVar.cpp
    ├── ParallelRVar.h
    ├── Param.h
    ├── ParamMap.cpp
    ├── ParamMap.h
    ├── Parameter.cpp
    ├── Parameter.h
    ├── PartitionLoops.cpp
    ├── PartitionLoops.h
    ├── Pipeline.cpp
    ├── Pipeline.h
    ├── Prefetch.cpp
    ├── Prefetch.h
    ├── PrintLoopNest.cpp
    ├── PrintLoopNest.h
    ├── Profiling.cpp
    ├── Profiling.h
    ├── PurifyIndexMath.cpp
    ├── PurifyIndexMath.h
    ├── PythonExtensionGen.cpp
    ├── PythonExtensionGen.h
    ├── Qualify.cpp
    ├── Qualify.h
    ├── RDom.cpp
    ├── RDom.h
    ├── Random.cpp
    ├── Random.h
    ├── RealizationOrder.cpp
    ├── RealizationOrder.h
    ├── Reduction.cpp
    ├── Reduction.h
    ├── RegionCosts.cpp
    ├── RegionCosts.h
    ├── RemoveDeadAllocations.cpp
    ├── RemoveDeadAllocations.h
    ├── RemoveExternLoops.cpp
    ├── RemoveExternLoops.h
    ├── RemoveUndef.cpp
    ├── RemoveUndef.h
    ├── RoundingMode.h
    ├── Schedule.cpp
    ├── Schedule.h
    ├── ScheduleFunctions.cpp
    ├── ScheduleFunctions.h
    ├── Scope.h
    ├── SelectGPUAPI.cpp
    ├── SelectGPUAPI.h
    ├── Simplify.cpp
    ├── Simplify.h
    ├── SimplifyCorrelatedDifferences.cpp
    ├── SimplifyCorrelatedDifferences.h
    ├── SimplifySpecializations.cpp
    ├── SimplifySpecializations.h
    ├── Simplify_Add.cpp
    ├── Simplify_And.cpp
    ├── Simplify_Call.cpp
    ├── Simplify_Cast.cpp
    ├── Simplify_Div.cpp
    ├── Simplify_EQ.cpp
    ├── Simplify_Exprs.cpp
    ├── Simplify_Internal.h
    ├── Simplify_LT.cpp
    ├── Simplify_Let.cpp
    ├── Simplify_Max.cpp
    ├── Simplify_Min.cpp
    ├── Simplify_Mod.cpp
    ├── Simplify_Mul.cpp
    ├── Simplify_Not.cpp
    ├── Simplify_Or.cpp
    ├── Simplify_Select.cpp
    ├── Simplify_Shuffle.cpp
    ├── Simplify_Stmts.cpp
    ├── Simplify_Sub.cpp
    ├── SkipStages.cpp
    ├── SkipStages.h
    ├── SlidingWindow.cpp
    ├── SlidingWindow.h
    ├── Solve.cpp
    ├── Solve.h
    ├── SplitTuples.cpp
    ├── SplitTuples.h
    ├── StmtToHtml.cpp
    ├── StmtToHtml.h
    ├── StorageFlattening.cpp
    ├── StorageFlattening.h
    ├── StorageFolding.cpp
    ├── StorageFolding.h
    ├── StrictifyFloat.cpp
    ├── StrictifyFloat.h
    ├── Substitute.cpp
    ├── Substitute.h
    ├── Target.cpp
    ├── Target.h
    ├── ThreadPool.h
    ├── Tracing.cpp
    ├── Tracing.h
    ├── TrimNoOps.cpp
    ├── TrimNoOps.h
    ├── Tuple.cpp
    ├── Tuple.h
    ├── Type.cpp
    ├── Type.h
    ├── UnifyDuplicateLets.cpp
    ├── UnifyDuplicateLets.h
    ├── UniquifyVariableNames.cpp
    ├── UniquifyVariableNames.h
    ├── UnpackBuffers.cpp
    ├── UnpackBuffers.h
    ├── UnrollLoops.cpp
    ├── UnrollLoops.h
    ├── UnsafePromises.cpp
    ├── UnsafePromises.h
    ├── Util.cpp
    ├── Util.h
    ├── Var.cpp
    ├── Var.h
    ├── VaryingAttributes.cpp
    ├── VaryingAttributes.h
    ├── VectorizeLoops.cpp
    ├── VectorizeLoops.h
    ├── WasmExecutor.cpp
    ├── WasmExecutor.h
    ├── WrapCalls.cpp
    ├── WrapCalls.h
    ├── WrapExternStages.cpp
    ├── WrapExternStages.h
    └── runtime
    │   ├── HalideBuffer.h
    │   ├── HalideRuntime.h
    │   ├── HalideRuntimeCuda.h
    │   ├── HalideRuntimeD3D12Compute.h
    │   ├── HalideRuntimeHexagonDma.h
    │   ├── HalideRuntimeHexagonHost.h
    │   ├── HalideRuntimeMetal.h
    │   ├── HalideRuntimeOpenCL.h
    │   ├── HalideRuntimeOpenGL.h
    │   ├── HalideRuntimeOpenGLCompute.h
    │   ├── HalideRuntimeQurt.h
    │   ├── aarch64.ll
    │   ├── aarch64_cpu_features.cpp
    │   ├── alignment_128.cpp
    │   ├── alignment_32.cpp
    │   ├── alignment_64.cpp
    │   ├── android_clock.cpp
    │   ├── android_host_cpu_count.cpp
    │   ├── android_io.cpp
    │   ├── android_ioctl.h
    │   ├── arm.ll
    │   ├── arm_cpu_features.cpp
    │   ├── arm_no_neon.ll
    │   ├── buffer_t.cpp
    │   ├── cache.cpp
    │   ├── can_use_target.cpp
    │   ├── cl_functions.h
    │   ├── cpu_features.h
    │   ├── cuda.cpp
    │   ├── cuda_functions.h
    │   ├── d3d12_abi_patch_64.h
    │   ├── d3d12_abi_patch_64.ll
    │   ├── d3d12compute.cpp
    │   ├── destructors.cpp
    │   ├── device_buffer_utils.h
    │   ├── device_interface.cpp
    │   ├── device_interface.h
    │   ├── errors.cpp
    │   ├── fake_get_symbol.cpp
    │   ├── fake_thread_pool.cpp
    │   ├── float16_t.cpp
    │   ├── fuchsia_clock.cpp
    │   ├── fuchsia_host_cpu_count.cpp
    │   ├── fuchsia_yield.cpp
    │   ├── gpu_device_selection.cpp
    │   ├── hashmap.h
    │   ├── hexagon_cache_allocator.cpp
    │   ├── hexagon_cpu_features.cpp
    │   ├── hexagon_dma.cpp
    │   ├── hexagon_dma_pool.cpp
    │   ├── hexagon_dma_pool.h
    │   ├── hexagon_host.cpp
    │   ├── hexagon_remote
    │       ├── .gitignore
    │       ├── Makefile
    │       ├── bin
    │       │   ├── arm-32-android
    │       │   │   └── libhalide_hexagon_host.so
    │       │   ├── arm-64-android
    │       │   │   └── libhalide_hexagon_host.so
    │       │   ├── host
    │       │   │   └── libhalide_hexagon_host.so
    │       │   ├── src
    │       │   │   ├── halide_hexagon_remote.h
    │       │   │   ├── halide_hexagon_remote_skel.c
    │       │   │   └── halide_hexagon_remote_stub.c
    │       │   └── v60
    │       │   │   ├── hexagon_sim_remote
    │       │   │   ├── libhalide_hexagon_remote_skel.so
    │       │   │   ├── libsim_qurt.a
    │       │   │   ├── libsim_qurt_vtcm.a
    │       │   │   └── signed_by_debug
    │       │   │       └── libhalide_hexagon_remote_skel.so
    │       ├── c11_stubs.cpp
    │       ├── dlib.cpp
    │       ├── dlib.h
    │       ├── halide_hexagon_remote.idl
    │       ├── halide_remote.cpp
    │       ├── host_malloc.cpp
    │       ├── host_shim.cpp
    │       ├── instruction_encodings.txt
    │       ├── known_symbols.cpp
    │       ├── known_symbols.h
    │       ├── libadsprpc_shim.cpp
    │       ├── log.cpp
    │       ├── log.h
    │       ├── nearbyint.cpp
    │       ├── pipeline_context.h
    │       ├── sim_host.cpp
    │       ├── sim_protocol.h
    │       ├── sim_qurt.cpp
    │       ├── sim_qurt_vtcm.cpp
    │       └── sim_remote.cpp
    │   ├── hvx_128.ll
    │   ├── hvx_64.ll
    │   ├── ios_io.cpp
    │   ├── linux_clock.cpp
    │   ├── linux_host_cpu_count.cpp
    │   ├── linux_yield.cpp
    │   ├── matlab.cpp
    │   ├── metadata.cpp
    │   ├── metal.cpp
    │   ├── metal_objc_arm.cpp
    │   ├── metal_objc_platform_dependent.cpp
    │   ├── metal_objc_platform_dependent.h
    │   ├── metal_objc_x86.cpp
    │   ├── mex_functions.h
    │   ├── mingw_math.cpp
    │   ├── mini_cl.h
    │   ├── mini_cuda.h
    │   ├── mini_d3d12.h
    │   ├── mini_hexagon_dma.h
    │   ├── mini_opengl.h
    │   ├── mini_qurt.h
    │   ├── mini_qurt_vtcm.h
    │   ├── mips.ll
    │   ├── mips_cpu_features.cpp
    │   ├── module_aot_ref_count.cpp
    │   ├── module_jit_ref_count.cpp
    │   ├── msan.cpp
    │   ├── msan_stubs.cpp
    │   ├── nvidia_libdevice_bitcode
    │       ├── libdevice.compute_20.10.bc
    │       ├── libdevice.compute_30.10.bc
    │       └── libdevice.compute_35.10.bc
    │   ├── objc_support.h
    │   ├── old_buffer_t.cpp
    │   ├── opencl.cpp
    │   ├── opengl.cpp
    │   ├── opengl_egl_context.cpp
    │   ├── opengl_glx_context.cpp
    │   ├── openglcompute.cpp
    │   ├── osx_clock.cpp
    │   ├── osx_get_symbol.cpp
    │   ├── osx_host_cpu_count.cpp
    │   ├── osx_opengl_context.cpp
    │   ├── osx_yield.cpp
    │   ├── posix_abort.cpp
    │   ├── posix_allocator.cpp
    │   ├── posix_clock.cpp
    │   ├── posix_error_handler.cpp
    │   ├── posix_get_symbol.cpp
    │   ├── posix_io.cpp
    │   ├── posix_math.ll
    │   ├── posix_print.cpp
    │   ├── posix_threads.cpp
    │   ├── posix_threads_tsan.cpp
    │   ├── powerpc.ll
    │   ├── powerpc_cpu_features.cpp
    │   ├── prefetch.cpp
    │   ├── printer.h
    │   ├── profiler.cpp
    │   ├── profiler_inlined.cpp
    │   ├── pseudostack.cpp
    │   ├── ptx_dev.ll
    │   ├── qurt_allocator.cpp
    │   ├── qurt_hvx.cpp
    │   ├── qurt_hvx_vtcm.cpp
    │   ├── qurt_init_fini.cpp
    │   ├── qurt_threads.cpp
    │   ├── qurt_threads_tsan.cpp
    │   ├── qurt_yield.cpp
    │   ├── riscv_cpu_features.cpp
    │   ├── runtime_api.cpp
    │   ├── runtime_internal.h
    │   ├── scoped_mutex_lock.h
    │   ├── scoped_spin_lock.h
    │   ├── ssp.cpp
    │   ├── synchronization_common.h
    │   ├── thread_pool_common.h
    │   ├── to_string.cpp
    │   ├── trace_helper.cpp
    │   ├── tracing.cpp
    │   ├── wasm_cpu_features.cpp
    │   ├── wasm_math.ll
    │   ├── win32_math.ll
    │   ├── windows_abort.cpp
    │   ├── windows_clock.cpp
    │   ├── windows_cuda.cpp
    │   ├── windows_get_symbol.cpp
    │   ├── windows_io.cpp
    │   ├── windows_opencl.cpp
    │   ├── windows_profiler.cpp
    │   ├── windows_threads.cpp
    │   ├── windows_threads_tsan.cpp
    │   ├── windows_yield.cpp
    │   ├── write_debug_image.cpp
    │   ├── x86.ll
    │   ├── x86_avx.ll
    │   ├── x86_avx2.ll
    │   ├── x86_cpu_features.cpp
    │   └── x86_sse41.ll
├── test
    ├── CMakeLists.txt
    ├── auto_schedule
    │   ├── cost_function.cpp
    │   ├── data_dependent.cpp
    │   ├── extern.cpp
    │   ├── fibonacci.cpp
    │   ├── harris.cpp
    │   ├── histogram.cpp
    │   ├── iir.cpp
    │   ├── interpolate.cpp
    │   ├── large_window.cpp
    │   ├── mat_mul.cpp
    │   ├── max_filter.cpp
    │   ├── multi_output.cpp
    │   ├── overlap.cpp
    │   ├── param.cpp
    │   ├── reorder.cpp
    │   ├── tile_vs_inline.cpp
    │   ├── unbounded_nonpure.cpp
    │   ├── unsharp.cpp
    │   ├── unused_func.cpp
    │   └── vectorize_var_in_update.cpp
    ├── common
    │   ├── check_call_graphs.h
    │   ├── expect_failure.sh
    │   ├── gpu_object_lifetime_tracker.h
    │   └── halide_test_dirs.h
    ├── correctness
    │   ├── align_bounds.cpp
    │   ├── argmax.cpp
    │   ├── assertion_failure_in_parallel_for.cpp
    │   ├── async.cpp
    │   ├── async_copy_chain.cpp
    │   ├── async_device_copy.cpp
    │   ├── autodiff.cpp
    │   ├── autoschedule_small_pure_update.cpp
    │   ├── autotune_bug.cpp
    │   ├── autotune_bug_2.cpp
    │   ├── autotune_bug_3.cpp
    │   ├── autotune_bug_4.cpp
    │   ├── autotune_bug_5.cpp
    │   ├── bad_likely.cpp
    │   ├── bit_counting.cpp
    │   ├── bitwise_ops.cpp
    │   ├── bool_compute_root_vectorize.cpp
    │   ├── bound.cpp
    │   ├── bound_small_allocations.cpp
    │   ├── boundary_conditions.cpp
    │   ├── bounds.cpp
    │   ├── bounds_inference.cpp
    │   ├── bounds_inference_chunk.cpp
    │   ├── bounds_inference_complex.cpp
    │   ├── bounds_inference_outer_split.cpp
    │   ├── bounds_of_abs.cpp
    │   ├── bounds_of_cast.cpp
    │   ├── bounds_of_func.cpp
    │   ├── bounds_of_monotonic_math.cpp
    │   ├── bounds_of_multiply.cpp
    │   ├── bounds_query.cpp
    │   ├── buffer_t.cpp
    │   ├── c_function.cpp
    │   ├── cascaded_filters.cpp
    │   ├── cast.cpp
    │   ├── cast_handle.cpp
    │   ├── chunk.cpp
    │   ├── chunk_sharing.cpp
    │   ├── circular_reference_leak.cpp
    │   ├── code_explosion.cpp
    │   ├── compare_vars.cpp
    │   ├── compile_to.cpp
    │   ├── compile_to_bitcode.cpp
    │   ├── compile_to_lowered_stmt.cpp
    │   ├── compile_to_multitarget.cpp
    │   ├── compute_at_reordered_update_stage.cpp
    │   ├── compute_at_split_rvar.cpp
    │   ├── compute_outermost.cpp
    │   ├── compute_with.cpp
    │   ├── compute_with_in.cpp
    │   ├── compute_with_inlined.cpp
    │   ├── computed_index.cpp
    │   ├── concat.cpp
    │   ├── constant_expr.cpp
    │   ├── constant_type.cpp
    │   ├── constraints.cpp
    │   ├── convolution.cpp
    │   ├── convolution_multiple_kernels.cpp
    │   ├── cross_compilation.cpp
    │   ├── custom_allocator.cpp
    │   ├── custom_auto_scheduler.cpp
    │   ├── custom_error_reporter.cpp
    │   ├── custom_lowering_pass.cpp
    │   ├── debug_to_file.cpp
    │   ├── debug_to_file_multiple_outputs.cpp
    │   ├── debug_to_file_reorder.cpp
    │   ├── deferred_loop_level.cpp
    │   ├── deinterleave4.cpp
    │   ├── device_buffer_copy.cpp
    │   ├── device_crop.cpp
    │   ├── device_slice.cpp
    │   ├── dilate3x3.cpp
    │   ├── dynamic_reduction_bounds.cpp
    │   ├── embed_bitcode.cpp
    │   ├── erf.cpp
    │   ├── exception.cpp
    │   ├── explicit_inline_reductions.cpp
    │   ├── extern_bounds_inference.cpp
    │   ├── extern_consumer.cpp
    │   ├── extern_consumer_tiled.cpp
    │   ├── extern_error.cpp
    │   ├── extern_output_expansion.cpp
    │   ├── extern_partial.cpp
    │   ├── extern_producer.cpp
    │   ├── extern_reorder_storage.cpp
    │   ├── extern_sort.cpp
    │   ├── extern_stage.cpp
    │   ├── extern_stage_on_device.cpp
    │   ├── external_code.cpp
    │   ├── failed_unroll.cpp
    │   ├── fast_trigonometric.cpp
    │   ├── fibonacci.cpp
    │   ├── fit_function.cpp
    │   ├── float16_t_comparison.cpp
    │   ├── float16_t_constants.cpp
    │   ├── float16_t_image_type.cpp
    │   ├── for_each_element.cpp
    │   ├── force_onto_stack.cpp
    │   ├── func_clone.cpp
    │   ├── func_lifetime.cpp
    │   ├── func_lifetime_2.cpp
    │   ├── func_wrapper.cpp
    │   ├── fuse.cpp
    │   ├── fuse_gpu_threads.cpp
    │   ├── fused_where_inner_extent_is_zero.cpp
    │   ├── fuzz_cse.cpp
    │   ├── fuzz_float_stores.cpp
    │   ├── fuzz_simplify.cpp
    │   ├── gameoflife.cpp
    │   ├── gather.cpp
    │   ├── gpu_assertion_in_kernel.cpp
    │   ├── gpu_bounds_inference_failure.cpp
    │   ├── gpu_cpu_simultaneous_read.cpp
    │   ├── gpu_data_flows.cpp
    │   ├── gpu_dynamic_shared.cpp
    │   ├── gpu_free_sync.cpp
    │   ├── gpu_give_input_buffers_device_allocations.cpp
    │   ├── gpu_jit_explicit_copy_to_device.cpp
    │   ├── gpu_large_alloc.cpp
    │   ├── gpu_mixed_dimensionality.cpp
    │   ├── gpu_mixed_shared_mem_types.cpp
    │   ├── gpu_multi_device.cpp
    │   ├── gpu_multi_kernel.cpp
    │   ├── gpu_non_contiguous_copy.cpp
    │   ├── gpu_object_lifetime_1.cpp
    │   ├── gpu_object_lifetime_2.cpp
    │   ├── gpu_object_lifetime_3.cpp
    │   ├── gpu_param_allocation.cpp
    │   ├── gpu_reuse_shared_memory.cpp
    │   ├── gpu_specialize.cpp
    │   ├── gpu_sum_scan.cpp
    │   ├── gpu_thread_barrier.cpp
    │   ├── gpu_transpose.cpp
    │   ├── gpu_vectorized_shared_memory.cpp
    │   ├── halide_buffer.cpp
    │   ├── handle.cpp
    │   ├── heap_cleanup.cpp
    │   ├── hello_gpu.cpp
    │   ├── hexagon_scatter.cpp
    │   ├── histogram.cpp
    │   ├── histogram_equalize.cpp
    │   ├── host_alignment.cpp
    │   ├── image_io.cpp
    │   ├── image_of_lists.cpp
    │   ├── image_wrapper.cpp
    │   ├── implicit_args.cpp
    │   ├── implicit_args_tests.cpp
    │   ├── in_place.cpp
    │   ├── infer_arguments.cpp
    │   ├── inline_reduction.cpp
    │   ├── inlined_generator.cpp
    │   ├── input_image_bounds_check.cpp
    │   ├── input_larger_than_two_gigs.cpp
    │   ├── integer_powers.cpp
    │   ├── interleave.cpp
    │   ├── interleave_rgb.cpp
    │   ├── interleave_x.cpp
    │   ├── interval.cpp
    │   ├── introspection.cpp
    │   ├── inverse.cpp
    │   ├── isnan.cpp
    │   ├── issue_3926.cpp
    │   ├── iterate_over_circle.cpp
    │   ├── lambda.cpp
    │   ├── lazy_convolution.cpp
    │   ├── leak_device_memory.cpp
    │   ├── left_shift_negative.cpp
    │   ├── legal_race_condition.cpp
    │   ├── lerp.cpp
    │   ├── let_in_rdom_bound.cpp
    │   ├── likely.cpp
    │   ├── load_library.cpp
    │   ├── logical.cpp
    │   ├── loop_invariant_extern_calls.cpp
    │   ├── loop_level_generator_param.cpp
    │   ├── lots_of_dimensions.cpp
    │   ├── make_struct.cpp
    │   ├── many_dimensions.cpp
    │   ├── many_small_extern_stages.cpp
    │   ├── many_updates.cpp
    │   ├── math.cpp
    │   ├── median3x3.cpp
    │   ├── memoize.cpp
    │   ├── memoize_cloned.cpp
    │   ├── min_extent.cpp
    │   ├── mod.cpp
    │   ├── mul_div_mod.cpp
    │   ├── multi_output_pipeline_with_bad_sizes.cpp
    │   ├── multi_pass_reduction.cpp
    │   ├── multi_splits_with_diff_tail_strategies.cpp
    │   ├── multi_way_select.cpp
    │   ├── multipass_constraints.cpp
    │   ├── multiple_outputs.cpp
    │   ├── multiple_outputs_extern.cpp
    │   ├── named_updates.cpp
    │   ├── nested_shiftinwards.cpp
    │   ├── newtons_method.cpp
    │   ├── non_vector_aligned_embeded_buffer.cpp
    │   ├── obscure_image_references.cpp
    │   ├── oddly_sized_output.cpp
    │   ├── out_constraint.cpp
    │   ├── out_of_memory.cpp
    │   ├── output_larger_than_two_gigs.cpp
    │   ├── parallel.cpp
    │   ├── parallel_alloc.cpp
    │   ├── parallel_fork.cpp
    │   ├── parallel_gpu_nested.cpp
    │   ├── parallel_nested.cpp
    │   ├── parallel_nested_1.cpp
    │   ├── parallel_reductions.cpp
    │   ├── parallel_rvar.cpp
    │   ├── param.cpp
    │   ├── param_map.cpp
    │   ├── parameter_constraints.cpp
    │   ├── partial_application.cpp
    │   ├── partial_realization.cpp
    │   ├── partition_loops.cpp
    │   ├── partition_loops_bug.cpp
    │   ├── pipeline_set_jit_externs_func.cpp
    │   ├── plain_c_includes.c
    │   ├── popc_clz_ctz_bounds.cpp
    │   ├── predicated_store_load.cpp
    │   ├── prefetch.cpp
    │   ├── print.cpp
    │   ├── process_some_tiles.cpp
    │   ├── pseudostack_shares_slots.cpp
    │   ├── python_extension_gen.cpp
    │   ├── random.cpp
    │   ├── realize_larger_than_two_gigs.cpp
    │   ├── realize_over_shifted_domain.cpp
    │   ├── reduction_chain.cpp
    │   ├── reduction_non_rectangular.cpp
    │   ├── reduction_schedule.cpp
    │   ├── register_shuffle.cpp
    │   ├── reorder_rvars.cpp
    │   ├── reorder_storage.cpp
    │   ├── require.cpp
    │   ├── reschedule.cpp
    │   ├── reuse_stack_alloc.cpp
    │   ├── rfactor.cpp
    │   ├── round.cpp
    │   ├── saturating_casts.cpp
    │   ├── scatter.cpp
    │   ├── set_custom_trace.cpp
    │   ├── shared_self_references.cpp
    │   ├── shifted_image.cpp
    │   ├── side_effects.cpp
    │   ├── simd_op_check.cpp
    │   ├── simplified_away_embedded_image.cpp
    │   ├── simplify.cpp
    │   ├── skip_stages.cpp
    │   ├── skip_stages_external_array_functions.cpp
    │   ├── skip_stages_memoize.cpp
    │   ├── sliding_backwards.cpp
    │   ├── sliding_reduction.cpp
    │   ├── sliding_window.cpp
    │   ├── sort_exprs.cpp
    │   ├── specialize.cpp
    │   ├── specialize_to_gpu.cpp
    │   ├── split_by_non_factor.cpp
    │   ├── split_fuse_rvar.cpp
    │   ├── split_reuse_inner_name_bug.cpp
    │   ├── split_store_compute.cpp
    │   ├── stack_allocations.cpp
    │   ├── stencil_chain_in_update_definitions.cpp
    │   ├── stmt_to_html.cpp
    │   ├── storage_folding.cpp
    │   ├── store_in.cpp
    │   ├── stream_compaction.cpp
    │   ├── strict_float.cpp
    │   ├── strict_float_bounds.cpp
    │   ├── strided_load.cpp
    │   ├── target.cpp
    │   ├── thread_safety.cpp
    │   ├── tracing.cpp
    │   ├── tracing_bounds.cpp
    │   ├── tracing_broadcast.cpp
    │   ├── tracing_stack.cpp
    │   ├── transitive_bounds.cpp
    │   ├── trim_no_ops.cpp
    │   ├── truncated_pyramid.cpp
    │   ├── tuple_partial_update.cpp
    │   ├── tuple_reduction.cpp
    │   ├── tuple_select.cpp
    │   ├── tuple_undef.cpp
    │   ├── tuple_update_ops.cpp
    │   ├── two_vector_args.cpp
    │   ├── undef.cpp
    │   ├── uninitialized_read.cpp
    │   ├── unique_func_image.cpp
    │   ├── unroll_dynamic_loop.cpp
    │   ├── unrolled_reduction.cpp
    │   ├── unsafe_dedup_lets.cpp
    │   ├── unsafe_promises.cpp
    │   ├── unused_func.cpp
    │   ├── update_chunk.cpp
    │   ├── vector_bounds_inference.cpp
    │   ├── vector_cast.cpp
    │   ├── vector_extern.cpp
    │   ├── vector_math.cpp
    │   ├── vector_print_bug.cpp
    │   ├── vectorize_guard_with_if.cpp
    │   ├── vectorize_mixed_widths.cpp
    │   ├── vectorize_varying_allocation_size.cpp
    │   ├── vectorized_gpu_allocation.cpp
    │   ├── vectorized_initialization.cpp
    │   ├── vectorized_load_from_vectorized_allocation.cpp
    │   ├── vectorized_reduction_bug.cpp
    │   └── widening_reduction.cpp
    ├── error
    │   ├── ambiguous_inline_reductions.cpp
    │   ├── async_require_fail.cpp
    │   ├── auto_schedule_no_bounds.cpp
    │   ├── auto_schedule_no_parallel.cpp
    │   ├── auto_schedule_no_reorder.cpp
    │   ├── bad_bound.cpp
    │   ├── bad_compute_at.cpp
    │   ├── bad_compute_with.cpp
    │   ├── bad_compute_with_invalid_specialization.cpp
    │   ├── bad_compute_with_parent_func_not_used.cpp
    │   ├── bad_const_cast.cpp
    │   ├── bad_device_api.cpp
    │   ├── bad_dimensions.cpp
    │   ├── bad_extern_split.cpp
    │   ├── bad_fold.cpp
    │   ├── bad_host_alignment.cpp
    │   ├── bad_rvar_order.cpp
    │   ├── bad_schedule.cpp
    │   ├── bad_store_at.cpp
    │   ├── broken_promise.cpp
    │   ├── buffer_larger_than_two_gigs.cpp
    │   ├── clamp_out_of_range.cpp
    │   ├── constrain_wrong_output_buffer.cpp
    │   ├── constraint_uses_non_param.cpp
    │   ├── define_after_realize.cpp
    │   ├── define_after_use.cpp
    │   ├── device_target_mismatch.cpp
    │   ├── expanding_reduction.cpp
    │   ├── extern_func_self_argument.cpp
    │   ├── five_d_gpu_buffer.cpp
    │   ├── float_arg.cpp
    │   ├── forward_on_undefined_buffer.cpp
    │   ├── implicit_args.cpp
    │   ├── impossible_constraints.cpp
    │   ├── init_def_should_be_all_vars.cpp
    │   ├── inspect_loop_level.cpp
    │   ├── lerp_float_weight_out_of_range.cpp
    │   ├── lerp_mismatch.cpp
    │   ├── lerp_signed_weight.cpp
    │   ├── memoize_different_compute_store.cpp
    │   ├── metal_vector_too_large.cpp
    │   ├── missing_args.cpp
    │   ├── modulo_constant_zero.cpp
    │   ├── no_default_device.cpp
    │   ├── nonexistent_update_stage.cpp
    │   ├── null_host_field.cpp
    │   ├── overflow_during_constant_folding.cpp
    │   ├── pointer_arithmetic.cpp
    │   ├── race_condition.cpp
    │   ├── rdom_undefined.cpp
    │   ├── realize_constantly_larger_than_two_gigs.cpp
    │   ├── reduction_bounds.cpp
    │   ├── reduction_type_mismatch.cpp
    │   ├── require_fail.cpp
    │   ├── reuse_var_in_schedule.cpp
    │   ├── reused_args.cpp
    │   ├── rfactor_inner_dim_non_commutative.cpp
    │   ├── specialize_fail.cpp
    │   ├── split_inner_wrong_tail_strategy.cpp
    │   ├── thread_id_outside_block_id.cpp
    │   ├── too_many_args.cpp
    │   ├── tuple_arg_select_undef.cpp
    │   ├── tuple_val_select_undef.cpp
    │   ├── unbounded_input.cpp
    │   ├── unbounded_output.cpp
    │   ├── undefined_func_compile.cpp
    │   ├── undefined_func_realize.cpp
    │   ├── undefined_loop_level.cpp
    │   ├── undefined_pipeline_compile.cpp
    │   ├── undefined_pipeline_realize.cpp
    │   ├── undefined_rdom_dimension.cpp
    │   ├── unknown_target.cpp
    │   ├── vectorize_dynamic.cpp
    │   ├── vectorize_too_little.cpp
    │   ├── vectorize_too_much.cpp
    │   ├── vectorized_extern.cpp
    │   ├── wrap_custom_after_shared.cpp
    │   ├── wrap_frozen.cpp
    │   ├── wrapper_never_used.cpp
    │   ├── wrong_dimensionality_extern_stage.cpp
    │   └── wrong_type.cpp
    ├── failing_with_issue
    │   ├── 3292_async_specialize.cpp
    │   ├── 3293_storage_folding_async.cpp
    │   └── 3357_vectorize_pred.cpp
    ├── generator
    │   ├── acquire_release_aottest.cpp
    │   ├── acquire_release_generator.cpp
    │   ├── alias_aottest.cpp
    │   ├── alias_generator.cpp
    │   ├── argvcall_aottest.cpp
    │   ├── argvcall_generator.cpp
    │   ├── async_parallel_aottest.cpp
    │   ├── async_parallel_generator.cpp
    │   ├── bit_operations_aottest.cpp
    │   ├── bit_operations_generator.cpp
    │   ├── blur2x2_aottest.cpp
    │   ├── blur2x2_generator.cpp
    │   ├── buffer_copy_aottest.cpp
    │   ├── buffer_copy_generator.cpp
    │   ├── buildmethod_aottest.cpp
    │   ├── buildmethod_generator.cpp
    │   ├── can_use_target_aottest.cpp
    │   ├── can_use_target_generator.cpp
    │   ├── cleanup_on_error_aottest.cpp
    │   ├── cleanup_on_error_generator.cpp
    │   ├── configure_aottest.cpp
    │   ├── configure_generator.cpp
    │   ├── configure_jittest.cpp
    │   ├── cxx_mangling_aottest.cpp
    │   ├── cxx_mangling_define_extern_aottest.cpp
    │   ├── cxx_mangling_define_extern_externs.cpp
    │   ├── cxx_mangling_define_extern_generator.cpp
    │   ├── cxx_mangling_externs.cpp
    │   ├── cxx_mangling_generator.cpp
    │   ├── define_extern_opencl_aottest.cpp
    │   ├── define_extern_opencl_generator.cpp
    │   ├── embed_image_aottest.cpp
    │   ├── embed_image_generator.cpp
    │   ├── error_codes_aottest.cpp
    │   ├── error_codes_generator.cpp
    │   ├── example_aottest.cpp
    │   ├── example_generator.cpp
    │   ├── example_jittest.cpp
    │   ├── extern_output_aottest.cpp
    │   ├── extern_output_generator.cpp
    │   ├── external_code_aottest.cpp
    │   ├── external_code_extern.cpp
    │   ├── external_code_generator.cpp
    │   ├── float16_t_aottest.cpp
    │   ├── float16_t_generator.cpp
    │   ├── gpu_object_lifetime_aottest.cpp
    │   ├── gpu_object_lifetime_generator.cpp
    │   ├── gpu_only_aottest.cpp
    │   ├── gpu_only_generator.cpp
    │   ├── image_from_array_aottest.cpp
    │   ├── image_from_array_generator.cpp
    │   ├── mandelbrot_aottest.cpp
    │   ├── mandelbrot_generator.cpp
    │   ├── matlab_aottest.cpp
    │   ├── matlab_generator.cpp
    │   ├── memory_profiler_mandelbrot_aottest.cpp
    │   ├── memory_profiler_mandelbrot_generator.cpp
    │   ├── metadata_tester_aottest.cpp
    │   ├── metadata_tester_generator.cpp
    │   ├── msan_aottest.cpp
    │   ├── msan_generator.cpp
    │   ├── multitarget_aottest.cpp
    │   ├── multitarget_generator.cpp
    │   ├── nested_externs_aottest.cpp
    │   ├── nested_externs_generator.cpp
    │   ├── old_buffer_t_aottest.cpp
    │   ├── old_buffer_t_generator.cpp
    │   ├── output_assign_aottest.cpp
    │   ├── output_assign_generator.cpp
    │   ├── pyramid_aottest.cpp
    │   ├── pyramid_generator.cpp
    │   ├── rdom_input_aottest.cpp
    │   ├── rdom_input_generator.cpp
    │   ├── registration_test.cpp
    │   ├── rungen_test.cpp
    │   ├── string_param_aottest.cpp
    │   ├── string_param_generator.cpp
    │   ├── stubtest_aottest.cpp
    │   ├── stubtest_generator.cpp
    │   ├── stubtest_jittest.cpp
    │   ├── stubuser_aottest.cpp
    │   ├── stubuser_generator.cpp
    │   ├── tiled_blur_aottest.cpp
    │   ├── tiled_blur_generator.cpp
    │   ├── user_context_aottest.cpp
    │   ├── user_context_generator.cpp
    │   ├── user_context_insanity_aottest.cpp
    │   ├── user_context_insanity_generator.cpp
    │   ├── variable_num_threads_aottest.cpp
    │   └── variable_num_threads_generator.cpp
    ├── internal.cpp
    ├── opengl
    │   ├── conv_select.cpp
    │   ├── copy_pixels.cpp
    │   ├── copy_to_device.cpp
    │   ├── copy_to_host.cpp
    │   ├── float_texture.cpp
    │   ├── inline_reduction.cpp
    │   ├── internal.cpp
    │   ├── lut.cpp
    │   ├── multiple_stages.cpp
    │   ├── produce.cpp
    │   ├── rewrap_texture.cpp
    │   ├── save_state.cpp
    │   ├── select.cpp
    │   ├── set_pixels.cpp
    │   ├── shifted_domains.cpp
    │   ├── special_funcs.cpp
    │   ├── sum_reduction.cpp
    │   ├── sumcolor_reduction.cpp
    │   ├── testing.h
    │   ├── tuples.cpp
    │   ├── vagrant
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── Vagrantfile
    │   │   ├── build_tests.sh
    │   │   └── provision
    │   │   │   ├── etc
    │   │   │       ├── environment
    │   │   │       ├── init
    │   │   │       │   └── xdummy.conf
    │   │   │       └── systemd
    │   │   │       │   └── system
    │   │   │       │       └── xdummy.service
    │   │   │   └── usr
    │   │   │       └── share
    │   │   │           └── X11
    │   │   │               └── xorg.conf.d
    │   │   │                   └── xdummy.conf
    │   └── varying.cpp
    ├── performance
    │   ├── async_gpu.cpp
    │   ├── block_transpose.cpp
    │   ├── boundary_conditions.cpp
    │   ├── clamped_vector_load.cpp
    │   ├── const_division.cpp
    │   ├── fan_in.cpp
    │   ├── fast_inverse.cpp
    │   ├── fast_pow.cpp
    │   ├── fast_sine_cosine.cpp
    │   ├── inner_loop_parallel.cpp
    │   ├── jit_stress.cpp
    │   ├── lots_of_inputs.cpp
    │   ├── lots_of_small_allocations.cpp
    │   ├── matrix_multiplication.cpp
    │   ├── memcpy.cpp
    │   ├── memory_profiler.cpp
    │   ├── packed_planar_fusion.cpp
    │   ├── parallel_performance.cpp
    │   ├── profiler.cpp
    │   ├── realize_overhead.cpp
    │   ├── rfactor.cpp
    │   ├── rgb_interleaved.cpp
    │   ├── sort.cpp
    │   ├── thread_safe_jit.cpp
    │   ├── vectorize.cpp
    │   └── wrap.cpp
    ├── scripts
    │   └── build_travis.sh
    └── warning
    │   ├── double_vectorize.cpp
    │   ├── hidden_pure_definition.cpp
    │   └── require_const_false.cpp
├── tools
    ├── GenGen.cpp
    ├── RunGen.h
    ├── RunGenMain.cpp
    ├── binary2cpp.cpp
    ├── build_halide_h.cpp
    ├── find_inverse.cpp
    ├── halide_benchmark.h
    ├── halide_config.cmake.tpl
    ├── halide_config.make.tpl
    ├── halide_image.h
    ├── halide_image_info.h
    ├── halide_image_io.h
    ├── halide_malloc_trace.h
    ├── halide_trace_config.h
    ├── makelib.sh
    └── mex_halide.m
├── tutorial
    ├── .gitignore
    ├── CMakeLists.txt
    ├── clock.h
    ├── figures
    │   ├── generate_figures_17.sh
    │   ├── generate_figures_18.sh
    │   ├── generate_figures_19.sh
    │   ├── generate_figures_5.sh
    │   ├── generate_figures_8.sh
    │   ├── generate_figures_9.sh
    │   ├── generate_output_snippets.sh
    │   ├── lesson_02_input.jpg
    │   ├── lesson_02_output.jpg
    │   ├── lesson_05_col_major.gif
    │   ├── lesson_05_fast.mp4
    │   ├── lesson_05_parallel_tiles.gif
    │   ├── lesson_05_row_major.gif
    │   ├── lesson_05_split_7_by_3.gif
    │   ├── lesson_05_tiled.gif
    │   ├── lesson_05_vectors.gif
    │   ├── lesson_08_compute_root.gif
    │   ├── lesson_08_compute_y.gif
    │   ├── lesson_08_mixed.mp4
    │   ├── lesson_08_store_root_compute_x.gif
    │   ├── lesson_08_store_root_compute_y.gif
    │   ├── lesson_08_tile.gif
    │   ├── lesson_09_compute_at_multiple_updates.mp4
    │   ├── lesson_09_compute_at_pure.gif
    │   ├── lesson_09_compute_at_pure_and_update.gif
    │   ├── lesson_09_compute_at_rvar.gif
    │   ├── lesson_09_compute_at_update.gif
    │   ├── lesson_09_inline_reduction.gif
    │   ├── lesson_09_update.gif
    │   ├── lesson_09_update_rdom.mp4
    │   ├── lesson_09_update_schedule.mp4
    │   ├── lesson_17_rdom_calls_in_predicate.mp4
    │   ├── lesson_17_rdom_circular.mp4
    │   ├── lesson_17_rdom_triangular.mp4
    │   ├── lesson_18_hist_manual_par.mp4
    │   ├── lesson_18_hist_rfactor_par.mp4
    │   ├── lesson_18_hist_rfactor_tile.mp4
    │   ├── lesson_18_hist_rfactor_vec.mp4
    │   ├── lesson_18_hist_serial.mp4
    │   ├── lesson_19_group_updates.mp4
    │   ├── lesson_19_transpose.mp4
    │   ├── lesson_19_wrapper_global.mp4
    │   ├── lesson_19_wrapper_local.mp4
    │   ├── lesson_19_wrapper_unique.mp4
    │   └── lesson_19_wrapper_vary_schedule.mp4
    ├── images
    │   ├── gray.png
    │   └── rgb.png
    ├── lesson_01_basics.cpp
    ├── lesson_02_input_image.cpp
    ├── lesson_03_debugging_1.cpp
    ├── lesson_04_debugging_2.cpp
    ├── lesson_05_scheduling_1.cpp
    ├── lesson_06_realizing_over_shifted_domains.cpp
    ├── lesson_07_multi_stage_pipelines.cpp
    ├── lesson_08_scheduling_2.cpp
    ├── lesson_09_update_definitions.cpp
    ├── lesson_10_aot_compilation_generate.cpp
    ├── lesson_10_aot_compilation_run.cpp
    ├── lesson_11_cross_compilation.cpp
    ├── lesson_12_using_the_gpu.cpp
    ├── lesson_13_tuples.cpp
    ├── lesson_14_types.cpp
    ├── lesson_15_generators.cpp
    ├── lesson_15_generators_usage.sh
    ├── lesson_16_rgb_generate.cpp
    ├── lesson_16_rgb_run.cpp
    ├── lesson_17_predicated_rdom.cpp
    ├── lesson_18_parallel_associative_reductions.cpp
    ├── lesson_19_wrapper_funcs.cpp
    ├── lesson_20_cloning_funcs.cpp
    ├── lesson_21_auto_scheduler_generate.cpp
    ├── lesson_21_auto_scheduler_run.cpp
    └── todo.txt
├── util
    ├── CMakeLists.txt
    ├── Halide-VS2017.natvis
    ├── Halide.natvis
    ├── HalideTraceDump.cpp
    ├── HalideTraceUtils.cpp
    ├── HalideTraceUtils.h
    ├── HalideTraceViz.cpp
    └── inconsolata.h
└── xhalide_examples
    ├── golden
        ├── xhalide_2dfilter.cc
        ├── xhalide_dse_4d_filter_generated.cc
        └── xhalide_generated1.cc
    ├── xhalide-2dconvolution.cpp
    ├── xhalide-convolution-16bits.cpp
    ├── xhalide-convolution-32bit.cpp
    ├── xhalide-convolution.cpp
    ├── xhalide-convolution1.cpp
    ├── xhalide_dse_4d_filter_limited.cpp
    └── xhalide_dse_conv.cpp


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted
 5 | # to native line endings on checkout.
 6 | *.cpp text
 7 | *.c text
 8 | *.h text
 9 | 
10 | # Denote all files that are truly binary and should not be modified.
11 | *.png binary
12 | *.jpg binary
13 | *.tiff binary
14 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/.gitmodules


--------------------------------------------------------------------------------
/apps/HelloAndroid/.gitignore:
--------------------------------------------------------------------------------
1 | .gradle/**
2 | gen/**
3 | gradle_build/**
4 | HelloAndroid.iml
5 | local.properties
6 | obj/**
7 | proguard-project.txt
8 | project.properties
9 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/ant.properties:
--------------------------------------------------------------------------------
 1 | # This file is used to override default values used by the Ant build system.
 2 | #
 3 | # This file must be checked into Version Control Systems, as it is
 4 | # integral to the build system of your project.
 5 | 
 6 | # This file is only used by the Ant script.
 7 | 
 8 | # You can use this to override default values such as
 9 | #  'source.dir' for the location of your java source folder and
10 | #  'out.dir' for the location of your output folder.
11 | 
12 | # You can also use it define how the release builds are signed by declaring
13 | # the following properties:
14 | #  'key.store' for the location of your keystore and
15 | #  'key.alias' for the name of the key to use.
16 | # The password will be asked during the build when you use the 'release' target.
17 | 
18 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/build-gradle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Gradle needs to know where the NDK is.
 4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable.
 5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says
 6 | # that it's only used by ant).
 7 | # However, if you run "android update" (say, via build.sh), this variable will
 8 | # be clobbered.
 9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroid-debug.apk && adb shell am start com.example.hellohalide/com.example.hellohalide.CameraActivity
10 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Mon Jan 05 14:23:44 PST 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-bin.zip
7 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/jni/Android.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | 
 3 | include $(CLEAR_VARS)
 4 | 
 5 | LOCAL_MODULE    := HelloAndroid
 6 | LOCAL_ARM_MODE  := arm
 7 | LOCAL_SRC_FILES := hello_wrapper.cpp
 8 | LOCAL_LDFLAGS   := -L$(LOCAL_PATH)/../jni
 9 | LOCAL_LDLIBS    := -lm -llog -landroid $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/hello.a
10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue
11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include $(LOCAL_PATH)/../../../build/include $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/
12 | 
13 | include $(BUILD_SHARED_LIBRARY)
14 | 
15 | $(call import-module,android/native_app_glue)
16 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since
2 | # llvm will not compile for the R6 version of the ISA without Nan2008
3 | # and the gcc toolchain used by the Android build setup requires those
4 | # two options together.
5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
6 | APP_PLATFORM := android-17
7 | APP_STL := gnustl_static
8 | APP_CPPFLAGS := -std=c++11
9 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroid/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
 3 |     android:orientation="horizontal"
 4 |     android:layout_width="fill_parent"
 5 |     android:layout_height="fill_parent"
 6 |     >
 7 | 
 8 |   <FrameLayout
 9 |       android:id="@+id/camera_preview"
10 |       android:layout_width="fill_parent"
11 |       android:layout_height="fill_parent"
12 |       android:layout_weight="1"
13 |       />
14 | 
15 | </LinearLayout>


--------------------------------------------------------------------------------
/apps/HelloAndroid/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <resources>
3 |     <string name="app_name">HelloHalide</string>
4 | </resources>
5 | 


--------------------------------------------------------------------------------
/apps/HelloAndroid/src/com/example/hellohalide/FrameHandler.java:
--------------------------------------------------------------------------------
 1 | package com.example.hellohalide;
 2 | 
 3 | import android.hardware.Camera;
 4 | import android.util.Log;
 5 | 
 6 | public class FrameHandler implements Camera.PreviewCallback {
 7 |     private static final String TAG = "FrameHandler";
 8 | 
 9 |     public void onPreviewFrame(byte[] data, Camera camera) {
10 |         Log.d(TAG, "Got a frame!");
11 |     }
12 | }


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/.gitignore:
--------------------------------------------------------------------------------
1 | .gradle/**
2 | gen/**
3 | gradle_build/**
4 | *.iml
5 | local.properties
6 | obj/**
7 | proguard-project.txt
8 | project.properties
9 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/build-gradle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Gradle needs to know where the NDK is.
 4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable.
 5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says
 6 | # that it's only used by ant).
 7 | # However, if you run "android update" (say, via build.sh), this variable will
 8 | # be clobbered.
 9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroidCamera2-debug.apk && adb shell am start com.example.helloandroidcamera2/com.example.helloandroidcamera2.CameraActivity
10 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Wed Jul 15 16:34:43 PDT 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-all.zip
7 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since
2 | # llvm will not compile for the R6 version of the ISA without Nan2008
3 | # and the gcc toolchain used by the Android build setup requires those
4 | # two options together.
5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
6 | APP_PLATFORM := android-21
7 | APP_STL := c++_static
8 | APP_CPPFLAGS := -std=c++11 -fno-rtti -fexceptions
9 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
 3 |     android:orientation="horizontal"
 4 |     android:layout_width="fill_parent"
 5 |     android:layout_height="fill_parent"
 6 |     >
 7 | 
 8 |   <FrameLayout
 9 |       android:id="@+id/camera_preview"
10 |       android:layout_width="fill_parent"
11 |       android:layout_height="fill_parent"
12 |       android:layout_weight="1"
13 |       />
14 | 
15 | </LinearLayout>


--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <resources>
3 |     <string name="app_name">HelloHalideCamera2</string>
4 |     <string name="toggle">Toggle Edge Detector</string>
5 | </resources>
6 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/ant.properties:
--------------------------------------------------------------------------------
 1 | # This file is used to override default values used by the Ant build system.
 2 | #
 3 | # This file must be checked into Version Control Systems, as it is
 4 | # integral to the build system of your project.
 5 | 
 6 | # This file is only used by the Ant script.
 7 | 
 8 | # You can use this to override default values such as
 9 | #  'source.dir' for the location of your java source folder and
10 | #  'out.dir' for the location of your output folder.
11 | 
12 | # You can also use it define how the release builds are signed by declaring
13 | # the following properties:
14 | #  'key.store' for the location of your keystore and
15 | #  'key.alias' for the name of the key to use.
16 | # The password will be asked during the build when you use the 'release' target.
17 | 
18 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | android update project -p . --target android-17
 4 | cd jni
 5 | c++ -std=c++11 halide_gl_filter.cpp -L ../../../bin -lHalide -I ../../../include -ldl -lpthread -lz
 6 | HL_TARGET=arm-32-android-opengl-debug DYLD_LIBRARY_PATH=../../../bin LD_LIBRARY_PATH=../../../bin ./a.out
 7 | cd ..
 8 | pwd
 9 | ndk-build
10 | ant debug
11 | adb install -r bin/HelloAndroidGL-debug.apk
12 | adb logcat
13 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/jni/Android.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | 
 3 | include $(CLEAR_VARS)
 4 | 
 5 | LOCAL_MODULE    := android_halide_gl_native
 6 | LOCAL_ARM_MODE  := arm
 7 | LOCAL_SRC_FILES := android_halide_gl_native.cpp
 8 | LOCAL_LDFLAGS   := -Ljni
 9 | LOCAL_LDLIBS    := -lm -llog -landroid -lEGL -lGLESv2 jni/halide_gl_filter.o
10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue
11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include
12 | 
13 | include $(BUILD_SHARED_LIBRARY)
14 | 
15 | $(call import-module,android/native_app_glue)
16 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # The ARMv7 is significanly faster due to the use of the hardware FPU
2 | APP_ABI := armeabi-v7a
3 | APP_PLATFORM := android-17
4 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/project.properties:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by Android Tools.
 2 | # Do not modify this file -- YOUR CHANGES WILL BE ERASED!
 3 | #
 4 | # This file must be checked in Version Control Systems.
 5 | #
 6 | # To customize properties used by the Ant build system edit
 7 | # "ant.properties", and override values to adapt the script to your
 8 | # project structure.
 9 | #
10 | # To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
11 | #proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
12 | 
13 | # Project target.
14 | target=android-17
15 | 


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
 3 |     android:orientation="horizontal"
 4 |     android:layout_width="fill_parent"
 5 |     android:layout_height="fill_parent"
 6 |     >
 7 | 
 8 |   <FrameLayout
 9 |       android:id="@+id/camera_preview"
10 |       android:layout_width="fill_parent"
11 |       android:layout_height="fill_parent"
12 |       android:layout_weight="1"
13 |       />
14 | 
15 | </LinearLayout>


--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <resources>
3 |     <string name="app_name">Halide GL Demo</string>
4 | </resources>
5 | 


--------------------------------------------------------------------------------
/apps/HelloMatlab/Makefile:
--------------------------------------------------------------------------------
1 | include ../support/Makefile.inc
2 | 
3 | 
4 | test:
5 | 	./run_blur.sh
6 | 
7 | 


--------------------------------------------------------------------------------
/apps/HelloMatlab/run_blur.m:
--------------------------------------------------------------------------------
 1 | % Add the path to mex_halide.m.
 2 | addpath(fullfile(getenv('HALIDE_DISTRIB_PATH'), 'tools'));
 3 | 
 4 | % Build the mex library from the blur generator.
 5 | mex_halide('iir_blur.cpp', '-g', 'IirBlur');
 6 | 
 7 | % Load the input, create an output buffer of equal size.
 8 | input = cast(imread('../images/rgb.png'), 'single') / 255;
 9 | output = zeros(size(input), 'single');
10 | 
11 | % The blur filter coefficient.
12 | alpha = 0.1;
13 | 
14 | % Call the Halide pipeline.
15 | for i = 1:10
16 |     tic;
17 |     iir_blur(input, alpha, output);
18 |     toc;
19 | end
20 | 
21 | % Write the blurred image.
22 | imwrite(cast(output * 255, 'uint8'), 'blurred.png');
23 | 


--------------------------------------------------------------------------------
/apps/HelloMatlab/run_blur.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script is run by the nightly tests to check that mex_halide works.
 4 | 
 5 | command -v octave >/dev/null 2>&1 || { echo >&2 "Octave not found.  Aborting."; exit 0; }
 6 | 
 7 | if [[ $CXX == *"-m32"* ]]; then
 8 |     echo "Not proceeding because Halide is compiled in 32-bit mode but octave is (likely) 64-bit"
 9 |     exit 0
10 | fi
11 | 
12 | rm -f blurred.png iir_blur.mex
13 | octave run_blur.m
14 | 
15 | if [ -f blurred.png ]
16 | then
17 |     echo "Success!"
18 |     exit 0
19 | fi
20 | 
21 | echo "Failed to produce blurred.png!"
22 | exit 1
23 | 


--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/AppDelegate.h:
--------------------------------------------------------------------------------
1 | #import <UIKit/UIKit.h>
2 | 
3 | @interface AppDelegate : UIResponder <UIApplicationDelegate>
4 | 
5 | @property (strong, nonatomic) UIWindow *window;
6 | 
7 | @end
8 | 


--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/HalideViewController.h:
--------------------------------------------------------------------------------
 1 | #ifndef HelloiOS_HalideViewController_h
 2 | #define HelloiOS_HalideViewController_h
 3 | 
 4 | #import "HalideView.h"
 5 | #import <UIKit/UIKit.h>
 6 | 
 7 | 
 8 | @interface HalideViewController : UIViewController
 9 | 
10 | @property HalideView *halide_view;
11 | 
12 | - (void)viewWillAppear:(BOOL)animated;
13 | 
14 | @end
15 | 
16 | #endif


--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/HelloiOS-Prefix.pch:
--------------------------------------------------------------------------------
 1 | //
 2 | //  Prefix header
 3 | //
 4 | //  The contents of this file are implicitly included at the beginning of every source file.
 5 | //
 6 | 
 7 | #import <Availability.h>
 8 | 
 9 | #ifndef __IPHONE_3_0
10 | #warning "This project uses features only available in iOS SDK 3.0 and later."
11 | #endif
12 | 
13 | #ifdef __OBJC__
14 |     #import <UIKit/UIKit.h>
15 |     #import <Foundation/Foundation.h>
16 | #endif
17 | 


--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/en.lproj/InfoPlist.strings:
--------------------------------------------------------------------------------
1 | /* Localized versions of Info.plist keys */
2 | 
3 | 


--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/main.mm:
--------------------------------------------------------------------------------
 1 | #import <UIKit/UIKit.h>
 2 | 
 3 | #import "AppDelegate.h"
 4 | 
 5 | int main(int argc, char * argv[])
 6 | {
 7 |     @autoreleasepool {
 8 |         return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/apps/autoscheduler/AutoSchedule.h:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include "CostModel.h"
 3 | #include "FunctionDAG.h"
 4 | #include "PerfectHashMap.h"
 5 | #include "Featurization.h"
 6 | #include <vector>
 7 | 
 8 | namespace Halide {
 9 | namespace Internal {
10 | namespace Autoscheduler {
11 | 
12 | typedef PerfectHashMap<FunctionDAG::Node::Stage, ScheduleFeatures> StageMapOfScheduleFeatures;
13 | 
14 | void find_and_apply_schedule(FunctionDAG& dag, const std::vector<Function> &outputs, const MachineParams &params,
15 |                              CostModel* cost_model, int beam_size, StageMapOfScheduleFeatures* schedule_features);
16 | 
17 | }
18 | }
19 | }
20 | 


--------------------------------------------------------------------------------
/apps/autoscheduler/NetworkSize.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_NETWORK_SIZE_H
 2 | #define HALIDE_NETWORK_SIZE_H
 3 | 
 4 | namespace Halide {
 5 | // The size of the best cost model network found. Needed by the cost
 6 | // model and also the cost model training script.
 7 | const int head1_channels = 8, head1_w = 40, head1_h = 7;
 8 | const int head2_channels = 24, head2_w = 39;
 9 | const int conv1_channels = 32;
10 | }
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head1_conv1_bias.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head1_conv1_bias.data


--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head1_conv1_weight.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head1_conv1_weight.data


--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head2_conv1_bias.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head2_conv1_bias.data


--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head2_conv1_weight.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head2_conv1_weight.data


--------------------------------------------------------------------------------
/apps/autoscheduler/weights/trunk_conv1_bias.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/trunk_conv1_bias.data


--------------------------------------------------------------------------------
/apps/autoscheduler/weights/trunk_conv1_weight.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/trunk_conv1_weight.data


--------------------------------------------------------------------------------
/apps/bilateral_grid/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(bilateral_grid_process filter.cpp)
 2 | halide_use_image_io(bilateral_grid_process)
 3 | 
 4 | halide_generator(bilateral_grid.generator SRCS bilateral_grid_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB bilateral_grid_auto_schedule)
 8 |     else()
 9 |         set(LIB bilateral_grid)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR bilateral_grid.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}
14 |                                   EXTRA_OUTPUTS stmt schedule)
15 |     target_link_libraries(bilateral_grid_process PRIVATE ${LIB})
16 | endforeach()
17 | 


--------------------------------------------------------------------------------
/apps/bilateral_grid/viz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo HL_AVCONV is ${HL_AVCONV}
 3 | export HL_TRACE_FILE=/dev/stdout
 4 | export HL_NUMTHREADS=4
 5 | rm -f $1/bilateral_grid.mp4
 6 | make $1/filter_viz && \
 7 | $1/filter_viz ../images/gray_small.png $1/out_small.png 0.2 0 | \
 8 | ../../bin/HalideTraceViz --size 1920 1080 | \
 9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/bilateral_grid.mp4
10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
11 | 


--------------------------------------------------------------------------------
/apps/blur/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Generator
 2 | halide_library(halide_blur SRCS halide_blur_generator.cpp)
 3 | 
 4 | # Final executable
 5 | add_executable(blur_test test.cpp)
 6 | target_link_libraries(blur_test PUBLIC halide_blur)
 7 | 
 8 | if (NOT MSVC)
 9 |   target_compile_options(blur_test PRIVATE "-O2")
10 |   if (OPENMP_FOUND)
11 |     target_compile_options(blur_test PRIVATE ${OpenMP_CXX_FLAGS})
12 |     target_link_libraries(blur_test PRIVATE ${OpenMP_CXX_FLAGS})
13 |   else()
14 |     target_compile_options(blur_test PRIVATE "-Wno-unknown-pragmas")
15 |   endif()
16 | endif()
17 | 


--------------------------------------------------------------------------------
/apps/camera_pipe/viz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export HL_TRACE_FILE=/dev/stdout
 3 | export HL_NUMTHREADS=4
 4 | rm -f $1/camera_pipe.mp4
 5 | # Do trivial partial-overrides of trace settings via flags
 6 | # (--zoom and --rlabel) just to demonstrate that it works.
 7 | $1/process_viz ../images/bayer_small.png 3700 1.8 50 1 1 $1/out.png |
 8 | ../../bin/HalideTraceViz --timestep 1000 --size 1920 1080 \
 9 | --zoom 4 --func sharpen_strength_x32 \
10 | --rlabel curve "tone curve LUT" 0 0 10 \
11 | |\
12 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/camera_pipe.mp4
13 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
14 | 


--------------------------------------------------------------------------------
/apps/conv_layer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(conv_layer_process process.cpp)
 2 | halide_use_image_io(conv_layer_process)
 3 | 
 4 | halide_generator(conv_layer.generator SRCS conv_layer_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB conv_layer_auto_schedule)
 8 |     else()
 9 |         set(LIB conv_layer)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR conv_layer.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(conv_layer_process PRIVATE ${LIB})
15 | endforeach()
16 | 


--------------------------------------------------------------------------------
/apps/images/bayer_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/bayer_raw.png


--------------------------------------------------------------------------------
/apps/images/bayer_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/bayer_small.png


--------------------------------------------------------------------------------
/apps/images/gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/gray.png


--------------------------------------------------------------------------------
/apps/images/gray_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/gray_small.png


--------------------------------------------------------------------------------
/apps/images/rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb.png


--------------------------------------------------------------------------------
/apps/images/rgb_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb_small.png


--------------------------------------------------------------------------------
/apps/images/rgb_small16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb_small16.png


--------------------------------------------------------------------------------
/apps/images/rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgba.png


--------------------------------------------------------------------------------
/apps/interpolate/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | halide_project(interpolate "apps" interpolate.cpp)
2 | halide_use_image_io(interpolate)
3 | set_target_properties(interpolate PROPERTIES RUNTIME_OUTPUT_DIRECTORY
4 |                       "${CMAKE_CURRENT_BINARY_DIR}")
5 | 


--------------------------------------------------------------------------------
/apps/interpolate/Makefile:
--------------------------------------------------------------------------------
 1 | include ../support/Makefile.inc
 2 | 
 3 | CXXFLAGS += -g -Wall
 4 | 
 5 | .PHONY: clean
 6 | 
 7 | $(BIN)/%/interpolate: interpolate.cpp $(LIB_HALIDE)
 8 | 	@mkdir -p $(@D)
 9 | 	$(CXX) $(CXXFLAGS) $^ -o $@ $(IMAGE_IO_FLAGS) $(LDFLAGS) $(HALIDE_SYSTEM_LIBS)
10 | 
11 | $(BIN)/%/out.png: $(BIN)/%/interpolate
12 | 	@mkdir -p $(@D)
13 | 	$^ $(IMAGES)/rgba.png $@
14 | 
15 | clean:
16 | 	rm -rf $(BIN)
17 | 
18 | test: $(BIN)/$(HL_TARGET)/out.png
19 | 


--------------------------------------------------------------------------------
/apps/lens_blur/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(lens_blur_process process.cpp)
 2 | halide_use_image_io(lens_blur_process)
 3 | 
 4 | halide_generator(lens_blur.generator SRCS lens_blur_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB lens_blur_auto_schedule)
 8 |     else()
 9 |         set(LIB lens_blur)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR lens_blur.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(lens_blur_process PRIVATE ${LIB})
15 | endforeach()
16 | 


--------------------------------------------------------------------------------
/apps/linear_algebra/.gitignore:
--------------------------------------------------------------------------------
1 | src/kernels/*
2 | 


--------------------------------------------------------------------------------
/apps/linear_algebra/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if (NOT CBLAS_FOUND)
 2 |   message(STATUS "linear_algebra: No CBLAS header, skipping CBLAS tests")
 3 |   return()
 4 | endif()
 5 | 
 6 | add_executable(test_halide_blas
 7 |   test_halide_blas.cpp
 8 | )
 9 | target_include_directories(test_halide_blas SYSTEM
10 |   PRIVATE
11 |    ${CBLAS_INCLUDE_DIR}
12 | )
13 | target_include_directories(test_halide_blas BEFORE
14 |   PRIVATE
15 |     ${halide_blas_INCLUDE_DIRS}
16 | )
17 | target_compile_options(test_halide_blas PRIVATE -Wno-unused-variable)
18 | 
19 | target_link_libraries(test_halide_blas
20 |   PRIVATE
21 |    halide_blas
22 |    cblas # XXX fragile
23 |    ${HALIDE_COMPILER_LIB}
24 | )
25 | 
26 | 


--------------------------------------------------------------------------------
/apps/local_laplacian/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(local_laplacian_process process.cpp)
 2 | halide_use_image_io(local_laplacian_process)
 3 | 
 4 | halide_generator(local_laplacian.generator SRCS local_laplacian_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB local_laplacian_auto_schedule)
 8 |     else()
 9 |         set(LIB local_laplacian)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR local_laplacian.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(local_laplacian_process PRIVATE ${LIB})
15 | endforeach()
16 | 


--------------------------------------------------------------------------------
/apps/local_laplacian/viz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export HL_TRACE_FILE=/dev/stdout
 3 | export HL_NUM_THREADS=4
 4 | rm -f $1/local_laplacian.mp4
 5 | make $1/process_viz && \
 6 | ./$1/process_viz ../images/rgb_small.png 4 1 1 0 ./$1/out_small.png | \
 7 | ../../bin/HalideTraceViz \
 8 | --size 1920 1080 --timestep 3000 | \
 9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 ./$1/local_laplacian.mp4
10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
11 | 


--------------------------------------------------------------------------------
/apps/nl_means/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(nl_means_process process.cpp)
 2 | halide_use_image_io(nl_means_process)
 3 | 
 4 | halide_generator(nl_means.generator SRCS nl_means_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB nl_means_auto_schedule)
 8 |     else()
 9 |         set(LIB nl_means)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR nl_means.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(nl_means_process PRIVATE ${LIB})
15 | endforeach()
16 | 


--------------------------------------------------------------------------------
/apps/nn_ops/AveragePool.sh:
--------------------------------------------------------------------------------
 1 | AVERAGE_POOL=$1
 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max
 3 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 0 255
 4 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 0 255
 5 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 0 255
 6 | $AVERAGE_POOL 8 16 16 1 2 2 2 5 5 0 255
 7 | 
 8 | $AVERAGE_POOL 32 7 7 1 1 0 0 1 1 0 255
 9 | $AVERAGE_POOL 32 7 7 1 1 1 1 3 3 0 255
10 | $AVERAGE_POOL 32 7 7 1 2 1 1 3 3 0 255
11 | $AVERAGE_POOL 32 7 7 4 2 2 2 5 5 0 255
12 | 
13 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 64 128
14 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 64 128
15 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 64 128
16 | 


--------------------------------------------------------------------------------
/apps/nn_ops/Convolution.sh:
--------------------------------------------------------------------------------
 1 | CONVOLUTION=$1
 2 | # Columns are: schedule C W H N filter_width, filter_height, output_depth,
 3 | # input_offset, filter_offset, input_depth, stride, pad_width, pad_height,
 4 | # byte_zero, output_multiplier, output_shift, output_offset, output_min,
 5 | # output_max
 6 | 
 7 | $CONVOLUTION 8 17 17 1 1 1 8 -128 -128 8 1 0 0 0
 8 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 1 1 1 0
 9 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 2 1 1 0
10 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -128 8 1 1 1 0
11 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -140 8 1 1 1 0
12 | $CONVOLUTION 12 17 17 1 3 3 16 -128 -140 12 1 1 1 0
13 | 


--------------------------------------------------------------------------------
/apps/nn_ops/Im2col.sh:
--------------------------------------------------------------------------------
 1 | IM2COL=$1
 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height byte zero
 3 | $IM2COL 8 16 16 1 1 0 0 1 1 0
 4 | $IM2COL 8 16 16 1 1 1 1 3 3 0
 5 | $IM2COL 8 16 16 1 2 1 1 3 3 0
 6 | $IM2COL 8 16 16 1 2 2 2 5 5 0
 7 | 
 8 | $IM2COL 32 7 7 1 1 0 0 1 1 0
 9 | $IM2COL 32 7 7 1 1 1 1 3 3 0
10 | $IM2COL 32 7 7 1 2 1 1 3 3 0
11 | $IM2COL 32 7 7 4 2 2 2 5 5 0
12 | 
13 | $IM2COL 8 16 16 1 1 0 0 1 1 5
14 | $IM2COL 8 16 16 1 1 1 1 3 3 5
15 | $IM2COL 8 16 16 1 2 1 1 3 3 5
16 | 


--------------------------------------------------------------------------------
/apps/nn_ops/MaxPool.sh:
--------------------------------------------------------------------------------
 1 | MAXPOOL=$1
 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max
 3 | $MAXPOOL 8 16 16 1 1 0 0 1 1 0 255
 4 | $MAXPOOL 8 16 16 1 1 1 1 3 3 0 255
 5 | $MAXPOOL 8 16 16 1 2 1 1 3 3 0 255
 6 | $MAXPOOL 8 16 16 1 2 2 2 5 5 0 255
 7 | 
 8 | $MAXPOOL 32 7 7 1 1 0 0 1 1 0 255
 9 | $MAXPOOL 32 7 7 1 1 1 1 3 3 0 255
10 | $MAXPOOL 32 7 7 1 2 1 1 3 3 0 255
11 | $MAXPOOL 32 7 7 4 2 2 2 5 5 0 255
12 | 
13 | $MAXPOOL 8 16 16 1 1 0 0 1 1 64 128
14 | $MAXPOOL 8 16 16 1 1 1 1 3 3 64 128
15 | $MAXPOOL 8 16 16 1 2 1 1 3 3 64 128
16 | 


--------------------------------------------------------------------------------
/apps/nn_ops/common.h:
--------------------------------------------------------------------------------
 1 | // A collection of utility functions shared by the halide generators.
 2 | 
 3 | #ifndef COMMON_HALIDE_H_
 4 | #define COMMON_HALIDE_H_
 5 | 
 6 | #include <Halide.h>
 7 | 
 8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH
 9 | // instruction.
10 | Halide::Expr saturating_rounding_doubling_high_multiply(Halide::Expr a, Halide::Expr b);
11 | 
12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as
13 | // rounding arithmetic right shift.
14 | Halide::Expr rounding_shift_right(Halide::Expr x, Halide::Expr shift);
15 | 
16 | // Performs right shift and multiply by a multiplier.
17 | Halide::Expr multiply_quantized_multiplier(
18 |     Halide::Expr x, Halide::Expr quantized_multiplier, Halide::Expr shift);
19 | #endif
20 | 


--------------------------------------------------------------------------------
/apps/nn_ops/common_reference.h:
--------------------------------------------------------------------------------
 1 | // A collection of utility functions shared by test apps.
 2 | 
 3 | #ifndef COMMON_REFERENCE_H_
 4 | #define COMMON_REFERENCE_H_
 5 | 
 6 | #include <cstdint>
 7 | 
 8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH
 9 | // instruction.
10 | int32_t saturating_rounding_doubling_high_multiply_reference(int32_t a, int32_t b);
11 | 
12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as
13 | // rounding arithmetic right shift.
14 | int32_t rounding_shift_right_reference(int32_t x, int32_t shift);
15 | 
16 | // Performs right shift and multiply by a multiplier.
17 | int32_t multiply_quantized_multiplier_reference(int32_t x, int32_t q, int32_t shift);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/apps/onnx/common_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_TYPES_H_
 2 | #define COMMON_TYPES_H_
 3 | 
 4 | #include "Halide.h"
 5 | #include "onnx_converter.h"
 6 | 
 7 | struct HalideModel {
 8 |     std::shared_ptr<Model> model;
 9 |     std::shared_ptr<Halide::Pipeline> rep;
10 |     std::vector<std::string> input_names;
11 |     std::unordered_map<std::string, int> input_types;
12 |     std::vector<std::string> output_names;
13 |     std::vector<int> output_types;
14 | };
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/apps/opengl_demo/glfw_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GLFW_HELPERS_H_
 2 | #define _GLFW_HELPERS_H_
 3 | 
 4 | namespace GlfwHelpers {
 5 | 
 6 |     struct info {
 7 |         float dpi_scale;
 8 |     };
 9 | 
10 |     struct info setup(int width, int height);
11 |     void set_opengl_context();
12 |     void terminate();
13 | }
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/apps/opengl_demo/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/opengl_demo/image.png


--------------------------------------------------------------------------------
/apps/opengl_demo/layout.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LAYOUT_HELPERS_H_
 2 | #define _LAYOUT_HELPERS_H_
 3 | 
 4 | #if defined(__APPLE__)
 5 | #include <OpenGL/gl.h>
 6 | #else
 7 | #include <GL/gl.h>
 8 | #endif
 9 | 
10 | namespace Layout {
11 | 
12 |     enum location { UL, UR, LL, LR };
13 | 
14 |     struct info {
15 |         int window_width;
16 |         int window_height;
17 |     };
18 | 
19 |     const struct info &setup(int image_width, int image_height);
20 | 
21 |     void draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label);
22 |     void draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label);
23 | }
24 | 
25 | #endif
26 | 
27 | 


--------------------------------------------------------------------------------
/apps/opengl_demo/opengl_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _OPENGL_HELPERS_H_
 2 | #define _OPENGL_HELPERS_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | #if defined(__APPLE__)
 7 | #include <OpenGL/gl.h>
 8 | #else
 9 | #include <GL/gl.h>
10 | #endif
11 | 
12 | namespace OpenGLHelpers {
13 |     void setup(float dpi_scale);
14 |     GLuint create_texture(int width, int height, const uint8_t *data);
15 |     void delete_texture(GLuint texture_id);
16 |     void display_texture(GLuint texture_id, float x0, float x1, float y0, float y1);
17 |     void draw_text(const std::string &text, float x, float y);
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/apps/opengl_demo/png_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PNG_HELPERS_
 2 | #define _PNG_HELPERS_
 3 | 
 4 | namespace PNGHelpers {
 5 | 
 6 |     struct image_info {
 7 | 	unsigned int width;
 8 | 	unsigned int height;
 9 | 	const uint8_t *data;
10 |     };
11 | 
12 |     struct image_info load(const std::string &filepath);
13 | }
14 | 
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/apps/opengl_demo/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TIMER_H_
 2 | #define _TIMER_H_
 3 | 
 4 | #include <chrono>
 5 | 
 6 | namespace Timer
 7 | {
 8 |     struct info {
 9 |         const std::string what;
10 |         std::chrono::time_point<std::chrono::high_resolution_clock> time;
11 |     };
12 | 
13 |     struct info start(const std::string &what);
14 |     std::string report(const struct info &);
15 | }
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/apps/openglcompute/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | android update project -p . --target android-21
 4 | make jni-libs
 5 | ant debug
 6 | adb install -r bin/HelloHalideOpenGLCompute-debug.apk
 7 | adb logcat -c
 8 | adb shell am start -n com.example.hellohalideopenglcompute/.HalideOpenGLComputeActivity
 9 | adb logcat | grep "^I/oglc"
10 | 


--------------------------------------------------------------------------------
/apps/openglcompute/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # TODO(aam): Confirm that application builds and runs for all supported targets:
2 | # APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
3 | APP_ABI := armeabi-v7a
4 | APP_PLATFORM := android-17
5 | 
6 | APP_STL := c++_static
7 | LOCAL_C_INCLUDES += ${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.8/include
8 | 


--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-hdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-ldpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-mdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/apps/openglcompute/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
 3 |     android:orientation="horizontal"
 4 |     android:layout_width="fill_parent"
 5 |     android:layout_height="fill_parent"
 6 |     >
 7 | 
 8 |   <FrameLayout
 9 |       android:id="@+id/camera_preview"
10 |       android:layout_width="fill_parent"
11 |       android:layout_height="fill_parent"
12 |       android:layout_weight="1"
13 |       />
14 | 
15 | </LinearLayout>


--------------------------------------------------------------------------------
/apps/openglcompute/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <resources>
3 |     <string name="app_name">HelloHalideAndroidOpenGLCompute</string>
4 | </resources>
5 | 


--------------------------------------------------------------------------------
/apps/stencil_chain/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(stencil_chain_process process.cpp)
 2 | halide_use_image_io(stencil_chain_process)
 3 | 
 4 | halide_generator(stencil_chain.generator SRCS stencil_chain_generator.cpp)
 5 | foreach(AUTO_SCHEDULE false true)
 6 |     if(${AUTO_SCHEDULE})
 7 |         set(LIB stencil_chain_auto_schedule)
 8 |     else()
 9 |         set(LIB stencil_chain)
10 |     endif()
11 |     halide_library_from_generator(${LIB}
12 |                                   GENERATOR stencil_chain.generator
13 |                                   GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 |     target_link_libraries(stencil_chain_process PRIVATE ${LIB})
15 | endforeach()
16 | 


--------------------------------------------------------------------------------
/apps/support/viz_auto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # $1 = filter cmd to run, including args
 4 | # $2 = HalideTraceViz executable
 5 | # $3 = path to output mp4
 6 | 
 7 | rm -rf "$3"
 8 | 
 9 | # Use a named pipe for the $1 -> HTV pipe, just in case
10 | # the exe in $1 writes any random output to stdout.
11 | PIPE=/tmp/halide_viz_auto_pipe
12 | rm -rf $PIPE
13 | mkfifo $PIPE
14 | 
15 | HL_TRACE_FILE=${PIPE} HL_NUMTHREADS=8 $1 &
16 | 
17 | $2 --auto_layout --ignore_tags 0<${PIPE} | \
18 | ${HL_AVCONV} -y -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 "$3"
19 | 


--------------------------------------------------------------------------------
/apps/wavelet/README.md:
--------------------------------------------------------------------------------
1 | wavelet is a trivial app designed to show ahead-of-time Generator usage (with both Make and CMake), as opposed to using direct calls to (e.g.) Func::compile_to_file().
2 | 


--------------------------------------------------------------------------------
/apps/wavelet/daubechies_constants.h:
--------------------------------------------------------------------------------
 1 | #ifndef DAUBECHIES_CONSTANTS_H_
 2 | #define DAUBECHIES_CONSTANTS_H_
 3 | 
 4 | const float D0 = 0.4829629131445341f;
 5 | const float D1 = 0.83651630373780772f;
 6 | const float D2 = 0.22414386804201339f;
 7 | const float D3 = -0.12940952255126034f;
 8 | 
 9 | #endif  // DAUBECHIES_CONSTANTS_H_
10 | 


--------------------------------------------------------------------------------
/apps/wavelet/haar_x_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include "daubechies_constants.h"
 4 | 
 5 | namespace {
 6 | 
 7 | Halide::Var x("x"), y("y"), c("c");
 8 | 
 9 | class haar_x : public Halide::Generator<haar_x> {
10 | public:
11 |     Input<Buffer<float>> in_{"in" , 2};
12 |     Output<Buffer<float>> out_{"out" , 3};
13 | 
14 |     void generate() {
15 |         Func in = Halide::BoundaryConditions::repeat_edge(in_);
16 | 
17 |         out_(x, y, c) = select(c == 0,
18 |                               (in(2*x, y) + in(2*x+1, y)),
19 |                               (in(2*x, y) - in(2*x+1, y)))/2;
20 |         out_.unroll(c, 2);
21 |     }
22 | };
23 | 
24 | }  // namespace
25 | 
26 | HALIDE_REGISTER_GENERATOR(haar_x, haar_x)
27 | 


--------------------------------------------------------------------------------
/apps/wavelet/inverse_haar_x_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include "daubechies_constants.h"
 4 | 
 5 | namespace {
 6 | 
 7 | Halide::Var x("x"), y("y"), c("c");
 8 | 
 9 | class inverse_haar_x : public Halide::Generator<inverse_haar_x> {
10 | public:
11 |     Input<Buffer<float>> in_{"in" , 3};
12 |     Output<Buffer<float>> out_{"out" , 2};
13 | 
14 |     void generate() {
15 |         Func in = Halide::BoundaryConditions::repeat_edge(in_);
16 | 
17 |         out_(x, y) = select(x%2 == 0,
18 |                            in(x/2, y, 0) + in(x/2, y, 1),
19 |                            in(x/2, y, 0) - in(x/2, y, 1));
20 |         out_.unroll(x, 2);
21 |     }
22 | };
23 | 
24 | }  // namespace
25 | 
26 | HALIDE_REGISTER_GENERATOR(inverse_haar_x, inverse_haar_x)
27 | 


--------------------------------------------------------------------------------
/python_bindings/correctness/bit_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | class BitGenerator : public Halide::Generator<BitGenerator> {
 6 | public:
 7 |     Input<Buffer<bool>> bit_input{"input_uint1", 1};
 8 |     Input<bool> bit_constant{"constant_uint1"};
 9 | 
10 |     Output<Buffer<bool>> bit_output{"output_uint1", 1};
11 | 
12 |     Var x, y, z;
13 | 
14 |     void generate() {
15 |         bit_output(x) = bit_input(x) + bit_constant;
16 |     }
17 | 
18 |     void schedule() {
19 |     }
20 | };
21 | 
22 | HALIDE_REGISTER_GENERATOR(BitGenerator, bit)
23 | 


--------------------------------------------------------------------------------
/python_bindings/correctness/rdom.py:
--------------------------------------------------------------------------------
 1 | import halide as hl
 2 | 
 3 | def test_rdom():
 4 |     x = hl.Var("x")
 5 |     y = hl.Var("y")
 6 | 
 7 |     diagonal = hl.Func("diagonal")
 8 |     diagonal[x, y] = 1
 9 | 
10 |     domain_width = 10
11 |     domain_height = 10
12 | 
13 |     r = hl.RDom([(0, domain_width), (0, domain_height)])
14 |     r.where(r.x <= r.y)
15 | 
16 |     diagonal[r.x, r.y] += 2
17 |     output = diagonal.realize(domain_width, domain_height)
18 |     
19 |     for iy in range(domain_height):
20 |         for ix in range(domain_width):
21 |             if ix <= iy:
22 |                 assert output[ix, iy] == 3
23 |             else:
24 |                 assert output[ix, iy] == 1
25 | 
26 |     return 0
27 | 
28 | if __name__ == "__main__":
29 |     test_rdom()
30 | 


--------------------------------------------------------------------------------
/python_bindings/correctness/user_context_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | class UserContextGenerator : public Halide::Generator<UserContextGenerator> {
 6 | public:
 7 |     Input<uint8_t> constant{"constant"};
 8 |     Output<Buffer<uint8_t>> output{"output", 1};
 9 | 
10 |     Var x;
11 | 
12 |     void generate() {
13 |         output(x) = constant;
14 |     }
15 | 
16 |     void schedule() {
17 |     }
18 | };
19 | 
20 | HALIDE_REGISTER_GENERATOR(UserContextGenerator, user_context)
21 | 


--------------------------------------------------------------------------------
/python_bindings/correctness/user_context_test.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import user_context
 3 | 
 4 | 
 5 | def test():
 6 |     output = bytearray("\0\0\0\0", "ascii")
 7 |     user_context.user_context(None, ord('q'), output)
 8 |     assert output == bytearray("qqqq", "ascii")
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     test()
13 | 


--------------------------------------------------------------------------------
/python_bindings/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file lists the python dependencies, 
 2 | # it is meant to be used with pip (and/or possibly virtualenv, pbundler, etc)
 3 | # See http://pip.readthedocs.org/en/latest/user_guide.html#requirements-files
 4 | # You will probably want to run 
 5 | # something similar to `pip3 install --user -r requirements.txt`
 6 | 
 7 | # science packages
 8 | numpy
 9 | scipy
10 | pillow
11 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyArgument.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_argument(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyBoundaryConditions.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_boundary_conditions(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyBuffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBUFFER_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYBUFFER_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_buffer(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYBUFFER_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyConciseCasts.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_concise_casts(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyDerivative.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_derivative(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyEnums.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYENUMS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYENUMS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_enums(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYENUMS_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyError.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYERROR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYERROR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_error(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYERROR_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyExpr.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXPR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYEXPR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_expr(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYEXPR_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyExternFuncArgument.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_extern_func_argument(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyFunc.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_func(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyFuncRef.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_func_ref(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyIROperator.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_operators(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyImageParam.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_image_param(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyInlineReductions.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_inline_reductions(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyLambda.cpp:
--------------------------------------------------------------------------------
 1 | #include "PyLambda.h"
 2 | 
 3 | namespace Halide {
 4 | namespace PythonBindings {
 5 | 
 6 | void define_lambda(py::module &m) {
 7 |     // TODO: 'lambda' is a reserved word in Python, so we
 8 |     // can't use it for a function. Using 'lambda_func' for now.
 9 |     m.def("lambda_func", [](py::args args) -> Func {
10 |         auto vars = args_to_vector<Var>(args, 0, 1);
11 |         Expr e = args[args.size() - 1].cast<Expr>();
12 |         Func f("lambda" + Internal::unique_name('_'));
13 |         f(vars) = e;
14 |         return f;
15 |     });
16 | }
17 | 
18 | }  // namespace PythonBindings
19 | }  // namespace Halide
20 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyLambda.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_lambda(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyLoopLevel.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_loop_level(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyMachineParams.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_machine_params(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyModule.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMODULE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYMODULE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_module(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYMODULE_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyOutputs.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_outputs(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyParam.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPARAM_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYPARAM_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_param(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYPARAM_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyPipeline.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_pipeline(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyRDom.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYRDOM_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYRDOM_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_rdom(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYRDOM_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyStage.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYSTAGE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYSTAGE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_stage(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_STAGE_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyTarget.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTARGET_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYTARGET_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_target(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYTARGET_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyTuple.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTUPLE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYTUPLE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_tuple(py::module &m);
10 | 
11 | // Templated function to allow for use with Realization as well as Tuple
12 | template<typename T>
13 | inline py::tuple to_python_tuple(const T &ht) {
14 |     py::tuple pt(ht.size());
15 |     for (size_t i = 0; i < ht.size(); i++) {
16 |         pt[i] = py::cast(ht[i]);
17 |     }
18 |     return pt;
19 | }
20 | 
21 | }  // namespace PythonBindings
22 | }  // namespace Halide
23 | 
24 | #endif  // HALIDE_PYTHON_BINDINGS_PYTUPLE_H
25 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyType.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTYPE_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYTYPE_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_type(py::module &m);
10 | 
11 | std::string halide_type_to_string(const Type &type);
12 | 
13 | }  // namespace PythonBindings
14 | }  // namespace Halide
15 | 
16 | #endif  // HALIDE_PYTHON_BINDINGS_PYTYPE_H
17 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyVar.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYVAR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYVAR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_var(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYVAR_H
15 | 


--------------------------------------------------------------------------------
/python_bindings/src/PyVarOrRVar.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
 3 | 
 4 | #include "PyHalide.h"
 5 | 
 6 | namespace Halide {
 7 | namespace PythonBindings {
 8 | 
 9 | void define_var_or_rvar(py::module &m);
10 | 
11 | }  // namespace PythonBindings
12 | }  // namespace Halide
13 | 
14 | #endif  // HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
15 | 


--------------------------------------------------------------------------------
/src/AddParameterChecks.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H
 2 | #define HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines the lowering pass that adds the assertions that validate
 7 |  * scalar parameters.
 8 |  */
 9 | 
10 | #include "IR.h"
11 | 
12 | namespace Halide {
13 | 
14 | struct Target;
15 | 
16 | namespace Internal {
17 | 
18 | /** Insert checks to make sure that all referenced parameters meet
19 |  * their constraints. Also injects any custom requirements provided
20 |  * by the user. */
21 | Stmt add_parameter_checks(const std::vector<Stmt> &requirements, Stmt s, const Target &t);
22 | 
23 | }  // namespace Internal
24 | }  // namespace Halide
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/AlignLoads.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ALIGN_LOADS_H
 2 | #define HALIDE_ALIGN_LOADS_H
 3 | 
 4 | /** \file
 5 |  * Defines a lowering pass that rewrites unaligned loads into
 6 |  * sequences of aligned loads.
 7 |  */
 8 | #include "IR.h"
 9 | #include "ModulusRemainder.h"
10 | #include "Scope.h"
11 | #include "Target.h"
12 | 
13 | namespace Halide {
14 | namespace Internal {
15 | 
16 | /** Attempt to rewrite unaligned loads from buffers which are known to
17 |  * be aligned to instead load aligned vectors that cover the original
18 |  * load, and then slice the original load out of the aligned
19 |  * vectors. */
20 | Stmt align_loads(Stmt s, int alignment);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/AllocationBoundsInference.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ALLOCATION_BOUNDS_INFERENCE_H
 2 | #define HALIDE_ALLOCATION_BOUNDS_INFERENCE_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that determines how large internal allocations should be.
 6 |  */
 7 | 
 8 | #include "Bounds.h"
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Take a partially statement with Realize nodes in terms of
15 |  * variables, and define values for those variables. */
16 | Stmt allocation_bounds_inference(Stmt s,
17 |                                  const std::map<std::string, Function> &env,
18 |                                  const std::map<std::pair<std::string, int>, Interval> &func_bounds);
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/AsyncProducers.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ASYNC_PRODUCERS_H
 2 | #define HALIDE_ASYNC_PRODUCERS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects task parallelism for producers that are scheduled as async.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | Stmt fork_async_producers(Stmt s, const std::map<std::string, Function> &env);
14 | 
15 | }
16 | }
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/BoundSmallAllocations.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_BOUND_SMALL_ALLOCATIONS
 2 | #define HALIDE_BOUND_SMALL_ALLOCATIONS
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines the lowering pass that attempts to rewrite small
 8 |  * allocations to have constant size.
 9 |  */
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** \file
15 |  *
16 |  * Use bounds analysis to attempt to bound the sizes of small
17 |  * allocations. Inside GPU kernels this is necessary in order to
18 |  * compile. On the CPU this is also useful, because it prevents malloc
19 |  * calls for (provably) tiny allocations. */
20 | Stmt bound_small_allocations(const Stmt &s);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/CanonicalizeGPUVars.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_CANONICALIZE_GPU_VARS_H
 2 | #define HALIDE_CANONICALIZE_GPU_VARS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that canonicalize the GPU var names over.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Canonicalize GPU var names into some pre-determined block/thread names
14 |  * (i.e. __block_id_x, __thread_id_x, etc.). The x/y/z/w order is determined
15 |  * by the nesting order: innermost is assigned to x and so on. */
16 | Stmt canonicalize_gpu_vars(Stmt s);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/CodeGen_RISCV.cpp:
--------------------------------------------------------------------------------
 1 | #include "CodeGen_RISCV.h"
 2 | #include "Util.h"
 3 | #include "LLVM_Headers.h"
 4 | 
 5 | namespace Halide {
 6 | namespace Internal {
 7 | 
 8 | using std::string;
 9 | 
10 | using namespace llvm;
11 | 
12 | CodeGen_RISCV::CodeGen_RISCV(Target t) : CodeGen_Posix(t) {
13 |     #if !defined(WITH_RISCV)
14 |     user_error << "llvm build not configured with RISCV target enabled.\n";
15 |     #endif
16 | }
17 | 
18 | string CodeGen_RISCV::mcpu() const {
19 |     return "";
20 | }
21 | 
22 | string CodeGen_RISCV::mattrs() const {
23 |     return "";
24 | }
25 | 
26 | bool CodeGen_RISCV::use_soft_float_abi() const {
27 |     return false;
28 | }
29 | 
30 | int CodeGen_RISCV::native_vector_bits() const {
31 |     return 128;
32 | }
33 | 
34 | }}
35 | 


--------------------------------------------------------------------------------
/src/Debug.cpp:
--------------------------------------------------------------------------------
 1 | #include "Debug.h"
 2 | 
 3 | namespace Halide {
 4 | namespace Internal {
 5 | 
 6 | int debug::debug_level() {
 7 |     static int cached_debug_level = ([]() -> int {
 8 |         std::string lvl = get_env_variable("HL_DEBUG_CODEGEN");
 9 |         return !lvl.empty() ? atoi(lvl.c_str()) : 0;
10 |     })();
11 |     return cached_debug_level;
12 | }
13 | 
14 | }  // namespace Internal
15 | }  // namespace Halide
16 | 


--------------------------------------------------------------------------------
/src/DebugArguments.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
 2 | #define HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
 3 | 
 4 | #include "Target.h"
 5 | 
 6 | /** \file
 7 |  *
 8 |  * Defines a lowering pass that injects debug statements inside a
 9 |  * LoweredFunc. Intended to be used when Target::Debug is on.
10 |  */
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | struct LoweredFunc;
16 | 
17 | /** Injects debug prints in a LoweredFunc that describe the target and
18 |  * arguments. Mutates the given func. */
19 | void debug_arguments(LoweredFunc *func, const Target &t);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/EarlyFree.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_EARLY_FREE_H
 2 | #define HALIDE_EARLY_FREE_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects markers just after
 6 |  * the last use of each buffer so that they can potentially be freed
 7 |  * earlier.
 8 |  */
 9 | 
10 | #include "IR.h"
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Take a statement with allocations and inject markers (of the form
16 |  * of calls to "mark buffer dead") after the last use of each
17 |  * allocation. Targets may use this to free buffers earlier than the
18 |  * close of their Allocate node. */
19 | Stmt inject_early_frees(Stmt s);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/HexagonOffload.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_HEXAGON_OFFLOAD_H
 2 | #define HALIDE_HEXAGON_OFFLOAD_H
 3 | 
 4 | /** \file
 5 |  * Defines a lowering pass to pull loops marked with the
 6 |  * Hexagon device API to a separate module, and call them through the
 7 |  * Hexagon host runtime module.
 8 |  */
 9 | 
10 | #include "Module.h"
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Pull loops marked with the Hexagon device API to a separate
16 |  * module, and call them through the Hexagon host runtime module. */
17 | Stmt inject_hexagon_rpc(Stmt s, const Target &host_target, Module &module);
18 | 
19 | Buffer<uint8_t> compile_module_to_hexagon_shared_object(const Module &device_code);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/InjectHostDevBufferCopies.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
 2 | #define HALIDE_HOST_GPU_BUFFER_COPIES_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering passes that deal with host and device buffer flow.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | #include "Target.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** A helper function to call an extern function, and assert that it
15 |  * returns 0. */
16 | Stmt call_extern_and_assert(const std::string &name, const std::vector<Expr> &args);
17 | 
18 | /** Inject calls to halide_device_malloc, halide_copy_to_device, and
19 |  * halide_copy_to_host as needed. */
20 | Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/InjectOpenGLIntrinsics.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INJECT_OPENGL_INTRINSICS_H
 2 | #define HALIDE_INJECT_OPENGL_INTRINSICS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects texture loads and texture
 6 |  * stores for opengl.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Take a statement with for kernel for loops and turn loads and
15 |  * stores inside the loops into OpenGL texture load and store
16 |  * intrinsics. Should only be run when the OpenGL target is active. */
17 | Stmt inject_opengl_intrinsics(Stmt s);
18 | 
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/LICM.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LICM_H
 2 | #define HALIDE_LICM_H
 3 | 
 4 | /** \file
 5 |  * Methods for lifting loop invariants out of inner loops.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Hoist loop-invariants out of inner loops. This is especially
14 |  * important in cases where LLVM would not do it for us
15 |  * automatically. For example, it hoists loop invariants out of cuda
16 |  * kernels. */
17 | Stmt loop_invariant_code_motion(Stmt);
18 | 
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/Lerp.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LERP_H
 2 | #define HALIDE_LERP_H
 3 | 
 4 | /** \file
 5 |  * Defines methods for converting a lerp intrinsic into Halide IR.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Build Halide IR that computes a lerp. Use by codegen targets that
14 |  * don't have a native lerp. */
15 | Expr lower_lerp(Expr zero_val, Expr one_val, Expr weight);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/LoopCarry.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LOOP_CARRY_H
 2 | #define HALIDE_LOOP_CARRY_H
 3 | 
 4 | #include "Expr.h"
 5 | 
 6 | namespace Halide {
 7 | namespace Internal {
 8 | 
 9 | /** Reuse loads done on previous loop iterations by stashing them in
10 |  * induction variables instead of redoing the load. If the loads are
11 |  * predicated, the predicates need to match. Can be an optimization or
12 |  * pessimization depending on how good the L1 cache is on the architecture
13 |  * and how many memory issue slots there are. Currently only intended
14 |  * for Hexagon. */
15 | Stmt loop_carry(Stmt, int max_carried_values = 8);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/LowerWarpShuffles.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_LOWER_WARP_SHUFFLES_H
 2 | #define HALIDE_LOWER_WARP_SHUFFLES_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that injects CUDA warp shuffle
 6 |  * instructions to access storage outside of a GPULane loop.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Rewrite access to things stored outside the loop over GPU lanes to
15 |  * use nvidia's warp shuffle instructions. */
16 | Stmt lower_warp_shuffles(Stmt s);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/Monotonic.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_MONOTONIC_H
 2 | #define HALIDE_MONOTONIC_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Methods for computing whether expressions are monotonic
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | #include "Scope.h"
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /**
16 |  * Detect whether an expression is monotonic increasing in a variable,
17 |  * decreasing, or unknown.
18 |  */
19 | enum class Monotonic {Constant, Increasing, Decreasing, Unknown};
20 | Monotonic is_monotonic(Expr e, const std::string &var,
21 |                        const Scope<Monotonic> &scope = Scope<Monotonic>::empty_scope());
22 | 
23 | void is_monotonic_test();
24 | 
25 | }  // namespace Internal
26 | }  // namespace Halide
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/PrintLoopNest.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_PRINT_LOOP_NEST_H
 2 | #define HALIDE_INTERNAL_PRINT_LOOP_NEST_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines methods to print out the loop nest corresponding to a schedule.
 7 |  */
 8 | 
 9 | #include <string>
10 | #include <vector>
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | class Function;
16 | 
17 | /** Emit some simple pseudocode that shows the structure of the loop
18 |  * nest specified by this pipeline's schedule, and the schedules of
19 |  * the functions it uses. */
20 | std::string print_loop_nest(const std::vector<Function> &output_funcs);
21 | 
22 | }  // namespace Internal
23 | }  // namespace Halide
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/PythonExtensionGen.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_PYTHON_EXTENSION_GEN_H_
 2 | #define HALIDE_PYTHON_EXTENSION_GEN_H_
 3 | 
 4 | #include <string>
 5 | #include "Module.h"
 6 | #include "Target.h"
 7 | 
 8 | namespace Halide {
 9 | 
10 | class Module;
11 | struct Target;
12 | 
13 | namespace Internal {
14 | 
15 | class PythonExtensionGen {
16 | public:
17 |     PythonExtensionGen(std::ostream &dest, const std::string &header_name, Target target);
18 | 
19 |     void compile(const Module &module);
20 |     void compile(const LoweredFunc &f);
21 | private:
22 |     void convert_buffer(std::string name, const LoweredArgument* arg);
23 |     std::ostream &dest;
24 |     std::string header_name;
25 |     Target target;
26 | };
27 | 
28 | }
29 | }
30 | 
31 | #endif // HALIDE_PYTHON_EXTENSION_GEN_H_
32 | 


--------------------------------------------------------------------------------
/src/Qualify.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_QUALIFY_H
 2 | #define HALIDE_QUALIFY_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines methods for prefixing names in an expression with a prefix string.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Prefix all variable names in the given expression with the prefix string. */
15 | Expr qualify(const std::string &prefix, Expr value);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/RemoveDeadAllocations.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_REMOVE_DEAD_ALLOCATIONS_H
 2 | #define HALIDE_REMOVE_DEAD_ALLOCATIONS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that removes allocate and free nodes that
 6 |  * are not used.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Find Allocate/Free pairs that are never loaded from or stored to,
15 |  *  and remove them from the Stmt. This doesn't touch Realize/Call
16 |  *  nodes and so must be called after storage_flattening.
17 |  */
18 | Stmt remove_dead_allocations(Stmt s);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/RemoveExternLoops.cpp:
--------------------------------------------------------------------------------
 1 | #include "RemoveExternLoops.h"
 2 | #include "IRMutator.h"
 3 | 
 4 | namespace Halide {
 5 | namespace Internal {
 6 | 
 7 | class RemoveExternLoops : public IRMutator {
 8 | private:
 9 |     using IRMutator::visit;
10 | 
11 |     Stmt visit(const For *op) override {
12 |         if (op->for_type != ForType::Extern) {
13 |             return IRMutator::visit(op);
14 |         }
15 |         // Replace the for with its first iteration (implemented with a let).
16 |         return LetStmt::make(op->name, op->min, mutate(op->body));
17 |     }
18 | };
19 | 
20 | Stmt remove_extern_loops(Stmt s) {
21 |     return RemoveExternLoops().mutate(s);
22 | }
23 | 
24 | }  // namespace Internal
25 | }  // namespace Halide
26 | 


--------------------------------------------------------------------------------
/src/RemoveExternLoops.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_REMOVE_EXTERN_LOOPS
 2 | #define HALIDE_REMOVE_EXTERN_LOOPS
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines a lowering pass that removes placeholder loops for extern stages.
 8 |  */
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 |   /** Removes placeholder loops for extern stages. */
14 | Stmt remove_extern_loops(Stmt s);
15 | 
16 | }  // namespace Internal
17 | }  // namespace Halide
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/RemoveUndef.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_REMOVE_UNDEF
 2 | #define HALIDE_REMOVE_UNDEF
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines a lowering pass that elides stores that depend on unitialized values.
 8 |  */
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Removes stores that depend on undef values, and statements that
14 |  * only contain such stores. */
15 | Stmt remove_undef(Stmt s);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/RoundingMode.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_ROUNDING_MODE_H
 2 | #define HALIDE_ROUNDING_MODE_H
 3 | namespace Halide {
 4 | 
 5 | /** Rounding modes (IEEE754 2008 4.3 Rounding-direction attributes) */
 6 | enum class RoundingMode {
 7 |     TowardZero, ///< Round towards zero (IEEE754 2008 4.3.2)
 8 |     ToNearestTiesToEven, ///< Round to nearest, when there is a tie pick even integral significand (IEEE754 2008 4.3.1)
 9 |     ToNearestTiesToAway, ///< Round to nearest, when there is a tie pick value furthest away from zero (IEEE754 2008 4.3.1)
10 |     TowardPositiveInfinity, ///< Round towards positive infinity (IEEE754 2008 4.3.2)
11 |     TowardNegativeInfinity ///< Round towards negative infinity (IEEE754 2008 4.3.2)
12 | };
13 | 
14 | }  // namespace Halide
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/SelectGPUAPI.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_INTERNAL_SELECT_GPU_API_H
 2 | #define HALIDE_INTERNAL_SELECT_GPU_API_H
 3 | 
 4 | #include "IR.h"
 5 | #include "Target.h"
 6 | 
 7 | /** \file
 8 |  * Defines a lowering pass that selects which GPU api to use for each
 9 |  * gpu for loop
10 |  */
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Replace for loops with GPU_Default device_api with an actual
16 |  * device API depending on what's enabled in the target. Choose the
17 |  * first of the following: opencl, cuda, openglcompute, opengl */
18 | Stmt select_gpu_api(Stmt s, Target t);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/SimplifySpecializations.h:
--------------------------------------------------------------------------------
 1 | #ifndef SIMPLIFY_SPECIALIZATIONS_H
 2 | #define SIMPLIFY_SPECIALIZATIONS_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines pass that try to simplify the RHS/LHS of a function's definition
 7 |  * based on its specializations.
 8 |  */
 9 | 
10 | #include <map>
11 | 
12 | #include "IR.h"
13 | 
14 | namespace Halide {
15 | namespace Internal {
16 | 
17 | /** Try to simplify the RHS/LHS of a function's definition based on its
18 |  * specializations. */
19 | void simplify_specializations(std::map<std::string, Function> &env);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/SkipStages.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_SKIP_STAGES
 2 | #define HALIDE_SKIP_STAGES
 3 | 
 4 | #include "IR.h"
 5 | 
 6 | /** \file
 7 |  * Defines a pass that dynamically avoids realizing unnecessary stages.
 8 |  */
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Avoid computing certain stages if we can infer a runtime condition
14 |  * to check that tells us they won't be used. Does this by analyzing
15 |  * all reads of each buffer allocated, and inferring some condition
16 |  * that tells us if the reads occur. If the condition is non-trivial,
17 |  * inject ifs that guard the production. */
18 | Stmt skip_stages(Stmt s, const std::vector<std::string> &order);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/SlidingWindow.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_SLIDING_WINDOW_H
 2 | #define HALIDE_SLIDING_WINDOW_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines the sliding_window lowering optimization pass, which avoids
 7 |  * computing provably-already-computed values.
 8 |  */
 9 | 
10 | #include <map>
11 | 
12 | #include "IR.h"
13 | 
14 | namespace Halide {
15 | namespace Internal {
16 | 
17 | /** Perform sliding window optimizations on a halide
18 |  * statement. I.e. don't bother computing points in a function that
19 |  * have provably already been computed by a previous iteration.
20 |  */
21 | Stmt sliding_window(Stmt s, const std::map<std::string, Function> &env);
22 | 
23 | }  // namespace Internal
24 | }  // namespace Halide
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/SplitTuples.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_SPLIT_TUPLES_H
 2 | #define HALIDE_SPLIT_TUPLES_H
 3 | 
 4 | #include "Expr.h"
 5 | #include "Function.h"
 6 | #include <map>
 7 | 
 8 | /** \file
 9 |  * Defines the lowering pass that breaks up Tuple-valued realization
10 |  * and productions into several scalar-valued ones. */
11 | 
12 | namespace Halide {
13 | namespace Internal {
14 | 
15 | /** Rewrite all tuple-valued Realizations, Provide nodes, and Call
16 |  * nodes into several scalar-valued ones, so that later lowering
17 |  * passes only need to think about scalar-valued productions. */
18 | 
19 | Stmt split_tuples(Stmt s, const std::map<std::string, Function> &env);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/StmtToHtml.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_STMT_TO_HTML
 2 | #define HALIDE_STMT_TO_HTML
 3 | 
 4 | /** \file
 5 |  * Defines a function to dump an HTML-formatted stmt to a file.
 6 |  */
 7 | 
 8 | #include "Module.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /**
14 |  * Dump an HTML-formatted print of a Stmt to filename.
15 |  */
16 | void print_to_html(std::string filename, Stmt s);
17 | 
18 | /** Dump an HTML-formatted print of a Module to filename. */
19 | void print_to_html(std::string filename, const Module &m);
20 | 
21 | }  // namespace Internal
22 | }  // namespace Halide
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/TrimNoOps.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRIM_NO_OPS_H
 2 | #define TRIM_NO_OPS_H
 3 | 
 4 | /** \file
 5 |  * Defines a lowering pass that truncates loops to the region over
 6 |  * which they actually do something.
 7 |  */
 8 | 
 9 | #include "IR.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Truncate loop bounds to the region over which they actually do
15 |  * something. For examples see test/correctness/trim_no_ops.cpp */
16 | Stmt trim_no_ops(Stmt s);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/Tuple.cpp:
--------------------------------------------------------------------------------
 1 | #include "Tuple.h"
 2 | #include "Debug.h"
 3 | #include "Func.h"
 4 | 
 5 | namespace Halide {
 6 | 
 7 | Tuple::Tuple(const FuncRef &f) : exprs(f.size()) {
 8 |     user_assert(f.size() > 1)
 9 |         << "Can't construct a Tuple from a call to Func \""
10 |         << f.function().name() << "\" because it does not return a Tuple.\n";
11 |     for (size_t i = 0; i < f.size(); i++) {
12 |         exprs[i] = f[i];
13 |     }
14 | }
15 | 
16 | }  // namespace Halide
17 | 


--------------------------------------------------------------------------------
/src/UnifyDuplicateLets.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_UNIFY_DUPLICATE_LETS_H
 2 | #define HALIDE_UNIFY_DUPLICATE_LETS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that coalesces redundant let statements
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Find let statements that all define the same value, and make later
14 |  * ones just reuse the symbol names of the earlier ones. */
15 | Stmt unify_duplicate_lets(Stmt s);
16 | 
17 | }  // namespace Internal
18 | }  // namespace Halide
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/UniquifyVariableNames.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_UNIQUIFY_VARIABLE_NAMES
 2 | #define HALIDE_UNIQUIFY_VARIABLE_NAMES
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that renames all variables to have unique names.
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Modify a statement so that every internally-defined variable name
14 |  * is unique. This lets later passes assume syntactic equivalence is
15 |  * semantic equivalence. */
16 | Stmt uniquify_variable_names(Stmt s);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/UnpackBuffers.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_UNPACK_BUFFERS_H
 2 | #define HALIDE_UNPACK_BUFFERS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that unpacks buffer arguments onto the symbol table
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Creates let stmts for the various buffer components
14 |  * (e.g. foo.extent.0) in any referenced concrete buffers or buffer
15 |  * parameters. After this pass, the only undefined symbols should
16 |  * scalar parameters and the buffers themselves (e.g. foo.buffer). */
17 | Stmt unpack_buffers(Stmt s);
18 | 
19 | }  // namespace Internal
20 | }  // namespace Halide
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/UnrollLoops.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_UNROLL_LOOPS_H
 2 | #define HALIDE_UNROLL_LOOPS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that unrolls loops marked as such
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | 
10 | namespace Halide {
11 | namespace Internal {
12 | 
13 | /** Take a statement with for loops marked for unrolling, and convert
14 |  * each into several copies of the innermost statement. I.e. unroll
15 |  * the loop. */
16 | Stmt unroll_loops(Stmt);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/UnsafePromises.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_UNSAFE_PROMISES_H
 2 | #define HALIDE_UNSAFE_PROMISES_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that removes unsafe promises
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | #include "Target.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Lower all unsafe promises into either assertions or unchecked
15 |     code, depending on the target. */
16 | Stmt lower_unsafe_promises(Stmt s, const Target &t);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/VectorizeLoops.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_VECTORIZE_LOOPS_H
 2 | #define HALIDE_VECTORIZE_LOOPS_H
 3 | 
 4 | /** \file
 5 |  * Defines the lowering pass that vectorizes loops marked as such
 6 |  */
 7 | 
 8 | #include "IR.h"
 9 | #include "Target.h"
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Take a statement with for loops marked for vectorization, and turn
15 |  * them into single statements that operate on vectors. The loops in
16 |  * question must have constant extent.
17 |  */
18 | Stmt vectorize_loops(Stmt s, const Target &t);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/WrapCalls.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_WRAP_CALLS_H
 2 | #define HALIDE_WRAP_CALLS_H
 3 | 
 4 | /** \file
 5 |  *
 6 |  * Defines pass to replace calls to wrapped Functions with their wrappers.
 7 |  */
 8 | 
 9 | #include <map>
10 | 
11 | #include "IR.h"
12 | 
13 | namespace Halide {
14 | namespace Internal {
15 | 
16 | /** Replace every call to wrapped Functions in the Functions' definitions with
17 |   * call to their wrapper functions. */
18 | std::map<std::string, Function> wrap_func_calls(const std::map<std::string, Function> &env);
19 | 
20 | }  // namespace Internal
21 | }  // namespace Halide
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/WrapExternStages.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_WRAP_EXTERN_STAGES_H
 2 | #define HALIDE_WRAP_EXTERN_STAGES_H
 3 | 
 4 | #include "Module.h"
 5 | 
 6 | /** \file
 7 |  *
 8 |  * Defines a pass over a Module that adds wrapper LoweredFuncs to any
 9 |  * extern stages that need them */
10 | 
11 | namespace Halide {
12 | namespace Internal {
13 | 
14 | /** Add a wrapper for a LoweredFunc that accepts old buffers and
15 |  * upgrades them. */
16 | void add_legacy_wrapper(Module m, const LoweredFunc &fn);
17 | 
18 | }  // namespace Internal
19 | }  // namespace Halide
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/runtime/aarch64_cpu_features.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "cpu_features.h"
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal {
 5 | 
 6 | WEAK CpuFeatures halide_get_cpu_features() {
 7 |     // AArch64 has no CPU-specific Features.
 8 |     return CpuFeatures();
 9 | }
10 | 
11 | }}} // namespace Halide::Runtime::Internal
12 | 


--------------------------------------------------------------------------------
/src/runtime/alignment_128.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | namespace Halide {
 4 | namespace Runtime {
 5 | namespace Internal {
 6 | 
 7 | WEAK __attribute__((always_inline)) int halide_malloc_alignment() {
 8 |     return 128;
 9 | }
10 | 
11 | }}}
12 | 


--------------------------------------------------------------------------------
/src/runtime/alignment_32.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | namespace Halide {
 4 | namespace Runtime {
 5 | namespace Internal {
 6 | 
 7 | WEAK __attribute__((always_inline)) int halide_malloc_alignment() {
 8 |     return 32;
 9 | }
10 | 
11 | }}}
12 | 


--------------------------------------------------------------------------------
/src/runtime/alignment_64.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | namespace Halide {
 4 | namespace Runtime {
 5 | namespace Internal {
 6 | 
 7 | WEAK __attribute__((always_inline)) int halide_malloc_alignment() {
 8 |     return 64;
 9 | }
10 | 
11 | }}}
12 | 


--------------------------------------------------------------------------------
/src/runtime/android_clock.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef BITS_64
 2 | #define SYS_CLOCK_GETTIME 113
 3 | #endif
 4 | 
 5 | #ifdef BITS_32
 6 | #define SYS_CLOCK_GETTIME 263
 7 | #endif
 8 | 
 9 | #include "linux_clock.cpp"
10 | 


--------------------------------------------------------------------------------
/src/runtime/android_host_cpu_count.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | extern long sysconf(int);
 6 | 
 7 | WEAK int halide_host_cpu_count() {
 8 |     // Works for Android ARMv7. Probably bogus on other platforms.
 9 |     return sysconf(97);
10 | }
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/src/runtime/android_io.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | #define ANDROID_LOG_INFO 4
 6 | 
 7 | extern int __android_log_print(int, const char *, const char *, ...);
 8 | 
 9 | WEAK void halide_default_print(void *user_context, const char * str) {
10 |     __android_log_print(ANDROID_LOG_INFO, "halide", "%s", str);
11 | }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/src/runtime/arm_no_neon.ll:
--------------------------------------------------------------------------------
1 | ; TODO: add specializations for ARMv7 without NEON
2 | 


--------------------------------------------------------------------------------
/src/runtime/destructors.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | #define INLINE inline __attribute__((weak)) __attribute__((always_inline)) __attribute__((used))
 4 | 
 5 | extern "C" {
 6 | 
 7 | INLINE void call_destructor(void *user_context, void (*fn)(void *user_context, void *object), void **object, bool should_call) {
 8 |     void *o = *object;
 9 |     *object = NULL;
10 |     // Call the function
11 |     if (o && should_call) {
12 |         fn(user_context, o);
13 |     }
14 | }
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/src/runtime/fuchsia_host_cpu_count.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | uint32_t zx_system_get_num_cpus(void);
 6 | 
 7 | WEAK int halide_host_cpu_count() {
 8 |   return (int)zx_system_get_num_cpus();
 9 | }
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/runtime/fuchsia_yield.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | typedef int32_t zx_status_t;
 4 | typedef int64_t zx_time_t;
 5 | extern "C" zx_status_t zx_nanosleep(zx_time_t deadline);
 6 | 
 7 | namespace Halide { namespace Runtime { namespace Internal {
 8 | 
 9 | WEAK void halide_thread_yield() {
10 |     zx_nanosleep(0);
11 | }
12 | 
13 | }}}
14 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_cpu_features.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "cpu_features.h"
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal {
 5 | 
 6 | WEAK CpuFeatures halide_get_cpu_features() {
 7 |     // Hexagon has no CPU-specific Features.
 8 |     return CpuFeatures();
 9 | }
10 | 
11 | }}} // namespace Halide::Runtime::Internal
12 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/.gitignore:
--------------------------------------------------------------------------------
1 | !bin/*/*
2 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/host/libhalide_hexagon_host.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/host/libhalide_hexagon_host.so


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/v60/hexagon_sim_remote:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/hexagon_sim_remote


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/v60/libsim_qurt.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/libsim_qurt.a


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/v60/libsim_qurt_vtcm.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/libsim_qurt_vtcm.a


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/c11_stubs.cpp:
--------------------------------------------------------------------------------
 1 | extern "C" {
 2 | 
 3 | // Hexagon-tools 8.0.06 and later are dependent on 2 additional symbols:
 4 | //    __cxa_finalize and __cxa_atexit
 5 | // We are providing weak symbol definitions of the these functions.
 6 | 
 7 | #include "HAP_farf.h"
 8 | 
 9 | //#define FARF_LOW 1    // Enable debug output
10 | 
11 | void __attribute__ ((weak)) __cxa_finalize() {
12 |   FARF(LOW, "Finalizing\n");
13 |   return;
14 | }
15 | 
16 | void __attribute__ ((weak)) __cxa_atexit() {
17 |   FARF(LOW, "Atexiting\n");
18 |   return;
19 | }
20 | 
21 | }  // extern "C"
22 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/dlib.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_HEXAGON_MMAP_DLIB_H
 2 | #define HALIDE_HEXAGON_MMAP_DLIB_H
 3 | 
 4 | // This is a custom implementation of dlopen/dlsym/dlclose for loading
 5 | // a shared object in memory, based on using mmap/mprotect to load and
 6 | // make data executable. The arguments are the same as their standard
 7 | // counterparts, except mmap_dlopen takes a pointer/size instead of a
 8 | // file, and does not take a flags option. The exported symbols are
 9 | // not actually loaded into the process for use by other
10 | // dlopen/mmap_dlopen calls.
11 | void *mmap_dlopen(const void *code, size_t size);
12 | void *mmap_dlsym(void *dlib, const char *name);
13 | int mmap_dlclose(void *dlib);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/known_symbols.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_HEXAGON_REMOTE_KNOWN_SYMBOLS_H
 2 | #define HALIDE_HEXAGON_REMOTE_KNOWN_SYMBOLS_H
 3 | 
 4 | // Mapping between a symbol name and an address.
 5 | struct known_symbol {
 6 |     const char *name;
 7 |     char *addr;
 8 | };
 9 | 
10 | // Look up a symbol in an array of known symbols. The map should be
11 | // terminated with a {NULL, NULL} known_symbol.
12 | void *lookup_symbol(const char *sym, const known_symbol *map);
13 | 
14 | // Look up common symbols.
15 | void *get_known_symbol(const char *sym);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/log.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_HEXAGON_REMOTE_LOG_H
2 | #define HALIDE_HEXAGON_REMOTE_LOG_H
3 | 
4 | void log_printf(const char *fmt, ...);
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/nearbyint.cpp:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | 
 3 | extern "C" {
 4 | 
 5 | // Hexagon doesn't have an implementation of nearbyint/nearbyintf, so
 6 | // we provide one here. This implementation is not great, nearbyint is
 7 | // supposed to round to nearest even in the case of a tie.
 8 | 
 9 | float nearbyintf(float x) {
10 |     return floorf(x + 0.5f);
11 | }
12 | 
13 | double nearbyint(double x) {
14 |     return floor(x + 0.5);
15 | }
16 | 
17 | }  // extern "C"
18 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/sim_protocol.h:
--------------------------------------------------------------------------------
 1 | #ifndef SIM_PROTOCOL_H
 2 | #define SIM_PROTOCOL_H
 3 | 
 4 | namespace Message {
 5 | enum {
 6 |     None = 0,
 7 |     Alloc,
 8 |     Free,
 9 |     LoadLibrary,
10 |     GetSymbol,
11 |     Run,
12 |     ReleaseLibrary,
13 |     Break,
14 | };
15 | }
16 | 
17 | #endif  // SIM_PROTOCOL_H
18 | 


--------------------------------------------------------------------------------
/src/runtime/hexagon_remote/sim_qurt.cpp:
--------------------------------------------------------------------------------
 1 | #include "hexagon_standalone.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | // Provide an implementation of qurt to redirect to the appropriate
 6 | // simulator calls.
 7 | int qurt_hvx_lock(int mode) {
 8 |     SIM_ACQUIRE_HVX;
 9 |     if (mode == 0) {
10 |         SIM_CLEAR_HVX_DOUBLE_MODE;
11 |     } else {
12 |         SIM_SET_HVX_DOUBLE_MODE;
13 |     }
14 |     return 0;
15 | }
16 | 
17 | int qurt_hvx_unlock() {
18 |     SIM_RELEASE_HVX;
19 |     return 0;
20 | }
21 | 
22 | }  // extern "C"
23 | 


--------------------------------------------------------------------------------
/src/runtime/ios_io.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "objc_support.h"
 3 | 
 4 | extern "C" {
 5 | 
 6 | WEAK void halide_default_print(void *user_context, const char *str) {
 7 |     void *pool = create_autorelease_pool();
 8 |     ns_log_utf8_string(str);
 9 |     drain_autorelease_pool(pool);
10 | }
11 | 
12 | }  // extern "C"
13 | 


--------------------------------------------------------------------------------
/src/runtime/linux_host_cpu_count.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | extern long sysconf(int);
 6 | 
 7 | WEAK int halide_host_cpu_count() {
 8 |     return sysconf(84);
 9 | }
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/runtime/linux_yield.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | extern "C" int sched_yield();
 4 | 
 5 | namespace Halide { namespace Runtime { namespace Internal {
 6 | 
 7 | WEAK void halide_thread_yield() {
 8 |     sched_yield();
 9 | }
10 | 
11 | }}}
12 | 


--------------------------------------------------------------------------------
/src/runtime/metadata.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | namespace Halide { namespace Runtime { namespace Internal {
 4 | 
 5 | // This is unused and expected to be optimized away; it exists solely to ensure
 6 | // that the halide_filter_metadata_t type is in the runtime module, so that
 7 | // Codegen_LLVM can access its description.
 8 | WEAK const halide_filter_metadata_t *unused_function_to_get_halide_filter_metadata_t_declared() { return NULL; }
 9 | 
10 | } } }
11 | 
12 | 


--------------------------------------------------------------------------------
/src/runtime/metal_objc_arm.cpp:
--------------------------------------------------------------------------------
1 | #define ARM_COMPILE 1
2 | #include "metal_objc_platform_dependent.cpp"
3 | 


--------------------------------------------------------------------------------
/src/runtime/metal_objc_platform_dependent.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_OBJC_METAL_PLATFORM_DEPENDENT_H
 2 | #define HALIDE_OBJC_METAL_PLATFORM_DEPENDENT_H
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal { namespace Metal {
 5 | 
 6 | struct mtl_compute_command_encoder;
 7 | 
 8 | void dispatch_threadgroups(mtl_compute_command_encoder *encoder,
 9 |                            int32_t blocks_x, int32_t blocks_y, int32_t blocks_z,
10 |                            int32_t threads_x, int32_t threads_y, int32_t threads_z);
11 | 
12 | }}}}
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/src/runtime/metal_objc_x86.cpp:
--------------------------------------------------------------------------------
1 | #define X86_COMPILE 1
2 | #include "metal_objc_platform_dependent.cpp"
3 | 


--------------------------------------------------------------------------------
/src/runtime/mini_qurt_vtcm.h:
--------------------------------------------------------------------------------
1 | extern "C" {
2 | 
3 | extern void* HAP_request_VTCM (unsigned int size, unsigned int single_page_flag);
4 | extern int HAP_release_VTCM (void* pVA);
5 | 
6 | }
7 | 


--------------------------------------------------------------------------------
/src/runtime/mips_cpu_features.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "cpu_features.h"
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal {
 5 | 
 6 | WEAK CpuFeatures halide_get_cpu_features() {
 7 |     // MIPS has no CPU-specific Features.
 8 |     return CpuFeatures();
 9 | }
10 | 
11 | }}} // namespace Halide::Runtime::Internal
12 | 


--------------------------------------------------------------------------------
/src/runtime/msan_stubs.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | WEAK int halide_msan_annotate_memory_is_initialized(void *user_context, const void *ptr, uint64_t len) { return 0; }
 6 | 
 7 | WEAK int halide_msan_annotate_buffer_is_initialized(void *user_context, halide_buffer_t *b) { return 0; }
 8 | 
 9 | WEAK void halide_msan_annotate_buffer_is_initialized_as_destructor(void *user_context, void *b) {}
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_20.10.bc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_20.10.bc


--------------------------------------------------------------------------------
/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_30.10.bc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_30.10.bc


--------------------------------------------------------------------------------
/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_35.10.bc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_35.10.bc


--------------------------------------------------------------------------------
/src/runtime/osx_host_cpu_count.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | extern long sysconf(int);
 6 | 
 7 | WEAK int halide_host_cpu_count() {
 8 |     return sysconf(58);
 9 | }
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/runtime/osx_yield.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | extern "C" int swtch_pri(int);
 4 | 
 5 | namespace Halide { namespace Runtime { namespace Internal {
 6 | 
 7 | WEAK void halide_thread_yield() {
 8 |     swtch_pri(0);
 9 | }
10 | 
11 | }}}
12 | 


--------------------------------------------------------------------------------
/src/runtime/posix_abort.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | extern "C" void abort();
 4 | 
 5 | namespace Halide {
 6 | namespace Runtime {
 7 | namespace Internal {
 8 | 
 9 | WEAK __attribute__((always_inline)) void halide_abort() {
10 |     abort();
11 | }
12 | 
13 | }}}
14 | 


--------------------------------------------------------------------------------
/src/runtime/posix_io.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | WEAK void halide_default_print(void *user_context, const char *str) {
 6 |     write(STDOUT_FILENO, str, strlen(str));
 7 | }
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/runtime/posix_print.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" void halide_default_print(void *, const char *);
 4 | 
 5 | namespace Halide { namespace Runtime { namespace Internal {
 6 | 
 7 | WEAK halide_print_t custom_print = halide_default_print;
 8 | 
 9 | }}} // namespace Halide::Runtime::Internal
10 | 
11 | extern "C" {
12 | 
13 | WEAK void halide_print(void *user_context, const char *msg) {
14 |     (*custom_print)(user_context, msg);
15 | }
16 | 
17 | WEAK halide_print_t halide_set_custom_print(halide_print_t print) {
18 |     halide_print_t result = custom_print;
19 |     custom_print = print;
20 |     return result;
21 | }
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/runtime/posix_threads_tsan.cpp:
--------------------------------------------------------------------------------
1 | #define TSAN_ANNOTATIONS 1
2 | 
3 | #include "posix_threads.cpp"
4 | 


--------------------------------------------------------------------------------
/src/runtime/prefetch.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | // These need to inline, otherwise the extern call with the ptr
 6 | // parameter breaks a lot of optimizations.
 7 | __attribute__((always_inline))
 8 | WEAK int _halide_prefetch(const void *ptr) {
 9 |     __builtin_prefetch(ptr, 1, 3);
10 |     return 0;
11 | }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/src/runtime/qurt_hvx_vtcm.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | #include "HalideRuntimeQurt.h"
 3 | #include "mini_qurt.h"
 4 | #include "mini_qurt_vtcm.h"
 5 | 
 6 | using namespace Halide::Runtime::Internal::Qurt;
 7 | 
 8 | extern "C" {
 9 | 
10 | WEAK void* halide_vtcm_malloc(void *user_context, int size) {
11 |     return HAP_request_VTCM(size, 1);
12 | }
13 | 
14 | WEAK void halide_vtcm_free(void *user_context, void *addr) {
15 |     HAP_release_VTCM(addr);
16 | }
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/src/runtime/qurt_threads_tsan.cpp:
--------------------------------------------------------------------------------
1 | #define TSAN_ANNOTATIONS 1
2 | 
3 | #include "qurt_threads.cpp"
4 | 


--------------------------------------------------------------------------------
/src/runtime/qurt_yield.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | // TODO: what should we use here???
 4 | 
 5 | namespace Halide { namespace Runtime { namespace Internal {
 6 | 
 7 | WEAK void halide_thread_yield() {
 8 | }
 9 | 
10 | }}}
11 | 


--------------------------------------------------------------------------------
/src/runtime/riscv_cpu_features.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "cpu_features.h"
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal {
 5 | 
 6 | WEAK CpuFeatures halide_get_cpu_features() {
 7 |     // For now, no version specific features, though RISCV promises to have many.
 8 |     return CpuFeatures();
 9 | }
10 | 
11 | }}} // namespace Halide::Runtime::Internal
12 | 


--------------------------------------------------------------------------------
/src/runtime/scoped_mutex_lock.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_RUNTIME_SCOPED_MUTEX_LOCK_H
 2 | #define HALIDE_RUNTIME_SCOPED_MUTEX_LOCK_H
 3 | 
 4 | #include "HalideRuntime.h"
 5 | 
 6 | namespace Halide { namespace Runtime { namespace Internal {
 7 | 
 8 | // An RAII mutex locking operation
 9 | struct ScopedMutexLock {
10 |     halide_mutex *mutex;
11 | 
12 |     ScopedMutexLock(halide_mutex *mutex) __attribute__((always_inline)) : mutex(mutex) {
13 |         halide_mutex_lock(mutex);
14 |     }
15 | 
16 |     ~ScopedMutexLock() __attribute__((always_inline)) {
17 |         halide_mutex_unlock(mutex);
18 |     }
19 | };
20 | 
21 | }}} // namespace Halide::Runtime::Internal
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/runtime/scoped_spin_lock.h:
--------------------------------------------------------------------------------
 1 | #ifndef HALIDE_SCOPED_SPIN_LOCK_H
 2 | #define HALIDE_SCOPED_SPIN_LOCK_H
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal {
 5 | 
 6 | // An RAII spin lock.
 7 | struct ScopedSpinLock {
 8 |     volatile int *lock;
 9 | 
10 |     ScopedSpinLock(volatile int *l) __attribute__((always_inline)) : lock(l) {
11 |         while (__sync_lock_test_and_set(lock, 1)) { }
12 |     }
13 | 
14 |     ~ScopedSpinLock() __attribute__((always_inline)) {
15 |         __sync_lock_release(lock);
16 |     }
17 | };
18 | 
19 | }}} // namespace Halide::Runtime::Internal
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/runtime/ssp.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "runtime_internal.h"
 3 | 
 4 | // LLVM sometimes likes to generate calls to a stack smashing
 5 | // protector, but some build environments (e.g. native client), don't
 6 | // provide libssp reliably. We define two weak symbols here to help
 7 | // things along.
 8 | 
 9 | extern "C" {
10 | 
11 | WEAK char *__stack_chk_guard = (char *)(0xdeadbeef);
12 | 
13 | WEAK void __stack_chk_fail() {
14 |     halide_error(NULL, "Memory error: stack smashing protector changed!\n");
15 |     Halide::Runtime::Internal::halide_abort();
16 | }
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/src/runtime/wasm_cpu_features.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | #include "cpu_features.h"
 3 | 
 4 | namespace Halide { namespace Runtime { namespace Internal {
 5 | 
 6 | WEAK CpuFeatures halide_get_cpu_features() {
 7 |     CpuFeatures features;
 8 | 
 9 |     // There isn't a way to determine what features are available --
10 |     // if a feature we need isn't available, we couldn't
11 |     // even load. So just declare that all wasm-related features are
12 |     // known and available.
13 |     features.set_known(halide_target_feature_wasm_simd128);
14 |     features.set_available(halide_target_feature_wasm_simd128);
15 | 
16 |     return features;
17 | }
18 | 
19 | }}}  // namespace Halide::Runtime::Internal
20 | 


--------------------------------------------------------------------------------
/src/runtime/windows_cuda.cpp:
--------------------------------------------------------------------------------
1 | #define WINDOWS
2 | #include "cuda.cpp"
3 | 


--------------------------------------------------------------------------------
/src/runtime/windows_io.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideRuntime.h"
 2 | 
 3 | extern "C" {
 4 | 
 5 | WEAK void halide_default_print(void *user_context, const char *str) {
 6 |     write(STDOUT_FILENO, str, strlen(str));
 7 | }
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/runtime/windows_opencl.cpp:
--------------------------------------------------------------------------------
1 | #define WINDOWS
2 | #include "opencl.cpp"
3 | 


--------------------------------------------------------------------------------
/src/runtime/windows_profiler.cpp:
--------------------------------------------------------------------------------
1 | #define WINDOWS
2 | #include "profiler.cpp"
3 | 


--------------------------------------------------------------------------------
/src/runtime/windows_threads_tsan.cpp:
--------------------------------------------------------------------------------
1 | #define TSAN_ANNOTATIONS 1
2 | 
3 | #include "windows_threads.cpp"
4 | 


--------------------------------------------------------------------------------
/src/runtime/windows_yield.cpp:
--------------------------------------------------------------------------------
 1 | #include "runtime_internal.h"
 2 | 
 3 | #ifdef BITS_64
 4 | #define WIN32API
 5 | #else
 6 | #define WIN32API __stdcall
 7 | #endif
 8 | 
 9 | extern "C" WIN32API int32_t Sleep(int32_t timeout);
10 | 
11 | namespace Halide { namespace Runtime { namespace Internal {
12 | 
13 | WEAK void halide_thread_yield() {
14 |     Sleep(0);
15 | }
16 | 
17 | }}}
18 | 


--------------------------------------------------------------------------------
/test/auto_schedule/unused_func.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Var x("x"), y("y");
 7 |     Func f("f"), g("g"), h("h");
 8 | 
 9 |     g(x) = x;
10 |     g(x) += 10;
11 |     h(x) = x*x;
12 |     f(x) = select(false, g(x + 1), h(x + 1));
13 | 
14 |     f.set_estimates({{0, 256}});
15 | 
16 |     Target target = get_jit_target_from_environment();
17 |     Pipeline p(f);
18 | 
19 |     p.auto_schedule(target);
20 | 
21 |     // Inspect the schedule
22 |     f.print_loop_nest();
23 | 
24 |     // Run the schedule
25 |     p.realize(256);
26 | 
27 |     printf("Success!\n");
28 |     return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/test/common/expect_failure.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Simple bash script that will execute another process, which is *expected* to fail;
 4 | # this is useful mainly for running test/error and other expected-to-fail tests.
 5 | #
 6 | 
 7 | echo Running $1
 8 | 
 9 | "$1"
10 | if [[ "$?" -ne "0" ]]
11 | then
12 |   echo "Success"
13 |   exit 0
14 | fi
15 | 
16 | echo "Expected Failure from '$1', but got Success"
17 | exit -1
18 | 


--------------------------------------------------------------------------------
/test/correctness/autoschedule_small_pure_update.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Buffer<float> in(13, 17);
 7 |     ImageParam in_param(Float(32), 2);
 8 | 
 9 |     Func g, h;
10 |     Var x, y;
11 | 
12 |     RDom r(0, 17);
13 |     g(x) += in_param(x, r);
14 | 
15 |     h(x, y) = in_param(x, y) + g(x);
16 | 
17 |     h.set_estimates({{0, 13}, {0, 17}});
18 |     in_param.set_estimates({{0, 13}, {0, 17}});
19 | 
20 |     Pipeline p(h);
21 |     p.auto_schedule(Target("host"));
22 | 
23 |     in_param.set(in);
24 | 
25 |     // Ensure the autoscheduler doesn't try to RoundUp the pure loop
26 |     // in g's update definition.
27 |     p.realize(13, 17);
28 | 
29 |     return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/test/correctness/bad_likely.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     // Use a likely intrinsic to tag a disjoint range.
10 |     f(x) = select(x < 10 || x > 20, likely(1), 2);
11 | 
12 |     Buffer<int> im = f.realize(30);
13 |     for (int x = 0; x < 30; x++) {
14 |         int correct = (x < 10 || x > 20) ? 1 : 2;
15 |         if (im(x) != correct) {
16 |             printf("im(%d) = %d instead of %d\n", x, im(x), correct);
17 |             return -1;
18 |         }
19 |     }
20 | 
21 |     printf("Success!\n");
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/test/correctness/bool_compute_root_vectorize.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x, y;
 8 | 
 9 |     Func pred("pred");
10 |     pred(x, y) = x < y;
11 | 
12 |     Func selector("selector");
13 |     selector(x, y) = select(pred(x, y), 1, 0);
14 | 
15 |     // Load a vector of 8 bools
16 |     pred.compute_root();
17 |     selector.compute_root().vectorize(x, 8);
18 | 
19 |     RDom range(0, 100, 0, 100);
20 |     int32_t result = evaluate_may_gpu<int32_t>(sum(selector(range.x, range.y)));
21 | 
22 |     assert(result == 4950);
23 | 
24 |     printf("Success!\n");
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/correctness/bounds_of_monotonic_math.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 | 
10 |     ImageParam input(Float(32), 1);
11 | 
12 |     f(x) = input(cast<int>(ceil(0.3f * ceil(0.4f * floor(x * 22.5f)))));
13 | 
14 |     f.infer_input_bounds(10);
15 | 
16 |     Buffer<float> in = input.get();
17 | 
18 |     int correct = 26;
19 |     if (in.width() != correct) {
20 |         printf("Width is %d instead of %d\n", in.width(), correct);
21 |         return -1;
22 |     }
23 | 
24 |     printf("Success!\n");
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/correctness/bounds_of_multiply.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | // See https://github.com/halide/Halide/issues/3070
 5 | 
 6 | using namespace Halide;
 7 | 
 8 | template <typename T>
 9 | void test() {
10 |     Param<T> bound;
11 |     ImageParam in(UInt(8), 1);
12 |     Var x;
13 |     Func f;
14 | 
15 |     f(x) = in(clamp(x, 0, bound * 2 - 1));
16 | 
17 |     Buffer<uint8_t> foo(10);
18 |     foo.fill(0);
19 |     in.set(foo);
20 |     bound.set(5);
21 | 
22 |     auto result = f.realize(200);
23 | }
24 | 
25 | int main(int argc, char **argv) {
26 |     printf("Trying int32_t\n");
27 |     test<int32_t>();
28 |     printf("Trying int16_t\n");
29 |     test<int16_t>();
30 |     printf("Success!\n");
31 |     return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/test/correctness/circular_reference_leak.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |     // Recursive functions can create circular references. These could
 9 |     // cause leaks. Run this test under valgrind to check.
10 |     for (int i = 0; i < 10000; i++) {
11 |         Func f;
12 |         Var x;
13 |         RDom r(0, 10);
14 |         f(x) = x;
15 |         f(r) = f(r-1) + f(r+1);
16 |     }
17 | 
18 |     printf("Success!\n");
19 |     return 0;
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/test/correctness/compare_vars.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x, y;
 9 |     f(x, y) = select(x == y, 1, 0);
10 | 
11 |     Buffer<int> im = f.realize(10, 10);
12 | 
13 |     for (int y = 0; y < 10; y++) {
14 |         for (int x = 0; x < 10; x++) {
15 |             int correct = (x == y) ? 1 : 0;
16 |             if (im(x, y) != correct) {
17 |                 printf("im(%d, %d) = %d instead of %d\n",
18 |                        x, y, im(x, y), correct);
19 |                 return -1;
20 |             }
21 |         }
22 |     }
23 | 
24 |     printf("Success!\n");
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/correctness/compute_with_in.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x("x"), y("y");
 8 |     Func one("one"), two("two"), three("three"), output("output");
 9 | 
10 |     one(x, y) = x + y;
11 |     two(x, y) = one(x, y) + 2;
12 |     three(x, y) = one(x, y) + 3;
13 |     output(x, y) = two(x, y) + three(x, y);
14 | 
15 |     two.compute_root();
16 |     one.in(three).compute_root().compute_with(two, Var::outermost());
17 |     one.compute_root();
18 |     one.compute_at(two, Var::outermost());
19 | 
20 |     output.realize(64, 64);
21 | 
22 |     printf("Success!\n");
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/test/correctness/computed_index.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Buffer<uint8_t> in1(256, 256);
 7 |     Buffer<uint8_t> in2(256, 256, 10);
 8 | 
 9 |     Func f;
10 |     Var x, y;
11 | 
12 |     f(x, y) = in2(x, y, clamp(in1(x, y), 0, 9));
13 |     Buffer<uint8_t> out = f.realize(256, 256);
14 | 
15 |     printf("Success!\n");
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/correctness/dynamic_reduction_bounds.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     ImageParam input(Float(32), 2);
 9 | 
10 |     Var x, y, z;
11 |     RDom dom(0, input.width()*8);
12 |     Func f;
13 |     Expr hard_to_reason_about = cast<int>(hypot(input.width(), input.height()));
14 |     f(x, y, z) = 1;
15 |     f(x, y, dom / hard_to_reason_about) += 1;
16 |     f.compile_jit();
17 | 
18 |     Buffer<float> im(32, 32);
19 |     input.set(im);
20 | 
21 |     f.realize(100, 100, 16);
22 | 
23 |     printf("Success!\n");
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/test/correctness/explicit_inline_reductions.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f");
 8 |     Var x("x"), y("y");
 9 |     RDom r1(0, 10, "r1"), r2(0, 10, "r2"), r3(0, 10, "r3");
10 | 
11 |     f(x, y) = product(sum(r1, r1 + r3) + sum(r2, r2 * 2 + r3));
12 |     f(r1, y) += product(r3, sum(r2, r1 + r2 + r3));
13 | 
14 |     Buffer<int> result = f.realize(10, 10);
15 | 
16 |     return 0;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/test/correctness/issue_3926.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char* argv[]) {
 6 |   Func f("f"), g("g");
 7 |   Var x("x"), y("y");
 8 |   Var tx("tx"), ty("ty");
 9 |   Param<bool> param;
10 | 
11 |   f(x) = x;
12 |   g(x, y) = f(x) + select(param, 1, 2);
13 | 
14 |   //g.gpu_tile(x, y, tx, ty, 8, 8, TailStrategy::GuardWithIf);
15 |   g.specialize(param).tile(x, y, tx, ty, 8, 8, TailStrategy::GuardWithIf);
16 |   g.specialize(!param).tile(x, y, tx, ty, 8, 8, TailStrategy::GuardWithIf);
17 |   g.specialize_fail("Unknown");
18 |   f.in().compute_at(g, tx);
19 | 
20 |   Buffer<int> out(34, 34);
21 |   param.set(false);
22 |   g.realize(out);
23 | 
24 |   printf("Success\n");
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/test/correctness/many_updates.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     const int N = 20;
 8 | 
 9 |     Func f;
10 |     Var x, y;
11 |     f(x, y) = x + y;
12 |     for (int i = 0; i < N; i++) {
13 |         f(x, i) += 1;
14 |         f(i, y) += 1;
15 |     }
16 |     f.compute_root();
17 | 
18 |     Buffer<int> im = f.realize(N, N);
19 | 
20 |     printf("Success!\n");
21 |     return 0;
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/test/correctness/non_vector_aligned_embeded_buffer.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <iostream>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     char storage[5 * sizeof(int32_t)]{0};
 8 |     char *ptr = storage;
 9 |     ptr += sizeof(int32_t);
10 |     Buffer<int32_t> foo((int32_t *)(ptr), 4);
11 | 
12 |     Func f;
13 |     Var x;
14 | 
15 |     f(x) = foo(x);
16 |     f.vectorize(x, 4);
17 |     f.output_buffer().dim(0).set_min(0);
18 |     auto result = f.realize(4);
19 | 
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/correctness/parallel.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x;
 8 |     Func f;
 9 | 
10 |     Param<int> k;
11 |     k.set(3);
12 | 
13 |     f(x) = x*k;
14 | 
15 |     f.parallel(x);
16 | 
17 |     Buffer<int> im = f.realize(16);
18 | 
19 |     for (int i = 0; i < 16; i++) {
20 |         if (im(i) != i*3) {
21 |             printf("im(%d) = %d\n", i, im(i));
22 |             return -1;
23 |         }
24 |     }
25 | 
26 |     printf("Success!\n");
27 |     return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/test/correctness/plain_c_includes.c:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | #error "This test must be compiled as plain C, without C++ enabled."
 3 | #endif
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | // Verify that all HalideRuntime*.h files can be compiled without C++
 8 | #include "HalideRuntime.h"
 9 | #include "HalideRuntimeCuda.h"
10 | #include "HalideRuntimeHexagonHost.h"
11 | #include "HalideRuntimeMetal.h"
12 | #include "HalideRuntimeOpenCL.h"
13 | #include "HalideRuntimeOpenGL.h"
14 | #include "HalideRuntimeOpenGLCompute.h"
15 | #include "HalideRuntimeQurt.h"
16 | 
17 | 
18 | int main(int argc, char **argv) {
19 |     printf("Success!\n");
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/correctness/shared_self_references.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     // Check that recursive references get tracked properly
 8 |     {
 9 |         Func f;
10 |         Var x;
11 |         f(x) = x;
12 |         {
13 |             Expr e = f(2);
14 |             f(0) = e;
15 |             f(1) = e;
16 |         } // Destroy e
17 |     } // Destroy f
18 | 
19 |     // f should have been cleaned up. valgrind will complain if it
20 |     // hasn't been.
21 | 
22 |     printf("Success!\n");
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/test/correctness/shifted_image.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |     halide_dimension_t shape[] = {{100, 10, 1},
 9 |                                   {300, 10, 10},
10 |                                   {500, 10, 100},
11 |                                   {400, 10, 1000}};
12 |     Buffer<int> buf(nullptr, 4, shape);
13 |     buf.allocate();
14 | 
15 |     buf.data()[0] = 17;
16 |     if (buf(100, 300, 500, 400) != 17) {
17 |         printf("Image indexing into buffers with non-zero mins is broken\n");
18 |         return -1;
19 |     }
20 | 
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/test/correctness/simplified_away_embedded_image.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |     // What happens if an emedded image gets simplified away?
 9 |     Buffer<float> input(32, 32);
10 | 
11 |     Var x("x"), y("y");
12 |     Func foo("foo");
13 | 
14 |     foo(x, y) = input(x, y) - input(x, y);
15 | 
16 |     Buffer<float> output(32, 32);
17 | 
18 |     foo.realize(output);
19 | 
20 |     // Any non-error is a success.
21 |     printf("Success!\n");
22 | 
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/test/correctness/split_reuse_inner_name_bug.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x("x"), x0, x1, x2, x3;
 8 |     Func f("f");
 9 | 
10 |     f(x) = 1;
11 |     f.compute_root().split(x, x0, x, 16).split(x, x, x1, 2).split(x, x2, x, 4).split(x, x, x3, 2);
12 |     f.realize(1024);
13 | 
14 |     printf("Success!\n");
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/correctness/tracing_bounds.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     // Turning on tracing wraps certain Exprs. This shouldn't effect
 8 |     // bounds inference.
 9 | 
10 |     Func f, g;
11 |     Var x;
12 |     f(x) = clamp(x, 0, 100);
13 |     f.compute_root();
14 |     g(x) = f(f(x));
15 |     // f is known to be bounded, so this means we need 101 values of
16 |     // f. This shouldn't be confused by tracing loads of f or stores
17 |     // to g.
18 |     f.trace_loads();
19 |     g.trace_stores();
20 | 
21 |     // Shouldn't throw an error about unbounded access.
22 |     g.compile_jit();
23 | 
24 |     printf("Success!\n");
25 | 
26 |     return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/test/correctness/transitive_bounds.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f, g;
 8 |     Var x;
 9 |     f(x) = x;
10 |     g(x) = f(x);
11 | 
12 |     g.bound(x, 0, 4);
13 | 
14 |     // Should be ok to unroll x because it's bounded by a constant in its only consumer
15 |     f.compute_root().unroll(x);
16 | 
17 |     g.realize(4);
18 | 
19 |     printf("Success!\n");
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/correctness/two_vector_args.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |     Func f, g;
 9 |     Var x, y;
10 | 
11 |     g(x, y) = x+y;
12 | 
13 |     f(x, y) = g(x, x);
14 | 
15 |     f.vectorize(x, 4);
16 | 
17 |     Buffer<int> out = f.realize(4, 4);
18 | 
19 |     printf("Success!\n");
20 | 
21 |     return 0;
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/test/correctness/unrolled_reduction.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x("x"), y("y"), z("z");
 8 | 
 9 |     Buffer<float> noise(32);
10 |     for (int i = 0; i < 32; i++) {
11 |         noise(i) = (float)rand() / RAND_MAX;
12 |     }
13 | 
14 |     Func f("f");
15 |     Func g("g");
16 |     RDom r(0, 32);
17 | 
18 |     g(x, y) = 0.0f;
19 |     g(r.x, y) += noise(r.x);
20 | 
21 |     f(x, y, z) = g(x, y) + g(x+1, y);
22 | 
23 |     RVar rxo, rxi;
24 |     g.compute_at(f, y).update().split(r.x, rxo, rxi, 2).unroll(rxi);
25 |     f.unroll(z, 2);
26 | 
27 |     Buffer<float> im = f.realize(64, 64, 4);
28 | 
29 |     printf("Success!\n");
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/test/correctness/unused_func.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Var x, y, xi, yi;
 7 | 
 8 |     ImageParam input(Float(32), 2);
 9 | 
10 |     Func filtered;
11 |     filtered(x, y) = input(x, y);
12 |     filtered.compute_root();
13 | 
14 |     Func false_func;
15 |     false_func() = cast<bool>(0);
16 | 
17 |     Func result;
18 |     result(x, y) = select(false_func(), filtered(x, y), input(0, 0));
19 | 
20 |     // The bounds required on the input depend on filtered, but
21 |     // filtered is not going to be computed because it simplified away
22 |     // entirely. This test ensures things compile anyway.
23 |     result.compile_jit();
24 | 
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/correctness/update_chunk.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     // This test computes a function within the update step of a reduction
 8 | 
 9 |     Func f, g;
10 |     Var x, y, z;
11 |     RDom r(0, 10);
12 | 
13 |     f(x, y) = x*y;
14 |     g(x, y) = 0;
15 |     g(x, r) = f(r, x)+1;
16 | 
17 |     f.compute_at(g, r);
18 |     g.realize(10, 10);
19 | 
20 |     printf("Success!\n");
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/test/correctness/vector_extern.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     Var x, y;
 9 |     Func f, g;
10 | 
11 |     printf("Defining function...\n");
12 | 
13 |     f(x) = sqrt(cast<float>(x));
14 | 
15 |     f.vectorize(x, 4);
16 |     Buffer<float> im = f.realize(32);
17 | 
18 |     for (int i = 0; i < 32; i++) {
19 |         float correct = sqrtf((float)i);
20 |         if (fabs(im(i) - correct) > 0.001) {
21 |             printf("im(%d) = %f instead of %f\n", i, im(i), correct);
22 |             return -1;
23 |         }
24 |     }
25 | 
26 |     printf("Success!\n");
27 |     return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/test/correctness/vector_print_bug.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f;
 7 |     Var x;
 8 |     f(x) = print(x);
 9 |     f.vectorize(x, 4);
10 |     f.realize(8);
11 |     return 0;
12 | }
13 | 


--------------------------------------------------------------------------------
/test/correctness/vectorize_mixed_widths.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |     Var x("x");
 9 |     Func f("f"), g("g");
10 | 
11 |     f(x) = 2*x;
12 |     g(x) = f(x)/2;
13 | 
14 |     Var xo, xi;
15 |     f.compute_at(g, x).split(x, xo, xi, 16).vectorize(xi, 8).unroll(xi);
16 |     g.compute_root().vectorize(x, 16);
17 | 
18 |     Buffer<int> r = g.realize(16);
19 |     for (int i = 0; i < 16; i++) {
20 |         if (r(i) != i) {
21 |             std::cout << "Error at " << i << ": " << r(i) << std::endl;
22 |             return -1;
23 |         }
24 |     }
25 | 
26 |     printf("Success!\n");
27 |     return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/test/error/ambiguous_inline_reductions.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f");
 8 |     Var x("x"), y("y");
 9 |     RDom r1(0, 10, "r1"), r2(0, 10, "r2"), r3(0, 10, "r3");
10 | 
11 |     f(x, y) = product(sum(r1, r1 + r3) + sum(r2, r2 * 2 + r3));
12 | 
13 |     // Is this the product over r1, or r3? It must be r3 because r1 is
14 |     // used on the LHS, but Halide's not smart enough to know
15 |     // that. All it sees is a product over an expression with two
16 |     // reduction domains.
17 |     f(r1, y) += product(sum(r2, r1 + r2 + r3));
18 | 
19 |     Buffer<int> result = f.realize(10, 10);
20 | 
21 |     return 0;
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/test/error/bad_bound.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f");
 8 |     Var x("x"), y("y");
 9 | 
10 |     f(x) = 0;
11 |     f.bound(y, 0, 10);
12 | 
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/test/error/bad_compute_with.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f");
 8 |     Var x("x"), y("y");
 9 | 
10 |     f(x, y) = x + y;
11 | 	f(x, y) += 2;
12 | 	f.update(0).compute_with(f, x);
13 | 
14 | 	f.realize(10, 10);
15 | 
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/error/bad_compute_with_invalid_specialization.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x("x"), y("y");
 8 |     Func f("f"), g("g"), h("h");
 9 | 
10 |     f(x, y) = x + y;
11 |     g(x, y) = x - y;
12 |     h(x, y) = f(x - 1, y + 1) + g(x + 2, y - 2);
13 | 
14 |     f.compute_root();
15 |     g.compute_root();
16 | 
17 |     Param<bool> tile;
18 |     Var xo("xo"), xi("xi");
19 |     g.specialize(tile).split(x, xo, xi, 8);
20 |     g.compute_with(f.specialize(tile), y, LoopAlignStrategy::AlignEnd);
21 | 
22 |     tile.set(true);
23 |     h.realize(200, 200);
24 | 
25 |     return 0;
26 | }


--------------------------------------------------------------------------------
/test/error/bad_compute_with_parent_func_not_used.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x("x"), y("y");
 8 |     Func f("f"), g("g"), h("h"), p("p");
 9 | 
10 |     f(x, y) = x + y;
11 |     g(x, y) = x - y;
12 |     p(x, y) = x * y;
13 |     h(x, y) = g(x + 2, y - 2) + p(x, y);
14 |     h(x, y) += f(x - 1, y + 1);
15 | 
16 |     f.compute_at(h, y);
17 |     g.compute_at(h, y);
18 |     p.compute_at(h, y);
19 | 
20 |     p.compute_with(f, x);
21 |     g.compute_with(f, x);
22 |     h.realize(200, 200);
23 | 
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/test/error/bad_const_cast.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 | 
10 |     // The 256 here would be implicitly cast to uint8, and converted to
11 |     // zero. That's bad. So we check for that inside IROperator.cpp.
12 |     f(x) = cast<uint8_t>(x) % 256;
13 | 
14 |     printf("How did I get here?\n");
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/error/bad_device_api.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Target t("host");
 8 |     (void)get_device_interface_for_device_api((DeviceAPI)-1, t, "Bad DeviceAPI");
 9 |     
10 |     printf("I should not have reached here\n");
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/error/bad_dimensions.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     ImageParam im(UInt(8), 2);
 8 | 
 9 |     Var x, y;
10 |     Func f;
11 | 
12 |     f(x, y) = im(x, y);
13 | 
14 |     Buffer<uint8_t> b(10, 10, 3);
15 |     im.set(b);
16 | 
17 |     f.realize(10, 10);
18 | 
19 |     printf("There should have been an error\n");
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/error/bad_extern_split.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     f.define_extern("test", {}, Int(32), {x});
10 |     Var xo;
11 |     f.split(x, xo, x, 8).reorder(xo, x);
12 | 
13 |     f.compile_jit();
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/test/error/bad_fold.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "Halide.h"
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x, y, c;
 8 | 
 9 |     Func f, g;
10 | 
11 |     f(x, y) = x;
12 |     g(x, y) = f(x-1, y+1) + f(x, y-1);
13 |     f.store_root().compute_at(g, y).fold_storage(y, 2);
14 | 
15 |     Buffer<int> im = g.realize(100, 1000);
16 | 
17 |     printf("Should have gotten a bad fold!\n");
18 |     return -1;
19 | }
20 | 


--------------------------------------------------------------------------------
/test/error/bad_host_alignment.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | using namespace Halide::Internal;
 6 | 
 7 | IRPrinter irp(std::cerr);
 8 | int main(int argc, char **argv) {
 9 |     Func f;
10 |     Var x, y;
11 |     ImageParam in(UInt(8), 2);
12 | 
13 |     Buffer<uint8_t> param_buf(11, 10);
14 |     param_buf.crop(0, 1, 10);
15 | 
16 |     in.set_host_alignment(512);
17 |     f(x, y) = in(x, y);
18 |     f.compute_root();
19 | 
20 |     in.set(param_buf);
21 |     Buffer<uint8_t> result = f.realize(10, 10);
22 | 
23 |     printf("I should not have reached here\n");
24 | 
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/error/bad_rvar_order.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     RDom r1(0, 10, 0, 10);
 8 | 
 9 |     Func f("f");
10 |     Var x, y;
11 |     f(x, y) = x + y;
12 |     f(r1.x, r1.y) += f(r1.y, r1.x);
13 | 
14 |     // It's not permitted to change the relative ordering of reduction
15 |     // domain variables when it could change the meaning.
16 |     f.update().reorder(r1.y, r1.x);
17 | 
18 |     f.realize(10, 10);
19 | 
20 |     printf("Success!\n");
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/test/error/bad_schedule.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f, g;
 8 |     Var x, y;
 9 | 
10 |     f(x) = x;
11 |     g(x) = f(x);
12 | 
13 |     // f is inlined, so this schedule is bad.
14 |     f.vectorize(x, 4);
15 | 
16 |     g.realize(10);
17 | 
18 |     printf("There should have been an error\n");
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/test/error/bad_store_at.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f"), g("g"), h("h");
 8 |     Var x("x"), y("y");
 9 | 
10 |     f(x) = x;
11 |     g(x) = f(x);
12 |     h(x, y) = g(x);
13 | 
14 |     g.compute_at(h, y);
15 | 
16 |     // This makes no sense, because the compute_at level is higher than the store_at level
17 |     f.store_at(h, y).compute_root();
18 | 
19 |     h.realize(10, 10);
20 | 
21 |     printf("I should not have reached here\n");
22 |     return 0;
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/test/error/buffer_larger_than_two_gigs.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | int main(int argc, char **argv) {
 6 |     if (sizeof(void *) == 8) {
 7 |         Buffer<uint8_t> result(1 << 24, 1 << 24, 1 << 24);
 8 |     } else {
 9 |         Buffer<uint8_t> result(1 << 12, 1 << 12, 1 << 8);
10 |     }
11 |     printf("Success!\n");
12 | }
13 | 


--------------------------------------------------------------------------------
/test/error/clamp_out_of_range.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x;
 8 |     Func f;
 9 | 
10 |     f(x) = clamp(cast<int8_t>(x), 0, 255);
11 |     Buffer<> result = f.realize(42);
12 | 
13 |     printf("Success!\n");
14 | 
15 |     printf("I should not have reached here\n");
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/error/constrain_wrong_output_buffer.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     f(x) = Tuple(x, sin(x));
10 | 
11 |     // Don't do this. Instead constrain the size of output buffer 0.
12 |     f.output_buffers()[1].dim(0).set_min(4);
13 | 
14 |     f.compile_jit();
15 | 
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/error/constraint_uses_non_param.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f, g;
 7 |     Var x, y;
 8 |     f(x, y) = 0;
 9 |     g(x, y) = f(x, y);
10 |     Pipeline p(g);
11 | 
12 |     // This can't possibly be a precondition
13 |     p.add_requirement(x == 4 && f(3, 2) == 5);
14 | 
15 |     p.realize(100, 100);
16 | 
17 |     return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/test/error/define_after_realize.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f, g;
 8 |     Var x;
 9 | 
10 |     f(x) = x;
11 | 
12 |     Buffer<int> im = f.realize(10);
13 | 
14 |     // Now try to add an update definition to f
15 |     f(x) += 1;
16 | 
17 |     printf("There should have been an error\n");
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/test/error/define_after_use.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f, g;
 8 |     Var x;
 9 | 
10 |     f(x) = x;
11 |     g(x) = f(x) + 1;
12 | 
13 |     // Now try to add an update definition to f
14 |     f(x) += 1;
15 | 
16 |     printf("There should have been an error\n");
17 |     return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/test/error/device_target_mismatch.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Target t("host");
 8 |     (void)get_device_interface_for_device_api(DeviceAPI::CUDA, t, "Device Target Mistmatch Test");
 9 |     
10 |     printf("I should not have reached here\n");
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/error/extern_func_self_argument.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | extern "C"
 6 | int extern_func() {
 7 |     return 0;
 8 | }
 9 | 
10 | int main(int argc, char **argv) {
11 |     Func f("f");
12 | 
13 |     f.define_extern("extern_func", {f}, Int(32), 2);
14 |     f.infer_arguments();
15 | 
16 |     printf("There should have been an error\n");
17 |     return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/test/error/float_arg.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     Func f;
 9 |     Var x, y;
10 |     f(x, y) = 3*x + y;
11 | 
12 |     // Should result in an error
13 |     Func g;
14 |     g(x) = f(f(x, 3) * 17.0f, 3);
15 | 
16 |     printf("Success!\n");
17 |     return 0;
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/test/error/forward_on_undefined_buffer.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     const Buffer<> foo;
 8 |     foo.raw_buffer();
 9 | 
10 |     printf("I should not have reached here\n");
11 |     return 0;
12 | }
13 | 


--------------------------------------------------------------------------------
/test/error/implicit_args.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x("x"), y("y"), z("z");
 8 | 
 9 |     Func f("f"), g("g"), h("h");
10 | 
11 |     g(x, y) = x + y;
12 |     g.compute_root();
13 | 
14 |     h(x, y, z) = x + y + z;
15 |     h.compute_root();
16 | 
17 |     // The initial definition uses 2 implicit vars: f(x, _0, _1) = g(_0, _1) + 2.
18 |     // The update definition, however, calls h(_) which will be expanded into
19 |     // h(_0, _1, _2), which is invalid.
20 |     f(x, _) = g(_) + 2;
21 |     f(x, _) += h(_) + 3;
22 | 
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/test/error/impossible_constraints.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     ImageParam input(Float(32), 2, "in");
 8 | 
 9 |     Func out("out");
10 | 
11 |     // The requires that the input be larger than the input
12 |     out() = input(input.width(), input.height()) + input(0, 0);
13 | 
14 |     out.infer_input_bounds();
15 | 
16 |     return 0;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/test/error/init_def_should_be_all_vars.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Buffer<int> in(10, 10);
 8 | 
 9 |     Func f("f");
10 |     RDom r(0, in.width(), 0, in.height());
11 |     f(r.x, r.y) = in(r.x, r.y) + 2;
12 |     f.realize(in.width(), in.height());
13 | 
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/test/error/inspect_loop_level.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     LoopLevel root = LoopLevel::root();
 8 | 
 9 |     printf("LoopLevel is %s\n", root.to_string().c_str()); // should fail
10 | 
11 |     printf("I should not have reached here\n");
12 | 
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/test/error/lerp_float_weight_out_of_range.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     // This should trigger an error.
 9 |     Func f;
10 |     f() = lerp(0, 42, 1.5f);
11 | 
12 |     printf("Success!\n");
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/test/error/lerp_mismatch.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     // This should trigger an error.
 9 |     Func f;
10 |     f() = lerp(cast<uint16_t>(0), cast<uint8_t>(42), 0.5f);
11 | 
12 |     printf("Success!\n");
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/test/error/lerp_signed_weight.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     // This should trigger an error.
 9 |     Func f;
10 |     f() = lerp(cast<uint8_t>(0), cast<uint8_t>(42), cast<int8_t>(16));
11 | 
12 |     printf("Success!\n");
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/test/error/memoize_different_compute_store.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Param<float> val;
 8 | 
 9 |     Func f, g;
10 |     Var x, y;
11 | 
12 |     f(x, y) = val + cast<uint8_t>(x);
13 |     g(x, y) = f(x, y) + f(x - 1, y) + f(x + 1, y);
14 | 
15 |     g.split(y, y, _, 16);
16 |     f.store_root();
17 |     f.compute_at(g, y).memoize();
18 | 
19 |     val.set(23.0f);
20 |     Buffer<uint8_t> out = g.realize(128, 128);
21 | 
22 |     for (int32_t i = 0; i < 128; i++) {
23 |         for (int32_t j = 0; j < 128; j++) {
24 |             assert(out(i, j) == (uint8_t)(3 * 23 + i + (i - 1) + (i + 1)));
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/test/error/metal_vector_too_large.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include "test/common/halide_test_dirs.h"
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     ImageParam input(UInt(16), 2, "input");
 9 |     Func f("f");
10 |     Var x("x"), y("y");
11 | 
12 |     f(x, y) = input(x, y) + 42;
13 |     f.vectorize(x ,16).gpu_blocks(y, DeviceAPI::Metal);
14 | 
15 |     std::string test_object = Internal::get_test_tmp_dir() + "metal_vector_too_large.o";
16 |     Target mac_target("osx-metal");
17 | 
18 |     f.compile_to_object(test_object, { input }, "f", mac_target);
19 | 
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/error/missing_args.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     ImageParam im(Int(8), 2);
10 |     Param<float> arg;
11 | 
12 |     f(x) = im(x, x) + arg;
13 | 
14 |     std::vector<Argument> args;
15 |     //args.push_back(im);
16 |     //args.push_back(arg);
17 |     f.compile_to_object("f.o", args, "f");
18 | 
19 |     printf("Success!\n");
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/error/modulo_constant_zero.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     Func f;
 9 |     Var x;
10 |     f(x) = x % 0;
11 | 
12 |     f.realize(10);
13 | 
14 |     printf("Success!\n");
15 |     return 0;
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/test/error/no_default_device.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Target t("host");
 8 |     (void)get_device_interface_for_device_api(DeviceAPI::Default_GPU, t, "No Default Device Test");
 9 |     
10 |     printf("I should not have reached here\n");
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/error/nonexistent_update_stage.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     f(x) = x;
10 |     f.update().vectorize(x, 4);
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/error/overflow_during_constant_folding.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f;
 7 |     Var x;
 8 |     f(x) = Expr(0x12345678) * Expr(0x76543210);
 9 | 
10 |     f.realize(10);
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/error/pointer_arithmetic.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Param<const char *> p;
 8 |     p.set("Hello, world!\n");
 9 | 
10 |     Func f;
11 |     Var x;
12 |     // Should error out during match_types
13 |     f(x) = p + 2;
14 | 
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/error/race_condition.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |     Func f, g;
 9 |     Var x, y;
10 | 
11 |     f(x, y) = 0;
12 | 
13 |     RDom r(0, 10, 0, 10);
14 |     f(r.x, r.y) += f(r.y, r.x);
15 | 
16 |     // This schedule should be forbidden, because it causes a race condition.
17 |     f.update().parallel(r.y);
18 | 
19 |     // We shouldn't reach here, because there should have been a compile error.
20 |     printf("There should have been an error\n");
21 | 
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/test/error/rdom_undefined.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Expr undef_min, undef_extent;
 8 | 
 9 |     // This should assert-fail
10 |     RDom r(undef_min, undef_min);
11 | 
12 |     // Just to ensure compiler doesn't optimize-away the RDom ctor
13 |     printf("Dimensions: %d\n", r.dimensions());
14 | 
15 |     printf("Success!\n");
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/error/reduction_bounds.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f"), g("g");
 8 |     Var x("x");
 9 |     RDom r(0, 100, "r");
10 | 
11 |     f(x) = x;
12 | 
13 |     g(x) = 0;
14 |     g(x) = f(g(x-1)) + r;
15 | 
16 |     f.compute_at(g, r.x);
17 | 
18 |     // Use of f is unbounded in g.
19 | 
20 |     g.realize(100);
21 | 
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/test/error/reduction_type_mismatch.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "Halide.h"
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x;
 8 |     Func f;
 9 |     RDom dom(0, 50);
10 | 
11 |     f(x) = cast<uint8_t>(0); // The type here...
12 |     f(dom) += 1.0f;          // does not match the type here.
13 | 
14 |     // Should result in an error
15 |     Buffer<float> result = f.realize(50);
16 | 
17 |     printf("Success!\n");
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/test/error/require_fail.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | #include <memory>
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     const int kPrime1 = 7829;
 9 |     const int kPrime2 = 7919;
10 | 
11 |     Buffer<int> result;
12 |     Param<int> p1, p2;
13 |     Var x;
14 |     Func f;
15 |     f(x) = require((p1 + p2) == kPrime1,
16 |                    (p1 + p2) * kPrime2,
17 |                    "The parameters should add to exactly", kPrime1, "but were", p1, p2);
18 |     // choose values that will fail
19 |     p1.set(1);
20 |     p2.set(2);
21 |     result = f.realize(1);
22 | 
23 |     return 0;
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/test/error/reuse_var_in_schedule.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f;
 7 |     Var x;
 8 | 
 9 |     f(x) = x;
10 | 
11 |     Var xo, xi;
12 |     f.split(x, xo, xi, 4).split(xo, xo, xi, 4);
13 | 
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/test/error/reused_args.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f;
 7 |     Var x;
 8 |     // You can't use the same variable more than once in the LHS of a
 9 |     // pure definition.
10 |     f(x, x) = x;
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/error/specialize_fail.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x;
 8 |     Param<int> p;
 9 | 
10 |     Func f;
11 |     f(x) = x;
12 |     f.specialize(p == 0).vectorize(x, 8);
13 |     f.specialize_fail("Expected failure");
14 | 
15 |     p.set(42);  // arbitrary nonzero value
16 |     f.realize(100);
17 | 
18 |     printf("How did I get here?\n");
19 | 
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/error/split_inner_wrong_tail_strategy.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f;
 7 |     Var x;
 8 |     f(x) = x;
 9 |     f(x) += 1;
10 |     Var xo, xi, xio, xii;
11 |     // Would redundantly redo some +=1, and create incorrect output.
12 |     f.compute_root();
13 |     f.update().split(x, xo, xi, 8).split(xi, xio, xii, 9, TailStrategy::RoundUp);
14 | 
15 |     Func g;
16 |     g(x) = f(x);
17 |     g.realize(10);
18 | 
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/test/error/thread_id_outside_block_id.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Target t = get_jit_target_from_environment();
 8 |     t.set_feature(Target::CUDA);
 9 | 
10 |     Func f;
11 |     Var x;
12 |     f(x) = x;
13 |     Var xo, xi;
14 |     f.gpu_tile(x, xo, xi, 16).reorder(xo, xi);
15 | 
16 |     f.compile_jit(t);
17 |     Buffer<int> result = f.realize(16);
18 | 
19 |     printf("There should have been an error\n");
20 |     return 0;
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/test/error/too_many_args.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "Halide.h"
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x, y;
 8 | 
 9 |     Func one_arg;
10 |     one_arg(x) = x * 2;             // One argument
11 | 
12 |     Func bad_call;
13 |     bad_call(x, y) = one_arg(x, y); // Called with two
14 | 
15 |     // Should result in an error
16 |     Buffer<uint32_t> result = bad_call.realize(256, 256);
17 | 
18 |     printf("Success!\n");
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/test/error/tuple_arg_select_undef.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | using namespace Halide::Internal;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     Var x("x"), y("y");
 9 |     Func f("f"), g("g");
10 | 
11 |     f(x, y) = {0, 0};
12 | 
13 |     RDom r(0, 10);
14 |     Expr arg_0 = clamp(select(r.x < 2, 13, undef<int>()), 0, 20);
15 |     Expr arg_1 = clamp(select(r.x < 5, 23, undef<int>()), 0, 20);
16 |     // Different predicates for the undefs: should result in an error
17 |     f(arg_0, arg_1) = {f(arg_0, arg_1)[0] + 10, f(arg_0, arg_1)[1] + 5};
18 | 
19 |     f.realize(100, 100);
20 | 
21 |     printf("Success!\n");
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/test/error/tuple_val_select_undef.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | using namespace Halide::Internal;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     Var x("x");
 9 |     Func f("f");
10 | 
11 |     // Should result in an error
12 |     f(x) = {x, select(x < 20, 20*x, undef<int>())};
13 |     f.realize(10);
14 | 
15 |     printf("Success!\n");
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/error/unbounded_input.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x, y;
 9 | 
10 |     ImageParam in(Float(32), 2);
11 |     ImageParam x_coord(Int(32), 2);
12 |     ImageParam y_coord(Int(32), 2);
13 | 
14 |     f(x, y) = in(x_coord(x, y), y_coord(x, y));
15 | 
16 |     f.compile_jit();
17 | 
18 |     printf("I should not have reached here\n");
19 | 
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/test/error/unbounded_output.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x, y;
 9 | 
10 |     ImageParam in(Float(32), 2);
11 |     ImageParam x_coord(Int(32), 2);
12 |     ImageParam y_coord(Int(32), 2);
13 | 
14 |     f(x, y) = 0.0f;
15 |     RDom r(0, 100, 0, 100);
16 |     f(x_coord(r.x, r.y), y_coord(r.x, r.y)) += in(r.x, r.y);
17 | 
18 |     f.compile_jit();
19 | 
20 |     printf("I should not have reached here\n");
21 | 
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/test/error/undefined_func_compile.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include "Halide.h"
 4 | 
 5 | #include "test/common/halide_test_dirs.h"
 6 | 
 7 | using namespace Halide;
 8 | 
 9 | int main(int argc, char **argv) {
10 |     Func f("f");
11 | 
12 |     std::string test_object = Internal::get_test_tmp_dir() + "compile_undefined.o";
13 |     f.compile_to_object(test_object, {}, "f");
14 | 
15 |     // We shouldn't reach here, because there should have been a compile error.
16 |     printf("There should have been an error\n");
17 | 
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/test/error/undefined_func_realize.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include "Halide.h"
 4 | 
 5 | #include "test/common/halide_test_dirs.h"
 6 | 
 7 | using namespace Halide;
 8 | 
 9 | int main(int argc, char **argv) {
10 |     Func f("f");
11 | 
12 |     Buffer<int32_t> result = f.realize(100, 5, 3);
13 | 
14 |     // We shouldn't reach here, because there should have been a compile error.
15 |     printf("There should have been an error\n");
16 | 
17 |     return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/test/error/undefined_loop_level.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     LoopLevel undefined;
 8 | 
 9 |     Var x;
10 |     Func f, g;
11 |     f(x) = x;
12 |     g(x) = f(x);
13 |     f.compute_at(undefined);
14 |     g.compute_root();
15 | 
16 |     // Trying to lower/realize with an undefined LoopLevel should be fatal
17 |     Buffer<int> result = g.realize(1);
18 | 
19 |     printf("I should not have reached here\n");
20 | 
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/test/error/undefined_pipeline_compile.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include "Halide.h"
 4 | 
 5 | #include "test/common/halide_test_dirs.h"
 6 | 
 7 | using namespace Halide;
 8 | 
 9 | int main(int argc, char **argv) {
10 |     Func f("f");
11 | 
12 |     Pipeline p(f);
13 |     std::string test_object = Internal::get_test_tmp_dir() + "compile_undefined.o";
14 |     p.compile_to_object(test_object, {}, "f");
15 | 
16 |     // We shouldn't reach here, because there should have been a compile error.
17 |     printf("There should have been an error\n");
18 | 
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/test/error/undefined_pipeline_realize.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include "Halide.h"
 4 | 
 5 | using namespace Halide;
 6 | 
 7 | int main(int argc, char **argv) {
 8 |     Func f("f");
 9 | 
10 |     Pipeline p(f);
11 |     Buffer<int32_t> result = p.realize(100, 5, 3);
12 | 
13 |     // We shouldn't reach here, because there should have been a compile error.
14 |     printf("There should have been an error\n");
15 | 
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/error/undefined_rdom_dimension.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include "Halide.h"
 4 | 
 5 | int error_occurred = false;
 6 | void halide_error(void *ctx, const char *msg) {
 7 |     printf("Expected: %s\n", msg);
 8 |     error_occurred = true;
 9 | }
10 | 
11 | using namespace Halide;
12 | 
13 | int main(int argc, char **argv) {
14 |     Func f("f"), g("g"), h("h");
15 |     Var x("x"), y("y"), c("c");
16 | 
17 |     RDom r(1, 99, "r");
18 |     g(x, y, c) = 42;
19 |     h(x, y, c) = 88;
20 |     f(x, y, c) = g(x, y, c);
21 |     f(r.x, r.y, c) = f(r.x-1, r.y, c) + h(r.x, r.y, c);
22 | 
23 |     f.set_error_handler(&halide_error);
24 |     Buffer<int32_t> result = f.realize(100, 5, 3);
25 | 
26 |     assert(error_occurred);
27 |     printf("Success!\n");
28 | }
29 | 


--------------------------------------------------------------------------------
/test/error/unknown_target.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Target t;
 8 | 
 9 |     // Calling natural_vector_size() on a Target with Unknown fields
10 |     // should generate user_error.
11 |     (void) t.natural_vector_size<float>();
12 | 
13 |     printf("I should not have reached here\n");
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/test/error/vectorize_dynamic.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "Halide.h"
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x, y;
 8 | 
 9 |     Buffer<int> input(5, 5);
10 |     Func f;
11 |     f(x, y) = input(x, y) * 2;
12 |     Var xo, xi;
13 | 
14 |     Param<int> vector_size;
15 | 
16 |     // You can only vectorize across compile-time-constant sizes.
17 |     f.split(x, xo, xi, vector_size).vectorize(xi);
18 | 
19 |     // Should result in an error
20 |     vector_size.set(4);
21 |     Buffer<int> out = f.realize(5, 5);
22 | 
23 |     printf("Success!\n");
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/test/error/vectorize_too_little.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "Halide.h"
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x, y;
 8 | 
 9 |     Buffer<int> input(5, 5);
10 |     Func f;
11 |     f(x, y) = input(x, y) * 2;
12 |     f.vectorize(x, 0);
13 | 
14 |     // Should result in an error
15 |     Buffer<int> out = f.realize(5, 5);
16 | 
17 |     printf("Success!\n");
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/test/error/vectorize_too_much.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "Halide.h"
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Var x, y;
 8 | 
 9 |     Buffer<int> input(5, 5);
10 |     Func f;
11 |     f(x, y) = input(x, y) * 2;
12 |     f.vectorize(x, 8).vectorize(y, 8);
13 | 
14 |     // Should result in an error
15 |     Buffer<int> out = f.realize(5, 5);
16 | 
17 |     printf("Success!\n");
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/test/error/vectorized_extern.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     f.define_extern("test", {}, Int(32), {x});
10 |     Var xo;
11 |     f.split(x, xo, x, 8).vectorize(xo);
12 | 
13 |     f.compile_jit();
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/test/error/wrap_custom_after_shared.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f"), g1("g1"), g2("g2"), g3("g3"), g4("g4");
 8 |     Var x("x"), y("y");
 9 | 
10 |     f(x) = x;
11 |     g1(x, y) = f(x);
12 |     g2(x, y) = f(x);
13 |     g3(x, y) = f(x);
14 | 
15 |     // It's not valid to call f.in(g1) after defining a shared wrapper for
16 |     // {g1, g2, g3}
17 |     Func wrapper1 = f.in({g1, g4, g3});
18 |     Func wrapper2 = f.in(g3);
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/test/error/wrap_frozen.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f("f"), g("g");
 8 |     Var x("x"), y("y");
 9 | 
10 |     f(x) = x;
11 |     g(x) = f(x);
12 |     Func wrapper = f.in(g);
13 |     wrapper(x) += 1;
14 | 
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/error/wrapper_never_used.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | using namespace Halide::Internal;
 5 | 
 6 | int main() {
 7 |     Var x("x"), y("y");
 8 |     Func f("f"), g("g"), h("h");
 9 |     f(x, y) = x + y;
10 |     g(x, y) = 5;
11 |     h(x, y) = f(x, y) + g(x, y);
12 | 
13 |     f.compute_root();
14 |     f.in(g).compute_root();
15 | 
16 |     // This should cause an error since f.in(g) was called but 'f' is
17 |     // never used in 'g'.
18 |     h.realize(5, 5);
19 | 
20 |     return 0;
21 | }


--------------------------------------------------------------------------------
/test/error/wrong_dimensionality_extern_stage.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     Func f, g;
 7 |     Var x, y;
 8 | 
 9 |     g.define_extern("foo", {}, UInt(16), 3);
10 | 
11 |     // Show throw an error immediately because g was defined with 3 dimensions.
12 |     f(x, y) = cast<float>(g(x, y));
13 | 
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/test/error/wrong_type.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 |     f(x) = x;
10 |     Buffer<float> im = f.realize(100);
11 | 
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/generator/alias_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class Alias : public Halide::Generator<Alias> {
 6 | public:
 7 |     GeneratorParam<int32_t> offset{ "offset", 0 };
 8 |     Input<Buffer<int32_t>>  input{ "input", 1 };
 9 |     Output<Buffer<int32_t>> output{ "output", 1 };
10 | 
11 |     void generate() {
12 |         Var x;
13 |         output(x) = input(x) + offset;
14 |     }
15 | };
16 | 
17 | }  // namespace
18 | 
19 | HALIDE_REGISTER_GENERATOR(Alias, alias)
20 | HALIDE_REGISTER_GENERATOR_ALIAS(alias_with_offset_42, alias, { { "offset", "42" }})
21 | 


--------------------------------------------------------------------------------
/test/generator/argvcall_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class ArgvCall : public Halide::Generator<ArgvCall> {
 6 | public:
 7 |     Input<float> f1{ "f1", 1.0 };
 8 |     Input<float> f2{ "f2", 1.0 };
 9 | 
10 |     Output<Buffer<int32_t>> output{ "output", 3 };
11 | 
12 |     void generate() {
13 |         Var x, y, c;
14 |         Func f("f");
15 | 
16 |         f(x, y) = max(x, y);
17 |         output(x, y, c) = cast<int32_t>(f(x, y) * c * f1 / f2);
18 | 
19 |         output.bound(c, 0, 3).reorder(c, x, y).unroll(c);
20 | 
21 |         output.vectorize(x, natural_vector_size<float>());
22 |     }
23 | };
24 | 
25 | }  // namespace
26 | 
27 | HALIDE_REGISTER_GENERATOR(ArgvCall, argvcall)
28 | 


--------------------------------------------------------------------------------
/test/generator/can_use_target_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class CanUseTarget : public Halide::Generator<CanUseTarget> {
 6 | public:
 7 |     Output<Buffer<uint32_t>> output{"output", 2};
 8 | 
 9 |     // Current really just a placeholder: can_use_target_aottest.cpp just
10 |     // needs to test the runtime itself, not the generator function.
11 |     void generate() {
12 |         Var x, y;
13 |         output(x, y) = cast<uint32_t>((int32_t)0xdeadbeef);
14 |     }
15 | };
16 | 
17 | }  // namespace
18 | 
19 | HALIDE_REGISTER_GENERATOR(CanUseTarget, can_use_target)
20 | 
21 | 


--------------------------------------------------------------------------------
/test/generator/cxx_mangling_externs.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | 
 3 | // These are the HalideExtern functions referenced by cxx_mangling_generator.cpp
 4 | int32_t extract_value_global(int32_t *arg) {
 5 |     return *arg;
 6 | }
 7 | 
 8 | namespace HalideTest {
 9 | 
10 | int32_t extract_value_ns(const int32_t *arg) {
11 |     return *arg;
12 | }
13 | 
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/test/generator/error_codes_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class ErrorCodes : public Halide::Generator<ErrorCodes> {
 6 | public:
 7 |     Input<Buffer<int32_t>>  input{ "input", 2};
 8 |     Input<int>              f_explicit_bound{"f_explicit_bound", 1, 0, 64};
 9 | 
10 |     Output<Buffer<int32_t>> output{"output", 2};
11 | 
12 |     void generate() {
13 |         assert(!get_target().has_feature(Target::LargeBuffers));
14 |         Var x, y;
15 | 
16 |         output(x, y) = input(x, y);
17 |         output.bound(x, 0, f_explicit_bound);
18 | 
19 |         add_requirement(input.dim(1).extent() == 123);
20 |     }
21 | };
22 | 
23 | }  // namespace
24 | 
25 | HALIDE_REGISTER_GENERATOR(ErrorCodes, error_codes)
26 | 


--------------------------------------------------------------------------------
/test/generator/external_code_extern.cpp:
--------------------------------------------------------------------------------
1 | extern "C" float gen_extern_tester(float in) {
2 |     return in + 42;
3 | }
4 | 


--------------------------------------------------------------------------------
/test/generator/float16_t_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | class Float16T : public Halide::Generator<Float16T> {
 4 | public:
 5 |     Output<Buffer<int32_t>> output{"output", 1};
 6 | 
 7 |     void generate() {
 8 |         // Currently the float16 aot test just exercises the
 9 |         // runtime. More interesting code may go here in the future.
10 |         Var x;
11 |         output(x) = x;
12 |     }
13 | };
14 | 
15 | HALIDE_REGISTER_GENERATOR(Float16T, float16_t)
16 | 


--------------------------------------------------------------------------------
/test/generator/gpu_object_lifetime_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class GpuObjectLifetime : public Halide::Generator<GpuObjectLifetime> {
 6 | public:
 7 |     Output<Buffer<int32_t>> output{"output", 1};
 8 | 
 9 |     void generate() {
10 |         Var x;
11 | 
12 |         output(x) = x;
13 | 
14 |         Target target = get_target();
15 |         if (target.has_gpu_feature()) {
16 |             Var xo, xi;
17 |             output.gpu_tile(x, xo, xi, 16);
18 |         }
19 |     }
20 | };
21 | 
22 | }  // namespace
23 | 
24 | HALIDE_REGISTER_GENERATOR(GpuObjectLifetime, gpu_object_lifetime)
25 | 


--------------------------------------------------------------------------------
/test/generator/gpu_only_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class GpuOnly : public Halide::Generator<GpuOnly> {
 6 | public:
 7 |     Input<Buffer<int32_t>> input{"input", 2};
 8 | 
 9 |     Output<Buffer<int32_t>>  output{"output", 2};
10 | 
11 |     void generate() {
12 |         Var x("x"), y("y");
13 | 
14 |         // Create a simple pipeline that scales pixel values by 2.
15 |         output(x, y) = input(x, y) * 2;
16 | 
17 |         Target target = get_target();
18 |         if (target.has_gpu_feature()) {
19 |             Var xo, yo, xi, yi;
20 |             output.gpu_tile(x, y, xo, yo, xi, yi, 16, 16);
21 |         }
22 |     }
23 | };
24 | 
25 | }  // namespace
26 | 
27 | HALIDE_REGISTER_GENERATOR(GpuOnly, gpu_only)
28 | 


--------------------------------------------------------------------------------
/test/generator/image_from_array_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class ImageFromArray : public Halide::Generator<ImageFromArray> {
 6 | public:
 7 |     Output<Buffer<int32_t>> output{"output", 1};
 8 | 
 9 |     void generate() {
10 |         // Currently the test just exercises halide_image.h.
11 |         Var x;
12 |         output(x) = x;
13 |     }
14 | };
15 | 
16 | }  // namespace
17 | 
18 | HALIDE_REGISTER_GENERATOR(ImageFromArray, image_from_array)
19 | 
20 | 


--------------------------------------------------------------------------------
/test/generator/matlab_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | using namespace Halide;
 4 | 
 5 | namespace {
 6 | 
 7 | class Matlab : public Halide::Generator<Matlab> {
 8 | public:
 9 |     Input<Buffer<float>>  input{"input", 2};
10 |     Input<float>          scale{"scale"};
11 |     Input<bool>           negate{"negate"};
12 | 
13 |     Output<Buffer<float>> output{"output", 2};
14 | 
15 |     void generate() {
16 |         Var x, y;
17 |         output(x, y) = input(x, y) * scale * select(negate, -1.0f, 1.0f);
18 |     }
19 | };
20 | 
21 | }  // namespace
22 | 
23 | HALIDE_REGISTER_GENERATOR(Matlab, matlab)
24 | 


--------------------------------------------------------------------------------
/test/generator/multitarget_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class Multitarget : public Halide::Generator<Multitarget> {
 6 | public:
 7 |     Output<Buffer<uint32_t>> output{"output", 2};
 8 | 
 9 |     void generate() {
10 |         Var x, y;
11 |         if (get_target().has_feature(Target::Debug)) {
12 |             output(x, y) = cast<uint32_t>((int32_t)0xdeadbeef);
13 |         } else {
14 |             output(x, y) = cast<uint32_t>((int32_t)0xf00dcafe);
15 |         }
16 |     }
17 | };
18 | 
19 | }  // namespace
20 | 
21 | HALIDE_REGISTER_GENERATOR(Multitarget, multitarget)
22 | 


--------------------------------------------------------------------------------
/test/generator/nested_externs_aottest.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "HalideBuffer.h"
 5 | #include "nested_externs_root.h"
 6 | 
 7 | using namespace Halide::Runtime;
 8 | 
 9 | int main(int argc, char **argv) {
10 |     auto buf = Buffer<float>::make_interleaved(100, 200, 3);
11 | 
12 |     nested_externs_root(38.5f, buf);
13 | 
14 |     buf.for_each_element([&](int x, int y, int c) {
15 |         const float correct = 158.0f;
16 |         const float actual = buf(x, y, c);
17 |         if (actual != correct) {
18 |             printf("result(%d, %d, %d) = %f instead of %f\n",
19 |                    x, y, c, actual, correct);
20 |             exit(-1);
21 |         }
22 |     });
23 | 
24 |     printf("Success!\n");
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/generator/string_param_aottest.cpp:
--------------------------------------------------------------------------------
 1 | #include "HalideBuffer.h"
 2 | #include "HalideRuntime.h"
 3 | #include "string_param.h"
 4 | #include <iostream>
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Halide::Runtime::Buffer<int> output(3, 3);
 8 |     string_param(output);
 9 | 
10 |     for (int x = 0; x < 3; ++x) {
11 |         for (int y = 0; y < 3; ++y) {
12 |             int expected_value = (5 * y + x);
13 |             if (output(x, y) != expected_value) {
14 |                 printf("Unexpected output value : %d at output(%d, %d)\n", output(x, y), x, y);
15 |                 return -1;
16 |             }
17 |         }
18 |     }
19 | 
20 |     printf("Success!\n");
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/test/generator/user_context_insanity_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class UserContextInsanity : public Halide::Generator<UserContextInsanity> {
 6 | public:
 7 |     Input<Buffer<float>>  input{"input", 2};
 8 |     Output<Buffer<float>> output{"output", 2};
 9 | 
10 |     void generate() {
11 |         Var x, y;
12 | 
13 |         Func g;
14 |         g(x, y) = input(x, y) * 2;
15 |         g.compute_root();
16 | 
17 |         output(x, y) = g(x, y);
18 | 
19 |         output.parallel(y);
20 |         output.trace_stores();
21 |     }
22 | };
23 | 
24 | }  // namespace
25 | 
26 | HALIDE_REGISTER_GENERATOR(UserContextInsanity, user_context_insanity)
27 | 


--------------------------------------------------------------------------------
/test/generator/variable_num_threads_generator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | namespace {
 4 | 
 5 | class VariableNumThreads : public Halide::Generator<VariableNumThreads> {
 6 | public:
 7 |     Output<Buffer<float>> output{"output", 2};
 8 | 
 9 |     void generate() {
10 |         // A job with lots of nested parallelism
11 |         Var x, y;
12 | 
13 |         output(x, y) = sqrt(sqrt(x*y));
14 |         output.parallel(x).parallel(y);
15 |     }
16 | };
17 | 
18 | }  // namespace
19 | 
20 | HALIDE_REGISTER_GENERATOR(VariableNumThreads, variable_num_threads)
21 | 


--------------------------------------------------------------------------------
/test/opengl/inline_reduction.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | #include "testing.h"
 5 | 
 6 | using namespace Halide;
 7 | 
 8 | int main() {
 9 |     // This test must be run with an OpenGL target.
10 |     const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
11 | 
12 |     Func f;
13 |     Var x, y, c;
14 |     RDom r(0, 10);
15 |     f(x, y, c) = sum(cast<float>(r));
16 |     f.bound(c, 0, 3).glsl(x, y, c);
17 | 
18 |     Buffer<float> result = f.realize(100, 100, 3, target);
19 | 
20 |     if (!Testing::check_result<float>(result, [&](int x, int y, int c) { return 45; })) {
21 |         return 1;
22 |     }
23 | 
24 |     printf("Success!\n");
25 | 
26 |     return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/test/opengl/internal.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../src/CodeGen_OpenGL_Dev.h"
 2 | 
 3 | using namespace Halide;
 4 | using namespace Halide::Internal;
 5 | 
 6 | int main() {
 7 |     CodeGen_GLSL::test();
 8 | 
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/test/opengl/set_pixels.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | #include "testing.h"
 5 | 
 6 | using namespace Halide;
 7 | 
 8 | int main() {
 9 |     // This test must be run with an OpenGL target.
10 |     const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
11 | 
12 |     Func f;
13 |     Var x, y, c;
14 | 
15 |     f(x, y, c) = cast<uint8_t>(42);
16 | 
17 |     Buffer<uint8_t> out(10, 10, 3);
18 |     f.bound(c, 0, 3).glsl(x, y, c);
19 |     f.realize(out, target);
20 | 
21 |     out.copy_to_host();
22 |     if (!Testing::check_result<uint8_t>(out, [](int x, int y, int c) { return 42; })) {
23 |         return 1;
24 |     }
25 | 
26 |     printf("Success!\n");
27 |     return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/test/opengl/vagrant/.gitignore:
--------------------------------------------------------------------------------
1 | .vagrant
2 | 


--------------------------------------------------------------------------------
/test/opengl/vagrant/build_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh -x
2 | mkdir -p ~/halide_build
3 | cd ~/halide_build
4 | ln -s -f /Halide/Makefile .
5 | make -j 3
6 | make -k test_opengl
7 | 


--------------------------------------------------------------------------------
/test/opengl/vagrant/provision/etc/environment:
--------------------------------------------------------------------------------
1 | PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games"
2 | LC_ALL=C
3 | DISPLAY=:0.0
4 | LLVM_CONFIG=/usr/bin/llvm-config-3.8
5 | CLANG=/usr/bin/clang-3.8
6 | HL_TARGET=host-opengl
7 | HL_JIT_TARGET=host-opengl
8 | 


--------------------------------------------------------------------------------
/test/opengl/vagrant/provision/etc/init/xdummy.conf:
--------------------------------------------------------------------------------
1 | description "Dummy X server providing DISPLAY=:0.0"
2 | 
3 | expect fork
4 | 
5 | script
6 |     /usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -logfile /var/log/Xorg.log :0 &
7 | end script
8 | 


--------------------------------------------------------------------------------
/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Dummy X server providing DISPLAY=:0.0"
3 | 
4 | [Service]
5 | Type=simple
6 | ExecStart=/usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config /dev/null -logfile /var/log/Xorg.log :0
7 | 


--------------------------------------------------------------------------------
/test/performance/jit_stress.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | 
 3 | #include <cstdio>
 4 | #include "halide_benchmark.h"
 5 | 
 6 | using namespace Halide;
 7 | using namespace Halide::Tools;
 8 | 
 9 | int main(int argc, char **argv) {
10 |     Var x;
11 | 
12 |     ImageParam a(Int(32), 1);
13 |     Buffer<int> b(1), c(1);
14 |     b(0) = 17;
15 |     c(0) = 0;
16 |     a.set(c);
17 | 
18 |     int expected = 0;
19 |     double t = benchmark([&]() {
20 |         Func f;
21 |         f(x) = a(x) + b(x);
22 |         f.realize(c);
23 |         expected += 17;
24 |         assert(c(0) == expected);
25 |     });
26 | 
27 |     printf("%g ms per jit compilation\n", t * 1e3);
28 | 
29 |     printf("Success!\n");
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/test/warning/double_vectorize.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f, g;
 8 |     Var x, y;
 9 |     f(x, y) = x + y;
10 |     g(x, y) = f(x, y) + f(x + 1, y);
11 | 
12 |     // Nested vectorization should cause a warning.
13 |     Var xi;
14 |     g.split(x, x, xi, 8).vectorize(xi);
15 |     f.compute_at(g, xi).vectorize(x);
16 | 
17 |     g.realize(16, 16);
18 | 
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/test/warning/hidden_pure_definition.cpp:
--------------------------------------------------------------------------------
 1 | #include "Halide.h"
 2 | #include <stdio.h>
 3 | 
 4 | using namespace Halide;
 5 | 
 6 | int main(int argc, char **argv) {
 7 |     Func f;
 8 |     Var x;
 9 | 
10 |     f(x) = x;
11 | 
12 |     // Hide the previous definition.
13 |     f(x) = 2;
14 | 
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/tools/GenGen.cpp:
--------------------------------------------------------------------------------
1 | #include "Halide.h"
2 | 
3 | int main(int argc, char **argv) {
4 |   return Halide::Internal::generate_filter_main(argc, argv, std::cerr);
5 | }
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/tools/halide_config.cmake.tpl:
--------------------------------------------------------------------------------
1 | # Machine-Generated: Do Not Edit
2 | set(HALIDE_SYSTEM_LIBS @HALIDE_SYSTEM_LIBS_RAW@)
3 | set(HALIDE_RTTI @HALIDE_RTTI_RAW@)
4 | 


--------------------------------------------------------------------------------
/tools/halide_config.make.tpl:
--------------------------------------------------------------------------------
1 | # Machine-Generated: Do Not Edit
2 | HALIDE_SYSTEM_LIBS=@HALIDE_SYSTEM_LIBS_RAW@
3 | HALIDE_RTTI=@HALIDE_RTTI_RAW@
4 | 


--------------------------------------------------------------------------------
/tutorial/.gitignore:
--------------------------------------------------------------------------------
1 | lesson_01
2 | lesson_02
3 | lesson_03
4 | lesson_04
5 | lesson_05
6 | blurred.png
7 | brighter.png
8 | 


--------------------------------------------------------------------------------
/tutorial/figures/lesson_02_input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_02_input.jpg


--------------------------------------------------------------------------------
/tutorial/figures/lesson_02_output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_02_output.jpg


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_col_major.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_col_major.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_fast.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_fast.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_parallel_tiles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_parallel_tiles.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_row_major.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_row_major.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_split_7_by_3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_split_7_by_3.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_tiled.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_tiled.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_05_vectors.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_vectors.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_08_compute_root.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_compute_root.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_08_compute_y.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_compute_y.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_08_mixed.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_mixed.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_08_store_root_compute_x.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_store_root_compute_x.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_08_store_root_compute_y.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_store_root_compute_y.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_08_tile.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_tile.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_compute_at_multiple_updates.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_multiple_updates.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_compute_at_pure.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_pure.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_compute_at_pure_and_update.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_pure_and_update.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_compute_at_rvar.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_rvar.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_compute_at_update.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_update.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_inline_reduction.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_inline_reduction.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_update.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_update.gif


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_update_rdom.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_update_rdom.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_09_update_schedule.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_update_schedule.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_17_rdom_calls_in_predicate.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_17_rdom_calls_in_predicate.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_17_rdom_circular.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_17_rdom_circular.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_17_rdom_triangular.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_17_rdom_triangular.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_18_hist_manual_par.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_manual_par.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_18_hist_rfactor_par.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_rfactor_par.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_18_hist_rfactor_tile.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_rfactor_tile.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_18_hist_rfactor_vec.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_rfactor_vec.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_18_hist_serial.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_serial.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_19_group_updates.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_group_updates.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_19_transpose.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_transpose.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_19_wrapper_global.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_global.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_19_wrapper_local.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_local.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_19_wrapper_unique.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_unique.mp4


--------------------------------------------------------------------------------
/tutorial/figures/lesson_19_wrapper_vary_schedule.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_vary_schedule.mp4


--------------------------------------------------------------------------------
/tutorial/images/gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/images/gray.png


--------------------------------------------------------------------------------
/tutorial/images/rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/images/rgb.png


--------------------------------------------------------------------------------
/tutorial/todo.txt:
--------------------------------------------------------------------------------
 1 | - debug_to_file
 2 | - the bounds query interface
 3 | - multi-output pipelines
 4 | - overriding the runtime
 5 | - lambdas
 6 |   useful tricks, e.g., `Buffer<float> image_buf = lambda(x, y, (sin(x+y)+1)/2).realize(10 * tile_size, 10 * tile_size);`
 7 | - extern functions and extern stages
 8 | - common scheduling patterns
 9 | - scheduling rvars
10 | - tail strategies
11 | - Wrapping existing memory in a Halide::Buffer
12 | 


--------------------------------------------------------------------------------
/util/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | halide_project(HalideTraceViz "utils" HalideTraceViz.cpp)
2 | halide_project(HalideTraceDump "utils" HalideTraceDump.cpp HalideTraceUtils.cpp)
3 | halide_use_image_io(HalideTraceDump)
4 | 


--------------------------------------------------------------------------------