├── .clang-format
├── .gitattributes
├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── Doxyfile
├── Doxyfile.in
├── LICENSE.txt
├── Makefile
├── README.md
├── README_cmake.md
├── README_rungen.md
├── README_webassembly.md
├── ai_scripts
├── conv.prx
├── data
│ ├── data.txt
│ ├── kernelAndInImage_256x16_k3_gaussblur.txt
│ ├── old_data.txt
│ └── refImage_256x16_k3_gaussblur.txt
├── data_points.txt
├── execute.py
├── explore.bash
├── global.h
├── main.cc
├── naive.cc
├── run.bash
├── run1.bash
├── run2.bash
├── run3.bash
├── run4.bash
├── run_single.bash
└── xhalide_generated.cc
├── apps
├── CMakeLists.txt
├── HelloAndroid
│ ├── .gitignore
│ ├── AndroidManifest.xml
│ ├── README.md
│ ├── ant.properties
│ ├── build-gradle.sh
│ ├── build.gradle
│ ├── build.sh
│ ├── build.xml
│ ├── gradle
│ │ └── wrapper
│ │ │ ├── gradle-wrapper.jar
│ │ │ └── gradle-wrapper.properties
│ ├── gradlew
│ ├── gradlew.bat
│ ├── jni
│ │ ├── Android.mk
│ │ ├── Application.mk
│ │ ├── hello_generator.cpp
│ │ └── hello_wrapper.cpp
│ ├── res
│ │ ├── drawable-hdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-ldpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-mdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-xhdpi
│ │ │ └── ic_launcher.png
│ │ ├── layout
│ │ │ └── main.xml
│ │ └── values
│ │ │ └── strings.xml
│ └── src
│ │ └── com
│ │ └── example
│ │ └── hellohalide
│ │ ├── CameraActivity.java
│ │ ├── CameraPreview.java
│ │ └── FrameHandler.java
├── HelloAndroidCamera2
│ ├── .gitignore
│ ├── AndroidManifest.xml
│ ├── README.md
│ ├── ant.properties
│ ├── build-gradle.sh
│ ├── build.gradle
│ ├── build.sh
│ ├── build.xml
│ ├── gradle
│ │ └── wrapper
│ │ │ ├── gradle-wrapper.jar
│ │ │ └── gradle-wrapper.properties
│ ├── gradlew
│ ├── gradlew.bat
│ ├── jni
│ │ ├── Android.mk
│ │ ├── AndroidBufferUtilities.cpp
│ │ ├── AndroidBufferUtilities.h
│ │ ├── Application.mk
│ │ ├── HalideFilters.cpp
│ │ ├── LockedSurface.cpp
│ │ ├── LockedSurface.h
│ │ ├── YuvBufferT.cpp
│ │ ├── YuvBufferT.h
│ │ ├── deinterleave_generator.cpp
│ │ └── edge_detect_generator.cpp
│ ├── res
│ │ ├── drawable-hdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-ldpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-mdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-xhdpi
│ │ │ └── ic_launcher.png
│ │ ├── layout
│ │ │ ├── activity_camera.xml
│ │ │ ├── fragment_camera2_basic.xml
│ │ │ └── main.xml
│ │ └── values
│ │ │ ├── strings.xml
│ │ │ └── styles.xml
│ └── src
│ │ └── com
│ │ ├── android
│ │ └── ex
│ │ │ └── camera2
│ │ │ ├── blocking
│ │ │ ├── BlockingCameraManager.java
│ │ │ ├── BlockingCaptureCallback.java
│ │ │ ├── BlockingSessionCallback.java
│ │ │ └── BlockingStateCallback.java
│ │ │ ├── exceptions
│ │ │ └── TimeoutRuntimeException.java
│ │ │ ├── pos
│ │ │ └── AutoFocusStateMachine.java
│ │ │ └── utils
│ │ │ ├── StateChangeListener.java
│ │ │ ├── StateWaiter.java
│ │ │ └── SysTrace.java
│ │ └── example
│ │ └── helloandroidcamera2
│ │ ├── AndroidBufferUtilities.java
│ │ ├── AutoFitSurfaceView.java
│ │ ├── Camera2BasicFragment.java
│ │ ├── CameraActivity.java
│ │ ├── HalideFilters.java
│ │ ├── HalideYuvBufferT.java
│ │ └── NativeSurfaceHandle.java
├── HelloAndroidGL
│ ├── AndroidManifest.xml
│ ├── ant.properties
│ ├── build.sh
│ ├── build.xml
│ ├── jni
│ │ ├── Android.mk
│ │ ├── Application.mk
│ │ ├── android_halide_gl_native.cpp
│ │ └── halide_gl_filter.cpp
│ ├── project.properties
│ ├── res
│ │ ├── drawable-hdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-ldpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-mdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-xhdpi
│ │ │ └── ic_launcher.png
│ │ ├── layout
│ │ │ └── main.xml
│ │ └── values
│ │ │ └── strings.xml
│ └── src
│ │ └── org
│ │ └── halide_lang
│ │ └── hellohalidegl
│ │ └── HelloHalideGL.java
├── HelloMatlab
│ ├── Makefile
│ ├── iir_blur.cpp
│ ├── run_blur.m
│ └── run_blur.sh
├── HelloiOS
│ ├── HelloiOS.xcodeproj
│ │ └── project.pbxproj
│ └── HelloiOS
│ │ ├── AppDelegate.h
│ │ ├── AppDelegate.mm
│ │ ├── HalideView.h
│ │ ├── HalideView.mm
│ │ ├── HalideViewController.h
│ │ ├── HalideViewController.mm
│ │ ├── HelloiOS-Info.plist
│ │ ├── HelloiOS-Prefix.pch
│ │ ├── Images.xcassets
│ │ ├── AppIcon.appiconset
│ │ │ └── Contents.json
│ │ └── LaunchImage.launchimage
│ │ │ └── Contents.json
│ │ ├── en.lproj
│ │ └── InfoPlist.strings
│ │ ├── main.mm
│ │ └── reaction_diffusion_2_generator.cpp
├── auto_viz
│ ├── Makefile
│ ├── auto_viz_demo.cpp
│ └── auto_viz_demo_generator.cpp
├── autoscheduler
│ ├── ASLog.cpp
│ ├── ASLog.h
│ ├── AutoSchedule.cpp
│ ├── AutoSchedule.h
│ ├── CostModel.h
│ ├── DefaultCostModel.cpp
│ ├── Errors.h
│ ├── Featurization.h
│ ├── FunctionDAG.h
│ ├── Makefile
│ ├── NetworkSize.h
│ ├── PerfectHashMap.h
│ ├── autotune_loop.sh
│ ├── cost_model_generator.cpp
│ ├── cost_model_schedule.h
│ ├── demo_generator.cpp
│ ├── featurization_to_sample.cpp
│ ├── test.cpp
│ ├── test_perfect_hash_map.cpp
│ ├── train_cost_model.cpp
│ └── weights
│ │ ├── head1_conv1_bias.data
│ │ ├── head1_conv1_weight.data
│ │ ├── head2_conv1_bias.data
│ │ ├── head2_conv1_weight.data
│ │ ├── trunk_conv1_bias.data
│ │ └── trunk_conv1_weight.data
├── bilateral_grid
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── bilateral_grid_generator.cpp
│ ├── filter.cpp
│ └── viz.sh
├── blur
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── adb_run_on_device.sh
│ ├── halide_blur_generator.cpp
│ └── test.cpp
├── c_backend
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── pipeline_cpp_generator.cpp
│ ├── pipeline_generator.cpp
│ ├── run.cpp
│ └── run_cpp.cpp
├── camera_pipe
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── camera_pipe_generator.cpp
│ ├── process.cpp
│ └── viz.sh
├── conv_layer
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── conv_layer_generator.cpp
│ └── process.cpp
├── cuda_mat_mul
│ ├── Makefile
│ ├── mat_mul_generator.cpp
│ └── runner.cpp
├── fft
│ ├── Makefile
│ ├── complex.h
│ ├── fft.cpp
│ ├── fft.h
│ ├── fft_aot_test.cpp
│ ├── fft_generator.cpp
│ ├── funct.h
│ └── main.cpp
├── glsl
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── halide_blur_glsl_generator.cpp
│ ├── halide_ycc_glsl_generator.cpp
│ └── opengl_test.cpp
├── hexagon_benchmarks
│ ├── Makefile
│ ├── adb_run_on_device.sh
│ ├── conv3x3_generator.cpp
│ ├── dilate3x3_generator.cpp
│ ├── gaussian5x5_generator.cpp
│ ├── median3x3_generator.cpp
│ ├── process.cpp
│ ├── process.h
│ └── sobel_generator.cpp
├── hexagon_dma
│ ├── Makefile
│ ├── mock_dma_implementation.cpp
│ ├── pipeline_raw_linear_interleaved_basic.cpp
│ ├── pipeline_yuv_linear_basic.cpp
│ ├── process_raw_linear_interleaved_basic.cpp
│ └── process_yuv_linear_basic.cpp
├── images
│ ├── bayer_raw.png
│ ├── bayer_small.png
│ ├── gray.png
│ ├── gray_small.png
│ ├── rgb.png
│ ├── rgb_small.png
│ ├── rgb_small16.png
│ └── rgba.png
├── interpolate
│ ├── CMakeLists.txt
│ ├── Makefile
│ └── interpolate.cpp
├── lens_blur
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── lens_blur_generator.cpp
│ └── process.cpp
├── linear_algebra
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── benchmarks
│ │ ├── CMakeLists.txt
│ │ ├── cblas_benchmarks.cpp
│ │ ├── clock.h
│ │ ├── eigen_benchmarks.cpp
│ │ ├── halide_benchmarks.cpp
│ │ └── macros.h
│ ├── src
│ │ ├── CMakeLists.txt
│ │ ├── blas_l1_generators.cpp
│ │ ├── blas_l2_generators.cpp
│ │ ├── blas_l3_generators.cpp
│ │ ├── halide_blas.cpp
│ │ └── halide_blas.h
│ └── tests
│ │ ├── CMakeLists.txt
│ │ └── test_halide_blas.cpp
├── linear_blur
│ ├── CMakeLists.txt
│ ├── linear_blur_generator.cpp
│ ├── linear_to_srgb_generator.cpp
│ ├── run_linear_blur.cpp
│ ├── simple_blur_generator.cpp
│ └── srgb_to_linear_generator.cpp
├── local_laplacian
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── local_laplacian_generator.cpp
│ ├── process.cpp
│ └── viz.sh
├── nl_means
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── nl_means_generator.cpp
│ └── process.cpp
├── nn_ops
│ ├── AveragePool.cpp
│ ├── AveragePool.sh
│ ├── AveragePool_generator.cpp
│ ├── Convolution.cpp
│ ├── Convolution.sh
│ ├── Convolution_generator.cpp
│ ├── DepthwiseConvolution.cpp
│ ├── DepthwiseConvolution.sh
│ ├── DepthwiseConvolution_generator.cpp
│ ├── Im2col.cpp
│ ├── Im2col.sh
│ ├── Im2col_generator.cpp
│ ├── Makefile
│ ├── MatrixMultiply.cpp
│ ├── MatrixMultiply.sh
│ ├── MatrixMultiply_generator.cpp
│ ├── MaxPool.cpp
│ ├── MaxPool.sh
│ ├── MaxPool_generator.cpp
│ ├── README.md
│ ├── adb_run_on_device.sh
│ ├── common.cpp
│ ├── common.h
│ ├── common_reference.cpp
│ └── common_reference.h
├── onnx
│ ├── Makefile
│ ├── common_types.h
│ ├── halide_as_onnx_backend.py
│ ├── halide_as_onnx_backend_test.py
│ ├── model.cpp
│ ├── model.py
│ ├── model_test.py
│ ├── onnx_converter.cc
│ ├── onnx_converter.h
│ ├── onnx_converter_generator.cc
│ ├── onnx_converter_generator_test.cc
│ ├── onnx_converter_test.cc
│ └── test_model_proto.txt
├── opengl_demo
│ ├── Makefile
│ ├── README.md
│ ├── glfw_helpers.cpp
│ ├── glfw_helpers.h
│ ├── image.png
│ ├── layout.cpp
│ ├── layout.h
│ ├── main.cpp
│ ├── opengl_helpers.cpp
│ ├── opengl_helpers.h
│ ├── png_helpers.cpp
│ ├── png_helpers.h
│ ├── sample_filter_generator.cpp
│ ├── timer.cpp
│ └── timer.h
├── openglcompute
│ ├── AndroidManifest.xml
│ ├── Makefile
│ ├── build.sh
│ ├── build.xml
│ ├── jni
│ │ ├── Android.mk
│ │ ├── Application.mk
│ │ ├── oglc_run.cpp
│ │ └── oglc_two_kernels_run.cpp
│ ├── res
│ │ ├── drawable-hdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-ldpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-mdpi
│ │ │ └── ic_launcher.png
│ │ ├── drawable-xhdpi
│ │ │ └── ic_launcher.png
│ │ ├── layout
│ │ │ └── main.xml
│ │ └── values
│ │ │ └── strings.xml
│ ├── src
│ │ └── com
│ │ │ └── example
│ │ │ └── hellohalideopenglcompute
│ │ │ └── HalideOpenGLComputeActivity.java
│ ├── test_oglc_avg.cpp
│ └── test_two_kernels.cpp
├── resize
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── resize.cpp
│ └── resize_generator.cpp
├── resnet_50
│ ├── Makefile
│ ├── Resnet50Generator.cpp
│ ├── load_weights.py
│ ├── process.cpp
│ └── validate_resnet50_output.py
├── simd_op_check
│ ├── Makefile
│ └── driver.cpp
├── stencil_chain
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── process.cpp
│ └── stencil_chain_generator.cpp
├── support
│ ├── Makefile.inc
│ ├── autoscheduler.inc
│ └── viz_auto.sh
└── wavelet
│ ├── CMakeLists.txt
│ ├── Makefile
│ ├── README.md
│ ├── daubechies_constants.h
│ ├── daubechies_x_generator.cpp
│ ├── haar_x_generator.cpp
│ ├── inverse_daubechies_x_generator.cpp
│ ├── inverse_haar_x_generator.cpp
│ └── wavelet.cpp
├── halide.cmake
├── python_bindings
├── Makefile
├── apps
│ ├── bilateral_grid.py
│ ├── blur.py
│ ├── erode.py
│ ├── interpolate.py
│ └── local_laplacian.py
├── correctness
│ ├── addconstant_generator.cpp
│ ├── addconstant_test.py
│ ├── autodiff.py
│ ├── basics.py
│ ├── bit_generator.cpp
│ ├── bit_test.py
│ ├── boundary_conditions.py
│ ├── buffer.py
│ ├── compile_to.py
│ ├── complexstub_generator.cpp
│ ├── division.py
│ ├── extern.py
│ ├── iroperator.py
│ ├── multipass_constraints.py
│ ├── nobuildmethod_generator.cpp
│ ├── partialbuildmethod_generator.cpp
│ ├── pystub.py
│ ├── rdom.py
│ ├── simplestub_generator.cpp
│ ├── target.py
│ ├── the_sort_function.c
│ ├── tuple_select.py
│ ├── type.py
│ ├── user_context_generator.cpp
│ ├── user_context_test.py
│ └── var.py
├── readme.md
├── requirements.txt
├── src
│ ├── PyArgument.cpp
│ ├── PyArgument.h
│ ├── PyBinaryOperators.h
│ ├── PyBoundaryConditions.cpp
│ ├── PyBoundaryConditions.h
│ ├── PyBuffer.cpp
│ ├── PyBuffer.h
│ ├── PyConciseCasts.cpp
│ ├── PyConciseCasts.h
│ ├── PyDerivative.cpp
│ ├── PyDerivative.h
│ ├── PyEnums.cpp
│ ├── PyEnums.h
│ ├── PyError.cpp
│ ├── PyError.h
│ ├── PyExpr.cpp
│ ├── PyExpr.h
│ ├── PyExternFuncArgument.cpp
│ ├── PyExternFuncArgument.h
│ ├── PyFunc.cpp
│ ├── PyFunc.h
│ ├── PyFuncRef.cpp
│ ├── PyFuncRef.h
│ ├── PyHalide.cpp
│ ├── PyHalide.h
│ ├── PyIROperator.cpp
│ ├── PyIROperator.h
│ ├── PyImageParam.cpp
│ ├── PyImageParam.h
│ ├── PyInlineReductions.cpp
│ ├── PyInlineReductions.h
│ ├── PyLambda.cpp
│ ├── PyLambda.h
│ ├── PyLoopLevel.cpp
│ ├── PyLoopLevel.h
│ ├── PyMachineParams.cpp
│ ├── PyMachineParams.h
│ ├── PyModule.cpp
│ ├── PyModule.h
│ ├── PyOutputs.cpp
│ ├── PyOutputs.h
│ ├── PyParam.cpp
│ ├── PyParam.h
│ ├── PyPipeline.cpp
│ ├── PyPipeline.h
│ ├── PyRDom.cpp
│ ├── PyRDom.h
│ ├── PyScheduleMethods.h
│ ├── PyStage.cpp
│ ├── PyStage.h
│ ├── PyTarget.cpp
│ ├── PyTarget.h
│ ├── PyTuple.cpp
│ ├── PyTuple.h
│ ├── PyType.cpp
│ ├── PyType.h
│ ├── PyVar.cpp
│ ├── PyVar.h
│ ├── PyVarOrRVar.cpp
│ └── PyVarOrRVar.h
├── stub
│ ├── PyStub.cpp
│ └── PyStubImpl.cpp
├── todo.txt
└── tutorial
│ ├── lesson_01_basics.py
│ ├── lesson_02_input_image.py
│ ├── lesson_03_debugging_1.py
│ ├── lesson_04_debugging_2.py
│ ├── lesson_05_scheduling_1.py
│ ├── lesson_06_realizing_over_shifted_domains.py
│ ├── lesson_07_multi_stage_pipelines.py
│ ├── lesson_08_scheduling_2.py
│ ├── lesson_09_update_definitions.py
│ ├── lesson_10_aot_compilation_generate.py
│ ├── lesson_10_aot_compilation_run.py
│ ├── lesson_11_cross_compilation.py
│ ├── lesson_12_using_the_gpu.py
│ ├── lesson_13_tuples.py
│ └── lesson_14_types.py
├── src
├── AddImageChecks.cpp
├── AddImageChecks.h
├── AddParameterChecks.cpp
├── AddParameterChecks.h
├── AlignLoads.cpp
├── AlignLoads.h
├── AllocationBoundsInference.cpp
├── AllocationBoundsInference.h
├── ApplySplit.cpp
├── ApplySplit.h
├── Argument.cpp
├── Argument.h
├── AssociativeOpsTable.cpp
├── AssociativeOpsTable.h
├── Associativity.cpp
├── Associativity.h
├── AsyncProducers.cpp
├── AsyncProducers.h
├── AutoSchedule.cpp
├── AutoSchedule.h
├── AutoScheduleUtils.cpp
├── AutoScheduleUtils.h
├── BoundSmallAllocations.cpp
├── BoundSmallAllocations.h
├── BoundaryConditions.cpp
├── BoundaryConditions.h
├── Bounds.cpp
├── Bounds.h
├── BoundsInference.cpp
├── BoundsInference.h
├── Buffer.cpp
├── Buffer.h
├── CMakeLists.txt
├── CPlusPlusMangle.cpp
├── CPlusPlusMangle.h
├── CSE.cpp
├── CSE.h
├── CanonicalizeGPUVars.cpp
├── CanonicalizeGPUVars.h
├── Closure.cpp
├── Closure.h
├── CodeGen_ARM.cpp
├── CodeGen_ARM.h
├── CodeGen_C.cpp
├── CodeGen_C.h
├── CodeGen_D3D12Compute_Dev.cpp
├── CodeGen_D3D12Compute_Dev.h
├── CodeGen_GPU_Dev.cpp
├── CodeGen_GPU_Dev.h
├── CodeGen_GPU_Host.cpp
├── CodeGen_GPU_Host.h
├── CodeGen_Hexagon.cpp
├── CodeGen_Hexagon.h
├── CodeGen_Internal.cpp
├── CodeGen_Internal.h
├── CodeGen_LLVM.cpp
├── CodeGen_LLVM.h
├── CodeGen_MIPS.cpp
├── CodeGen_MIPS.h
├── CodeGen_Metal_Dev.cpp
├── CodeGen_Metal_Dev.h
├── CodeGen_OpenCL_Dev.cpp
├── CodeGen_OpenCL_Dev.h
├── CodeGen_OpenGLCompute_Dev.cpp
├── CodeGen_OpenGLCompute_Dev.h
├── CodeGen_OpenGL_Dev.cpp
├── CodeGen_OpenGL_Dev.h
├── CodeGen_PTX_Dev.cpp
├── CodeGen_PTX_Dev.h
├── CodeGen_Posix.cpp
├── CodeGen_Posix.h
├── CodeGen_PowerPC.cpp
├── CodeGen_PowerPC.h
├── CodeGen_RISCV.cpp
├── CodeGen_RISCV.h
├── CodeGen_WebAssembly.cpp
├── CodeGen_WebAssembly.h
├── CodeGen_X86.cpp
├── CodeGen_X86.h
├── ConciseCasts.h
├── ConvolutionsCompilerForAICore.cpp
├── ConvolutionsCompilerForAICore.h
├── Debug.cpp
├── Debug.h
├── DebugArguments.cpp
├── DebugArguments.h
├── DebugToFile.cpp
├── DebugToFile.h
├── Definition.cpp
├── Definition.h
├── Deinterleave.cpp
├── Deinterleave.h
├── Derivative.cpp
├── Derivative.h
├── DerivativeUtils.cpp
├── DerivativeUtils.h
├── DeviceArgument.cpp
├── DeviceArgument.h
├── DeviceInterface.cpp
├── DeviceInterface.h
├── Dimension.cpp
├── Dimension.h
├── EarlyFree.cpp
├── EarlyFree.h
├── Elf.cpp
├── Elf.h
├── EliminateBoolVectors.cpp
├── EliminateBoolVectors.h
├── Error.cpp
├── Error.h
├── Expr.h
├── ExprUsesVar.h
├── Extern.h
├── ExternalCode.h
├── FastIntegerDivide.cpp
├── FastIntegerDivide.h
├── FindCalls.cpp
├── FindCalls.h
├── Float16.cpp
├── Float16.h
├── Func.cpp
├── Func.h
├── Function.cpp
├── Function.h
├── FunctionPtr.h
├── FuseGPUThreadLoops.cpp
├── FuseGPUThreadLoops.h
├── FuzzFloatStores.cpp
├── FuzzFloatStores.h
├── Generator.cpp
├── Generator.h
├── HexagonAlignment.h
├── HexagonOffload.cpp
├── HexagonOffload.h
├── HexagonOptimize.cpp
├── HexagonOptimize.h
├── IR.cpp
├── IR.h
├── IREquality.cpp
├── IREquality.h
├── IRMatch.cpp
├── IRMatch.h
├── IRMutator.cpp
├── IRMutator.h
├── IROperator.cpp
├── IROperator.h
├── IRPrinter.cpp
├── IRPrinter.h
├── IRVisitor.cpp
├── IRVisitor.h
├── ImageParam.cpp
├── ImageParam.h
├── InferArguments.cpp
├── InferArguments.h
├── InjectHostDevBufferCopies.cpp
├── InjectHostDevBufferCopies.h
├── InjectOpenGLIntrinsics.cpp
├── InjectOpenGLIntrinsics.h
├── Inline.cpp
├── Inline.h
├── InlineReductions.cpp
├── InlineReductions.h
├── IntegerDivisionTable.cpp
├── IntegerDivisionTable.h
├── Interval.cpp
├── Interval.h
├── Introspection.cpp
├── Introspection.h
├── IntrusivePtr.h
├── JITModule.cpp
├── JITModule.h
├── LICM.cpp
├── LICM.h
├── LLVM_Headers.h
├── LLVM_Output.cpp
├── LLVM_Output.h
├── LLVM_Runtime_Linker.cpp
├── LLVM_Runtime_Linker.h
├── Lambda.h
├── Lerp.cpp
├── Lerp.h
├── LoopCarry.cpp
├── LoopCarry.h
├── Lower.cpp
├── Lower.h
├── LowerWarpShuffles.cpp
├── LowerWarpShuffles.h
├── MainPage.h
├── MatlabWrapper.cpp
├── MatlabWrapper.h
├── Memoization.cpp
├── Memoization.h
├── Module.cpp
├── Module.h
├── ModulusRemainder.cpp
├── ModulusRemainder.h
├── Monotonic.cpp
├── Monotonic.h
├── ObjectInstanceRegistry.cpp
├── ObjectInstanceRegistry.h
├── OutputImageParam.cpp
├── OutputImageParam.h
├── Outputs.h
├── ParallelRVar.cpp
├── ParallelRVar.h
├── Param.h
├── ParamMap.cpp
├── ParamMap.h
├── Parameter.cpp
├── Parameter.h
├── PartitionLoops.cpp
├── PartitionLoops.h
├── Pipeline.cpp
├── Pipeline.h
├── Prefetch.cpp
├── Prefetch.h
├── PrintLoopNest.cpp
├── PrintLoopNest.h
├── Profiling.cpp
├── Profiling.h
├── PurifyIndexMath.cpp
├── PurifyIndexMath.h
├── PythonExtensionGen.cpp
├── PythonExtensionGen.h
├── Qualify.cpp
├── Qualify.h
├── RDom.cpp
├── RDom.h
├── Random.cpp
├── Random.h
├── RealizationOrder.cpp
├── RealizationOrder.h
├── Reduction.cpp
├── Reduction.h
├── RegionCosts.cpp
├── RegionCosts.h
├── RemoveDeadAllocations.cpp
├── RemoveDeadAllocations.h
├── RemoveExternLoops.cpp
├── RemoveExternLoops.h
├── RemoveUndef.cpp
├── RemoveUndef.h
├── RoundingMode.h
├── Schedule.cpp
├── Schedule.h
├── ScheduleFunctions.cpp
├── ScheduleFunctions.h
├── Scope.h
├── SelectGPUAPI.cpp
├── SelectGPUAPI.h
├── Simplify.cpp
├── Simplify.h
├── SimplifyCorrelatedDifferences.cpp
├── SimplifyCorrelatedDifferences.h
├── SimplifySpecializations.cpp
├── SimplifySpecializations.h
├── Simplify_Add.cpp
├── Simplify_And.cpp
├── Simplify_Call.cpp
├── Simplify_Cast.cpp
├── Simplify_Div.cpp
├── Simplify_EQ.cpp
├── Simplify_Exprs.cpp
├── Simplify_Internal.h
├── Simplify_LT.cpp
├── Simplify_Let.cpp
├── Simplify_Max.cpp
├── Simplify_Min.cpp
├── Simplify_Mod.cpp
├── Simplify_Mul.cpp
├── Simplify_Not.cpp
├── Simplify_Or.cpp
├── Simplify_Select.cpp
├── Simplify_Shuffle.cpp
├── Simplify_Stmts.cpp
├── Simplify_Sub.cpp
├── SkipStages.cpp
├── SkipStages.h
├── SlidingWindow.cpp
├── SlidingWindow.h
├── Solve.cpp
├── Solve.h
├── SplitTuples.cpp
├── SplitTuples.h
├── StmtToHtml.cpp
├── StmtToHtml.h
├── StorageFlattening.cpp
├── StorageFlattening.h
├── StorageFolding.cpp
├── StorageFolding.h
├── StrictifyFloat.cpp
├── StrictifyFloat.h
├── Substitute.cpp
├── Substitute.h
├── Target.cpp
├── Target.h
├── ThreadPool.h
├── Tracing.cpp
├── Tracing.h
├── TrimNoOps.cpp
├── TrimNoOps.h
├── Tuple.cpp
├── Tuple.h
├── Type.cpp
├── Type.h
├── UnifyDuplicateLets.cpp
├── UnifyDuplicateLets.h
├── UniquifyVariableNames.cpp
├── UniquifyVariableNames.h
├── UnpackBuffers.cpp
├── UnpackBuffers.h
├── UnrollLoops.cpp
├── UnrollLoops.h
├── UnsafePromises.cpp
├── UnsafePromises.h
├── Util.cpp
├── Util.h
├── Var.cpp
├── Var.h
├── VaryingAttributes.cpp
├── VaryingAttributes.h
├── VectorizeLoops.cpp
├── VectorizeLoops.h
├── WasmExecutor.cpp
├── WasmExecutor.h
├── WrapCalls.cpp
├── WrapCalls.h
├── WrapExternStages.cpp
├── WrapExternStages.h
└── runtime
│ ├── HalideBuffer.h
│ ├── HalideRuntime.h
│ ├── HalideRuntimeCuda.h
│ ├── HalideRuntimeD3D12Compute.h
│ ├── HalideRuntimeHexagonDma.h
│ ├── HalideRuntimeHexagonHost.h
│ ├── HalideRuntimeMetal.h
│ ├── HalideRuntimeOpenCL.h
│ ├── HalideRuntimeOpenGL.h
│ ├── HalideRuntimeOpenGLCompute.h
│ ├── HalideRuntimeQurt.h
│ ├── aarch64.ll
│ ├── aarch64_cpu_features.cpp
│ ├── alignment_128.cpp
│ ├── alignment_32.cpp
│ ├── alignment_64.cpp
│ ├── android_clock.cpp
│ ├── android_host_cpu_count.cpp
│ ├── android_io.cpp
│ ├── android_ioctl.h
│ ├── arm.ll
│ ├── arm_cpu_features.cpp
│ ├── arm_no_neon.ll
│ ├── buffer_t.cpp
│ ├── cache.cpp
│ ├── can_use_target.cpp
│ ├── cl_functions.h
│ ├── cpu_features.h
│ ├── cuda.cpp
│ ├── cuda_functions.h
│ ├── d3d12_abi_patch_64.h
│ ├── d3d12_abi_patch_64.ll
│ ├── d3d12compute.cpp
│ ├── destructors.cpp
│ ├── device_buffer_utils.h
│ ├── device_interface.cpp
│ ├── device_interface.h
│ ├── errors.cpp
│ ├── fake_get_symbol.cpp
│ ├── fake_thread_pool.cpp
│ ├── float16_t.cpp
│ ├── fuchsia_clock.cpp
│ ├── fuchsia_host_cpu_count.cpp
│ ├── fuchsia_yield.cpp
│ ├── gpu_device_selection.cpp
│ ├── hashmap.h
│ ├── hexagon_cache_allocator.cpp
│ ├── hexagon_cpu_features.cpp
│ ├── hexagon_dma.cpp
│ ├── hexagon_dma_pool.cpp
│ ├── hexagon_dma_pool.h
│ ├── hexagon_host.cpp
│ ├── hexagon_remote
│ ├── .gitignore
│ ├── Makefile
│ ├── bin
│ │ ├── arm-32-android
│ │ │ └── libhalide_hexagon_host.so
│ │ ├── arm-64-android
│ │ │ └── libhalide_hexagon_host.so
│ │ ├── host
│ │ │ └── libhalide_hexagon_host.so
│ │ ├── src
│ │ │ ├── halide_hexagon_remote.h
│ │ │ ├── halide_hexagon_remote_skel.c
│ │ │ └── halide_hexagon_remote_stub.c
│ │ └── v60
│ │ │ ├── hexagon_sim_remote
│ │ │ ├── libhalide_hexagon_remote_skel.so
│ │ │ ├── libsim_qurt.a
│ │ │ ├── libsim_qurt_vtcm.a
│ │ │ └── signed_by_debug
│ │ │ └── libhalide_hexagon_remote_skel.so
│ ├── c11_stubs.cpp
│ ├── dlib.cpp
│ ├── dlib.h
│ ├── halide_hexagon_remote.idl
│ ├── halide_remote.cpp
│ ├── host_malloc.cpp
│ ├── host_shim.cpp
│ ├── instruction_encodings.txt
│ ├── known_symbols.cpp
│ ├── known_symbols.h
│ ├── libadsprpc_shim.cpp
│ ├── log.cpp
│ ├── log.h
│ ├── nearbyint.cpp
│ ├── pipeline_context.h
│ ├── sim_host.cpp
│ ├── sim_protocol.h
│ ├── sim_qurt.cpp
│ ├── sim_qurt_vtcm.cpp
│ └── sim_remote.cpp
│ ├── hvx_128.ll
│ ├── hvx_64.ll
│ ├── ios_io.cpp
│ ├── linux_clock.cpp
│ ├── linux_host_cpu_count.cpp
│ ├── linux_yield.cpp
│ ├── matlab.cpp
│ ├── metadata.cpp
│ ├── metal.cpp
│ ├── metal_objc_arm.cpp
│ ├── metal_objc_platform_dependent.cpp
│ ├── metal_objc_platform_dependent.h
│ ├── metal_objc_x86.cpp
│ ├── mex_functions.h
│ ├── mingw_math.cpp
│ ├── mini_cl.h
│ ├── mini_cuda.h
│ ├── mini_d3d12.h
│ ├── mini_hexagon_dma.h
│ ├── mini_opengl.h
│ ├── mini_qurt.h
│ ├── mini_qurt_vtcm.h
│ ├── mips.ll
│ ├── mips_cpu_features.cpp
│ ├── module_aot_ref_count.cpp
│ ├── module_jit_ref_count.cpp
│ ├── msan.cpp
│ ├── msan_stubs.cpp
│ ├── nvidia_libdevice_bitcode
│ ├── libdevice.compute_20.10.bc
│ ├── libdevice.compute_30.10.bc
│ └── libdevice.compute_35.10.bc
│ ├── objc_support.h
│ ├── old_buffer_t.cpp
│ ├── opencl.cpp
│ ├── opengl.cpp
│ ├── opengl_egl_context.cpp
│ ├── opengl_glx_context.cpp
│ ├── openglcompute.cpp
│ ├── osx_clock.cpp
│ ├── osx_get_symbol.cpp
│ ├── osx_host_cpu_count.cpp
│ ├── osx_opengl_context.cpp
│ ├── osx_yield.cpp
│ ├── posix_abort.cpp
│ ├── posix_allocator.cpp
│ ├── posix_clock.cpp
│ ├── posix_error_handler.cpp
│ ├── posix_get_symbol.cpp
│ ├── posix_io.cpp
│ ├── posix_math.ll
│ ├── posix_print.cpp
│ ├── posix_threads.cpp
│ ├── posix_threads_tsan.cpp
│ ├── powerpc.ll
│ ├── powerpc_cpu_features.cpp
│ ├── prefetch.cpp
│ ├── printer.h
│ ├── profiler.cpp
│ ├── profiler_inlined.cpp
│ ├── pseudostack.cpp
│ ├── ptx_dev.ll
│ ├── qurt_allocator.cpp
│ ├── qurt_hvx.cpp
│ ├── qurt_hvx_vtcm.cpp
│ ├── qurt_init_fini.cpp
│ ├── qurt_threads.cpp
│ ├── qurt_threads_tsan.cpp
│ ├── qurt_yield.cpp
│ ├── riscv_cpu_features.cpp
│ ├── runtime_api.cpp
│ ├── runtime_internal.h
│ ├── scoped_mutex_lock.h
│ ├── scoped_spin_lock.h
│ ├── ssp.cpp
│ ├── synchronization_common.h
│ ├── thread_pool_common.h
│ ├── to_string.cpp
│ ├── trace_helper.cpp
│ ├── tracing.cpp
│ ├── wasm_cpu_features.cpp
│ ├── wasm_math.ll
│ ├── win32_math.ll
│ ├── windows_abort.cpp
│ ├── windows_clock.cpp
│ ├── windows_cuda.cpp
│ ├── windows_get_symbol.cpp
│ ├── windows_io.cpp
│ ├── windows_opencl.cpp
│ ├── windows_profiler.cpp
│ ├── windows_threads.cpp
│ ├── windows_threads_tsan.cpp
│ ├── windows_yield.cpp
│ ├── write_debug_image.cpp
│ ├── x86.ll
│ ├── x86_avx.ll
│ ├── x86_avx2.ll
│ ├── x86_cpu_features.cpp
│ └── x86_sse41.ll
├── test
├── CMakeLists.txt
├── auto_schedule
│ ├── cost_function.cpp
│ ├── data_dependent.cpp
│ ├── extern.cpp
│ ├── fibonacci.cpp
│ ├── harris.cpp
│ ├── histogram.cpp
│ ├── iir.cpp
│ ├── interpolate.cpp
│ ├── large_window.cpp
│ ├── mat_mul.cpp
│ ├── max_filter.cpp
│ ├── multi_output.cpp
│ ├── overlap.cpp
│ ├── param.cpp
│ ├── reorder.cpp
│ ├── tile_vs_inline.cpp
│ ├── unbounded_nonpure.cpp
│ ├── unsharp.cpp
│ ├── unused_func.cpp
│ └── vectorize_var_in_update.cpp
├── common
│ ├── check_call_graphs.h
│ ├── expect_failure.sh
│ ├── gpu_object_lifetime_tracker.h
│ └── halide_test_dirs.h
├── correctness
│ ├── align_bounds.cpp
│ ├── argmax.cpp
│ ├── assertion_failure_in_parallel_for.cpp
│ ├── async.cpp
│ ├── async_copy_chain.cpp
│ ├── async_device_copy.cpp
│ ├── autodiff.cpp
│ ├── autoschedule_small_pure_update.cpp
│ ├── autotune_bug.cpp
│ ├── autotune_bug_2.cpp
│ ├── autotune_bug_3.cpp
│ ├── autotune_bug_4.cpp
│ ├── autotune_bug_5.cpp
│ ├── bad_likely.cpp
│ ├── bit_counting.cpp
│ ├── bitwise_ops.cpp
│ ├── bool_compute_root_vectorize.cpp
│ ├── bound.cpp
│ ├── bound_small_allocations.cpp
│ ├── boundary_conditions.cpp
│ ├── bounds.cpp
│ ├── bounds_inference.cpp
│ ├── bounds_inference_chunk.cpp
│ ├── bounds_inference_complex.cpp
│ ├── bounds_inference_outer_split.cpp
│ ├── bounds_of_abs.cpp
│ ├── bounds_of_cast.cpp
│ ├── bounds_of_func.cpp
│ ├── bounds_of_monotonic_math.cpp
│ ├── bounds_of_multiply.cpp
│ ├── bounds_query.cpp
│ ├── buffer_t.cpp
│ ├── c_function.cpp
│ ├── cascaded_filters.cpp
│ ├── cast.cpp
│ ├── cast_handle.cpp
│ ├── chunk.cpp
│ ├── chunk_sharing.cpp
│ ├── circular_reference_leak.cpp
│ ├── code_explosion.cpp
│ ├── compare_vars.cpp
│ ├── compile_to.cpp
│ ├── compile_to_bitcode.cpp
│ ├── compile_to_lowered_stmt.cpp
│ ├── compile_to_multitarget.cpp
│ ├── compute_at_reordered_update_stage.cpp
│ ├── compute_at_split_rvar.cpp
│ ├── compute_outermost.cpp
│ ├── compute_with.cpp
│ ├── compute_with_in.cpp
│ ├── compute_with_inlined.cpp
│ ├── computed_index.cpp
│ ├── concat.cpp
│ ├── constant_expr.cpp
│ ├── constant_type.cpp
│ ├── constraints.cpp
│ ├── convolution.cpp
│ ├── convolution_multiple_kernels.cpp
│ ├── cross_compilation.cpp
│ ├── custom_allocator.cpp
│ ├── custom_auto_scheduler.cpp
│ ├── custom_error_reporter.cpp
│ ├── custom_lowering_pass.cpp
│ ├── debug_to_file.cpp
│ ├── debug_to_file_multiple_outputs.cpp
│ ├── debug_to_file_reorder.cpp
│ ├── deferred_loop_level.cpp
│ ├── deinterleave4.cpp
│ ├── device_buffer_copy.cpp
│ ├── device_crop.cpp
│ ├── device_slice.cpp
│ ├── dilate3x3.cpp
│ ├── dynamic_reduction_bounds.cpp
│ ├── embed_bitcode.cpp
│ ├── erf.cpp
│ ├── exception.cpp
│ ├── explicit_inline_reductions.cpp
│ ├── extern_bounds_inference.cpp
│ ├── extern_consumer.cpp
│ ├── extern_consumer_tiled.cpp
│ ├── extern_error.cpp
│ ├── extern_output_expansion.cpp
│ ├── extern_partial.cpp
│ ├── extern_producer.cpp
│ ├── extern_reorder_storage.cpp
│ ├── extern_sort.cpp
│ ├── extern_stage.cpp
│ ├── extern_stage_on_device.cpp
│ ├── external_code.cpp
│ ├── failed_unroll.cpp
│ ├── fast_trigonometric.cpp
│ ├── fibonacci.cpp
│ ├── fit_function.cpp
│ ├── float16_t_comparison.cpp
│ ├── float16_t_constants.cpp
│ ├── float16_t_image_type.cpp
│ ├── for_each_element.cpp
│ ├── force_onto_stack.cpp
│ ├── func_clone.cpp
│ ├── func_lifetime.cpp
│ ├── func_lifetime_2.cpp
│ ├── func_wrapper.cpp
│ ├── fuse.cpp
│ ├── fuse_gpu_threads.cpp
│ ├── fused_where_inner_extent_is_zero.cpp
│ ├── fuzz_cse.cpp
│ ├── fuzz_float_stores.cpp
│ ├── fuzz_simplify.cpp
│ ├── gameoflife.cpp
│ ├── gather.cpp
│ ├── gpu_assertion_in_kernel.cpp
│ ├── gpu_bounds_inference_failure.cpp
│ ├── gpu_cpu_simultaneous_read.cpp
│ ├── gpu_data_flows.cpp
│ ├── gpu_dynamic_shared.cpp
│ ├── gpu_free_sync.cpp
│ ├── gpu_give_input_buffers_device_allocations.cpp
│ ├── gpu_jit_explicit_copy_to_device.cpp
│ ├── gpu_large_alloc.cpp
│ ├── gpu_mixed_dimensionality.cpp
│ ├── gpu_mixed_shared_mem_types.cpp
│ ├── gpu_multi_device.cpp
│ ├── gpu_multi_kernel.cpp
│ ├── gpu_non_contiguous_copy.cpp
│ ├── gpu_object_lifetime_1.cpp
│ ├── gpu_object_lifetime_2.cpp
│ ├── gpu_object_lifetime_3.cpp
│ ├── gpu_param_allocation.cpp
│ ├── gpu_reuse_shared_memory.cpp
│ ├── gpu_specialize.cpp
│ ├── gpu_sum_scan.cpp
│ ├── gpu_thread_barrier.cpp
│ ├── gpu_transpose.cpp
│ ├── gpu_vectorized_shared_memory.cpp
│ ├── halide_buffer.cpp
│ ├── handle.cpp
│ ├── heap_cleanup.cpp
│ ├── hello_gpu.cpp
│ ├── hexagon_scatter.cpp
│ ├── histogram.cpp
│ ├── histogram_equalize.cpp
│ ├── host_alignment.cpp
│ ├── image_io.cpp
│ ├── image_of_lists.cpp
│ ├── image_wrapper.cpp
│ ├── implicit_args.cpp
│ ├── implicit_args_tests.cpp
│ ├── in_place.cpp
│ ├── infer_arguments.cpp
│ ├── inline_reduction.cpp
│ ├── inlined_generator.cpp
│ ├── input_image_bounds_check.cpp
│ ├── input_larger_than_two_gigs.cpp
│ ├── integer_powers.cpp
│ ├── interleave.cpp
│ ├── interleave_rgb.cpp
│ ├── interleave_x.cpp
│ ├── interval.cpp
│ ├── introspection.cpp
│ ├── inverse.cpp
│ ├── isnan.cpp
│ ├── issue_3926.cpp
│ ├── iterate_over_circle.cpp
│ ├── lambda.cpp
│ ├── lazy_convolution.cpp
│ ├── leak_device_memory.cpp
│ ├── left_shift_negative.cpp
│ ├── legal_race_condition.cpp
│ ├── lerp.cpp
│ ├── let_in_rdom_bound.cpp
│ ├── likely.cpp
│ ├── load_library.cpp
│ ├── logical.cpp
│ ├── loop_invariant_extern_calls.cpp
│ ├── loop_level_generator_param.cpp
│ ├── lots_of_dimensions.cpp
│ ├── make_struct.cpp
│ ├── many_dimensions.cpp
│ ├── many_small_extern_stages.cpp
│ ├── many_updates.cpp
│ ├── math.cpp
│ ├── median3x3.cpp
│ ├── memoize.cpp
│ ├── memoize_cloned.cpp
│ ├── min_extent.cpp
│ ├── mod.cpp
│ ├── mul_div_mod.cpp
│ ├── multi_output_pipeline_with_bad_sizes.cpp
│ ├── multi_pass_reduction.cpp
│ ├── multi_splits_with_diff_tail_strategies.cpp
│ ├── multi_way_select.cpp
│ ├── multipass_constraints.cpp
│ ├── multiple_outputs.cpp
│ ├── multiple_outputs_extern.cpp
│ ├── named_updates.cpp
│ ├── nested_shiftinwards.cpp
│ ├── newtons_method.cpp
│ ├── non_vector_aligned_embeded_buffer.cpp
│ ├── obscure_image_references.cpp
│ ├── oddly_sized_output.cpp
│ ├── out_constraint.cpp
│ ├── out_of_memory.cpp
│ ├── output_larger_than_two_gigs.cpp
│ ├── parallel.cpp
│ ├── parallel_alloc.cpp
│ ├── parallel_fork.cpp
│ ├── parallel_gpu_nested.cpp
│ ├── parallel_nested.cpp
│ ├── parallel_nested_1.cpp
│ ├── parallel_reductions.cpp
│ ├── parallel_rvar.cpp
│ ├── param.cpp
│ ├── param_map.cpp
│ ├── parameter_constraints.cpp
│ ├── partial_application.cpp
│ ├── partial_realization.cpp
│ ├── partition_loops.cpp
│ ├── partition_loops_bug.cpp
│ ├── pipeline_set_jit_externs_func.cpp
│ ├── plain_c_includes.c
│ ├── popc_clz_ctz_bounds.cpp
│ ├── predicated_store_load.cpp
│ ├── prefetch.cpp
│ ├── print.cpp
│ ├── process_some_tiles.cpp
│ ├── pseudostack_shares_slots.cpp
│ ├── python_extension_gen.cpp
│ ├── random.cpp
│ ├── realize_larger_than_two_gigs.cpp
│ ├── realize_over_shifted_domain.cpp
│ ├── reduction_chain.cpp
│ ├── reduction_non_rectangular.cpp
│ ├── reduction_schedule.cpp
│ ├── register_shuffle.cpp
│ ├── reorder_rvars.cpp
│ ├── reorder_storage.cpp
│ ├── require.cpp
│ ├── reschedule.cpp
│ ├── reuse_stack_alloc.cpp
│ ├── rfactor.cpp
│ ├── round.cpp
│ ├── saturating_casts.cpp
│ ├── scatter.cpp
│ ├── set_custom_trace.cpp
│ ├── shared_self_references.cpp
│ ├── shifted_image.cpp
│ ├── side_effects.cpp
│ ├── simd_op_check.cpp
│ ├── simplified_away_embedded_image.cpp
│ ├── simplify.cpp
│ ├── skip_stages.cpp
│ ├── skip_stages_external_array_functions.cpp
│ ├── skip_stages_memoize.cpp
│ ├── sliding_backwards.cpp
│ ├── sliding_reduction.cpp
│ ├── sliding_window.cpp
│ ├── sort_exprs.cpp
│ ├── specialize.cpp
│ ├── specialize_to_gpu.cpp
│ ├── split_by_non_factor.cpp
│ ├── split_fuse_rvar.cpp
│ ├── split_reuse_inner_name_bug.cpp
│ ├── split_store_compute.cpp
│ ├── stack_allocations.cpp
│ ├── stencil_chain_in_update_definitions.cpp
│ ├── stmt_to_html.cpp
│ ├── storage_folding.cpp
│ ├── store_in.cpp
│ ├── stream_compaction.cpp
│ ├── strict_float.cpp
│ ├── strict_float_bounds.cpp
│ ├── strided_load.cpp
│ ├── target.cpp
│ ├── thread_safety.cpp
│ ├── tracing.cpp
│ ├── tracing_bounds.cpp
│ ├── tracing_broadcast.cpp
│ ├── tracing_stack.cpp
│ ├── transitive_bounds.cpp
│ ├── trim_no_ops.cpp
│ ├── truncated_pyramid.cpp
│ ├── tuple_partial_update.cpp
│ ├── tuple_reduction.cpp
│ ├── tuple_select.cpp
│ ├── tuple_undef.cpp
│ ├── tuple_update_ops.cpp
│ ├── two_vector_args.cpp
│ ├── undef.cpp
│ ├── uninitialized_read.cpp
│ ├── unique_func_image.cpp
│ ├── unroll_dynamic_loop.cpp
│ ├── unrolled_reduction.cpp
│ ├── unsafe_dedup_lets.cpp
│ ├── unsafe_promises.cpp
│ ├── unused_func.cpp
│ ├── update_chunk.cpp
│ ├── vector_bounds_inference.cpp
│ ├── vector_cast.cpp
│ ├── vector_extern.cpp
│ ├── vector_math.cpp
│ ├── vector_print_bug.cpp
│ ├── vectorize_guard_with_if.cpp
│ ├── vectorize_mixed_widths.cpp
│ ├── vectorize_varying_allocation_size.cpp
│ ├── vectorized_gpu_allocation.cpp
│ ├── vectorized_initialization.cpp
│ ├── vectorized_load_from_vectorized_allocation.cpp
│ ├── vectorized_reduction_bug.cpp
│ └── widening_reduction.cpp
├── error
│ ├── ambiguous_inline_reductions.cpp
│ ├── async_require_fail.cpp
│ ├── auto_schedule_no_bounds.cpp
│ ├── auto_schedule_no_parallel.cpp
│ ├── auto_schedule_no_reorder.cpp
│ ├── bad_bound.cpp
│ ├── bad_compute_at.cpp
│ ├── bad_compute_with.cpp
│ ├── bad_compute_with_invalid_specialization.cpp
│ ├── bad_compute_with_parent_func_not_used.cpp
│ ├── bad_const_cast.cpp
│ ├── bad_device_api.cpp
│ ├── bad_dimensions.cpp
│ ├── bad_extern_split.cpp
│ ├── bad_fold.cpp
│ ├── bad_host_alignment.cpp
│ ├── bad_rvar_order.cpp
│ ├── bad_schedule.cpp
│ ├── bad_store_at.cpp
│ ├── broken_promise.cpp
│ ├── buffer_larger_than_two_gigs.cpp
│ ├── clamp_out_of_range.cpp
│ ├── constrain_wrong_output_buffer.cpp
│ ├── constraint_uses_non_param.cpp
│ ├── define_after_realize.cpp
│ ├── define_after_use.cpp
│ ├── device_target_mismatch.cpp
│ ├── expanding_reduction.cpp
│ ├── extern_func_self_argument.cpp
│ ├── five_d_gpu_buffer.cpp
│ ├── float_arg.cpp
│ ├── forward_on_undefined_buffer.cpp
│ ├── implicit_args.cpp
│ ├── impossible_constraints.cpp
│ ├── init_def_should_be_all_vars.cpp
│ ├── inspect_loop_level.cpp
│ ├── lerp_float_weight_out_of_range.cpp
│ ├── lerp_mismatch.cpp
│ ├── lerp_signed_weight.cpp
│ ├── memoize_different_compute_store.cpp
│ ├── metal_vector_too_large.cpp
│ ├── missing_args.cpp
│ ├── modulo_constant_zero.cpp
│ ├── no_default_device.cpp
│ ├── nonexistent_update_stage.cpp
│ ├── null_host_field.cpp
│ ├── overflow_during_constant_folding.cpp
│ ├── pointer_arithmetic.cpp
│ ├── race_condition.cpp
│ ├── rdom_undefined.cpp
│ ├── realize_constantly_larger_than_two_gigs.cpp
│ ├── reduction_bounds.cpp
│ ├── reduction_type_mismatch.cpp
│ ├── require_fail.cpp
│ ├── reuse_var_in_schedule.cpp
│ ├── reused_args.cpp
│ ├── rfactor_inner_dim_non_commutative.cpp
│ ├── specialize_fail.cpp
│ ├── split_inner_wrong_tail_strategy.cpp
│ ├── thread_id_outside_block_id.cpp
│ ├── too_many_args.cpp
│ ├── tuple_arg_select_undef.cpp
│ ├── tuple_val_select_undef.cpp
│ ├── unbounded_input.cpp
│ ├── unbounded_output.cpp
│ ├── undefined_func_compile.cpp
│ ├── undefined_func_realize.cpp
│ ├── undefined_loop_level.cpp
│ ├── undefined_pipeline_compile.cpp
│ ├── undefined_pipeline_realize.cpp
│ ├── undefined_rdom_dimension.cpp
│ ├── unknown_target.cpp
│ ├── vectorize_dynamic.cpp
│ ├── vectorize_too_little.cpp
│ ├── vectorize_too_much.cpp
│ ├── vectorized_extern.cpp
│ ├── wrap_custom_after_shared.cpp
│ ├── wrap_frozen.cpp
│ ├── wrapper_never_used.cpp
│ ├── wrong_dimensionality_extern_stage.cpp
│ └── wrong_type.cpp
├── failing_with_issue
│ ├── 3292_async_specialize.cpp
│ ├── 3293_storage_folding_async.cpp
│ └── 3357_vectorize_pred.cpp
├── generator
│ ├── acquire_release_aottest.cpp
│ ├── acquire_release_generator.cpp
│ ├── alias_aottest.cpp
│ ├── alias_generator.cpp
│ ├── argvcall_aottest.cpp
│ ├── argvcall_generator.cpp
│ ├── async_parallel_aottest.cpp
│ ├── async_parallel_generator.cpp
│ ├── bit_operations_aottest.cpp
│ ├── bit_operations_generator.cpp
│ ├── blur2x2_aottest.cpp
│ ├── blur2x2_generator.cpp
│ ├── buffer_copy_aottest.cpp
│ ├── buffer_copy_generator.cpp
│ ├── buildmethod_aottest.cpp
│ ├── buildmethod_generator.cpp
│ ├── can_use_target_aottest.cpp
│ ├── can_use_target_generator.cpp
│ ├── cleanup_on_error_aottest.cpp
│ ├── cleanup_on_error_generator.cpp
│ ├── configure_aottest.cpp
│ ├── configure_generator.cpp
│ ├── configure_jittest.cpp
│ ├── cxx_mangling_aottest.cpp
│ ├── cxx_mangling_define_extern_aottest.cpp
│ ├── cxx_mangling_define_extern_externs.cpp
│ ├── cxx_mangling_define_extern_generator.cpp
│ ├── cxx_mangling_externs.cpp
│ ├── cxx_mangling_generator.cpp
│ ├── define_extern_opencl_aottest.cpp
│ ├── define_extern_opencl_generator.cpp
│ ├── embed_image_aottest.cpp
│ ├── embed_image_generator.cpp
│ ├── error_codes_aottest.cpp
│ ├── error_codes_generator.cpp
│ ├── example_aottest.cpp
│ ├── example_generator.cpp
│ ├── example_jittest.cpp
│ ├── extern_output_aottest.cpp
│ ├── extern_output_generator.cpp
│ ├── external_code_aottest.cpp
│ ├── external_code_extern.cpp
│ ├── external_code_generator.cpp
│ ├── float16_t_aottest.cpp
│ ├── float16_t_generator.cpp
│ ├── gpu_object_lifetime_aottest.cpp
│ ├── gpu_object_lifetime_generator.cpp
│ ├── gpu_only_aottest.cpp
│ ├── gpu_only_generator.cpp
│ ├── image_from_array_aottest.cpp
│ ├── image_from_array_generator.cpp
│ ├── mandelbrot_aottest.cpp
│ ├── mandelbrot_generator.cpp
│ ├── matlab_aottest.cpp
│ ├── matlab_generator.cpp
│ ├── memory_profiler_mandelbrot_aottest.cpp
│ ├── memory_profiler_mandelbrot_generator.cpp
│ ├── metadata_tester_aottest.cpp
│ ├── metadata_tester_generator.cpp
│ ├── msan_aottest.cpp
│ ├── msan_generator.cpp
│ ├── multitarget_aottest.cpp
│ ├── multitarget_generator.cpp
│ ├── nested_externs_aottest.cpp
│ ├── nested_externs_generator.cpp
│ ├── old_buffer_t_aottest.cpp
│ ├── old_buffer_t_generator.cpp
│ ├── output_assign_aottest.cpp
│ ├── output_assign_generator.cpp
│ ├── pyramid_aottest.cpp
│ ├── pyramid_generator.cpp
│ ├── rdom_input_aottest.cpp
│ ├── rdom_input_generator.cpp
│ ├── registration_test.cpp
│ ├── rungen_test.cpp
│ ├── string_param_aottest.cpp
│ ├── string_param_generator.cpp
│ ├── stubtest_aottest.cpp
│ ├── stubtest_generator.cpp
│ ├── stubtest_jittest.cpp
│ ├── stubuser_aottest.cpp
│ ├── stubuser_generator.cpp
│ ├── tiled_blur_aottest.cpp
│ ├── tiled_blur_generator.cpp
│ ├── user_context_aottest.cpp
│ ├── user_context_generator.cpp
│ ├── user_context_insanity_aottest.cpp
│ ├── user_context_insanity_generator.cpp
│ ├── variable_num_threads_aottest.cpp
│ └── variable_num_threads_generator.cpp
├── internal.cpp
├── opengl
│ ├── conv_select.cpp
│ ├── copy_pixels.cpp
│ ├── copy_to_device.cpp
│ ├── copy_to_host.cpp
│ ├── float_texture.cpp
│ ├── inline_reduction.cpp
│ ├── internal.cpp
│ ├── lut.cpp
│ ├── multiple_stages.cpp
│ ├── produce.cpp
│ ├── rewrap_texture.cpp
│ ├── save_state.cpp
│ ├── select.cpp
│ ├── set_pixels.cpp
│ ├── shifted_domains.cpp
│ ├── special_funcs.cpp
│ ├── sum_reduction.cpp
│ ├── sumcolor_reduction.cpp
│ ├── testing.h
│ ├── tuples.cpp
│ ├── vagrant
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── Vagrantfile
│ │ ├── build_tests.sh
│ │ └── provision
│ │ │ ├── etc
│ │ │ ├── environment
│ │ │ ├── init
│ │ │ │ └── xdummy.conf
│ │ │ └── systemd
│ │ │ │ └── system
│ │ │ │ └── xdummy.service
│ │ │ └── usr
│ │ │ └── share
│ │ │ └── X11
│ │ │ └── xorg.conf.d
│ │ │ └── xdummy.conf
│ └── varying.cpp
├── performance
│ ├── async_gpu.cpp
│ ├── block_transpose.cpp
│ ├── boundary_conditions.cpp
│ ├── clamped_vector_load.cpp
│ ├── const_division.cpp
│ ├── fan_in.cpp
│ ├── fast_inverse.cpp
│ ├── fast_pow.cpp
│ ├── fast_sine_cosine.cpp
│ ├── inner_loop_parallel.cpp
│ ├── jit_stress.cpp
│ ├── lots_of_inputs.cpp
│ ├── lots_of_small_allocations.cpp
│ ├── matrix_multiplication.cpp
│ ├── memcpy.cpp
│ ├── memory_profiler.cpp
│ ├── packed_planar_fusion.cpp
│ ├── parallel_performance.cpp
│ ├── profiler.cpp
│ ├── realize_overhead.cpp
│ ├── rfactor.cpp
│ ├── rgb_interleaved.cpp
│ ├── sort.cpp
│ ├── thread_safe_jit.cpp
│ ├── vectorize.cpp
│ └── wrap.cpp
├── scripts
│ └── build_travis.sh
└── warning
│ ├── double_vectorize.cpp
│ ├── hidden_pure_definition.cpp
│ └── require_const_false.cpp
├── tools
├── GenGen.cpp
├── RunGen.h
├── RunGenMain.cpp
├── binary2cpp.cpp
├── build_halide_h.cpp
├── find_inverse.cpp
├── halide_benchmark.h
├── halide_config.cmake.tpl
├── halide_config.make.tpl
├── halide_image.h
├── halide_image_info.h
├── halide_image_io.h
├── halide_malloc_trace.h
├── halide_trace_config.h
├── makelib.sh
└── mex_halide.m
├── tutorial
├── .gitignore
├── CMakeLists.txt
├── clock.h
├── figures
│ ├── generate_figures_17.sh
│ ├── generate_figures_18.sh
│ ├── generate_figures_19.sh
│ ├── generate_figures_5.sh
│ ├── generate_figures_8.sh
│ ├── generate_figures_9.sh
│ ├── generate_output_snippets.sh
│ ├── lesson_02_input.jpg
│ ├── lesson_02_output.jpg
│ ├── lesson_05_col_major.gif
│ ├── lesson_05_fast.mp4
│ ├── lesson_05_parallel_tiles.gif
│ ├── lesson_05_row_major.gif
│ ├── lesson_05_split_7_by_3.gif
│ ├── lesson_05_tiled.gif
│ ├── lesson_05_vectors.gif
│ ├── lesson_08_compute_root.gif
│ ├── lesson_08_compute_y.gif
│ ├── lesson_08_mixed.mp4
│ ├── lesson_08_store_root_compute_x.gif
│ ├── lesson_08_store_root_compute_y.gif
│ ├── lesson_08_tile.gif
│ ├── lesson_09_compute_at_multiple_updates.mp4
│ ├── lesson_09_compute_at_pure.gif
│ ├── lesson_09_compute_at_pure_and_update.gif
│ ├── lesson_09_compute_at_rvar.gif
│ ├── lesson_09_compute_at_update.gif
│ ├── lesson_09_inline_reduction.gif
│ ├── lesson_09_update.gif
│ ├── lesson_09_update_rdom.mp4
│ ├── lesson_09_update_schedule.mp4
│ ├── lesson_17_rdom_calls_in_predicate.mp4
│ ├── lesson_17_rdom_circular.mp4
│ ├── lesson_17_rdom_triangular.mp4
│ ├── lesson_18_hist_manual_par.mp4
│ ├── lesson_18_hist_rfactor_par.mp4
│ ├── lesson_18_hist_rfactor_tile.mp4
│ ├── lesson_18_hist_rfactor_vec.mp4
│ ├── lesson_18_hist_serial.mp4
│ ├── lesson_19_group_updates.mp4
│ ├── lesson_19_transpose.mp4
│ ├── lesson_19_wrapper_global.mp4
│ ├── lesson_19_wrapper_local.mp4
│ ├── lesson_19_wrapper_unique.mp4
│ └── lesson_19_wrapper_vary_schedule.mp4
├── images
│ ├── gray.png
│ └── rgb.png
├── lesson_01_basics.cpp
├── lesson_02_input_image.cpp
├── lesson_03_debugging_1.cpp
├── lesson_04_debugging_2.cpp
├── lesson_05_scheduling_1.cpp
├── lesson_06_realizing_over_shifted_domains.cpp
├── lesson_07_multi_stage_pipelines.cpp
├── lesson_08_scheduling_2.cpp
├── lesson_09_update_definitions.cpp
├── lesson_10_aot_compilation_generate.cpp
├── lesson_10_aot_compilation_run.cpp
├── lesson_11_cross_compilation.cpp
├── lesson_12_using_the_gpu.cpp
├── lesson_13_tuples.cpp
├── lesson_14_types.cpp
├── lesson_15_generators.cpp
├── lesson_15_generators_usage.sh
├── lesson_16_rgb_generate.cpp
├── lesson_16_rgb_run.cpp
├── lesson_17_predicated_rdom.cpp
├── lesson_18_parallel_associative_reductions.cpp
├── lesson_19_wrapper_funcs.cpp
├── lesson_20_cloning_funcs.cpp
├── lesson_21_auto_scheduler_generate.cpp
├── lesson_21_auto_scheduler_run.cpp
└── todo.txt
├── util
├── CMakeLists.txt
├── Halide-VS2017.natvis
├── Halide.natvis
├── HalideTraceDump.cpp
├── HalideTraceUtils.cpp
├── HalideTraceUtils.h
├── HalideTraceViz.cpp
└── inconsolata.h
└── xhalide_examples
├── golden
├── xhalide_2dfilter.cc
├── xhalide_dse_4d_filter_generated.cc
└── xhalide_generated1.cc
├── xhalide-2dconvolution.cpp
├── xhalide-convolution-16bits.cpp
├── xhalide-convolution-32bit.cpp
├── xhalide-convolution.cpp
├── xhalide-convolution1.cpp
├── xhalide_dse_4d_filter_limited.cpp
└── xhalide_dse_conv.cpp
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | * text=auto
3 |
4 | # Explicitly declare text files you want to always be normalized and converted
5 | # to native line endings on checkout.
6 | *.cpp text
7 | *.c text
8 | *.h text
9 |
10 | # Denote all files that are truly binary and should not be modified.
11 | *.png binary
12 | *.jpg binary
13 | *.tiff binary
14 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/.gitmodules
--------------------------------------------------------------------------------
/apps/HelloAndroid/.gitignore:
--------------------------------------------------------------------------------
1 | .gradle/**
2 | gen/**
3 | gradle_build/**
4 | HelloAndroid.iml
5 | local.properties
6 | obj/**
7 | proguard-project.txt
8 | project.properties
9 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/ant.properties:
--------------------------------------------------------------------------------
1 | # This file is used to override default values used by the Ant build system.
2 | #
3 | # This file must be checked into Version Control Systems, as it is
4 | # integral to the build system of your project.
5 |
6 | # This file is only used by the Ant script.
7 |
8 | # You can use this to override default values such as
9 | # 'source.dir' for the location of your java source folder and
10 | # 'out.dir' for the location of your output folder.
11 |
12 | # You can also use it define how the release builds are signed by declaring
13 | # the following properties:
14 | # 'key.store' for the location of your keystore and
15 | # 'key.alias' for the name of the key to use.
16 | # The password will be asked during the build when you use the 'release' target.
17 |
18 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/build-gradle.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Gradle needs to know where the NDK is.
4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable.
5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says
6 | # that it's only used by ant).
7 | # However, if you run "android update" (say, via build.sh), this variable will
8 | # be clobbered.
9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroid-debug.apk && adb shell am start com.example.hellohalide/com.example.hellohalide.CameraActivity
10 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Mon Jan 05 14:23:44 PST 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-bin.zip
7 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/jni/Android.mk:
--------------------------------------------------------------------------------
1 | LOCAL_PATH := $(call my-dir)
2 |
3 | include $(CLEAR_VARS)
4 |
5 | LOCAL_MODULE := HelloAndroid
6 | LOCAL_ARM_MODE := arm
7 | LOCAL_SRC_FILES := hello_wrapper.cpp
8 | LOCAL_LDFLAGS := -L$(LOCAL_PATH)/../jni
9 | LOCAL_LDLIBS := -lm -llog -landroid $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/hello.a
10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue
11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include $(LOCAL_PATH)/../../../build/include $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/
12 |
13 | include $(BUILD_SHARED_LIBRARY)
14 |
15 | $(call import-module,android/native_app_glue)
16 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since
2 | # llvm will not compile for the R6 version of the ISA without Nan2008
3 | # and the gcc toolchain used by the Android build setup requires those
4 | # two options together.
5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
6 | APP_PLATFORM := android-17
7 | APP_STL := gnustl_static
8 | APP_CPPFLAGS := -std=c++11
9 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/layout/main.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
14 |
15 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | HelloHalide
4 |
5 |
--------------------------------------------------------------------------------
/apps/HelloAndroid/src/com/example/hellohalide/FrameHandler.java:
--------------------------------------------------------------------------------
1 | package com.example.hellohalide;
2 |
3 | import android.hardware.Camera;
4 | import android.util.Log;
5 |
6 | public class FrameHandler implements Camera.PreviewCallback {
7 | private static final String TAG = "FrameHandler";
8 |
9 | public void onPreviewFrame(byte[] data, Camera camera) {
10 | Log.d(TAG, "Got a frame!");
11 | }
12 | }
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/.gitignore:
--------------------------------------------------------------------------------
1 | .gradle/**
2 | gen/**
3 | gradle_build/**
4 | *.iml
5 | local.properties
6 | obj/**
7 | proguard-project.txt
8 | project.properties
9 |
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/build-gradle.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Gradle needs to know where the NDK is.
4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable.
5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says
6 | # that it's only used by ant).
7 | # However, if you run "android update" (say, via build.sh), this variable will
8 | # be clobbered.
9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroidCamera2-debug.apk && adb shell am start com.example.helloandroidcamera2/com.example.helloandroidcamera2.CameraActivity
10 |
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Wed Jul 15 16:34:43 PDT 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-all.zip
7 |
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since
2 | # llvm will not compile for the R6 version of the ISA without Nan2008
3 | # and the gcc toolchain used by the Android build setup requires those
4 | # two options together.
5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
6 | APP_PLATFORM := android-21
7 | APP_STL := c++_static
8 | APP_CPPFLAGS := -std=c++11 -fno-rtti -fexceptions
9 |
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/layout/main.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
14 |
15 |
--------------------------------------------------------------------------------
/apps/HelloAndroidCamera2/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | HelloHalideCamera2
4 | Toggle Edge Detector
5 |
6 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/ant.properties:
--------------------------------------------------------------------------------
1 | # This file is used to override default values used by the Ant build system.
2 | #
3 | # This file must be checked into Version Control Systems, as it is
4 | # integral to the build system of your project.
5 |
6 | # This file is only used by the Ant script.
7 |
8 | # You can use this to override default values such as
9 | # 'source.dir' for the location of your java source folder and
10 | # 'out.dir' for the location of your output folder.
11 |
12 | # You can also use it define how the release builds are signed by declaring
13 | # the following properties:
14 | # 'key.store' for the location of your keystore and
15 | # 'key.alias' for the name of the key to use.
16 | # The password will be asked during the build when you use the 'release' target.
17 |
18 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | android update project -p . --target android-17
4 | cd jni
5 | c++ -std=c++11 halide_gl_filter.cpp -L ../../../bin -lHalide -I ../../../include -ldl -lpthread -lz
6 | HL_TARGET=arm-32-android-opengl-debug DYLD_LIBRARY_PATH=../../../bin LD_LIBRARY_PATH=../../../bin ./a.out
7 | cd ..
8 | pwd
9 | ndk-build
10 | ant debug
11 | adb install -r bin/HelloAndroidGL-debug.apk
12 | adb logcat
13 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/jni/Android.mk:
--------------------------------------------------------------------------------
1 | LOCAL_PATH := $(call my-dir)
2 |
3 | include $(CLEAR_VARS)
4 |
5 | LOCAL_MODULE := android_halide_gl_native
6 | LOCAL_ARM_MODE := arm
7 | LOCAL_SRC_FILES := android_halide_gl_native.cpp
8 | LOCAL_LDFLAGS := -Ljni
9 | LOCAL_LDLIBS := -lm -llog -landroid -lEGL -lGLESv2 jni/halide_gl_filter.o
10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue
11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include
12 |
13 | include $(BUILD_SHARED_LIBRARY)
14 |
15 | $(call import-module,android/native_app_glue)
16 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # The ARMv7 is significanly faster due to the use of the hardware FPU
2 | APP_ABI := armeabi-v7a
3 | APP_PLATFORM := android-17
4 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/project.properties:
--------------------------------------------------------------------------------
1 | # This file is automatically generated by Android Tools.
2 | # Do not modify this file -- YOUR CHANGES WILL BE ERASED!
3 | #
4 | # This file must be checked in Version Control Systems.
5 | #
6 | # To customize properties used by the Ant build system edit
7 | # "ant.properties", and override values to adapt the script to your
8 | # project structure.
9 | #
10 | # To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
11 | #proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
12 |
13 | # Project target.
14 | target=android-17
15 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/layout/main.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
14 |
15 |
--------------------------------------------------------------------------------
/apps/HelloAndroidGL/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | Halide GL Demo
4 |
5 |
--------------------------------------------------------------------------------
/apps/HelloMatlab/Makefile:
--------------------------------------------------------------------------------
1 | include ../support/Makefile.inc
2 |
3 |
4 | test:
5 | ./run_blur.sh
6 |
7 |
--------------------------------------------------------------------------------
/apps/HelloMatlab/run_blur.m:
--------------------------------------------------------------------------------
1 | % Add the path to mex_halide.m.
2 | addpath(fullfile(getenv('HALIDE_DISTRIB_PATH'), 'tools'));
3 |
4 | % Build the mex library from the blur generator.
5 | mex_halide('iir_blur.cpp', '-g', 'IirBlur');
6 |
7 | % Load the input, create an output buffer of equal size.
8 | input = cast(imread('../images/rgb.png'), 'single') / 255;
9 | output = zeros(size(input), 'single');
10 |
11 | % The blur filter coefficient.
12 | alpha = 0.1;
13 |
14 | % Call the Halide pipeline.
15 | for i = 1:10
16 | tic;
17 | iir_blur(input, alpha, output);
18 | toc;
19 | end
20 |
21 | % Write the blurred image.
22 | imwrite(cast(output * 255, 'uint8'), 'blurred.png');
23 |
--------------------------------------------------------------------------------
/apps/HelloMatlab/run_blur.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script is run by the nightly tests to check that mex_halide works.
4 |
5 | command -v octave >/dev/null 2>&1 || { echo >&2 "Octave not found. Aborting."; exit 0; }
6 |
7 | if [[ $CXX == *"-m32"* ]]; then
8 | echo "Not proceeding because Halide is compiled in 32-bit mode but octave is (likely) 64-bit"
9 | exit 0
10 | fi
11 |
12 | rm -f blurred.png iir_blur.mex
13 | octave run_blur.m
14 |
15 | if [ -f blurred.png ]
16 | then
17 | echo "Success!"
18 | exit 0
19 | fi
20 |
21 | echo "Failed to produce blurred.png!"
22 | exit 1
23 |
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/AppDelegate.h:
--------------------------------------------------------------------------------
1 | #import
2 |
3 | @interface AppDelegate : UIResponder
4 |
5 | @property (strong, nonatomic) UIWindow *window;
6 |
7 | @end
8 |
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/HalideViewController.h:
--------------------------------------------------------------------------------
1 | #ifndef HelloiOS_HalideViewController_h
2 | #define HelloiOS_HalideViewController_h
3 |
4 | #import "HalideView.h"
5 | #import
6 |
7 |
8 | @interface HalideViewController : UIViewController
9 |
10 | @property HalideView *halide_view;
11 |
12 | - (void)viewWillAppear:(BOOL)animated;
13 |
14 | @end
15 |
16 | #endif
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/HelloiOS-Prefix.pch:
--------------------------------------------------------------------------------
1 | //
2 | // Prefix header
3 | //
4 | // The contents of this file are implicitly included at the beginning of every source file.
5 | //
6 |
7 | #import
8 |
9 | #ifndef __IPHONE_3_0
10 | #warning "This project uses features only available in iOS SDK 3.0 and later."
11 | #endif
12 |
13 | #ifdef __OBJC__
14 | #import
15 | #import
16 | #endif
17 |
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/en.lproj/InfoPlist.strings:
--------------------------------------------------------------------------------
1 | /* Localized versions of Info.plist keys */
2 |
3 |
--------------------------------------------------------------------------------
/apps/HelloiOS/HelloiOS/main.mm:
--------------------------------------------------------------------------------
1 | #import
2 |
3 | #import "AppDelegate.h"
4 |
5 | int main(int argc, char * argv[])
6 | {
7 | @autoreleasepool {
8 | return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/apps/autoscheduler/AutoSchedule.h:
--------------------------------------------------------------------------------
1 | #include "Halide.h"
2 | #include "CostModel.h"
3 | #include "FunctionDAG.h"
4 | #include "PerfectHashMap.h"
5 | #include "Featurization.h"
6 | #include
7 |
8 | namespace Halide {
9 | namespace Internal {
10 | namespace Autoscheduler {
11 |
12 | typedef PerfectHashMap StageMapOfScheduleFeatures;
13 |
14 | void find_and_apply_schedule(FunctionDAG& dag, const std::vector &outputs, const MachineParams ¶ms,
15 | CostModel* cost_model, int beam_size, StageMapOfScheduleFeatures* schedule_features);
16 |
17 | }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/apps/autoscheduler/NetworkSize.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_NETWORK_SIZE_H
2 | #define HALIDE_NETWORK_SIZE_H
3 |
4 | namespace Halide {
5 | // The size of the best cost model network found. Needed by the cost
6 | // model and also the cost model training script.
7 | const int head1_channels = 8, head1_w = 40, head1_h = 7;
8 | const int head2_channels = 24, head2_w = 39;
9 | const int conv1_channels = 32;
10 | }
11 |
12 | #endif
13 |
--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head1_conv1_bias.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head1_conv1_bias.data
--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head1_conv1_weight.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head1_conv1_weight.data
--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head2_conv1_bias.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head2_conv1_bias.data
--------------------------------------------------------------------------------
/apps/autoscheduler/weights/head2_conv1_weight.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head2_conv1_weight.data
--------------------------------------------------------------------------------
/apps/autoscheduler/weights/trunk_conv1_bias.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/trunk_conv1_bias.data
--------------------------------------------------------------------------------
/apps/autoscheduler/weights/trunk_conv1_weight.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/trunk_conv1_weight.data
--------------------------------------------------------------------------------
/apps/bilateral_grid/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(bilateral_grid_process filter.cpp)
2 | halide_use_image_io(bilateral_grid_process)
3 |
4 | halide_generator(bilateral_grid.generator SRCS bilateral_grid_generator.cpp)
5 | foreach(AUTO_SCHEDULE false true)
6 | if(${AUTO_SCHEDULE})
7 | set(LIB bilateral_grid_auto_schedule)
8 | else()
9 | set(LIB bilateral_grid)
10 | endif()
11 | halide_library_from_generator(${LIB}
12 | GENERATOR bilateral_grid.generator
13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}
14 | EXTRA_OUTPUTS stmt schedule)
15 | target_link_libraries(bilateral_grid_process PRIVATE ${LIB})
16 | endforeach()
17 |
--------------------------------------------------------------------------------
/apps/bilateral_grid/viz.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo HL_AVCONV is ${HL_AVCONV}
3 | export HL_TRACE_FILE=/dev/stdout
4 | export HL_NUMTHREADS=4
5 | rm -f $1/bilateral_grid.mp4
6 | make $1/filter_viz && \
7 | $1/filter_viz ../images/gray_small.png $1/out_small.png 0.2 0 | \
8 | ../../bin/HalideTraceViz --size 1920 1080 | \
9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/bilateral_grid.mp4
10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
11 |
--------------------------------------------------------------------------------
/apps/blur/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Generator
2 | halide_library(halide_blur SRCS halide_blur_generator.cpp)
3 |
4 | # Final executable
5 | add_executable(blur_test test.cpp)
6 | target_link_libraries(blur_test PUBLIC halide_blur)
7 |
8 | if (NOT MSVC)
9 | target_compile_options(blur_test PRIVATE "-O2")
10 | if (OPENMP_FOUND)
11 | target_compile_options(blur_test PRIVATE ${OpenMP_CXX_FLAGS})
12 | target_link_libraries(blur_test PRIVATE ${OpenMP_CXX_FLAGS})
13 | else()
14 | target_compile_options(blur_test PRIVATE "-Wno-unknown-pragmas")
15 | endif()
16 | endif()
17 |
--------------------------------------------------------------------------------
/apps/camera_pipe/viz.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export HL_TRACE_FILE=/dev/stdout
3 | export HL_NUMTHREADS=4
4 | rm -f $1/camera_pipe.mp4
5 | # Do trivial partial-overrides of trace settings via flags
6 | # (--zoom and --rlabel) just to demonstrate that it works.
7 | $1/process_viz ../images/bayer_small.png 3700 1.8 50 1 1 $1/out.png |
8 | ../../bin/HalideTraceViz --timestep 1000 --size 1920 1080 \
9 | --zoom 4 --func sharpen_strength_x32 \
10 | --rlabel curve "tone curve LUT" 0 0 10 \
11 | |\
12 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/camera_pipe.mp4
13 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
14 |
--------------------------------------------------------------------------------
/apps/conv_layer/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(conv_layer_process process.cpp)
2 | halide_use_image_io(conv_layer_process)
3 |
4 | halide_generator(conv_layer.generator SRCS conv_layer_generator.cpp)
5 | foreach(AUTO_SCHEDULE false true)
6 | if(${AUTO_SCHEDULE})
7 | set(LIB conv_layer_auto_schedule)
8 | else()
9 | set(LIB conv_layer)
10 | endif()
11 | halide_library_from_generator(${LIB}
12 | GENERATOR conv_layer.generator
13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 | target_link_libraries(conv_layer_process PRIVATE ${LIB})
15 | endforeach()
16 |
--------------------------------------------------------------------------------
/apps/images/bayer_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/bayer_raw.png
--------------------------------------------------------------------------------
/apps/images/bayer_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/bayer_small.png
--------------------------------------------------------------------------------
/apps/images/gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/gray.png
--------------------------------------------------------------------------------
/apps/images/gray_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/gray_small.png
--------------------------------------------------------------------------------
/apps/images/rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb.png
--------------------------------------------------------------------------------
/apps/images/rgb_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb_small.png
--------------------------------------------------------------------------------
/apps/images/rgb_small16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb_small16.png
--------------------------------------------------------------------------------
/apps/images/rgba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgba.png
--------------------------------------------------------------------------------
/apps/interpolate/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | halide_project(interpolate "apps" interpolate.cpp)
2 | halide_use_image_io(interpolate)
3 | set_target_properties(interpolate PROPERTIES RUNTIME_OUTPUT_DIRECTORY
4 | "${CMAKE_CURRENT_BINARY_DIR}")
5 |
--------------------------------------------------------------------------------
/apps/interpolate/Makefile:
--------------------------------------------------------------------------------
1 | include ../support/Makefile.inc
2 |
3 | CXXFLAGS += -g -Wall
4 |
5 | .PHONY: clean
6 |
7 | $(BIN)/%/interpolate: interpolate.cpp $(LIB_HALIDE)
8 | @mkdir -p $(@D)
9 | $(CXX) $(CXXFLAGS) $^ -o $@ $(IMAGE_IO_FLAGS) $(LDFLAGS) $(HALIDE_SYSTEM_LIBS)
10 |
11 | $(BIN)/%/out.png: $(BIN)/%/interpolate
12 | @mkdir -p $(@D)
13 | $^ $(IMAGES)/rgba.png $@
14 |
15 | clean:
16 | rm -rf $(BIN)
17 |
18 | test: $(BIN)/$(HL_TARGET)/out.png
19 |
--------------------------------------------------------------------------------
/apps/lens_blur/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(lens_blur_process process.cpp)
2 | halide_use_image_io(lens_blur_process)
3 |
4 | halide_generator(lens_blur.generator SRCS lens_blur_generator.cpp)
5 | foreach(AUTO_SCHEDULE false true)
6 | if(${AUTO_SCHEDULE})
7 | set(LIB lens_blur_auto_schedule)
8 | else()
9 | set(LIB lens_blur)
10 | endif()
11 | halide_library_from_generator(${LIB}
12 | GENERATOR lens_blur.generator
13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 | target_link_libraries(lens_blur_process PRIVATE ${LIB})
15 | endforeach()
16 |
--------------------------------------------------------------------------------
/apps/linear_algebra/.gitignore:
--------------------------------------------------------------------------------
1 | src/kernels/*
2 |
--------------------------------------------------------------------------------
/apps/linear_algebra/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | if (NOT CBLAS_FOUND)
2 | message(STATUS "linear_algebra: No CBLAS header, skipping CBLAS tests")
3 | return()
4 | endif()
5 |
6 | add_executable(test_halide_blas
7 | test_halide_blas.cpp
8 | )
9 | target_include_directories(test_halide_blas SYSTEM
10 | PRIVATE
11 | ${CBLAS_INCLUDE_DIR}
12 | )
13 | target_include_directories(test_halide_blas BEFORE
14 | PRIVATE
15 | ${halide_blas_INCLUDE_DIRS}
16 | )
17 | target_compile_options(test_halide_blas PRIVATE -Wno-unused-variable)
18 |
19 | target_link_libraries(test_halide_blas
20 | PRIVATE
21 | halide_blas
22 | cblas # XXX fragile
23 | ${HALIDE_COMPILER_LIB}
24 | )
25 |
26 |
--------------------------------------------------------------------------------
/apps/local_laplacian/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(local_laplacian_process process.cpp)
2 | halide_use_image_io(local_laplacian_process)
3 |
4 | halide_generator(local_laplacian.generator SRCS local_laplacian_generator.cpp)
5 | foreach(AUTO_SCHEDULE false true)
6 | if(${AUTO_SCHEDULE})
7 | set(LIB local_laplacian_auto_schedule)
8 | else()
9 | set(LIB local_laplacian)
10 | endif()
11 | halide_library_from_generator(${LIB}
12 | GENERATOR local_laplacian.generator
13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 | target_link_libraries(local_laplacian_process PRIVATE ${LIB})
15 | endforeach()
16 |
--------------------------------------------------------------------------------
/apps/local_laplacian/viz.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export HL_TRACE_FILE=/dev/stdout
3 | export HL_NUM_THREADS=4
4 | rm -f $1/local_laplacian.mp4
5 | make $1/process_viz && \
6 | ./$1/process_viz ../images/rgb_small.png 4 1 1 0 ./$1/out_small.png | \
7 | ../../bin/HalideTraceViz \
8 | --size 1920 1080 --timestep 3000 | \
9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 ./$1/local_laplacian.mp4
10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
11 |
--------------------------------------------------------------------------------
/apps/nl_means/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(nl_means_process process.cpp)
2 | halide_use_image_io(nl_means_process)
3 |
4 | halide_generator(nl_means.generator SRCS nl_means_generator.cpp)
5 | foreach(AUTO_SCHEDULE false true)
6 | if(${AUTO_SCHEDULE})
7 | set(LIB nl_means_auto_schedule)
8 | else()
9 | set(LIB nl_means)
10 | endif()
11 | halide_library_from_generator(${LIB}
12 | GENERATOR nl_means.generator
13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 | target_link_libraries(nl_means_process PRIVATE ${LIB})
15 | endforeach()
16 |
--------------------------------------------------------------------------------
/apps/nn_ops/AveragePool.sh:
--------------------------------------------------------------------------------
1 | AVERAGE_POOL=$1
2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max
3 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 0 255
4 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 0 255
5 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 0 255
6 | $AVERAGE_POOL 8 16 16 1 2 2 2 5 5 0 255
7 |
8 | $AVERAGE_POOL 32 7 7 1 1 0 0 1 1 0 255
9 | $AVERAGE_POOL 32 7 7 1 1 1 1 3 3 0 255
10 | $AVERAGE_POOL 32 7 7 1 2 1 1 3 3 0 255
11 | $AVERAGE_POOL 32 7 7 4 2 2 2 5 5 0 255
12 |
13 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 64 128
14 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 64 128
15 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 64 128
16 |
--------------------------------------------------------------------------------
/apps/nn_ops/Convolution.sh:
--------------------------------------------------------------------------------
1 | CONVOLUTION=$1
2 | # Columns are: schedule C W H N filter_width, filter_height, output_depth,
3 | # input_offset, filter_offset, input_depth, stride, pad_width, pad_height,
4 | # byte_zero, output_multiplier, output_shift, output_offset, output_min,
5 | # output_max
6 |
7 | $CONVOLUTION 8 17 17 1 1 1 8 -128 -128 8 1 0 0 0
8 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 1 1 1 0
9 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 2 1 1 0
10 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -128 8 1 1 1 0
11 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -140 8 1 1 1 0
12 | $CONVOLUTION 12 17 17 1 3 3 16 -128 -140 12 1 1 1 0
13 |
--------------------------------------------------------------------------------
/apps/nn_ops/Im2col.sh:
--------------------------------------------------------------------------------
1 | IM2COL=$1
2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height byte zero
3 | $IM2COL 8 16 16 1 1 0 0 1 1 0
4 | $IM2COL 8 16 16 1 1 1 1 3 3 0
5 | $IM2COL 8 16 16 1 2 1 1 3 3 0
6 | $IM2COL 8 16 16 1 2 2 2 5 5 0
7 |
8 | $IM2COL 32 7 7 1 1 0 0 1 1 0
9 | $IM2COL 32 7 7 1 1 1 1 3 3 0
10 | $IM2COL 32 7 7 1 2 1 1 3 3 0
11 | $IM2COL 32 7 7 4 2 2 2 5 5 0
12 |
13 | $IM2COL 8 16 16 1 1 0 0 1 1 5
14 | $IM2COL 8 16 16 1 1 1 1 3 3 5
15 | $IM2COL 8 16 16 1 2 1 1 3 3 5
16 |
--------------------------------------------------------------------------------
/apps/nn_ops/MaxPool.sh:
--------------------------------------------------------------------------------
1 | MAXPOOL=$1
2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max
3 | $MAXPOOL 8 16 16 1 1 0 0 1 1 0 255
4 | $MAXPOOL 8 16 16 1 1 1 1 3 3 0 255
5 | $MAXPOOL 8 16 16 1 2 1 1 3 3 0 255
6 | $MAXPOOL 8 16 16 1 2 2 2 5 5 0 255
7 |
8 | $MAXPOOL 32 7 7 1 1 0 0 1 1 0 255
9 | $MAXPOOL 32 7 7 1 1 1 1 3 3 0 255
10 | $MAXPOOL 32 7 7 1 2 1 1 3 3 0 255
11 | $MAXPOOL 32 7 7 4 2 2 2 5 5 0 255
12 |
13 | $MAXPOOL 8 16 16 1 1 0 0 1 1 64 128
14 | $MAXPOOL 8 16 16 1 1 1 1 3 3 64 128
15 | $MAXPOOL 8 16 16 1 2 1 1 3 3 64 128
16 |
--------------------------------------------------------------------------------
/apps/nn_ops/common.h:
--------------------------------------------------------------------------------
1 | // A collection of utility functions shared by the halide generators.
2 |
3 | #ifndef COMMON_HALIDE_H_
4 | #define COMMON_HALIDE_H_
5 |
6 | #include
7 |
8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH
9 | // instruction.
10 | Halide::Expr saturating_rounding_doubling_high_multiply(Halide::Expr a, Halide::Expr b);
11 |
12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as
13 | // rounding arithmetic right shift.
14 | Halide::Expr rounding_shift_right(Halide::Expr x, Halide::Expr shift);
15 |
16 | // Performs right shift and multiply by a multiplier.
17 | Halide::Expr multiply_quantized_multiplier(
18 | Halide::Expr x, Halide::Expr quantized_multiplier, Halide::Expr shift);
19 | #endif
20 |
--------------------------------------------------------------------------------
/apps/nn_ops/common_reference.h:
--------------------------------------------------------------------------------
1 | // A collection of utility functions shared by test apps.
2 |
3 | #ifndef COMMON_REFERENCE_H_
4 | #define COMMON_REFERENCE_H_
5 |
6 | #include
7 |
8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH
9 | // instruction.
10 | int32_t saturating_rounding_doubling_high_multiply_reference(int32_t a, int32_t b);
11 |
12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as
13 | // rounding arithmetic right shift.
14 | int32_t rounding_shift_right_reference(int32_t x, int32_t shift);
15 |
16 | // Performs right shift and multiply by a multiplier.
17 | int32_t multiply_quantized_multiplier_reference(int32_t x, int32_t q, int32_t shift);
18 |
19 | #endif
20 |
--------------------------------------------------------------------------------
/apps/onnx/common_types.h:
--------------------------------------------------------------------------------
1 | #ifndef COMMON_TYPES_H_
2 | #define COMMON_TYPES_H_
3 |
4 | #include "Halide.h"
5 | #include "onnx_converter.h"
6 |
7 | struct HalideModel {
8 | std::shared_ptr model;
9 | std::shared_ptr rep;
10 | std::vector input_names;
11 | std::unordered_map input_types;
12 | std::vector output_names;
13 | std::vector output_types;
14 | };
15 |
16 | #endif
17 |
--------------------------------------------------------------------------------
/apps/opengl_demo/glfw_helpers.h:
--------------------------------------------------------------------------------
1 | #ifndef _GLFW_HELPERS_H_
2 | #define _GLFW_HELPERS_H_
3 |
4 | namespace GlfwHelpers {
5 |
6 | struct info {
7 | float dpi_scale;
8 | };
9 |
10 | struct info setup(int width, int height);
11 | void set_opengl_context();
12 | void terminate();
13 | }
14 |
15 | #endif
16 |
--------------------------------------------------------------------------------
/apps/opengl_demo/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/opengl_demo/image.png
--------------------------------------------------------------------------------
/apps/opengl_demo/layout.h:
--------------------------------------------------------------------------------
1 | #ifndef _LAYOUT_HELPERS_H_
2 | #define _LAYOUT_HELPERS_H_
3 |
4 | #if defined(__APPLE__)
5 | #include
6 | #else
7 | #include
8 | #endif
9 |
10 | namespace Layout {
11 |
12 | enum location { UL, UR, LL, LR };
13 |
14 | struct info {
15 | int window_width;
16 | int window_height;
17 | };
18 |
19 | const struct info &setup(int image_width, int image_height);
20 |
21 | void draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label);
22 | void draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label);
23 | }
24 |
25 | #endif
26 |
27 |
--------------------------------------------------------------------------------
/apps/opengl_demo/opengl_helpers.h:
--------------------------------------------------------------------------------
1 | #ifndef _OPENGL_HELPERS_H_
2 | #define _OPENGL_HELPERS_H_
3 |
4 | #include
5 |
6 | #if defined(__APPLE__)
7 | #include
8 | #else
9 | #include
10 | #endif
11 |
12 | namespace OpenGLHelpers {
13 | void setup(float dpi_scale);
14 | GLuint create_texture(int width, int height, const uint8_t *data);
15 | void delete_texture(GLuint texture_id);
16 | void display_texture(GLuint texture_id, float x0, float x1, float y0, float y1);
17 | void draw_text(const std::string &text, float x, float y);
18 | }
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/apps/opengl_demo/png_helpers.h:
--------------------------------------------------------------------------------
1 | #ifndef _PNG_HELPERS_
2 | #define _PNG_HELPERS_
3 |
4 | namespace PNGHelpers {
5 |
6 | struct image_info {
7 | unsigned int width;
8 | unsigned int height;
9 | const uint8_t *data;
10 | };
11 |
12 | struct image_info load(const std::string &filepath);
13 | }
14 |
15 |
16 | #endif
17 |
--------------------------------------------------------------------------------
/apps/opengl_demo/timer.h:
--------------------------------------------------------------------------------
1 | #ifndef _TIMER_H_
2 | #define _TIMER_H_
3 |
4 | #include
5 |
6 | namespace Timer
7 | {
8 | struct info {
9 | const std::string what;
10 | std::chrono::time_point time;
11 | };
12 |
13 | struct info start(const std::string &what);
14 | std::string report(const struct info &);
15 | }
16 |
17 | #endif
18 |
--------------------------------------------------------------------------------
/apps/openglcompute/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | android update project -p . --target android-21
4 | make jni-libs
5 | ant debug
6 | adb install -r bin/HelloHalideOpenGLCompute-debug.apk
7 | adb logcat -c
8 | adb shell am start -n com.example.hellohalideopenglcompute/.HalideOpenGLComputeActivity
9 | adb logcat | grep "^I/oglc"
10 |
--------------------------------------------------------------------------------
/apps/openglcompute/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # TODO(aam): Confirm that application builds and runs for all supported targets:
2 | # APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86
3 | APP_ABI := armeabi-v7a
4 | APP_PLATFORM := android-17
5 |
6 | APP_STL := c++_static
7 | LOCAL_C_INCLUDES += ${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.8/include
8 |
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-ldpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-ldpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/apps/openglcompute/res/layout/main.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
14 |
15 |
--------------------------------------------------------------------------------
/apps/openglcompute/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | HelloHalideAndroidOpenGLCompute
4 |
5 |
--------------------------------------------------------------------------------
/apps/stencil_chain/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(stencil_chain_process process.cpp)
2 | halide_use_image_io(stencil_chain_process)
3 |
4 | halide_generator(stencil_chain.generator SRCS stencil_chain_generator.cpp)
5 | foreach(AUTO_SCHEDULE false true)
6 | if(${AUTO_SCHEDULE})
7 | set(LIB stencil_chain_auto_schedule)
8 | else()
9 | set(LIB stencil_chain)
10 | endif()
11 | halide_library_from_generator(${LIB}
12 | GENERATOR stencil_chain.generator
13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE})
14 | target_link_libraries(stencil_chain_process PRIVATE ${LIB})
15 | endforeach()
16 |
--------------------------------------------------------------------------------
/apps/support/viz_auto.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # $1 = filter cmd to run, including args
4 | # $2 = HalideTraceViz executable
5 | # $3 = path to output mp4
6 |
7 | rm -rf "$3"
8 |
9 | # Use a named pipe for the $1 -> HTV pipe, just in case
10 | # the exe in $1 writes any random output to stdout.
11 | PIPE=/tmp/halide_viz_auto_pipe
12 | rm -rf $PIPE
13 | mkfifo $PIPE
14 |
15 | HL_TRACE_FILE=${PIPE} HL_NUMTHREADS=8 $1 &
16 |
17 | $2 --auto_layout --ignore_tags 0<${PIPE} | \
18 | ${HL_AVCONV} -y -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 "$3"
19 |
--------------------------------------------------------------------------------
/apps/wavelet/README.md:
--------------------------------------------------------------------------------
1 | wavelet is a trivial app designed to show ahead-of-time Generator usage (with both Make and CMake), as opposed to using direct calls to (e.g.) Func::compile_to_file().
2 |
--------------------------------------------------------------------------------
/apps/wavelet/daubechies_constants.h:
--------------------------------------------------------------------------------
1 | #ifndef DAUBECHIES_CONSTANTS_H_
2 | #define DAUBECHIES_CONSTANTS_H_
3 |
4 | const float D0 = 0.4829629131445341f;
5 | const float D1 = 0.83651630373780772f;
6 | const float D2 = 0.22414386804201339f;
7 | const float D3 = -0.12940952255126034f;
8 |
9 | #endif // DAUBECHIES_CONSTANTS_H_
10 |
--------------------------------------------------------------------------------
/apps/wavelet/haar_x_generator.cpp:
--------------------------------------------------------------------------------
1 | #include "Halide.h"
2 |
3 | #include "daubechies_constants.h"
4 |
5 | namespace {
6 |
7 | Halide::Var x("x"), y("y"), c("c");
8 |
9 | class haar_x : public Halide::Generator {
10 | public:
11 | Input> in_{"in" , 2};
12 | Output> out_{"out" , 3};
13 |
14 | void generate() {
15 | Func in = Halide::BoundaryConditions::repeat_edge(in_);
16 |
17 | out_(x, y, c) = select(c == 0,
18 | (in(2*x, y) + in(2*x+1, y)),
19 | (in(2*x, y) - in(2*x+1, y)))/2;
20 | out_.unroll(c, 2);
21 | }
22 | };
23 |
24 | } // namespace
25 |
26 | HALIDE_REGISTER_GENERATOR(haar_x, haar_x)
27 |
--------------------------------------------------------------------------------
/apps/wavelet/inverse_haar_x_generator.cpp:
--------------------------------------------------------------------------------
1 | #include "Halide.h"
2 |
3 | #include "daubechies_constants.h"
4 |
5 | namespace {
6 |
7 | Halide::Var x("x"), y("y"), c("c");
8 |
9 | class inverse_haar_x : public Halide::Generator {
10 | public:
11 | Input> in_{"in" , 3};
12 | Output> out_{"out" , 2};
13 |
14 | void generate() {
15 | Func in = Halide::BoundaryConditions::repeat_edge(in_);
16 |
17 | out_(x, y) = select(x%2 == 0,
18 | in(x/2, y, 0) + in(x/2, y, 1),
19 | in(x/2, y, 0) - in(x/2, y, 1));
20 | out_.unroll(x, 2);
21 | }
22 | };
23 |
24 | } // namespace
25 |
26 | HALIDE_REGISTER_GENERATOR(inverse_haar_x, inverse_haar_x)
27 |
--------------------------------------------------------------------------------
/python_bindings/correctness/bit_generator.cpp:
--------------------------------------------------------------------------------
1 | #include "Halide.h"
2 |
3 | using namespace Halide;
4 |
5 | class BitGenerator : public Halide::Generator {
6 | public:
7 | Input> bit_input{"input_uint1", 1};
8 | Input bit_constant{"constant_uint1"};
9 |
10 | Output> bit_output{"output_uint1", 1};
11 |
12 | Var x, y, z;
13 |
14 | void generate() {
15 | bit_output(x) = bit_input(x) + bit_constant;
16 | }
17 |
18 | void schedule() {
19 | }
20 | };
21 |
22 | HALIDE_REGISTER_GENERATOR(BitGenerator, bit)
23 |
--------------------------------------------------------------------------------
/python_bindings/correctness/rdom.py:
--------------------------------------------------------------------------------
1 | import halide as hl
2 |
3 | def test_rdom():
4 | x = hl.Var("x")
5 | y = hl.Var("y")
6 |
7 | diagonal = hl.Func("diagonal")
8 | diagonal[x, y] = 1
9 |
10 | domain_width = 10
11 | domain_height = 10
12 |
13 | r = hl.RDom([(0, domain_width), (0, domain_height)])
14 | r.where(r.x <= r.y)
15 |
16 | diagonal[r.x, r.y] += 2
17 | output = diagonal.realize(domain_width, domain_height)
18 |
19 | for iy in range(domain_height):
20 | for ix in range(domain_width):
21 | if ix <= iy:
22 | assert output[ix, iy] == 3
23 | else:
24 | assert output[ix, iy] == 1
25 |
26 | return 0
27 |
28 | if __name__ == "__main__":
29 | test_rdom()
30 |
--------------------------------------------------------------------------------
/python_bindings/correctness/user_context_generator.cpp:
--------------------------------------------------------------------------------
1 | #include "Halide.h"
2 |
3 | using namespace Halide;
4 |
5 | class UserContextGenerator : public Halide::Generator {
6 | public:
7 | Input constant{"constant"};
8 | Output> output{"output", 1};
9 |
10 | Var x;
11 |
12 | void generate() {
13 | output(x) = constant;
14 | }
15 |
16 | void schedule() {
17 | }
18 | };
19 |
20 | HALIDE_REGISTER_GENERATOR(UserContextGenerator, user_context)
21 |
--------------------------------------------------------------------------------
/python_bindings/correctness/user_context_test.py:
--------------------------------------------------------------------------------
1 | import array
2 | import user_context
3 |
4 |
5 | def test():
6 | output = bytearray("\0\0\0\0", "ascii")
7 | user_context.user_context(None, ord('q'), output)
8 | assert output == bytearray("qqqq", "ascii")
9 |
10 |
11 | if __name__ == "__main__":
12 | test()
13 |
--------------------------------------------------------------------------------
/python_bindings/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file lists the python dependencies,
2 | # it is meant to be used with pip (and/or possibly virtualenv, pbundler, etc)
3 | # See http://pip.readthedocs.org/en/latest/user_guide.html#requirements-files
4 | # You will probably want to run
5 | # something similar to `pip3 install --user -r requirements.txt`
6 |
7 | # science packages
8 | numpy
9 | scipy
10 | pillow
11 |
--------------------------------------------------------------------------------
/python_bindings/src/PyArgument.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
2 | #define HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_argument(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYARGUMENT_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyBoundaryConditions.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
2 | #define HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_boundary_conditions(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyBuffer.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBUFFER_H
2 | #define HALIDE_PYTHON_BINDINGS_PYBUFFER_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_buffer(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYBUFFER_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyConciseCasts.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
2 | #define HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_concise_casts(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyDerivative.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H
2 | #define HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_derivative(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyEnums.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYENUMS_H
2 | #define HALIDE_PYTHON_BINDINGS_PYENUMS_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_enums(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYENUMS_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyError.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYERROR_H
2 | #define HALIDE_PYTHON_BINDINGS_PYERROR_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_error(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYERROR_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyExpr.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXPR_H
2 | #define HALIDE_PYTHON_BINDINGS_PYEXPR_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_expr(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYEXPR_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyExternFuncArgument.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
2 | #define HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_extern_func_argument(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyFunc.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_H
2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_func(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyFuncRef.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_func_ref(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyIROperator.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
2 | #define HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_operators(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyImageParam.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
2 | #define HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_image_param(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyInlineReductions.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
2 | #define HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_inline_reductions(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyLambda.cpp:
--------------------------------------------------------------------------------
1 | #include "PyLambda.h"
2 |
3 | namespace Halide {
4 | namespace PythonBindings {
5 |
6 | void define_lambda(py::module &m) {
7 | // TODO: 'lambda' is a reserved word in Python, so we
8 | // can't use it for a function. Using 'lambda_func' for now.
9 | m.def("lambda_func", [](py::args args) -> Func {
10 | auto vars = args_to_vector(args, 0, 1);
11 | Expr e = args[args.size() - 1].cast();
12 | Func f("lambda" + Internal::unique_name('_'));
13 | f(vars) = e;
14 | return f;
15 | });
16 | }
17 |
18 | } // namespace PythonBindings
19 | } // namespace Halide
20 |
--------------------------------------------------------------------------------
/python_bindings/src/PyLambda.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
2 | #define HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_lambda(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYLAMBDA_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyLoopLevel.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
2 | #define HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_loop_level(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyMachineParams.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
2 | #define HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_machine_params(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyModule.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMODULE_H
2 | #define HALIDE_PYTHON_BINDINGS_PYMODULE_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_module(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYMODULE_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyOutputs.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
2 | #define HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_outputs(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyParam.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPARAM_H
2 | #define HALIDE_PYTHON_BINDINGS_PYPARAM_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_param(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYPARAM_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyPipeline.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
2 | #define HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_pipeline(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYPIPELINE_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyRDom.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYRDOM_H
2 | #define HALIDE_PYTHON_BINDINGS_PYRDOM_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_rdom(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYRDOM_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyStage.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYSTAGE_H
2 | #define HALIDE_PYTHON_BINDINGS_PYSTAGE_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_stage(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_STAGE_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyTarget.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTARGET_H
2 | #define HALIDE_PYTHON_BINDINGS_PYTARGET_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_target(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYTARGET_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyTuple.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTUPLE_H
2 | #define HALIDE_PYTHON_BINDINGS_PYTUPLE_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_tuple(py::module &m);
10 |
11 | // Templated function to allow for use with Realization as well as Tuple
12 | template
13 | inline py::tuple to_python_tuple(const T &ht) {
14 | py::tuple pt(ht.size());
15 | for (size_t i = 0; i < ht.size(); i++) {
16 | pt[i] = py::cast(ht[i]);
17 | }
18 | return pt;
19 | }
20 |
21 | } // namespace PythonBindings
22 | } // namespace Halide
23 |
24 | #endif // HALIDE_PYTHON_BINDINGS_PYTUPLE_H
25 |
--------------------------------------------------------------------------------
/python_bindings/src/PyType.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTYPE_H
2 | #define HALIDE_PYTHON_BINDINGS_PYTYPE_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_type(py::module &m);
10 |
11 | std::string halide_type_to_string(const Type &type);
12 |
13 | } // namespace PythonBindings
14 | } // namespace Halide
15 |
16 | #endif // HALIDE_PYTHON_BINDINGS_PYTYPE_H
17 |
--------------------------------------------------------------------------------
/python_bindings/src/PyVar.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYVAR_H
2 | #define HALIDE_PYTHON_BINDINGS_PYVAR_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_var(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYVAR_H
15 |
--------------------------------------------------------------------------------
/python_bindings/src/PyVarOrRVar.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
3 |
4 | #include "PyHalide.h"
5 |
6 | namespace Halide {
7 | namespace PythonBindings {
8 |
9 | void define_var_or_rvar(py::module &m);
10 |
11 | } // namespace PythonBindings
12 | } // namespace Halide
13 |
14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H
15 |
--------------------------------------------------------------------------------
/src/AddParameterChecks.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H
2 | #define HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H
3 |
4 | /** \file
5 | *
6 | * Defines the lowering pass that adds the assertions that validate
7 | * scalar parameters.
8 | */
9 |
10 | #include "IR.h"
11 |
12 | namespace Halide {
13 |
14 | struct Target;
15 |
16 | namespace Internal {
17 |
18 | /** Insert checks to make sure that all referenced parameters meet
19 | * their constraints. Also injects any custom requirements provided
20 | * by the user. */
21 | Stmt add_parameter_checks(const std::vector &requirements, Stmt s, const Target &t);
22 |
23 | } // namespace Internal
24 | } // namespace Halide
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/src/AlignLoads.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_ALIGN_LOADS_H
2 | #define HALIDE_ALIGN_LOADS_H
3 |
4 | /** \file
5 | * Defines a lowering pass that rewrites unaligned loads into
6 | * sequences of aligned loads.
7 | */
8 | #include "IR.h"
9 | #include "ModulusRemainder.h"
10 | #include "Scope.h"
11 | #include "Target.h"
12 |
13 | namespace Halide {
14 | namespace Internal {
15 |
16 | /** Attempt to rewrite unaligned loads from buffers which are known to
17 | * be aligned to instead load aligned vectors that cover the original
18 | * load, and then slice the original load out of the aligned
19 | * vectors. */
20 | Stmt align_loads(Stmt s, int alignment);
21 |
22 | } // namespace Internal
23 | } // namespace Halide
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/src/AllocationBoundsInference.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_ALLOCATION_BOUNDS_INFERENCE_H
2 | #define HALIDE_ALLOCATION_BOUNDS_INFERENCE_H
3 |
4 | /** \file
5 | * Defines the lowering pass that determines how large internal allocations should be.
6 | */
7 |
8 | #include "Bounds.h"
9 | #include "IR.h"
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** Take a partially statement with Realize nodes in terms of
15 | * variables, and define values for those variables. */
16 | Stmt allocation_bounds_inference(Stmt s,
17 | const std::map &env,
18 | const std::map, Interval> &func_bounds);
19 | } // namespace Internal
20 | } // namespace Halide
21 |
22 | #endif
23 |
--------------------------------------------------------------------------------
/src/AsyncProducers.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_ASYNC_PRODUCERS_H
2 | #define HALIDE_ASYNC_PRODUCERS_H
3 |
4 | /** \file
5 | * Defines the lowering pass that injects task parallelism for producers that are scheduled as async.
6 | */
7 |
8 | #include "IR.h"
9 |
10 | namespace Halide {
11 | namespace Internal {
12 |
13 | Stmt fork_async_producers(Stmt s, const std::map &env);
14 |
15 | }
16 | }
17 |
18 | #endif
19 |
--------------------------------------------------------------------------------
/src/BoundSmallAllocations.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_BOUND_SMALL_ALLOCATIONS
2 | #define HALIDE_BOUND_SMALL_ALLOCATIONS
3 |
4 | #include "IR.h"
5 |
6 | /** \file
7 | * Defines the lowering pass that attempts to rewrite small
8 | * allocations to have constant size.
9 | */
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** \file
15 | *
16 | * Use bounds analysis to attempt to bound the sizes of small
17 | * allocations. Inside GPU kernels this is necessary in order to
18 | * compile. On the CPU this is also useful, because it prevents malloc
19 | * calls for (provably) tiny allocations. */
20 | Stmt bound_small_allocations(const Stmt &s);
21 |
22 | } // namespace Internal
23 | } // namespace Halide
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/src/CanonicalizeGPUVars.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_CANONICALIZE_GPU_VARS_H
2 | #define HALIDE_CANONICALIZE_GPU_VARS_H
3 |
4 | /** \file
5 | * Defines the lowering pass that canonicalize the GPU var names over.
6 | */
7 |
8 | #include "IR.h"
9 |
10 | namespace Halide {
11 | namespace Internal {
12 |
13 | /** Canonicalize GPU var names into some pre-determined block/thread names
14 | * (i.e. __block_id_x, __thread_id_x, etc.). The x/y/z/w order is determined
15 | * by the nesting order: innermost is assigned to x and so on. */
16 | Stmt canonicalize_gpu_vars(Stmt s);
17 |
18 | } // namespace Internal
19 | } // namespace Halide
20 |
21 | #endif
22 |
--------------------------------------------------------------------------------
/src/CodeGen_RISCV.cpp:
--------------------------------------------------------------------------------
1 | #include "CodeGen_RISCV.h"
2 | #include "Util.h"
3 | #include "LLVM_Headers.h"
4 |
5 | namespace Halide {
6 | namespace Internal {
7 |
8 | using std::string;
9 |
10 | using namespace llvm;
11 |
12 | CodeGen_RISCV::CodeGen_RISCV(Target t) : CodeGen_Posix(t) {
13 | #if !defined(WITH_RISCV)
14 | user_error << "llvm build not configured with RISCV target enabled.\n";
15 | #endif
16 | }
17 |
18 | string CodeGen_RISCV::mcpu() const {
19 | return "";
20 | }
21 |
22 | string CodeGen_RISCV::mattrs() const {
23 | return "";
24 | }
25 |
26 | bool CodeGen_RISCV::use_soft_float_abi() const {
27 | return false;
28 | }
29 |
30 | int CodeGen_RISCV::native_vector_bits() const {
31 | return 128;
32 | }
33 |
34 | }}
35 |
--------------------------------------------------------------------------------
/src/Debug.cpp:
--------------------------------------------------------------------------------
1 | #include "Debug.h"
2 |
3 | namespace Halide {
4 | namespace Internal {
5 |
6 | int debug::debug_level() {
7 | static int cached_debug_level = ([]() -> int {
8 | std::string lvl = get_env_variable("HL_DEBUG_CODEGEN");
9 | return !lvl.empty() ? atoi(lvl.c_str()) : 0;
10 | })();
11 | return cached_debug_level;
12 | }
13 |
14 | } // namespace Internal
15 | } // namespace Halide
16 |
--------------------------------------------------------------------------------
/src/DebugArguments.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
2 | #define HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
3 |
4 | #include "Target.h"
5 |
6 | /** \file
7 | *
8 | * Defines a lowering pass that injects debug statements inside a
9 | * LoweredFunc. Intended to be used when Target::Debug is on.
10 | */
11 |
12 | namespace Halide {
13 | namespace Internal {
14 |
15 | struct LoweredFunc;
16 |
17 | /** Injects debug prints in a LoweredFunc that describe the target and
18 | * arguments. Mutates the given func. */
19 | void debug_arguments(LoweredFunc *func, const Target &t);
20 |
21 | } // namespace Internal
22 | } // namespace Halide
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/src/EarlyFree.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_EARLY_FREE_H
2 | #define HALIDE_EARLY_FREE_H
3 |
4 | /** \file
5 | * Defines the lowering pass that injects markers just after
6 | * the last use of each buffer so that they can potentially be freed
7 | * earlier.
8 | */
9 |
10 | #include "IR.h"
11 |
12 | namespace Halide {
13 | namespace Internal {
14 |
15 | /** Take a statement with allocations and inject markers (of the form
16 | * of calls to "mark buffer dead") after the last use of each
17 | * allocation. Targets may use this to free buffers earlier than the
18 | * close of their Allocate node. */
19 | Stmt inject_early_frees(Stmt s);
20 |
21 | } // namespace Internal
22 | } // namespace Halide
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/src/HexagonOffload.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_HEXAGON_OFFLOAD_H
2 | #define HALIDE_HEXAGON_OFFLOAD_H
3 |
4 | /** \file
5 | * Defines a lowering pass to pull loops marked with the
6 | * Hexagon device API to a separate module, and call them through the
7 | * Hexagon host runtime module.
8 | */
9 |
10 | #include "Module.h"
11 |
12 | namespace Halide {
13 | namespace Internal {
14 |
15 | /** Pull loops marked with the Hexagon device API to a separate
16 | * module, and call them through the Hexagon host runtime module. */
17 | Stmt inject_hexagon_rpc(Stmt s, const Target &host_target, Module &module);
18 |
19 | Buffer compile_module_to_hexagon_shared_object(const Module &device_code);
20 |
21 | } // namespace Internal
22 | } // namespace Halide
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/src/InjectHostDevBufferCopies.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
2 | #define HALIDE_HOST_GPU_BUFFER_COPIES_H
3 |
4 | /** \file
5 | * Defines the lowering passes that deal with host and device buffer flow.
6 | */
7 |
8 | #include "IR.h"
9 | #include "Target.h"
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** A helper function to call an extern function, and assert that it
15 | * returns 0. */
16 | Stmt call_extern_and_assert(const std::string &name, const std::vector &args);
17 |
18 | /** Inject calls to halide_device_malloc, halide_copy_to_device, and
19 | * halide_copy_to_host as needed. */
20 | Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
21 |
22 | } // namespace Internal
23 | } // namespace Halide
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/src/InjectOpenGLIntrinsics.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_INJECT_OPENGL_INTRINSICS_H
2 | #define HALIDE_INJECT_OPENGL_INTRINSICS_H
3 |
4 | /** \file
5 | * Defines the lowering pass that injects texture loads and texture
6 | * stores for opengl.
7 | */
8 |
9 | #include "IR.h"
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** Take a statement with for kernel for loops and turn loads and
15 | * stores inside the loops into OpenGL texture load and store
16 | * intrinsics. Should only be run when the OpenGL target is active. */
17 | Stmt inject_opengl_intrinsics(Stmt s);
18 |
19 | } // namespace Internal
20 | } // namespace Halide
21 |
22 | #endif
23 |
--------------------------------------------------------------------------------
/src/LICM.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_LICM_H
2 | #define HALIDE_LICM_H
3 |
4 | /** \file
5 | * Methods for lifting loop invariants out of inner loops.
6 | */
7 |
8 | #include "IR.h"
9 |
10 | namespace Halide {
11 | namespace Internal {
12 |
13 | /** Hoist loop-invariants out of inner loops. This is especially
14 | * important in cases where LLVM would not do it for us
15 | * automatically. For example, it hoists loop invariants out of cuda
16 | * kernels. */
17 | Stmt loop_invariant_code_motion(Stmt);
18 |
19 | } // namespace Internal
20 | } // namespace Halide
21 |
22 | #endif
23 |
--------------------------------------------------------------------------------
/src/Lerp.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_LERP_H
2 | #define HALIDE_LERP_H
3 |
4 | /** \file
5 | * Defines methods for converting a lerp intrinsic into Halide IR.
6 | */
7 |
8 | #include "IR.h"
9 |
10 | namespace Halide {
11 | namespace Internal {
12 |
13 | /** Build Halide IR that computes a lerp. Use by codegen targets that
14 | * don't have a native lerp. */
15 | Expr lower_lerp(Expr zero_val, Expr one_val, Expr weight);
16 |
17 | } // namespace Internal
18 | } // namespace Halide
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/src/LoopCarry.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_LOOP_CARRY_H
2 | #define HALIDE_LOOP_CARRY_H
3 |
4 | #include "Expr.h"
5 |
6 | namespace Halide {
7 | namespace Internal {
8 |
9 | /** Reuse loads done on previous loop iterations by stashing them in
10 | * induction variables instead of redoing the load. If the loads are
11 | * predicated, the predicates need to match. Can be an optimization or
12 | * pessimization depending on how good the L1 cache is on the architecture
13 | * and how many memory issue slots there are. Currently only intended
14 | * for Hexagon. */
15 | Stmt loop_carry(Stmt, int max_carried_values = 8);
16 |
17 | } // namespace Internal
18 | } // namespace Halide
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/src/LowerWarpShuffles.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_LOWER_WARP_SHUFFLES_H
2 | #define HALIDE_LOWER_WARP_SHUFFLES_H
3 |
4 | /** \file
5 | * Defines the lowering pass that injects CUDA warp shuffle
6 | * instructions to access storage outside of a GPULane loop.
7 | */
8 |
9 | #include "IR.h"
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** Rewrite access to things stored outside the loop over GPU lanes to
15 | * use nvidia's warp shuffle instructions. */
16 | Stmt lower_warp_shuffles(Stmt s);
17 |
18 | } // namespace Internal
19 | } // namespace Halide
20 |
21 | #endif
22 |
--------------------------------------------------------------------------------
/src/Monotonic.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_MONOTONIC_H
2 | #define HALIDE_MONOTONIC_H
3 |
4 | /** \file
5 | *
6 | * Methods for computing whether expressions are monotonic
7 | */
8 |
9 | #include "IR.h"
10 | #include "Scope.h"
11 |
12 | namespace Halide {
13 | namespace Internal {
14 |
15 | /**
16 | * Detect whether an expression is monotonic increasing in a variable,
17 | * decreasing, or unknown.
18 | */
19 | enum class Monotonic {Constant, Increasing, Decreasing, Unknown};
20 | Monotonic is_monotonic(Expr e, const std::string &var,
21 | const Scope &scope = Scope::empty_scope());
22 |
23 | void is_monotonic_test();
24 |
25 | } // namespace Internal
26 | } // namespace Halide
27 |
28 | #endif
29 |
--------------------------------------------------------------------------------
/src/PrintLoopNest.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_INTERNAL_PRINT_LOOP_NEST_H
2 | #define HALIDE_INTERNAL_PRINT_LOOP_NEST_H
3 |
4 | /** \file
5 | *
6 | * Defines methods to print out the loop nest corresponding to a schedule.
7 | */
8 |
9 | #include
10 | #include
11 |
12 | namespace Halide {
13 | namespace Internal {
14 |
15 | class Function;
16 |
17 | /** Emit some simple pseudocode that shows the structure of the loop
18 | * nest specified by this pipeline's schedule, and the schedules of
19 | * the functions it uses. */
20 | std::string print_loop_nest(const std::vector &output_funcs);
21 |
22 | } // namespace Internal
23 | } // namespace Halide
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/src/PythonExtensionGen.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_PYTHON_EXTENSION_GEN_H_
2 | #define HALIDE_PYTHON_EXTENSION_GEN_H_
3 |
4 | #include
5 | #include "Module.h"
6 | #include "Target.h"
7 |
8 | namespace Halide {
9 |
10 | class Module;
11 | struct Target;
12 |
13 | namespace Internal {
14 |
15 | class PythonExtensionGen {
16 | public:
17 | PythonExtensionGen(std::ostream &dest, const std::string &header_name, Target target);
18 |
19 | void compile(const Module &module);
20 | void compile(const LoweredFunc &f);
21 | private:
22 | void convert_buffer(std::string name, const LoweredArgument* arg);
23 | std::ostream &dest;
24 | std::string header_name;
25 | Target target;
26 | };
27 |
28 | }
29 | }
30 |
31 | #endif // HALIDE_PYTHON_EXTENSION_GEN_H_
32 |
--------------------------------------------------------------------------------
/src/Qualify.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_QUALIFY_H
2 | #define HALIDE_QUALIFY_H
3 |
4 | /** \file
5 | *
6 | * Defines methods for prefixing names in an expression with a prefix string.
7 | */
8 |
9 | #include "IR.h"
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** Prefix all variable names in the given expression with the prefix string. */
15 | Expr qualify(const std::string &prefix, Expr value);
16 |
17 | } // namespace Internal
18 | } // namespace Halide
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/src/RemoveDeadAllocations.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_REMOVE_DEAD_ALLOCATIONS_H
2 | #define HALIDE_REMOVE_DEAD_ALLOCATIONS_H
3 |
4 | /** \file
5 | * Defines the lowering pass that removes allocate and free nodes that
6 | * are not used.
7 | */
8 |
9 | #include "IR.h"
10 |
11 | namespace Halide {
12 | namespace Internal {
13 |
14 | /** Find Allocate/Free pairs that are never loaded from or stored to,
15 | * and remove them from the Stmt. This doesn't touch Realize/Call
16 | * nodes and so must be called after storage_flattening.
17 | */
18 | Stmt remove_dead_allocations(Stmt s);
19 |
20 | } // namespace Internal
21 | } // namespace Halide
22 |
23 | #endif
24 |
--------------------------------------------------------------------------------
/src/RemoveExternLoops.cpp:
--------------------------------------------------------------------------------
1 | #include "RemoveExternLoops.h"
2 | #include "IRMutator.h"
3 |
4 | namespace Halide {
5 | namespace Internal {
6 |
7 | class RemoveExternLoops : public IRMutator {
8 | private:
9 | using IRMutator::visit;
10 |
11 | Stmt visit(const For *op) override {
12 | if (op->for_type != ForType::Extern) {
13 | return IRMutator::visit(op);
14 | }
15 | // Replace the for with its first iteration (implemented with a let).
16 | return LetStmt::make(op->name, op->min, mutate(op->body));
17 | }
18 | };
19 |
20 | Stmt remove_extern_loops(Stmt s) {
21 | return RemoveExternLoops().mutate(s);
22 | }
23 |
24 | } // namespace Internal
25 | } // namespace Halide
26 |
--------------------------------------------------------------------------------
/src/RemoveExternLoops.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_REMOVE_EXTERN_LOOPS
2 | #define HALIDE_REMOVE_EXTERN_LOOPS
3 |
4 | #include "IR.h"
5 |
6 | /** \file
7 | * Defines a lowering pass that removes placeholder loops for extern stages.
8 | */
9 |
10 | namespace Halide {
11 | namespace Internal {
12 |
13 | /** Removes placeholder loops for extern stages. */
14 | Stmt remove_extern_loops(Stmt s);
15 |
16 | } // namespace Internal
17 | } // namespace Halide
18 |
19 | #endif
20 |
--------------------------------------------------------------------------------
/src/RemoveUndef.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_REMOVE_UNDEF
2 | #define HALIDE_REMOVE_UNDEF
3 |
4 | #include "IR.h"
5 |
6 | /** \file
7 | * Defines a lowering pass that elides stores that depend on unitialized values.
8 | */
9 |
10 | namespace Halide {
11 | namespace Internal {
12 |
13 | /** Removes stores that depend on undef values, and statements that
14 | * only contain such stores. */
15 | Stmt remove_undef(Stmt s);
16 |
17 | } // namespace Internal
18 | } // namespace Halide
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/src/RoundingMode.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_ROUNDING_MODE_H
2 | #define HALIDE_ROUNDING_MODE_H
3 | namespace Halide {
4 |
5 | /** Rounding modes (IEEE754 2008 4.3 Rounding-direction attributes) */
6 | enum class RoundingMode {
7 | TowardZero, ///< Round towards zero (IEEE754 2008 4.3.2)
8 | ToNearestTiesToEven, ///< Round to nearest, when there is a tie pick even integral significand (IEEE754 2008 4.3.1)
9 | ToNearestTiesToAway, ///< Round to nearest, when there is a tie pick value furthest away from zero (IEEE754 2008 4.3.1)
10 | TowardPositiveInfinity, ///< Round towards positive infinity (IEEE754 2008 4.3.2)
11 | TowardNegativeInfinity ///< Round towards negative infinity (IEEE754 2008 4.3.2)
12 | };
13 |
14 | } // namespace Halide
15 | #endif
16 |
--------------------------------------------------------------------------------
/src/SelectGPUAPI.h:
--------------------------------------------------------------------------------
1 | #ifndef HALIDE_INTERNAL_SELECT_GPU_API_H
2 | #define HALIDE_INTERNAL_SELECT_GPU_API_H
3 |
4 | #include "IR.h"
5 | #include "Target.h"
6 |
7 | /** \file
8 | * Defines a lowering pass that selects which GPU api to use for each
9 | * gpu for loop
10 | */
11 |
12 | namespace Halide {
13 | namespace Internal {
14 |
15 | /** Replace for loops with GPU_Default device_api with an actual
16 | * device API depending on what's enabled in the target. Choose the
17 | * first of the following: opencl, cuda, openglcompute, opengl */
18 | Stmt select_gpu_api(Stmt s, Target t);
19 |
20 | } // namespace Internal
21 | } // namespace Halide
22 |
23 | #endif
24 |
--------------------------------------------------------------------------------
/src/SimplifySpecializations.h:
--------------------------------------------------------------------------------
1 | #ifndef SIMPLIFY_SPECIALIZATIONS_H
2 | #define SIMPLIFY_SPECIALIZATIONS_H
3 |
4 | /** \file
5 | *
6 | * Defines pass that try to simplify the RHS/LHS of a function's definition
7 | * based on its specializations.
8 | */
9 |
10 | #include