├── .clang-format ├── .gitattributes ├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── Doxyfile ├── Doxyfile.in ├── LICENSE.txt ├── Makefile ├── README.md ├── README_cmake.md ├── README_rungen.md ├── README_webassembly.md ├── ai_scripts ├── conv.prx ├── data │ ├── data.txt │ ├── kernelAndInImage_256x16_k3_gaussblur.txt │ ├── old_data.txt │ └── refImage_256x16_k3_gaussblur.txt ├── data_points.txt ├── execute.py ├── explore.bash ├── global.h ├── main.cc ├── naive.cc ├── run.bash ├── run1.bash ├── run2.bash ├── run3.bash ├── run4.bash ├── run_single.bash └── xhalide_generated.cc ├── apps ├── CMakeLists.txt ├── HelloAndroid │ ├── .gitignore │ ├── AndroidManifest.xml │ ├── README.md │ ├── ant.properties │ ├── build-gradle.sh │ ├── build.gradle │ ├── build.sh │ ├── build.xml │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── jni │ │ ├── Android.mk │ │ ├── Application.mk │ │ ├── hello_generator.cpp │ │ └── hello_wrapper.cpp │ ├── res │ │ ├── drawable-hdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-ldpi │ │ │ └── ic_launcher.png │ │ ├── drawable-mdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-xhdpi │ │ │ └── ic_launcher.png │ │ ├── layout │ │ │ └── main.xml │ │ └── values │ │ │ └── strings.xml │ └── src │ │ └── com │ │ └── example │ │ └── hellohalide │ │ ├── CameraActivity.java │ │ ├── CameraPreview.java │ │ └── FrameHandler.java ├── HelloAndroidCamera2 │ ├── .gitignore │ ├── AndroidManifest.xml │ ├── README.md │ ├── ant.properties │ ├── build-gradle.sh │ ├── build.gradle │ ├── build.sh │ ├── build.xml │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── jni │ │ ├── Android.mk │ │ ├── AndroidBufferUtilities.cpp │ │ ├── AndroidBufferUtilities.h │ │ ├── Application.mk │ │ ├── HalideFilters.cpp │ │ ├── LockedSurface.cpp │ │ ├── LockedSurface.h │ │ ├── YuvBufferT.cpp │ │ ├── YuvBufferT.h │ │ ├── deinterleave_generator.cpp │ │ └── edge_detect_generator.cpp │ ├── res │ │ ├── drawable-hdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-ldpi │ │ │ └── ic_launcher.png │ │ ├── drawable-mdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-xhdpi │ │ │ └── ic_launcher.png │ │ ├── layout │ │ │ ├── activity_camera.xml │ │ │ ├── fragment_camera2_basic.xml │ │ │ └── main.xml │ │ └── values │ │ │ ├── strings.xml │ │ │ └── styles.xml │ └── src │ │ └── com │ │ ├── android │ │ └── ex │ │ │ └── camera2 │ │ │ ├── blocking │ │ │ ├── BlockingCameraManager.java │ │ │ ├── BlockingCaptureCallback.java │ │ │ ├── BlockingSessionCallback.java │ │ │ └── BlockingStateCallback.java │ │ │ ├── exceptions │ │ │ └── TimeoutRuntimeException.java │ │ │ ├── pos │ │ │ └── AutoFocusStateMachine.java │ │ │ └── utils │ │ │ ├── StateChangeListener.java │ │ │ ├── StateWaiter.java │ │ │ └── SysTrace.java │ │ └── example │ │ └── helloandroidcamera2 │ │ ├── AndroidBufferUtilities.java │ │ ├── AutoFitSurfaceView.java │ │ ├── Camera2BasicFragment.java │ │ ├── CameraActivity.java │ │ ├── HalideFilters.java │ │ ├── HalideYuvBufferT.java │ │ └── NativeSurfaceHandle.java ├── HelloAndroidGL │ ├── AndroidManifest.xml │ ├── ant.properties │ ├── build.sh │ ├── build.xml │ ├── jni │ │ ├── Android.mk │ │ ├── Application.mk │ │ ├── android_halide_gl_native.cpp │ │ └── halide_gl_filter.cpp │ ├── project.properties │ ├── res │ │ ├── drawable-hdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-ldpi │ │ │ └── ic_launcher.png │ │ ├── drawable-mdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-xhdpi │ │ │ └── ic_launcher.png │ │ ├── layout │ │ │ └── main.xml │ │ └── values │ │ │ └── strings.xml │ └── src │ │ └── org │ │ └── halide_lang │ │ └── hellohalidegl │ │ └── HelloHalideGL.java ├── HelloMatlab │ ├── Makefile │ ├── iir_blur.cpp │ ├── run_blur.m │ └── run_blur.sh ├── HelloiOS │ ├── HelloiOS.xcodeproj │ │ └── project.pbxproj │ └── HelloiOS │ │ ├── AppDelegate.h │ │ ├── AppDelegate.mm │ │ ├── HalideView.h │ │ ├── HalideView.mm │ │ ├── HalideViewController.h │ │ ├── HalideViewController.mm │ │ ├── HelloiOS-Info.plist │ │ ├── HelloiOS-Prefix.pch │ │ ├── Images.xcassets │ │ ├── AppIcon.appiconset │ │ │ └── Contents.json │ │ └── LaunchImage.launchimage │ │ │ └── Contents.json │ │ ├── en.lproj │ │ └── InfoPlist.strings │ │ ├── main.mm │ │ └── reaction_diffusion_2_generator.cpp ├── auto_viz │ ├── Makefile │ ├── auto_viz_demo.cpp │ └── auto_viz_demo_generator.cpp ├── autoscheduler │ ├── ASLog.cpp │ ├── ASLog.h │ ├── AutoSchedule.cpp │ ├── AutoSchedule.h │ ├── CostModel.h │ ├── DefaultCostModel.cpp │ ├── Errors.h │ ├── Featurization.h │ ├── FunctionDAG.h │ ├── Makefile │ ├── NetworkSize.h │ ├── PerfectHashMap.h │ ├── autotune_loop.sh │ ├── cost_model_generator.cpp │ ├── cost_model_schedule.h │ ├── demo_generator.cpp │ ├── featurization_to_sample.cpp │ ├── test.cpp │ ├── test_perfect_hash_map.cpp │ ├── train_cost_model.cpp │ └── weights │ │ ├── head1_conv1_bias.data │ │ ├── head1_conv1_weight.data │ │ ├── head2_conv1_bias.data │ │ ├── head2_conv1_weight.data │ │ ├── trunk_conv1_bias.data │ │ └── trunk_conv1_weight.data ├── bilateral_grid │ ├── CMakeLists.txt │ ├── Makefile │ ├── bilateral_grid_generator.cpp │ ├── filter.cpp │ └── viz.sh ├── blur │ ├── CMakeLists.txt │ ├── Makefile │ ├── adb_run_on_device.sh │ ├── halide_blur_generator.cpp │ └── test.cpp ├── c_backend │ ├── CMakeLists.txt │ ├── Makefile │ ├── pipeline_cpp_generator.cpp │ ├── pipeline_generator.cpp │ ├── run.cpp │ └── run_cpp.cpp ├── camera_pipe │ ├── CMakeLists.txt │ ├── Makefile │ ├── camera_pipe_generator.cpp │ ├── process.cpp │ └── viz.sh ├── conv_layer │ ├── CMakeLists.txt │ ├── Makefile │ ├── conv_layer_generator.cpp │ └── process.cpp ├── cuda_mat_mul │ ├── Makefile │ ├── mat_mul_generator.cpp │ └── runner.cpp ├── fft │ ├── Makefile │ ├── complex.h │ ├── fft.cpp │ ├── fft.h │ ├── fft_aot_test.cpp │ ├── fft_generator.cpp │ ├── funct.h │ └── main.cpp ├── glsl │ ├── CMakeLists.txt │ ├── Makefile │ ├── halide_blur_glsl_generator.cpp │ ├── halide_ycc_glsl_generator.cpp │ └── opengl_test.cpp ├── hexagon_benchmarks │ ├── Makefile │ ├── adb_run_on_device.sh │ ├── conv3x3_generator.cpp │ ├── dilate3x3_generator.cpp │ ├── gaussian5x5_generator.cpp │ ├── median3x3_generator.cpp │ ├── process.cpp │ ├── process.h │ └── sobel_generator.cpp ├── hexagon_dma │ ├── Makefile │ ├── mock_dma_implementation.cpp │ ├── pipeline_raw_linear_interleaved_basic.cpp │ ├── pipeline_yuv_linear_basic.cpp │ ├── process_raw_linear_interleaved_basic.cpp │ └── process_yuv_linear_basic.cpp ├── images │ ├── bayer_raw.png │ ├── bayer_small.png │ ├── gray.png │ ├── gray_small.png │ ├── rgb.png │ ├── rgb_small.png │ ├── rgb_small16.png │ └── rgba.png ├── interpolate │ ├── CMakeLists.txt │ ├── Makefile │ └── interpolate.cpp ├── lens_blur │ ├── CMakeLists.txt │ ├── Makefile │ ├── lens_blur_generator.cpp │ └── process.cpp ├── linear_algebra │ ├── .gitignore │ ├── CMakeLists.txt │ ├── Makefile │ ├── benchmarks │ │ ├── CMakeLists.txt │ │ ├── cblas_benchmarks.cpp │ │ ├── clock.h │ │ ├── eigen_benchmarks.cpp │ │ ├── halide_benchmarks.cpp │ │ └── macros.h │ ├── src │ │ ├── CMakeLists.txt │ │ ├── blas_l1_generators.cpp │ │ ├── blas_l2_generators.cpp │ │ ├── blas_l3_generators.cpp │ │ ├── halide_blas.cpp │ │ └── halide_blas.h │ └── tests │ │ ├── CMakeLists.txt │ │ └── test_halide_blas.cpp ├── linear_blur │ ├── CMakeLists.txt │ ├── linear_blur_generator.cpp │ ├── linear_to_srgb_generator.cpp │ ├── run_linear_blur.cpp │ ├── simple_blur_generator.cpp │ └── srgb_to_linear_generator.cpp ├── local_laplacian │ ├── CMakeLists.txt │ ├── Makefile │ ├── local_laplacian_generator.cpp │ ├── process.cpp │ └── viz.sh ├── nl_means │ ├── CMakeLists.txt │ ├── Makefile │ ├── nl_means_generator.cpp │ └── process.cpp ├── nn_ops │ ├── AveragePool.cpp │ ├── AveragePool.sh │ ├── AveragePool_generator.cpp │ ├── Convolution.cpp │ ├── Convolution.sh │ ├── Convolution_generator.cpp │ ├── DepthwiseConvolution.cpp │ ├── DepthwiseConvolution.sh │ ├── DepthwiseConvolution_generator.cpp │ ├── Im2col.cpp │ ├── Im2col.sh │ ├── Im2col_generator.cpp │ ├── Makefile │ ├── MatrixMultiply.cpp │ ├── MatrixMultiply.sh │ ├── MatrixMultiply_generator.cpp │ ├── MaxPool.cpp │ ├── MaxPool.sh │ ├── MaxPool_generator.cpp │ ├── README.md │ ├── adb_run_on_device.sh │ ├── common.cpp │ ├── common.h │ ├── common_reference.cpp │ └── common_reference.h ├── onnx │ ├── Makefile │ ├── common_types.h │ ├── halide_as_onnx_backend.py │ ├── halide_as_onnx_backend_test.py │ ├── model.cpp │ ├── model.py │ ├── model_test.py │ ├── onnx_converter.cc │ ├── onnx_converter.h │ ├── onnx_converter_generator.cc │ ├── onnx_converter_generator_test.cc │ ├── onnx_converter_test.cc │ └── test_model_proto.txt ├── opengl_demo │ ├── Makefile │ ├── README.md │ ├── glfw_helpers.cpp │ ├── glfw_helpers.h │ ├── image.png │ ├── layout.cpp │ ├── layout.h │ ├── main.cpp │ ├── opengl_helpers.cpp │ ├── opengl_helpers.h │ ├── png_helpers.cpp │ ├── png_helpers.h │ ├── sample_filter_generator.cpp │ ├── timer.cpp │ └── timer.h ├── openglcompute │ ├── AndroidManifest.xml │ ├── Makefile │ ├── build.sh │ ├── build.xml │ ├── jni │ │ ├── Android.mk │ │ ├── Application.mk │ │ ├── oglc_run.cpp │ │ └── oglc_two_kernels_run.cpp │ ├── res │ │ ├── drawable-hdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-ldpi │ │ │ └── ic_launcher.png │ │ ├── drawable-mdpi │ │ │ └── ic_launcher.png │ │ ├── drawable-xhdpi │ │ │ └── ic_launcher.png │ │ ├── layout │ │ │ └── main.xml │ │ └── values │ │ │ └── strings.xml │ ├── src │ │ └── com │ │ │ └── example │ │ │ └── hellohalideopenglcompute │ │ │ └── HalideOpenGLComputeActivity.java │ ├── test_oglc_avg.cpp │ └── test_two_kernels.cpp ├── resize │ ├── CMakeLists.txt │ ├── Makefile │ ├── resize.cpp │ └── resize_generator.cpp ├── resnet_50 │ ├── Makefile │ ├── Resnet50Generator.cpp │ ├── load_weights.py │ ├── process.cpp │ └── validate_resnet50_output.py ├── simd_op_check │ ├── Makefile │ └── driver.cpp ├── stencil_chain │ ├── CMakeLists.txt │ ├── Makefile │ ├── process.cpp │ └── stencil_chain_generator.cpp ├── support │ ├── Makefile.inc │ ├── autoscheduler.inc │ └── viz_auto.sh └── wavelet │ ├── CMakeLists.txt │ ├── Makefile │ ├── README.md │ ├── daubechies_constants.h │ ├── daubechies_x_generator.cpp │ ├── haar_x_generator.cpp │ ├── inverse_daubechies_x_generator.cpp │ ├── inverse_haar_x_generator.cpp │ └── wavelet.cpp ├── halide.cmake ├── python_bindings ├── Makefile ├── apps │ ├── bilateral_grid.py │ ├── blur.py │ ├── erode.py │ ├── interpolate.py │ └── local_laplacian.py ├── correctness │ ├── addconstant_generator.cpp │ ├── addconstant_test.py │ ├── autodiff.py │ ├── basics.py │ ├── bit_generator.cpp │ ├── bit_test.py │ ├── boundary_conditions.py │ ├── buffer.py │ ├── compile_to.py │ ├── complexstub_generator.cpp │ ├── division.py │ ├── extern.py │ ├── iroperator.py │ ├── multipass_constraints.py │ ├── nobuildmethod_generator.cpp │ ├── partialbuildmethod_generator.cpp │ ├── pystub.py │ ├── rdom.py │ ├── simplestub_generator.cpp │ ├── target.py │ ├── the_sort_function.c │ ├── tuple_select.py │ ├── type.py │ ├── user_context_generator.cpp │ ├── user_context_test.py │ └── var.py ├── readme.md ├── requirements.txt ├── src │ ├── PyArgument.cpp │ ├── PyArgument.h │ ├── PyBinaryOperators.h │ ├── PyBoundaryConditions.cpp │ ├── PyBoundaryConditions.h │ ├── PyBuffer.cpp │ ├── PyBuffer.h │ ├── PyConciseCasts.cpp │ ├── PyConciseCasts.h │ ├── PyDerivative.cpp │ ├── PyDerivative.h │ ├── PyEnums.cpp │ ├── PyEnums.h │ ├── PyError.cpp │ ├── PyError.h │ ├── PyExpr.cpp │ ├── PyExpr.h │ ├── PyExternFuncArgument.cpp │ ├── PyExternFuncArgument.h │ ├── PyFunc.cpp │ ├── PyFunc.h │ ├── PyFuncRef.cpp │ ├── PyFuncRef.h │ ├── PyHalide.cpp │ ├── PyHalide.h │ ├── PyIROperator.cpp │ ├── PyIROperator.h │ ├── PyImageParam.cpp │ ├── PyImageParam.h │ ├── PyInlineReductions.cpp │ ├── PyInlineReductions.h │ ├── PyLambda.cpp │ ├── PyLambda.h │ ├── PyLoopLevel.cpp │ ├── PyLoopLevel.h │ ├── PyMachineParams.cpp │ ├── PyMachineParams.h │ ├── PyModule.cpp │ ├── PyModule.h │ ├── PyOutputs.cpp │ ├── PyOutputs.h │ ├── PyParam.cpp │ ├── PyParam.h │ ├── PyPipeline.cpp │ ├── PyPipeline.h │ ├── PyRDom.cpp │ ├── PyRDom.h │ ├── PyScheduleMethods.h │ ├── PyStage.cpp │ ├── PyStage.h │ ├── PyTarget.cpp │ ├── PyTarget.h │ ├── PyTuple.cpp │ ├── PyTuple.h │ ├── PyType.cpp │ ├── PyType.h │ ├── PyVar.cpp │ ├── PyVar.h │ ├── PyVarOrRVar.cpp │ └── PyVarOrRVar.h ├── stub │ ├── PyStub.cpp │ └── PyStubImpl.cpp ├── todo.txt └── tutorial │ ├── lesson_01_basics.py │ ├── lesson_02_input_image.py │ ├── lesson_03_debugging_1.py │ ├── lesson_04_debugging_2.py │ ├── lesson_05_scheduling_1.py │ ├── lesson_06_realizing_over_shifted_domains.py │ ├── lesson_07_multi_stage_pipelines.py │ ├── lesson_08_scheduling_2.py │ ├── lesson_09_update_definitions.py │ ├── lesson_10_aot_compilation_generate.py │ ├── lesson_10_aot_compilation_run.py │ ├── lesson_11_cross_compilation.py │ ├── lesson_12_using_the_gpu.py │ ├── lesson_13_tuples.py │ └── lesson_14_types.py ├── src ├── AddImageChecks.cpp ├── AddImageChecks.h ├── AddParameterChecks.cpp ├── AddParameterChecks.h ├── AlignLoads.cpp ├── AlignLoads.h ├── AllocationBoundsInference.cpp ├── AllocationBoundsInference.h ├── ApplySplit.cpp ├── ApplySplit.h ├── Argument.cpp ├── Argument.h ├── AssociativeOpsTable.cpp ├── AssociativeOpsTable.h ├── Associativity.cpp ├── Associativity.h ├── AsyncProducers.cpp ├── AsyncProducers.h ├── AutoSchedule.cpp ├── AutoSchedule.h ├── AutoScheduleUtils.cpp ├── AutoScheduleUtils.h ├── BoundSmallAllocations.cpp ├── BoundSmallAllocations.h ├── BoundaryConditions.cpp ├── BoundaryConditions.h ├── Bounds.cpp ├── Bounds.h ├── BoundsInference.cpp ├── BoundsInference.h ├── Buffer.cpp ├── Buffer.h ├── CMakeLists.txt ├── CPlusPlusMangle.cpp ├── CPlusPlusMangle.h ├── CSE.cpp ├── CSE.h ├── CanonicalizeGPUVars.cpp ├── CanonicalizeGPUVars.h ├── Closure.cpp ├── Closure.h ├── CodeGen_ARM.cpp ├── CodeGen_ARM.h ├── CodeGen_C.cpp ├── CodeGen_C.h ├── CodeGen_D3D12Compute_Dev.cpp ├── CodeGen_D3D12Compute_Dev.h ├── CodeGen_GPU_Dev.cpp ├── CodeGen_GPU_Dev.h ├── CodeGen_GPU_Host.cpp ├── CodeGen_GPU_Host.h ├── CodeGen_Hexagon.cpp ├── CodeGen_Hexagon.h ├── CodeGen_Internal.cpp ├── CodeGen_Internal.h ├── CodeGen_LLVM.cpp ├── CodeGen_LLVM.h ├── CodeGen_MIPS.cpp ├── CodeGen_MIPS.h ├── CodeGen_Metal_Dev.cpp ├── CodeGen_Metal_Dev.h ├── CodeGen_OpenCL_Dev.cpp ├── CodeGen_OpenCL_Dev.h ├── CodeGen_OpenGLCompute_Dev.cpp ├── CodeGen_OpenGLCompute_Dev.h ├── CodeGen_OpenGL_Dev.cpp ├── CodeGen_OpenGL_Dev.h ├── CodeGen_PTX_Dev.cpp ├── CodeGen_PTX_Dev.h ├── CodeGen_Posix.cpp ├── CodeGen_Posix.h ├── CodeGen_PowerPC.cpp ├── CodeGen_PowerPC.h ├── CodeGen_RISCV.cpp ├── CodeGen_RISCV.h ├── CodeGen_WebAssembly.cpp ├── CodeGen_WebAssembly.h ├── CodeGen_X86.cpp ├── CodeGen_X86.h ├── ConciseCasts.h ├── ConvolutionsCompilerForAICore.cpp ├── ConvolutionsCompilerForAICore.h ├── Debug.cpp ├── Debug.h ├── DebugArguments.cpp ├── DebugArguments.h ├── DebugToFile.cpp ├── DebugToFile.h ├── Definition.cpp ├── Definition.h ├── Deinterleave.cpp ├── Deinterleave.h ├── Derivative.cpp ├── Derivative.h ├── DerivativeUtils.cpp ├── DerivativeUtils.h ├── DeviceArgument.cpp ├── DeviceArgument.h ├── DeviceInterface.cpp ├── DeviceInterface.h ├── Dimension.cpp ├── Dimension.h ├── EarlyFree.cpp ├── EarlyFree.h ├── Elf.cpp ├── Elf.h ├── EliminateBoolVectors.cpp ├── EliminateBoolVectors.h ├── Error.cpp ├── Error.h ├── Expr.h ├── ExprUsesVar.h ├── Extern.h ├── ExternalCode.h ├── FastIntegerDivide.cpp ├── FastIntegerDivide.h ├── FindCalls.cpp ├── FindCalls.h ├── Float16.cpp ├── Float16.h ├── Func.cpp ├── Func.h ├── Function.cpp ├── Function.h ├── FunctionPtr.h ├── FuseGPUThreadLoops.cpp ├── FuseGPUThreadLoops.h ├── FuzzFloatStores.cpp ├── FuzzFloatStores.h ├── Generator.cpp ├── Generator.h ├── HexagonAlignment.h ├── HexagonOffload.cpp ├── HexagonOffload.h ├── HexagonOptimize.cpp ├── HexagonOptimize.h ├── IR.cpp ├── IR.h ├── IREquality.cpp ├── IREquality.h ├── IRMatch.cpp ├── IRMatch.h ├── IRMutator.cpp ├── IRMutator.h ├── IROperator.cpp ├── IROperator.h ├── IRPrinter.cpp ├── IRPrinter.h ├── IRVisitor.cpp ├── IRVisitor.h ├── ImageParam.cpp ├── ImageParam.h ├── InferArguments.cpp ├── InferArguments.h ├── InjectHostDevBufferCopies.cpp ├── InjectHostDevBufferCopies.h ├── InjectOpenGLIntrinsics.cpp ├── InjectOpenGLIntrinsics.h ├── Inline.cpp ├── Inline.h ├── InlineReductions.cpp ├── InlineReductions.h ├── IntegerDivisionTable.cpp ├── IntegerDivisionTable.h ├── Interval.cpp ├── Interval.h ├── Introspection.cpp ├── Introspection.h ├── IntrusivePtr.h ├── JITModule.cpp ├── JITModule.h ├── LICM.cpp ├── LICM.h ├── LLVM_Headers.h ├── LLVM_Output.cpp ├── LLVM_Output.h ├── LLVM_Runtime_Linker.cpp ├── LLVM_Runtime_Linker.h ├── Lambda.h ├── Lerp.cpp ├── Lerp.h ├── LoopCarry.cpp ├── LoopCarry.h ├── Lower.cpp ├── Lower.h ├── LowerWarpShuffles.cpp ├── LowerWarpShuffles.h ├── MainPage.h ├── MatlabWrapper.cpp ├── MatlabWrapper.h ├── Memoization.cpp ├── Memoization.h ├── Module.cpp ├── Module.h ├── ModulusRemainder.cpp ├── ModulusRemainder.h ├── Monotonic.cpp ├── Monotonic.h ├── ObjectInstanceRegistry.cpp ├── ObjectInstanceRegistry.h ├── OutputImageParam.cpp ├── OutputImageParam.h ├── Outputs.h ├── ParallelRVar.cpp ├── ParallelRVar.h ├── Param.h ├── ParamMap.cpp ├── ParamMap.h ├── Parameter.cpp ├── Parameter.h ├── PartitionLoops.cpp ├── PartitionLoops.h ├── Pipeline.cpp ├── Pipeline.h ├── Prefetch.cpp ├── Prefetch.h ├── PrintLoopNest.cpp ├── PrintLoopNest.h ├── Profiling.cpp ├── Profiling.h ├── PurifyIndexMath.cpp ├── PurifyIndexMath.h ├── PythonExtensionGen.cpp ├── PythonExtensionGen.h ├── Qualify.cpp ├── Qualify.h ├── RDom.cpp ├── RDom.h ├── Random.cpp ├── Random.h ├── RealizationOrder.cpp ├── RealizationOrder.h ├── Reduction.cpp ├── Reduction.h ├── RegionCosts.cpp ├── RegionCosts.h ├── RemoveDeadAllocations.cpp ├── RemoveDeadAllocations.h ├── RemoveExternLoops.cpp ├── RemoveExternLoops.h ├── RemoveUndef.cpp ├── RemoveUndef.h ├── RoundingMode.h ├── Schedule.cpp ├── Schedule.h ├── ScheduleFunctions.cpp ├── ScheduleFunctions.h ├── Scope.h ├── SelectGPUAPI.cpp ├── SelectGPUAPI.h ├── Simplify.cpp ├── Simplify.h ├── SimplifyCorrelatedDifferences.cpp ├── SimplifyCorrelatedDifferences.h ├── SimplifySpecializations.cpp ├── SimplifySpecializations.h ├── Simplify_Add.cpp ├── Simplify_And.cpp ├── Simplify_Call.cpp ├── Simplify_Cast.cpp ├── Simplify_Div.cpp ├── Simplify_EQ.cpp ├── Simplify_Exprs.cpp ├── Simplify_Internal.h ├── Simplify_LT.cpp ├── Simplify_Let.cpp ├── Simplify_Max.cpp ├── Simplify_Min.cpp ├── Simplify_Mod.cpp ├── Simplify_Mul.cpp ├── Simplify_Not.cpp ├── Simplify_Or.cpp ├── Simplify_Select.cpp ├── Simplify_Shuffle.cpp ├── Simplify_Stmts.cpp ├── Simplify_Sub.cpp ├── SkipStages.cpp ├── SkipStages.h ├── SlidingWindow.cpp ├── SlidingWindow.h ├── Solve.cpp ├── Solve.h ├── SplitTuples.cpp ├── SplitTuples.h ├── StmtToHtml.cpp ├── StmtToHtml.h ├── StorageFlattening.cpp ├── StorageFlattening.h ├── StorageFolding.cpp ├── StorageFolding.h ├── StrictifyFloat.cpp ├── StrictifyFloat.h ├── Substitute.cpp ├── Substitute.h ├── Target.cpp ├── Target.h ├── ThreadPool.h ├── Tracing.cpp ├── Tracing.h ├── TrimNoOps.cpp ├── TrimNoOps.h ├── Tuple.cpp ├── Tuple.h ├── Type.cpp ├── Type.h ├── UnifyDuplicateLets.cpp ├── UnifyDuplicateLets.h ├── UniquifyVariableNames.cpp ├── UniquifyVariableNames.h ├── UnpackBuffers.cpp ├── UnpackBuffers.h ├── UnrollLoops.cpp ├── UnrollLoops.h ├── UnsafePromises.cpp ├── UnsafePromises.h ├── Util.cpp ├── Util.h ├── Var.cpp ├── Var.h ├── VaryingAttributes.cpp ├── VaryingAttributes.h ├── VectorizeLoops.cpp ├── VectorizeLoops.h ├── WasmExecutor.cpp ├── WasmExecutor.h ├── WrapCalls.cpp ├── WrapCalls.h ├── WrapExternStages.cpp ├── WrapExternStages.h └── runtime │ ├── HalideBuffer.h │ ├── HalideRuntime.h │ ├── HalideRuntimeCuda.h │ ├── HalideRuntimeD3D12Compute.h │ ├── HalideRuntimeHexagonDma.h │ ├── HalideRuntimeHexagonHost.h │ ├── HalideRuntimeMetal.h │ ├── HalideRuntimeOpenCL.h │ ├── HalideRuntimeOpenGL.h │ ├── HalideRuntimeOpenGLCompute.h │ ├── HalideRuntimeQurt.h │ ├── aarch64.ll │ ├── aarch64_cpu_features.cpp │ ├── alignment_128.cpp │ ├── alignment_32.cpp │ ├── alignment_64.cpp │ ├── android_clock.cpp │ ├── android_host_cpu_count.cpp │ ├── android_io.cpp │ ├── android_ioctl.h │ ├── arm.ll │ ├── arm_cpu_features.cpp │ ├── arm_no_neon.ll │ ├── buffer_t.cpp │ ├── cache.cpp │ ├── can_use_target.cpp │ ├── cl_functions.h │ ├── cpu_features.h │ ├── cuda.cpp │ ├── cuda_functions.h │ ├── d3d12_abi_patch_64.h │ ├── d3d12_abi_patch_64.ll │ ├── d3d12compute.cpp │ ├── destructors.cpp │ ├── device_buffer_utils.h │ ├── device_interface.cpp │ ├── device_interface.h │ ├── errors.cpp │ ├── fake_get_symbol.cpp │ ├── fake_thread_pool.cpp │ ├── float16_t.cpp │ ├── fuchsia_clock.cpp │ ├── fuchsia_host_cpu_count.cpp │ ├── fuchsia_yield.cpp │ ├── gpu_device_selection.cpp │ ├── hashmap.h │ ├── hexagon_cache_allocator.cpp │ ├── hexagon_cpu_features.cpp │ ├── hexagon_dma.cpp │ ├── hexagon_dma_pool.cpp │ ├── hexagon_dma_pool.h │ ├── hexagon_host.cpp │ ├── hexagon_remote │ ├── .gitignore │ ├── Makefile │ ├── bin │ │ ├── arm-32-android │ │ │ └── libhalide_hexagon_host.so │ │ ├── arm-64-android │ │ │ └── libhalide_hexagon_host.so │ │ ├── host │ │ │ └── libhalide_hexagon_host.so │ │ ├── src │ │ │ ├── halide_hexagon_remote.h │ │ │ ├── halide_hexagon_remote_skel.c │ │ │ └── halide_hexagon_remote_stub.c │ │ └── v60 │ │ │ ├── hexagon_sim_remote │ │ │ ├── libhalide_hexagon_remote_skel.so │ │ │ ├── libsim_qurt.a │ │ │ ├── libsim_qurt_vtcm.a │ │ │ └── signed_by_debug │ │ │ └── libhalide_hexagon_remote_skel.so │ ├── c11_stubs.cpp │ ├── dlib.cpp │ ├── dlib.h │ ├── halide_hexagon_remote.idl │ ├── halide_remote.cpp │ ├── host_malloc.cpp │ ├── host_shim.cpp │ ├── instruction_encodings.txt │ ├── known_symbols.cpp │ ├── known_symbols.h │ ├── libadsprpc_shim.cpp │ ├── log.cpp │ ├── log.h │ ├── nearbyint.cpp │ ├── pipeline_context.h │ ├── sim_host.cpp │ ├── sim_protocol.h │ ├── sim_qurt.cpp │ ├── sim_qurt_vtcm.cpp │ └── sim_remote.cpp │ ├── hvx_128.ll │ ├── hvx_64.ll │ ├── ios_io.cpp │ ├── linux_clock.cpp │ ├── linux_host_cpu_count.cpp │ ├── linux_yield.cpp │ ├── matlab.cpp │ ├── metadata.cpp │ ├── metal.cpp │ ├── metal_objc_arm.cpp │ ├── metal_objc_platform_dependent.cpp │ ├── metal_objc_platform_dependent.h │ ├── metal_objc_x86.cpp │ ├── mex_functions.h │ ├── mingw_math.cpp │ ├── mini_cl.h │ ├── mini_cuda.h │ ├── mini_d3d12.h │ ├── mini_hexagon_dma.h │ ├── mini_opengl.h │ ├── mini_qurt.h │ ├── mini_qurt_vtcm.h │ ├── mips.ll │ ├── mips_cpu_features.cpp │ ├── module_aot_ref_count.cpp │ ├── module_jit_ref_count.cpp │ ├── msan.cpp │ ├── msan_stubs.cpp │ ├── nvidia_libdevice_bitcode │ ├── libdevice.compute_20.10.bc │ ├── libdevice.compute_30.10.bc │ └── libdevice.compute_35.10.bc │ ├── objc_support.h │ ├── old_buffer_t.cpp │ ├── opencl.cpp │ ├── opengl.cpp │ ├── opengl_egl_context.cpp │ ├── opengl_glx_context.cpp │ ├── openglcompute.cpp │ ├── osx_clock.cpp │ ├── osx_get_symbol.cpp │ ├── osx_host_cpu_count.cpp │ ├── osx_opengl_context.cpp │ ├── osx_yield.cpp │ ├── posix_abort.cpp │ ├── posix_allocator.cpp │ ├── posix_clock.cpp │ ├── posix_error_handler.cpp │ ├── posix_get_symbol.cpp │ ├── posix_io.cpp │ ├── posix_math.ll │ ├── posix_print.cpp │ ├── posix_threads.cpp │ ├── posix_threads_tsan.cpp │ ├── powerpc.ll │ ├── powerpc_cpu_features.cpp │ ├── prefetch.cpp │ ├── printer.h │ ├── profiler.cpp │ ├── profiler_inlined.cpp │ ├── pseudostack.cpp │ ├── ptx_dev.ll │ ├── qurt_allocator.cpp │ ├── qurt_hvx.cpp │ ├── qurt_hvx_vtcm.cpp │ ├── qurt_init_fini.cpp │ ├── qurt_threads.cpp │ ├── qurt_threads_tsan.cpp │ ├── qurt_yield.cpp │ ├── riscv_cpu_features.cpp │ ├── runtime_api.cpp │ ├── runtime_internal.h │ ├── scoped_mutex_lock.h │ ├── scoped_spin_lock.h │ ├── ssp.cpp │ ├── synchronization_common.h │ ├── thread_pool_common.h │ ├── to_string.cpp │ ├── trace_helper.cpp │ ├── tracing.cpp │ ├── wasm_cpu_features.cpp │ ├── wasm_math.ll │ ├── win32_math.ll │ ├── windows_abort.cpp │ ├── windows_clock.cpp │ ├── windows_cuda.cpp │ ├── windows_get_symbol.cpp │ ├── windows_io.cpp │ ├── windows_opencl.cpp │ ├── windows_profiler.cpp │ ├── windows_threads.cpp │ ├── windows_threads_tsan.cpp │ ├── windows_yield.cpp │ ├── write_debug_image.cpp │ ├── x86.ll │ ├── x86_avx.ll │ ├── x86_avx2.ll │ ├── x86_cpu_features.cpp │ └── x86_sse41.ll ├── test ├── CMakeLists.txt ├── auto_schedule │ ├── cost_function.cpp │ ├── data_dependent.cpp │ ├── extern.cpp │ ├── fibonacci.cpp │ ├── harris.cpp │ ├── histogram.cpp │ ├── iir.cpp │ ├── interpolate.cpp │ ├── large_window.cpp │ ├── mat_mul.cpp │ ├── max_filter.cpp │ ├── multi_output.cpp │ ├── overlap.cpp │ ├── param.cpp │ ├── reorder.cpp │ ├── tile_vs_inline.cpp │ ├── unbounded_nonpure.cpp │ ├── unsharp.cpp │ ├── unused_func.cpp │ └── vectorize_var_in_update.cpp ├── common │ ├── check_call_graphs.h │ ├── expect_failure.sh │ ├── gpu_object_lifetime_tracker.h │ └── halide_test_dirs.h ├── correctness │ ├── align_bounds.cpp │ ├── argmax.cpp │ ├── assertion_failure_in_parallel_for.cpp │ ├── async.cpp │ ├── async_copy_chain.cpp │ ├── async_device_copy.cpp │ ├── autodiff.cpp │ ├── autoschedule_small_pure_update.cpp │ ├── autotune_bug.cpp │ ├── autotune_bug_2.cpp │ ├── autotune_bug_3.cpp │ ├── autotune_bug_4.cpp │ ├── autotune_bug_5.cpp │ ├── bad_likely.cpp │ ├── bit_counting.cpp │ ├── bitwise_ops.cpp │ ├── bool_compute_root_vectorize.cpp │ ├── bound.cpp │ ├── bound_small_allocations.cpp │ ├── boundary_conditions.cpp │ ├── bounds.cpp │ ├── bounds_inference.cpp │ ├── bounds_inference_chunk.cpp │ ├── bounds_inference_complex.cpp │ ├── bounds_inference_outer_split.cpp │ ├── bounds_of_abs.cpp │ ├── bounds_of_cast.cpp │ ├── bounds_of_func.cpp │ ├── bounds_of_monotonic_math.cpp │ ├── bounds_of_multiply.cpp │ ├── bounds_query.cpp │ ├── buffer_t.cpp │ ├── c_function.cpp │ ├── cascaded_filters.cpp │ ├── cast.cpp │ ├── cast_handle.cpp │ ├── chunk.cpp │ ├── chunk_sharing.cpp │ ├── circular_reference_leak.cpp │ ├── code_explosion.cpp │ ├── compare_vars.cpp │ ├── compile_to.cpp │ ├── compile_to_bitcode.cpp │ ├── compile_to_lowered_stmt.cpp │ ├── compile_to_multitarget.cpp │ ├── compute_at_reordered_update_stage.cpp │ ├── compute_at_split_rvar.cpp │ ├── compute_outermost.cpp │ ├── compute_with.cpp │ ├── compute_with_in.cpp │ ├── compute_with_inlined.cpp │ ├── computed_index.cpp │ ├── concat.cpp │ ├── constant_expr.cpp │ ├── constant_type.cpp │ ├── constraints.cpp │ ├── convolution.cpp │ ├── convolution_multiple_kernels.cpp │ ├── cross_compilation.cpp │ ├── custom_allocator.cpp │ ├── custom_auto_scheduler.cpp │ ├── custom_error_reporter.cpp │ ├── custom_lowering_pass.cpp │ ├── debug_to_file.cpp │ ├── debug_to_file_multiple_outputs.cpp │ ├── debug_to_file_reorder.cpp │ ├── deferred_loop_level.cpp │ ├── deinterleave4.cpp │ ├── device_buffer_copy.cpp │ ├── device_crop.cpp │ ├── device_slice.cpp │ ├── dilate3x3.cpp │ ├── dynamic_reduction_bounds.cpp │ ├── embed_bitcode.cpp │ ├── erf.cpp │ ├── exception.cpp │ ├── explicit_inline_reductions.cpp │ ├── extern_bounds_inference.cpp │ ├── extern_consumer.cpp │ ├── extern_consumer_tiled.cpp │ ├── extern_error.cpp │ ├── extern_output_expansion.cpp │ ├── extern_partial.cpp │ ├── extern_producer.cpp │ ├── extern_reorder_storage.cpp │ ├── extern_sort.cpp │ ├── extern_stage.cpp │ ├── extern_stage_on_device.cpp │ ├── external_code.cpp │ ├── failed_unroll.cpp │ ├── fast_trigonometric.cpp │ ├── fibonacci.cpp │ ├── fit_function.cpp │ ├── float16_t_comparison.cpp │ ├── float16_t_constants.cpp │ ├── float16_t_image_type.cpp │ ├── for_each_element.cpp │ ├── force_onto_stack.cpp │ ├── func_clone.cpp │ ├── func_lifetime.cpp │ ├── func_lifetime_2.cpp │ ├── func_wrapper.cpp │ ├── fuse.cpp │ ├── fuse_gpu_threads.cpp │ ├── fused_where_inner_extent_is_zero.cpp │ ├── fuzz_cse.cpp │ ├── fuzz_float_stores.cpp │ ├── fuzz_simplify.cpp │ ├── gameoflife.cpp │ ├── gather.cpp │ ├── gpu_assertion_in_kernel.cpp │ ├── gpu_bounds_inference_failure.cpp │ ├── gpu_cpu_simultaneous_read.cpp │ ├── gpu_data_flows.cpp │ ├── gpu_dynamic_shared.cpp │ ├── gpu_free_sync.cpp │ ├── gpu_give_input_buffers_device_allocations.cpp │ ├── gpu_jit_explicit_copy_to_device.cpp │ ├── gpu_large_alloc.cpp │ ├── gpu_mixed_dimensionality.cpp │ ├── gpu_mixed_shared_mem_types.cpp │ ├── gpu_multi_device.cpp │ ├── gpu_multi_kernel.cpp │ ├── gpu_non_contiguous_copy.cpp │ ├── gpu_object_lifetime_1.cpp │ ├── gpu_object_lifetime_2.cpp │ ├── gpu_object_lifetime_3.cpp │ ├── gpu_param_allocation.cpp │ ├── gpu_reuse_shared_memory.cpp │ ├── gpu_specialize.cpp │ ├── gpu_sum_scan.cpp │ ├── gpu_thread_barrier.cpp │ ├── gpu_transpose.cpp │ ├── gpu_vectorized_shared_memory.cpp │ ├── halide_buffer.cpp │ ├── handle.cpp │ ├── heap_cleanup.cpp │ ├── hello_gpu.cpp │ ├── hexagon_scatter.cpp │ ├── histogram.cpp │ ├── histogram_equalize.cpp │ ├── host_alignment.cpp │ ├── image_io.cpp │ ├── image_of_lists.cpp │ ├── image_wrapper.cpp │ ├── implicit_args.cpp │ ├── implicit_args_tests.cpp │ ├── in_place.cpp │ ├── infer_arguments.cpp │ ├── inline_reduction.cpp │ ├── inlined_generator.cpp │ ├── input_image_bounds_check.cpp │ ├── input_larger_than_two_gigs.cpp │ ├── integer_powers.cpp │ ├── interleave.cpp │ ├── interleave_rgb.cpp │ ├── interleave_x.cpp │ ├── interval.cpp │ ├── introspection.cpp │ ├── inverse.cpp │ ├── isnan.cpp │ ├── issue_3926.cpp │ ├── iterate_over_circle.cpp │ ├── lambda.cpp │ ├── lazy_convolution.cpp │ ├── leak_device_memory.cpp │ ├── left_shift_negative.cpp │ ├── legal_race_condition.cpp │ ├── lerp.cpp │ ├── let_in_rdom_bound.cpp │ ├── likely.cpp │ ├── load_library.cpp │ ├── logical.cpp │ ├── loop_invariant_extern_calls.cpp │ ├── loop_level_generator_param.cpp │ ├── lots_of_dimensions.cpp │ ├── make_struct.cpp │ ├── many_dimensions.cpp │ ├── many_small_extern_stages.cpp │ ├── many_updates.cpp │ ├── math.cpp │ ├── median3x3.cpp │ ├── memoize.cpp │ ├── memoize_cloned.cpp │ ├── min_extent.cpp │ ├── mod.cpp │ ├── mul_div_mod.cpp │ ├── multi_output_pipeline_with_bad_sizes.cpp │ ├── multi_pass_reduction.cpp │ ├── multi_splits_with_diff_tail_strategies.cpp │ ├── multi_way_select.cpp │ ├── multipass_constraints.cpp │ ├── multiple_outputs.cpp │ ├── multiple_outputs_extern.cpp │ ├── named_updates.cpp │ ├── nested_shiftinwards.cpp │ ├── newtons_method.cpp │ ├── non_vector_aligned_embeded_buffer.cpp │ ├── obscure_image_references.cpp │ ├── oddly_sized_output.cpp │ ├── out_constraint.cpp │ ├── out_of_memory.cpp │ ├── output_larger_than_two_gigs.cpp │ ├── parallel.cpp │ ├── parallel_alloc.cpp │ ├── parallel_fork.cpp │ ├── parallel_gpu_nested.cpp │ ├── parallel_nested.cpp │ ├── parallel_nested_1.cpp │ ├── parallel_reductions.cpp │ ├── parallel_rvar.cpp │ ├── param.cpp │ ├── param_map.cpp │ ├── parameter_constraints.cpp │ ├── partial_application.cpp │ ├── partial_realization.cpp │ ├── partition_loops.cpp │ ├── partition_loops_bug.cpp │ ├── pipeline_set_jit_externs_func.cpp │ ├── plain_c_includes.c │ ├── popc_clz_ctz_bounds.cpp │ ├── predicated_store_load.cpp │ ├── prefetch.cpp │ ├── print.cpp │ ├── process_some_tiles.cpp │ ├── pseudostack_shares_slots.cpp │ ├── python_extension_gen.cpp │ ├── random.cpp │ ├── realize_larger_than_two_gigs.cpp │ ├── realize_over_shifted_domain.cpp │ ├── reduction_chain.cpp │ ├── reduction_non_rectangular.cpp │ ├── reduction_schedule.cpp │ ├── register_shuffle.cpp │ ├── reorder_rvars.cpp │ ├── reorder_storage.cpp │ ├── require.cpp │ ├── reschedule.cpp │ ├── reuse_stack_alloc.cpp │ ├── rfactor.cpp │ ├── round.cpp │ ├── saturating_casts.cpp │ ├── scatter.cpp │ ├── set_custom_trace.cpp │ ├── shared_self_references.cpp │ ├── shifted_image.cpp │ ├── side_effects.cpp │ ├── simd_op_check.cpp │ ├── simplified_away_embedded_image.cpp │ ├── simplify.cpp │ ├── skip_stages.cpp │ ├── skip_stages_external_array_functions.cpp │ ├── skip_stages_memoize.cpp │ ├── sliding_backwards.cpp │ ├── sliding_reduction.cpp │ ├── sliding_window.cpp │ ├── sort_exprs.cpp │ ├── specialize.cpp │ ├── specialize_to_gpu.cpp │ ├── split_by_non_factor.cpp │ ├── split_fuse_rvar.cpp │ ├── split_reuse_inner_name_bug.cpp │ ├── split_store_compute.cpp │ ├── stack_allocations.cpp │ ├── stencil_chain_in_update_definitions.cpp │ ├── stmt_to_html.cpp │ ├── storage_folding.cpp │ ├── store_in.cpp │ ├── stream_compaction.cpp │ ├── strict_float.cpp │ ├── strict_float_bounds.cpp │ ├── strided_load.cpp │ ├── target.cpp │ ├── thread_safety.cpp │ ├── tracing.cpp │ ├── tracing_bounds.cpp │ ├── tracing_broadcast.cpp │ ├── tracing_stack.cpp │ ├── transitive_bounds.cpp │ ├── trim_no_ops.cpp │ ├── truncated_pyramid.cpp │ ├── tuple_partial_update.cpp │ ├── tuple_reduction.cpp │ ├── tuple_select.cpp │ ├── tuple_undef.cpp │ ├── tuple_update_ops.cpp │ ├── two_vector_args.cpp │ ├── undef.cpp │ ├── uninitialized_read.cpp │ ├── unique_func_image.cpp │ ├── unroll_dynamic_loop.cpp │ ├── unrolled_reduction.cpp │ ├── unsafe_dedup_lets.cpp │ ├── unsafe_promises.cpp │ ├── unused_func.cpp │ ├── update_chunk.cpp │ ├── vector_bounds_inference.cpp │ ├── vector_cast.cpp │ ├── vector_extern.cpp │ ├── vector_math.cpp │ ├── vector_print_bug.cpp │ ├── vectorize_guard_with_if.cpp │ ├── vectorize_mixed_widths.cpp │ ├── vectorize_varying_allocation_size.cpp │ ├── vectorized_gpu_allocation.cpp │ ├── vectorized_initialization.cpp │ ├── vectorized_load_from_vectorized_allocation.cpp │ ├── vectorized_reduction_bug.cpp │ └── widening_reduction.cpp ├── error │ ├── ambiguous_inline_reductions.cpp │ ├── async_require_fail.cpp │ ├── auto_schedule_no_bounds.cpp │ ├── auto_schedule_no_parallel.cpp │ ├── auto_schedule_no_reorder.cpp │ ├── bad_bound.cpp │ ├── bad_compute_at.cpp │ ├── bad_compute_with.cpp │ ├── bad_compute_with_invalid_specialization.cpp │ ├── bad_compute_with_parent_func_not_used.cpp │ ├── bad_const_cast.cpp │ ├── bad_device_api.cpp │ ├── bad_dimensions.cpp │ ├── bad_extern_split.cpp │ ├── bad_fold.cpp │ ├── bad_host_alignment.cpp │ ├── bad_rvar_order.cpp │ ├── bad_schedule.cpp │ ├── bad_store_at.cpp │ ├── broken_promise.cpp │ ├── buffer_larger_than_two_gigs.cpp │ ├── clamp_out_of_range.cpp │ ├── constrain_wrong_output_buffer.cpp │ ├── constraint_uses_non_param.cpp │ ├── define_after_realize.cpp │ ├── define_after_use.cpp │ ├── device_target_mismatch.cpp │ ├── expanding_reduction.cpp │ ├── extern_func_self_argument.cpp │ ├── five_d_gpu_buffer.cpp │ ├── float_arg.cpp │ ├── forward_on_undefined_buffer.cpp │ ├── implicit_args.cpp │ ├── impossible_constraints.cpp │ ├── init_def_should_be_all_vars.cpp │ ├── inspect_loop_level.cpp │ ├── lerp_float_weight_out_of_range.cpp │ ├── lerp_mismatch.cpp │ ├── lerp_signed_weight.cpp │ ├── memoize_different_compute_store.cpp │ ├── metal_vector_too_large.cpp │ ├── missing_args.cpp │ ├── modulo_constant_zero.cpp │ ├── no_default_device.cpp │ ├── nonexistent_update_stage.cpp │ ├── null_host_field.cpp │ ├── overflow_during_constant_folding.cpp │ ├── pointer_arithmetic.cpp │ ├── race_condition.cpp │ ├── rdom_undefined.cpp │ ├── realize_constantly_larger_than_two_gigs.cpp │ ├── reduction_bounds.cpp │ ├── reduction_type_mismatch.cpp │ ├── require_fail.cpp │ ├── reuse_var_in_schedule.cpp │ ├── reused_args.cpp │ ├── rfactor_inner_dim_non_commutative.cpp │ ├── specialize_fail.cpp │ ├── split_inner_wrong_tail_strategy.cpp │ ├── thread_id_outside_block_id.cpp │ ├── too_many_args.cpp │ ├── tuple_arg_select_undef.cpp │ ├── tuple_val_select_undef.cpp │ ├── unbounded_input.cpp │ ├── unbounded_output.cpp │ ├── undefined_func_compile.cpp │ ├── undefined_func_realize.cpp │ ├── undefined_loop_level.cpp │ ├── undefined_pipeline_compile.cpp │ ├── undefined_pipeline_realize.cpp │ ├── undefined_rdom_dimension.cpp │ ├── unknown_target.cpp │ ├── vectorize_dynamic.cpp │ ├── vectorize_too_little.cpp │ ├── vectorize_too_much.cpp │ ├── vectorized_extern.cpp │ ├── wrap_custom_after_shared.cpp │ ├── wrap_frozen.cpp │ ├── wrapper_never_used.cpp │ ├── wrong_dimensionality_extern_stage.cpp │ └── wrong_type.cpp ├── failing_with_issue │ ├── 3292_async_specialize.cpp │ ├── 3293_storage_folding_async.cpp │ └── 3357_vectorize_pred.cpp ├── generator │ ├── acquire_release_aottest.cpp │ ├── acquire_release_generator.cpp │ ├── alias_aottest.cpp │ ├── alias_generator.cpp │ ├── argvcall_aottest.cpp │ ├── argvcall_generator.cpp │ ├── async_parallel_aottest.cpp │ ├── async_parallel_generator.cpp │ ├── bit_operations_aottest.cpp │ ├── bit_operations_generator.cpp │ ├── blur2x2_aottest.cpp │ ├── blur2x2_generator.cpp │ ├── buffer_copy_aottest.cpp │ ├── buffer_copy_generator.cpp │ ├── buildmethod_aottest.cpp │ ├── buildmethod_generator.cpp │ ├── can_use_target_aottest.cpp │ ├── can_use_target_generator.cpp │ ├── cleanup_on_error_aottest.cpp │ ├── cleanup_on_error_generator.cpp │ ├── configure_aottest.cpp │ ├── configure_generator.cpp │ ├── configure_jittest.cpp │ ├── cxx_mangling_aottest.cpp │ ├── cxx_mangling_define_extern_aottest.cpp │ ├── cxx_mangling_define_extern_externs.cpp │ ├── cxx_mangling_define_extern_generator.cpp │ ├── cxx_mangling_externs.cpp │ ├── cxx_mangling_generator.cpp │ ├── define_extern_opencl_aottest.cpp │ ├── define_extern_opencl_generator.cpp │ ├── embed_image_aottest.cpp │ ├── embed_image_generator.cpp │ ├── error_codes_aottest.cpp │ ├── error_codes_generator.cpp │ ├── example_aottest.cpp │ ├── example_generator.cpp │ ├── example_jittest.cpp │ ├── extern_output_aottest.cpp │ ├── extern_output_generator.cpp │ ├── external_code_aottest.cpp │ ├── external_code_extern.cpp │ ├── external_code_generator.cpp │ ├── float16_t_aottest.cpp │ ├── float16_t_generator.cpp │ ├── gpu_object_lifetime_aottest.cpp │ ├── gpu_object_lifetime_generator.cpp │ ├── gpu_only_aottest.cpp │ ├── gpu_only_generator.cpp │ ├── image_from_array_aottest.cpp │ ├── image_from_array_generator.cpp │ ├── mandelbrot_aottest.cpp │ ├── mandelbrot_generator.cpp │ ├── matlab_aottest.cpp │ ├── matlab_generator.cpp │ ├── memory_profiler_mandelbrot_aottest.cpp │ ├── memory_profiler_mandelbrot_generator.cpp │ ├── metadata_tester_aottest.cpp │ ├── metadata_tester_generator.cpp │ ├── msan_aottest.cpp │ ├── msan_generator.cpp │ ├── multitarget_aottest.cpp │ ├── multitarget_generator.cpp │ ├── nested_externs_aottest.cpp │ ├── nested_externs_generator.cpp │ ├── old_buffer_t_aottest.cpp │ ├── old_buffer_t_generator.cpp │ ├── output_assign_aottest.cpp │ ├── output_assign_generator.cpp │ ├── pyramid_aottest.cpp │ ├── pyramid_generator.cpp │ ├── rdom_input_aottest.cpp │ ├── rdom_input_generator.cpp │ ├── registration_test.cpp │ ├── rungen_test.cpp │ ├── string_param_aottest.cpp │ ├── string_param_generator.cpp │ ├── stubtest_aottest.cpp │ ├── stubtest_generator.cpp │ ├── stubtest_jittest.cpp │ ├── stubuser_aottest.cpp │ ├── stubuser_generator.cpp │ ├── tiled_blur_aottest.cpp │ ├── tiled_blur_generator.cpp │ ├── user_context_aottest.cpp │ ├── user_context_generator.cpp │ ├── user_context_insanity_aottest.cpp │ ├── user_context_insanity_generator.cpp │ ├── variable_num_threads_aottest.cpp │ └── variable_num_threads_generator.cpp ├── internal.cpp ├── opengl │ ├── conv_select.cpp │ ├── copy_pixels.cpp │ ├── copy_to_device.cpp │ ├── copy_to_host.cpp │ ├── float_texture.cpp │ ├── inline_reduction.cpp │ ├── internal.cpp │ ├── lut.cpp │ ├── multiple_stages.cpp │ ├── produce.cpp │ ├── rewrap_texture.cpp │ ├── save_state.cpp │ ├── select.cpp │ ├── set_pixels.cpp │ ├── shifted_domains.cpp │ ├── special_funcs.cpp │ ├── sum_reduction.cpp │ ├── sumcolor_reduction.cpp │ ├── testing.h │ ├── tuples.cpp │ ├── vagrant │ │ ├── .gitignore │ │ ├── README.md │ │ ├── Vagrantfile │ │ ├── build_tests.sh │ │ └── provision │ │ │ ├── etc │ │ │ ├── environment │ │ │ ├── init │ │ │ │ └── xdummy.conf │ │ │ └── systemd │ │ │ │ └── system │ │ │ │ └── xdummy.service │ │ │ └── usr │ │ │ └── share │ │ │ └── X11 │ │ │ └── xorg.conf.d │ │ │ └── xdummy.conf │ └── varying.cpp ├── performance │ ├── async_gpu.cpp │ ├── block_transpose.cpp │ ├── boundary_conditions.cpp │ ├── clamped_vector_load.cpp │ ├── const_division.cpp │ ├── fan_in.cpp │ ├── fast_inverse.cpp │ ├── fast_pow.cpp │ ├── fast_sine_cosine.cpp │ ├── inner_loop_parallel.cpp │ ├── jit_stress.cpp │ ├── lots_of_inputs.cpp │ ├── lots_of_small_allocations.cpp │ ├── matrix_multiplication.cpp │ ├── memcpy.cpp │ ├── memory_profiler.cpp │ ├── packed_planar_fusion.cpp │ ├── parallel_performance.cpp │ ├── profiler.cpp │ ├── realize_overhead.cpp │ ├── rfactor.cpp │ ├── rgb_interleaved.cpp │ ├── sort.cpp │ ├── thread_safe_jit.cpp │ ├── vectorize.cpp │ └── wrap.cpp ├── scripts │ └── build_travis.sh └── warning │ ├── double_vectorize.cpp │ ├── hidden_pure_definition.cpp │ └── require_const_false.cpp ├── tools ├── GenGen.cpp ├── RunGen.h ├── RunGenMain.cpp ├── binary2cpp.cpp ├── build_halide_h.cpp ├── find_inverse.cpp ├── halide_benchmark.h ├── halide_config.cmake.tpl ├── halide_config.make.tpl ├── halide_image.h ├── halide_image_info.h ├── halide_image_io.h ├── halide_malloc_trace.h ├── halide_trace_config.h ├── makelib.sh └── mex_halide.m ├── tutorial ├── .gitignore ├── CMakeLists.txt ├── clock.h ├── figures │ ├── generate_figures_17.sh │ ├── generate_figures_18.sh │ ├── generate_figures_19.sh │ ├── generate_figures_5.sh │ ├── generate_figures_8.sh │ ├── generate_figures_9.sh │ ├── generate_output_snippets.sh │ ├── lesson_02_input.jpg │ ├── lesson_02_output.jpg │ ├── lesson_05_col_major.gif │ ├── lesson_05_fast.mp4 │ ├── lesson_05_parallel_tiles.gif │ ├── lesson_05_row_major.gif │ ├── lesson_05_split_7_by_3.gif │ ├── lesson_05_tiled.gif │ ├── lesson_05_vectors.gif │ ├── lesson_08_compute_root.gif │ ├── lesson_08_compute_y.gif │ ├── lesson_08_mixed.mp4 │ ├── lesson_08_store_root_compute_x.gif │ ├── lesson_08_store_root_compute_y.gif │ ├── lesson_08_tile.gif │ ├── lesson_09_compute_at_multiple_updates.mp4 │ ├── lesson_09_compute_at_pure.gif │ ├── lesson_09_compute_at_pure_and_update.gif │ ├── lesson_09_compute_at_rvar.gif │ ├── lesson_09_compute_at_update.gif │ ├── lesson_09_inline_reduction.gif │ ├── lesson_09_update.gif │ ├── lesson_09_update_rdom.mp4 │ ├── lesson_09_update_schedule.mp4 │ ├── lesson_17_rdom_calls_in_predicate.mp4 │ ├── lesson_17_rdom_circular.mp4 │ ├── lesson_17_rdom_triangular.mp4 │ ├── lesson_18_hist_manual_par.mp4 │ ├── lesson_18_hist_rfactor_par.mp4 │ ├── lesson_18_hist_rfactor_tile.mp4 │ ├── lesson_18_hist_rfactor_vec.mp4 │ ├── lesson_18_hist_serial.mp4 │ ├── lesson_19_group_updates.mp4 │ ├── lesson_19_transpose.mp4 │ ├── lesson_19_wrapper_global.mp4 │ ├── lesson_19_wrapper_local.mp4 │ ├── lesson_19_wrapper_unique.mp4 │ └── lesson_19_wrapper_vary_schedule.mp4 ├── images │ ├── gray.png │ └── rgb.png ├── lesson_01_basics.cpp ├── lesson_02_input_image.cpp ├── lesson_03_debugging_1.cpp ├── lesson_04_debugging_2.cpp ├── lesson_05_scheduling_1.cpp ├── lesson_06_realizing_over_shifted_domains.cpp ├── lesson_07_multi_stage_pipelines.cpp ├── lesson_08_scheduling_2.cpp ├── lesson_09_update_definitions.cpp ├── lesson_10_aot_compilation_generate.cpp ├── lesson_10_aot_compilation_run.cpp ├── lesson_11_cross_compilation.cpp ├── lesson_12_using_the_gpu.cpp ├── lesson_13_tuples.cpp ├── lesson_14_types.cpp ├── lesson_15_generators.cpp ├── lesson_15_generators_usage.sh ├── lesson_16_rgb_generate.cpp ├── lesson_16_rgb_run.cpp ├── lesson_17_predicated_rdom.cpp ├── lesson_18_parallel_associative_reductions.cpp ├── lesson_19_wrapper_funcs.cpp ├── lesson_20_cloning_funcs.cpp ├── lesson_21_auto_scheduler_generate.cpp ├── lesson_21_auto_scheduler_run.cpp └── todo.txt ├── util ├── CMakeLists.txt ├── Halide-VS2017.natvis ├── Halide.natvis ├── HalideTraceDump.cpp ├── HalideTraceUtils.cpp ├── HalideTraceUtils.h ├── HalideTraceViz.cpp └── inconsolata.h └── xhalide_examples ├── golden ├── xhalide_2dfilter.cc ├── xhalide_dse_4d_filter_generated.cc └── xhalide_generated1.cc ├── xhalide-2dconvolution.cpp ├── xhalide-convolution-16bits.cpp ├── xhalide-convolution-32bit.cpp ├── xhalide-convolution.cpp ├── xhalide-convolution1.cpp ├── xhalide_dse_4d_filter_limited.cpp └── xhalide_dse_conv.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.cpp text 7 | *.c text 8 | *.h text 9 | 10 | # Denote all files that are truly binary and should not be modified. 11 | *.png binary 12 | *.jpg binary 13 | *.tiff binary 14 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/.gitmodules -------------------------------------------------------------------------------- /apps/HelloAndroid/.gitignore: -------------------------------------------------------------------------------- 1 | .gradle/** 2 | gen/** 3 | gradle_build/** 4 | HelloAndroid.iml 5 | local.properties 6 | obj/** 7 | proguard-project.txt 8 | project.properties 9 | -------------------------------------------------------------------------------- /apps/HelloAndroid/ant.properties: -------------------------------------------------------------------------------- 1 | # This file is used to override default values used by the Ant build system. 2 | # 3 | # This file must be checked into Version Control Systems, as it is 4 | # integral to the build system of your project. 5 | 6 | # This file is only used by the Ant script. 7 | 8 | # You can use this to override default values such as 9 | # 'source.dir' for the location of your java source folder and 10 | # 'out.dir' for the location of your output folder. 11 | 12 | # You can also use it define how the release builds are signed by declaring 13 | # the following properties: 14 | # 'key.store' for the location of your keystore and 15 | # 'key.alias' for the name of the key to use. 16 | # The password will be asked during the build when you use the 'release' target. 17 | 18 | -------------------------------------------------------------------------------- /apps/HelloAndroid/build-gradle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Gradle needs to know where the NDK is. 4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable. 5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says 6 | # that it's only used by ant). 7 | # However, if you run "android update" (say, via build.sh), this variable will 8 | # be clobbered. 9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroid-debug.apk && adb shell am start com.example.hellohalide/com.example.hellohalide.CameraActivity 10 | -------------------------------------------------------------------------------- /apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /apps/HelloAndroid/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Jan 05 14:23:44 PST 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-bin.zip 7 | -------------------------------------------------------------------------------- /apps/HelloAndroid/jni/Android.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | 3 | include $(CLEAR_VARS) 4 | 5 | LOCAL_MODULE := HelloAndroid 6 | LOCAL_ARM_MODE := arm 7 | LOCAL_SRC_FILES := hello_wrapper.cpp 8 | LOCAL_LDFLAGS := -L$(LOCAL_PATH)/../jni 9 | LOCAL_LDLIBS := -lm -llog -landroid $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/hello.a 10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue 11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include $(LOCAL_PATH)/../../../build/include $(LOCAL_PATH)/../bin/$(TARGET_ARCH_ABI)/ 12 | 13 | include $(BUILD_SHARED_LIBRARY) 14 | 15 | $(call import-module,android/native_app_glue) 16 | -------------------------------------------------------------------------------- /apps/HelloAndroid/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since 2 | # llvm will not compile for the R6 version of the ISA without Nan2008 3 | # and the gcc toolchain used by the Android build setup requires those 4 | # two options together. 5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86 6 | APP_PLATFORM := android-17 7 | APP_STL := gnustl_static 8 | APP_CPPFLAGS := -std=c++11 9 | -------------------------------------------------------------------------------- /apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-ldpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroid/res/drawable-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroid/res/layout/main.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 14 | 15 | -------------------------------------------------------------------------------- /apps/HelloAndroid/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | HelloHalide 4 | 5 | -------------------------------------------------------------------------------- /apps/HelloAndroid/src/com/example/hellohalide/FrameHandler.java: -------------------------------------------------------------------------------- 1 | package com.example.hellohalide; 2 | 3 | import android.hardware.Camera; 4 | import android.util.Log; 5 | 6 | public class FrameHandler implements Camera.PreviewCallback { 7 | private static final String TAG = "FrameHandler"; 8 | 9 | public void onPreviewFrame(byte[] data, Camera camera) { 10 | Log.d(TAG, "Got a frame!"); 11 | } 12 | } -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/.gitignore: -------------------------------------------------------------------------------- 1 | .gradle/** 2 | gen/** 3 | gradle_build/** 4 | *.iml 5 | local.properties 6 | obj/** 7 | proguard-project.txt 8 | project.properties 9 | -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/build-gradle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Gradle needs to know where the NDK is. 4 | # The easiest way is to set the ANDROID_NDK_HOME environment variable. 5 | # Otherwise, set ndk.dir in local.properties (even though the file itself says 6 | # that it's only used by ant). 7 | # However, if you run "android update" (say, via build.sh), this variable will 8 | # be clobbered. 9 | ./gradlew build && adb install -r gradle_build/outputs/apk/HelloAndroidCamera2-debug.apk && adb shell am start com.example.helloandroidcamera2/com.example.helloandroidcamera2.CameraActivity 10 | -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Wed Jul 15 16:34:43 PDT 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2-all.zip 7 | -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # Can't use "APP_ABI = all" as 64-bit MIPS currently does not build since 2 | # llvm will not compile for the R6 version of the ISA without Nan2008 3 | # and the gcc toolchain used by the Android build setup requires those 4 | # two options together. 5 | APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86 6 | APP_PLATFORM := android-21 7 | APP_STL := c++_static 8 | APP_CPPFLAGS := -std=c++11 -fno-rtti -fexceptions 9 | -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-ldpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidCamera2/res/drawable-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/res/layout/main.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 14 | 15 | -------------------------------------------------------------------------------- /apps/HelloAndroidCamera2/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | HelloHalideCamera2 4 | Toggle Edge Detector 5 | 6 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/ant.properties: -------------------------------------------------------------------------------- 1 | # This file is used to override default values used by the Ant build system. 2 | # 3 | # This file must be checked into Version Control Systems, as it is 4 | # integral to the build system of your project. 5 | 6 | # This file is only used by the Ant script. 7 | 8 | # You can use this to override default values such as 9 | # 'source.dir' for the location of your java source folder and 10 | # 'out.dir' for the location of your output folder. 11 | 12 | # You can also use it define how the release builds are signed by declaring 13 | # the following properties: 14 | # 'key.store' for the location of your keystore and 15 | # 'key.alias' for the name of the key to use. 16 | # The password will be asked during the build when you use the 'release' target. 17 | 18 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | android update project -p . --target android-17 4 | cd jni 5 | c++ -std=c++11 halide_gl_filter.cpp -L ../../../bin -lHalide -I ../../../include -ldl -lpthread -lz 6 | HL_TARGET=arm-32-android-opengl-debug DYLD_LIBRARY_PATH=../../../bin LD_LIBRARY_PATH=../../../bin ./a.out 7 | cd .. 8 | pwd 9 | ndk-build 10 | ant debug 11 | adb install -r bin/HelloAndroidGL-debug.apk 12 | adb logcat 13 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/jni/Android.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | 3 | include $(CLEAR_VARS) 4 | 5 | LOCAL_MODULE := android_halide_gl_native 6 | LOCAL_ARM_MODE := arm 7 | LOCAL_SRC_FILES := android_halide_gl_native.cpp 8 | LOCAL_LDFLAGS := -Ljni 9 | LOCAL_LDLIBS := -lm -llog -landroid -lEGL -lGLESv2 jni/halide_gl_filter.o 10 | LOCAL_STATIC_LIBRARIES := android_native_app_glue 11 | LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include 12 | 13 | include $(BUILD_SHARED_LIBRARY) 14 | 15 | $(call import-module,android/native_app_glue) 16 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # The ARMv7 is significanly faster due to the use of the hardware FPU 2 | APP_ABI := armeabi-v7a 3 | APP_PLATFORM := android-17 4 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/project.properties: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by Android Tools. 2 | # Do not modify this file -- YOUR CHANGES WILL BE ERASED! 3 | # 4 | # This file must be checked in Version Control Systems. 5 | # 6 | # To customize properties used by the Ant build system edit 7 | # "ant.properties", and override values to adapt the script to your 8 | # project structure. 9 | # 10 | # To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home): 11 | #proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt 12 | 13 | # Project target. 14 | target=android-17 15 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/HelloAndroidGL/res/layout/main.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 14 | 15 | -------------------------------------------------------------------------------- /apps/HelloAndroidGL/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Halide GL Demo 4 | 5 | -------------------------------------------------------------------------------- /apps/HelloMatlab/Makefile: -------------------------------------------------------------------------------- 1 | include ../support/Makefile.inc 2 | 3 | 4 | test: 5 | ./run_blur.sh 6 | 7 | -------------------------------------------------------------------------------- /apps/HelloMatlab/run_blur.m: -------------------------------------------------------------------------------- 1 | % Add the path to mex_halide.m. 2 | addpath(fullfile(getenv('HALIDE_DISTRIB_PATH'), 'tools')); 3 | 4 | % Build the mex library from the blur generator. 5 | mex_halide('iir_blur.cpp', '-g', 'IirBlur'); 6 | 7 | % Load the input, create an output buffer of equal size. 8 | input = cast(imread('../images/rgb.png'), 'single') / 255; 9 | output = zeros(size(input), 'single'); 10 | 11 | % The blur filter coefficient. 12 | alpha = 0.1; 13 | 14 | % Call the Halide pipeline. 15 | for i = 1:10 16 | tic; 17 | iir_blur(input, alpha, output); 18 | toc; 19 | end 20 | 21 | % Write the blurred image. 22 | imwrite(cast(output * 255, 'uint8'), 'blurred.png'); 23 | -------------------------------------------------------------------------------- /apps/HelloMatlab/run_blur.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is run by the nightly tests to check that mex_halide works. 4 | 5 | command -v octave >/dev/null 2>&1 || { echo >&2 "Octave not found. Aborting."; exit 0; } 6 | 7 | if [[ $CXX == *"-m32"* ]]; then 8 | echo "Not proceeding because Halide is compiled in 32-bit mode but octave is (likely) 64-bit" 9 | exit 0 10 | fi 11 | 12 | rm -f blurred.png iir_blur.mex 13 | octave run_blur.m 14 | 15 | if [ -f blurred.png ] 16 | then 17 | echo "Success!" 18 | exit 0 19 | fi 20 | 21 | echo "Failed to produce blurred.png!" 22 | exit 1 23 | -------------------------------------------------------------------------------- /apps/HelloiOS/HelloiOS/AppDelegate.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | @interface AppDelegate : UIResponder 4 | 5 | @property (strong, nonatomic) UIWindow *window; 6 | 7 | @end 8 | -------------------------------------------------------------------------------- /apps/HelloiOS/HelloiOS/HalideViewController.h: -------------------------------------------------------------------------------- 1 | #ifndef HelloiOS_HalideViewController_h 2 | #define HelloiOS_HalideViewController_h 3 | 4 | #import "HalideView.h" 5 | #import 6 | 7 | 8 | @interface HalideViewController : UIViewController 9 | 10 | @property HalideView *halide_view; 11 | 12 | - (void)viewWillAppear:(BOOL)animated; 13 | 14 | @end 15 | 16 | #endif -------------------------------------------------------------------------------- /apps/HelloiOS/HelloiOS/HelloiOS-Prefix.pch: -------------------------------------------------------------------------------- 1 | // 2 | // Prefix header 3 | // 4 | // The contents of this file are implicitly included at the beginning of every source file. 5 | // 6 | 7 | #import 8 | 9 | #ifndef __IPHONE_3_0 10 | #warning "This project uses features only available in iOS SDK 3.0 and later." 11 | #endif 12 | 13 | #ifdef __OBJC__ 14 | #import 15 | #import 16 | #endif 17 | -------------------------------------------------------------------------------- /apps/HelloiOS/HelloiOS/en.lproj/InfoPlist.strings: -------------------------------------------------------------------------------- 1 | /* Localized versions of Info.plist keys */ 2 | 3 | -------------------------------------------------------------------------------- /apps/HelloiOS/HelloiOS/main.mm: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | #import "AppDelegate.h" 4 | 5 | int main(int argc, char * argv[]) 6 | { 7 | @autoreleasepool { 8 | return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /apps/autoscheduler/AutoSchedule.h: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include "CostModel.h" 3 | #include "FunctionDAG.h" 4 | #include "PerfectHashMap.h" 5 | #include "Featurization.h" 6 | #include 7 | 8 | namespace Halide { 9 | namespace Internal { 10 | namespace Autoscheduler { 11 | 12 | typedef PerfectHashMap StageMapOfScheduleFeatures; 13 | 14 | void find_and_apply_schedule(FunctionDAG& dag, const std::vector &outputs, const MachineParams ¶ms, 15 | CostModel* cost_model, int beam_size, StageMapOfScheduleFeatures* schedule_features); 16 | 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /apps/autoscheduler/NetworkSize.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_NETWORK_SIZE_H 2 | #define HALIDE_NETWORK_SIZE_H 3 | 4 | namespace Halide { 5 | // The size of the best cost model network found. Needed by the cost 6 | // model and also the cost model training script. 7 | const int head1_channels = 8, head1_w = 40, head1_h = 7; 8 | const int head2_channels = 24, head2_w = 39; 9 | const int conv1_channels = 32; 10 | } 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /apps/autoscheduler/weights/head1_conv1_bias.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head1_conv1_bias.data -------------------------------------------------------------------------------- /apps/autoscheduler/weights/head1_conv1_weight.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head1_conv1_weight.data -------------------------------------------------------------------------------- /apps/autoscheduler/weights/head2_conv1_bias.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head2_conv1_bias.data -------------------------------------------------------------------------------- /apps/autoscheduler/weights/head2_conv1_weight.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/head2_conv1_weight.data -------------------------------------------------------------------------------- /apps/autoscheduler/weights/trunk_conv1_bias.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/trunk_conv1_bias.data -------------------------------------------------------------------------------- /apps/autoscheduler/weights/trunk_conv1_weight.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/autoscheduler/weights/trunk_conv1_weight.data -------------------------------------------------------------------------------- /apps/bilateral_grid/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(bilateral_grid_process filter.cpp) 2 | halide_use_image_io(bilateral_grid_process) 3 | 4 | halide_generator(bilateral_grid.generator SRCS bilateral_grid_generator.cpp) 5 | foreach(AUTO_SCHEDULE false true) 6 | if(${AUTO_SCHEDULE}) 7 | set(LIB bilateral_grid_auto_schedule) 8 | else() 9 | set(LIB bilateral_grid) 10 | endif() 11 | halide_library_from_generator(${LIB} 12 | GENERATOR bilateral_grid.generator 13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE} 14 | EXTRA_OUTPUTS stmt schedule) 15 | target_link_libraries(bilateral_grid_process PRIVATE ${LIB}) 16 | endforeach() 17 | -------------------------------------------------------------------------------- /apps/bilateral_grid/viz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo HL_AVCONV is ${HL_AVCONV} 3 | export HL_TRACE_FILE=/dev/stdout 4 | export HL_NUMTHREADS=4 5 | rm -f $1/bilateral_grid.mp4 6 | make $1/filter_viz && \ 7 | $1/filter_viz ../images/gray_small.png $1/out_small.png 0.2 0 | \ 8 | ../../bin/HalideTraceViz --size 1920 1080 | \ 9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/bilateral_grid.mp4 10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo - 11 | -------------------------------------------------------------------------------- /apps/blur/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Generator 2 | halide_library(halide_blur SRCS halide_blur_generator.cpp) 3 | 4 | # Final executable 5 | add_executable(blur_test test.cpp) 6 | target_link_libraries(blur_test PUBLIC halide_blur) 7 | 8 | if (NOT MSVC) 9 | target_compile_options(blur_test PRIVATE "-O2") 10 | if (OPENMP_FOUND) 11 | target_compile_options(blur_test PRIVATE ${OpenMP_CXX_FLAGS}) 12 | target_link_libraries(blur_test PRIVATE ${OpenMP_CXX_FLAGS}) 13 | else() 14 | target_compile_options(blur_test PRIVATE "-Wno-unknown-pragmas") 15 | endif() 16 | endif() 17 | -------------------------------------------------------------------------------- /apps/camera_pipe/viz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export HL_TRACE_FILE=/dev/stdout 3 | export HL_NUMTHREADS=4 4 | rm -f $1/camera_pipe.mp4 5 | # Do trivial partial-overrides of trace settings via flags 6 | # (--zoom and --rlabel) just to demonstrate that it works. 7 | $1/process_viz ../images/bayer_small.png 3700 1.8 50 1 1 $1/out.png | 8 | ../../bin/HalideTraceViz --timestep 1000 --size 1920 1080 \ 9 | --zoom 4 --func sharpen_strength_x32 \ 10 | --rlabel curve "tone curve LUT" 0 0 10 \ 11 | |\ 12 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 $1/camera_pipe.mp4 13 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo - 14 | -------------------------------------------------------------------------------- /apps/conv_layer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(conv_layer_process process.cpp) 2 | halide_use_image_io(conv_layer_process) 3 | 4 | halide_generator(conv_layer.generator SRCS conv_layer_generator.cpp) 5 | foreach(AUTO_SCHEDULE false true) 6 | if(${AUTO_SCHEDULE}) 7 | set(LIB conv_layer_auto_schedule) 8 | else() 9 | set(LIB conv_layer) 10 | endif() 11 | halide_library_from_generator(${LIB} 12 | GENERATOR conv_layer.generator 13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}) 14 | target_link_libraries(conv_layer_process PRIVATE ${LIB}) 15 | endforeach() 16 | -------------------------------------------------------------------------------- /apps/images/bayer_raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/bayer_raw.png -------------------------------------------------------------------------------- /apps/images/bayer_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/bayer_small.png -------------------------------------------------------------------------------- /apps/images/gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/gray.png -------------------------------------------------------------------------------- /apps/images/gray_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/gray_small.png -------------------------------------------------------------------------------- /apps/images/rgb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb.png -------------------------------------------------------------------------------- /apps/images/rgb_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb_small.png -------------------------------------------------------------------------------- /apps/images/rgb_small16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgb_small16.png -------------------------------------------------------------------------------- /apps/images/rgba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/images/rgba.png -------------------------------------------------------------------------------- /apps/interpolate/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | halide_project(interpolate "apps" interpolate.cpp) 2 | halide_use_image_io(interpolate) 3 | set_target_properties(interpolate PROPERTIES RUNTIME_OUTPUT_DIRECTORY 4 | "${CMAKE_CURRENT_BINARY_DIR}") 5 | -------------------------------------------------------------------------------- /apps/interpolate/Makefile: -------------------------------------------------------------------------------- 1 | include ../support/Makefile.inc 2 | 3 | CXXFLAGS += -g -Wall 4 | 5 | .PHONY: clean 6 | 7 | $(BIN)/%/interpolate: interpolate.cpp $(LIB_HALIDE) 8 | @mkdir -p $(@D) 9 | $(CXX) $(CXXFLAGS) $^ -o $@ $(IMAGE_IO_FLAGS) $(LDFLAGS) $(HALIDE_SYSTEM_LIBS) 10 | 11 | $(BIN)/%/out.png: $(BIN)/%/interpolate 12 | @mkdir -p $(@D) 13 | $^ $(IMAGES)/rgba.png $@ 14 | 15 | clean: 16 | rm -rf $(BIN) 17 | 18 | test: $(BIN)/$(HL_TARGET)/out.png 19 | -------------------------------------------------------------------------------- /apps/lens_blur/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(lens_blur_process process.cpp) 2 | halide_use_image_io(lens_blur_process) 3 | 4 | halide_generator(lens_blur.generator SRCS lens_blur_generator.cpp) 5 | foreach(AUTO_SCHEDULE false true) 6 | if(${AUTO_SCHEDULE}) 7 | set(LIB lens_blur_auto_schedule) 8 | else() 9 | set(LIB lens_blur) 10 | endif() 11 | halide_library_from_generator(${LIB} 12 | GENERATOR lens_blur.generator 13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}) 14 | target_link_libraries(lens_blur_process PRIVATE ${LIB}) 15 | endforeach() 16 | -------------------------------------------------------------------------------- /apps/linear_algebra/.gitignore: -------------------------------------------------------------------------------- 1 | src/kernels/* 2 | -------------------------------------------------------------------------------- /apps/linear_algebra/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (NOT CBLAS_FOUND) 2 | message(STATUS "linear_algebra: No CBLAS header, skipping CBLAS tests") 3 | return() 4 | endif() 5 | 6 | add_executable(test_halide_blas 7 | test_halide_blas.cpp 8 | ) 9 | target_include_directories(test_halide_blas SYSTEM 10 | PRIVATE 11 | ${CBLAS_INCLUDE_DIR} 12 | ) 13 | target_include_directories(test_halide_blas BEFORE 14 | PRIVATE 15 | ${halide_blas_INCLUDE_DIRS} 16 | ) 17 | target_compile_options(test_halide_blas PRIVATE -Wno-unused-variable) 18 | 19 | target_link_libraries(test_halide_blas 20 | PRIVATE 21 | halide_blas 22 | cblas # XXX fragile 23 | ${HALIDE_COMPILER_LIB} 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /apps/local_laplacian/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(local_laplacian_process process.cpp) 2 | halide_use_image_io(local_laplacian_process) 3 | 4 | halide_generator(local_laplacian.generator SRCS local_laplacian_generator.cpp) 5 | foreach(AUTO_SCHEDULE false true) 6 | if(${AUTO_SCHEDULE}) 7 | set(LIB local_laplacian_auto_schedule) 8 | else() 9 | set(LIB local_laplacian) 10 | endif() 11 | halide_library_from_generator(${LIB} 12 | GENERATOR local_laplacian.generator 13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}) 14 | target_link_libraries(local_laplacian_process PRIVATE ${LIB}) 15 | endforeach() 16 | -------------------------------------------------------------------------------- /apps/local_laplacian/viz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export HL_TRACE_FILE=/dev/stdout 3 | export HL_NUM_THREADS=4 4 | rm -f $1/local_laplacian.mp4 5 | make $1/process_viz && \ 6 | ./$1/process_viz ../images/rgb_small.png 4 1 1 0 ./$1/out_small.png | \ 7 | ../../bin/HalideTraceViz \ 8 | --size 1920 1080 --timestep 3000 | \ 9 | ${HL_AVCONV} -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 ./$1/local_laplacian.mp4 10 | #mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo - 11 | -------------------------------------------------------------------------------- /apps/nl_means/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(nl_means_process process.cpp) 2 | halide_use_image_io(nl_means_process) 3 | 4 | halide_generator(nl_means.generator SRCS nl_means_generator.cpp) 5 | foreach(AUTO_SCHEDULE false true) 6 | if(${AUTO_SCHEDULE}) 7 | set(LIB nl_means_auto_schedule) 8 | else() 9 | set(LIB nl_means) 10 | endif() 11 | halide_library_from_generator(${LIB} 12 | GENERATOR nl_means.generator 13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}) 14 | target_link_libraries(nl_means_process PRIVATE ${LIB}) 15 | endforeach() 16 | -------------------------------------------------------------------------------- /apps/nn_ops/AveragePool.sh: -------------------------------------------------------------------------------- 1 | AVERAGE_POOL=$1 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max 3 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 0 255 4 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 0 255 5 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 0 255 6 | $AVERAGE_POOL 8 16 16 1 2 2 2 5 5 0 255 7 | 8 | $AVERAGE_POOL 32 7 7 1 1 0 0 1 1 0 255 9 | $AVERAGE_POOL 32 7 7 1 1 1 1 3 3 0 255 10 | $AVERAGE_POOL 32 7 7 1 2 1 1 3 3 0 255 11 | $AVERAGE_POOL 32 7 7 4 2 2 2 5 5 0 255 12 | 13 | $AVERAGE_POOL 8 16 16 1 1 0 0 1 1 64 128 14 | $AVERAGE_POOL 8 16 16 1 1 1 1 3 3 64 128 15 | $AVERAGE_POOL 8 16 16 1 2 1 1 3 3 64 128 16 | -------------------------------------------------------------------------------- /apps/nn_ops/Convolution.sh: -------------------------------------------------------------------------------- 1 | CONVOLUTION=$1 2 | # Columns are: schedule C W H N filter_width, filter_height, output_depth, 3 | # input_offset, filter_offset, input_depth, stride, pad_width, pad_height, 4 | # byte_zero, output_multiplier, output_shift, output_offset, output_min, 5 | # output_max 6 | 7 | $CONVOLUTION 8 17 17 1 1 1 8 -128 -128 8 1 0 0 0 8 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 1 1 1 0 9 | $CONVOLUTION 8 17 17 1 3 3 8 -128 -128 8 2 1 1 0 10 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -128 8 1 1 1 0 11 | $CONVOLUTION 8 17 17 1 3 3 16 -128 -140 8 1 1 1 0 12 | $CONVOLUTION 12 17 17 1 3 3 16 -128 -140 12 1 1 1 0 13 | -------------------------------------------------------------------------------- /apps/nn_ops/Im2col.sh: -------------------------------------------------------------------------------- 1 | IM2COL=$1 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height byte zero 3 | $IM2COL 8 16 16 1 1 0 0 1 1 0 4 | $IM2COL 8 16 16 1 1 1 1 3 3 0 5 | $IM2COL 8 16 16 1 2 1 1 3 3 0 6 | $IM2COL 8 16 16 1 2 2 2 5 5 0 7 | 8 | $IM2COL 32 7 7 1 1 0 0 1 1 0 9 | $IM2COL 32 7 7 1 1 1 1 3 3 0 10 | $IM2COL 32 7 7 1 2 1 1 3 3 0 11 | $IM2COL 32 7 7 4 2 2 2 5 5 0 12 | 13 | $IM2COL 8 16 16 1 1 0 0 1 1 5 14 | $IM2COL 8 16 16 1 1 1 1 3 3 5 15 | $IM2COL 8 16 16 1 2 1 1 3 3 5 16 | -------------------------------------------------------------------------------- /apps/nn_ops/MaxPool.sh: -------------------------------------------------------------------------------- 1 | MAXPOOL=$1 2 | # Columns are: schedule C W H N stride pad_width pad_height filter_width filter_height output_min output_max 3 | $MAXPOOL 8 16 16 1 1 0 0 1 1 0 255 4 | $MAXPOOL 8 16 16 1 1 1 1 3 3 0 255 5 | $MAXPOOL 8 16 16 1 2 1 1 3 3 0 255 6 | $MAXPOOL 8 16 16 1 2 2 2 5 5 0 255 7 | 8 | $MAXPOOL 32 7 7 1 1 0 0 1 1 0 255 9 | $MAXPOOL 32 7 7 1 1 1 1 3 3 0 255 10 | $MAXPOOL 32 7 7 1 2 1 1 3 3 0 255 11 | $MAXPOOL 32 7 7 4 2 2 2 5 5 0 255 12 | 13 | $MAXPOOL 8 16 16 1 1 0 0 1 1 64 128 14 | $MAXPOOL 8 16 16 1 1 1 1 3 3 64 128 15 | $MAXPOOL 8 16 16 1 2 1 1 3 3 64 128 16 | -------------------------------------------------------------------------------- /apps/nn_ops/common.h: -------------------------------------------------------------------------------- 1 | // A collection of utility functions shared by the halide generators. 2 | 3 | #ifndef COMMON_HALIDE_H_ 4 | #define COMMON_HALIDE_H_ 5 | 6 | #include 7 | 8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH 9 | // instruction. 10 | Halide::Expr saturating_rounding_doubling_high_multiply(Halide::Expr a, Halide::Expr b); 11 | 12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as 13 | // rounding arithmetic right shift. 14 | Halide::Expr rounding_shift_right(Halide::Expr x, Halide::Expr shift); 15 | 16 | // Performs right shift and multiply by a multiplier. 17 | Halide::Expr multiply_quantized_multiplier( 18 | Halide::Expr x, Halide::Expr quantized_multiplier, Halide::Expr shift); 19 | #endif 20 | -------------------------------------------------------------------------------- /apps/nn_ops/common_reference.h: -------------------------------------------------------------------------------- 1 | // A collection of utility functions shared by test apps. 2 | 3 | #ifndef COMMON_REFERENCE_H_ 4 | #define COMMON_REFERENCE_H_ 5 | 6 | #include 7 | 8 | // This function implements the same computation as the ARMv7 NEON VQRDMULH 9 | // instruction. 10 | int32_t saturating_rounding_doubling_high_multiply_reference(int32_t a, int32_t b); 11 | 12 | // Correctly-rounded-to-nearest division by a power-of-two. Also known as 13 | // rounding arithmetic right shift. 14 | int32_t rounding_shift_right_reference(int32_t x, int32_t shift); 15 | 16 | // Performs right shift and multiply by a multiplier. 17 | int32_t multiply_quantized_multiplier_reference(int32_t x, int32_t q, int32_t shift); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /apps/onnx/common_types.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_TYPES_H_ 2 | #define COMMON_TYPES_H_ 3 | 4 | #include "Halide.h" 5 | #include "onnx_converter.h" 6 | 7 | struct HalideModel { 8 | std::shared_ptr model; 9 | std::shared_ptr rep; 10 | std::vector input_names; 11 | std::unordered_map input_types; 12 | std::vector output_names; 13 | std::vector output_types; 14 | }; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /apps/opengl_demo/glfw_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _GLFW_HELPERS_H_ 2 | #define _GLFW_HELPERS_H_ 3 | 4 | namespace GlfwHelpers { 5 | 6 | struct info { 7 | float dpi_scale; 8 | }; 9 | 10 | struct info setup(int width, int height); 11 | void set_opengl_context(); 12 | void terminate(); 13 | } 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /apps/opengl_demo/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/opengl_demo/image.png -------------------------------------------------------------------------------- /apps/opengl_demo/layout.h: -------------------------------------------------------------------------------- 1 | #ifndef _LAYOUT_HELPERS_H_ 2 | #define _LAYOUT_HELPERS_H_ 3 | 4 | #if defined(__APPLE__) 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | namespace Layout { 11 | 12 | enum location { UL, UR, LL, LR }; 13 | 14 | struct info { 15 | int window_width; 16 | int window_height; 17 | }; 18 | 19 | const struct info &setup(int image_width, int image_height); 20 | 21 | void draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label); 22 | void draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label); 23 | } 24 | 25 | #endif 26 | 27 | -------------------------------------------------------------------------------- /apps/opengl_demo/opengl_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _OPENGL_HELPERS_H_ 2 | #define _OPENGL_HELPERS_H_ 3 | 4 | #include 5 | 6 | #if defined(__APPLE__) 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | namespace OpenGLHelpers { 13 | void setup(float dpi_scale); 14 | GLuint create_texture(int width, int height, const uint8_t *data); 15 | void delete_texture(GLuint texture_id); 16 | void display_texture(GLuint texture_id, float x0, float x1, float y0, float y1); 17 | void draw_text(const std::string &text, float x, float y); 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /apps/opengl_demo/png_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _PNG_HELPERS_ 2 | #define _PNG_HELPERS_ 3 | 4 | namespace PNGHelpers { 5 | 6 | struct image_info { 7 | unsigned int width; 8 | unsigned int height; 9 | const uint8_t *data; 10 | }; 11 | 12 | struct image_info load(const std::string &filepath); 13 | } 14 | 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /apps/opengl_demo/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef _TIMER_H_ 2 | #define _TIMER_H_ 3 | 4 | #include 5 | 6 | namespace Timer 7 | { 8 | struct info { 9 | const std::string what; 10 | std::chrono::time_point time; 11 | }; 12 | 13 | struct info start(const std::string &what); 14 | std::string report(const struct info &); 15 | } 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /apps/openglcompute/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | android update project -p . --target android-21 4 | make jni-libs 5 | ant debug 6 | adb install -r bin/HelloHalideOpenGLCompute-debug.apk 7 | adb logcat -c 8 | adb shell am start -n com.example.hellohalideopenglcompute/.HalideOpenGLComputeActivity 9 | adb logcat | grep "^I/oglc" 10 | -------------------------------------------------------------------------------- /apps/openglcompute/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # TODO(aam): Confirm that application builds and runs for all supported targets: 2 | # APP_ABI := armeabi armeabi-v7a arm64-v8a mips x86_64 x86 3 | APP_ABI := armeabi-v7a 4 | APP_PLATFORM := android-17 5 | 6 | APP_STL := c++_static 7 | LOCAL_C_INCLUDES += ${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.8/include 8 | -------------------------------------------------------------------------------- /apps/openglcompute/res/drawable-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/openglcompute/res/drawable-ldpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-ldpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/openglcompute/res/drawable-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/openglcompute/res/drawable-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/apps/openglcompute/res/drawable-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /apps/openglcompute/res/layout/main.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 14 | 15 | -------------------------------------------------------------------------------- /apps/openglcompute/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | HelloHalideAndroidOpenGLCompute 4 | 5 | -------------------------------------------------------------------------------- /apps/stencil_chain/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(stencil_chain_process process.cpp) 2 | halide_use_image_io(stencil_chain_process) 3 | 4 | halide_generator(stencil_chain.generator SRCS stencil_chain_generator.cpp) 5 | foreach(AUTO_SCHEDULE false true) 6 | if(${AUTO_SCHEDULE}) 7 | set(LIB stencil_chain_auto_schedule) 8 | else() 9 | set(LIB stencil_chain) 10 | endif() 11 | halide_library_from_generator(${LIB} 12 | GENERATOR stencil_chain.generator 13 | GENERATOR_ARGS auto_schedule=${AUTO_SCHEDULE}) 14 | target_link_libraries(stencil_chain_process PRIVATE ${LIB}) 15 | endforeach() 16 | -------------------------------------------------------------------------------- /apps/support/viz_auto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # $1 = filter cmd to run, including args 4 | # $2 = HalideTraceViz executable 5 | # $3 = path to output mp4 6 | 7 | rm -rf "$3" 8 | 9 | # Use a named pipe for the $1 -> HTV pipe, just in case 10 | # the exe in $1 writes any random output to stdout. 11 | PIPE=/tmp/halide_viz_auto_pipe 12 | rm -rf $PIPE 13 | mkfifo $PIPE 14 | 15 | HL_TRACE_FILE=${PIPE} HL_NUMTHREADS=8 $1 & 16 | 17 | $2 --auto_layout --ignore_tags 0<${PIPE} | \ 18 | ${HL_AVCONV} -y -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 "$3" 19 | -------------------------------------------------------------------------------- /apps/wavelet/README.md: -------------------------------------------------------------------------------- 1 | wavelet is a trivial app designed to show ahead-of-time Generator usage (with both Make and CMake), as opposed to using direct calls to (e.g.) Func::compile_to_file(). 2 | -------------------------------------------------------------------------------- /apps/wavelet/daubechies_constants.h: -------------------------------------------------------------------------------- 1 | #ifndef DAUBECHIES_CONSTANTS_H_ 2 | #define DAUBECHIES_CONSTANTS_H_ 3 | 4 | const float D0 = 0.4829629131445341f; 5 | const float D1 = 0.83651630373780772f; 6 | const float D2 = 0.22414386804201339f; 7 | const float D3 = -0.12940952255126034f; 8 | 9 | #endif // DAUBECHIES_CONSTANTS_H_ 10 | -------------------------------------------------------------------------------- /apps/wavelet/haar_x_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include "daubechies_constants.h" 4 | 5 | namespace { 6 | 7 | Halide::Var x("x"), y("y"), c("c"); 8 | 9 | class haar_x : public Halide::Generator { 10 | public: 11 | Input> in_{"in" , 2}; 12 | Output> out_{"out" , 3}; 13 | 14 | void generate() { 15 | Func in = Halide::BoundaryConditions::repeat_edge(in_); 16 | 17 | out_(x, y, c) = select(c == 0, 18 | (in(2*x, y) + in(2*x+1, y)), 19 | (in(2*x, y) - in(2*x+1, y)))/2; 20 | out_.unroll(c, 2); 21 | } 22 | }; 23 | 24 | } // namespace 25 | 26 | HALIDE_REGISTER_GENERATOR(haar_x, haar_x) 27 | -------------------------------------------------------------------------------- /apps/wavelet/inverse_haar_x_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include "daubechies_constants.h" 4 | 5 | namespace { 6 | 7 | Halide::Var x("x"), y("y"), c("c"); 8 | 9 | class inverse_haar_x : public Halide::Generator { 10 | public: 11 | Input> in_{"in" , 3}; 12 | Output> out_{"out" , 2}; 13 | 14 | void generate() { 15 | Func in = Halide::BoundaryConditions::repeat_edge(in_); 16 | 17 | out_(x, y) = select(x%2 == 0, 18 | in(x/2, y, 0) + in(x/2, y, 1), 19 | in(x/2, y, 0) - in(x/2, y, 1)); 20 | out_.unroll(x, 2); 21 | } 22 | }; 23 | 24 | } // namespace 25 | 26 | HALIDE_REGISTER_GENERATOR(inverse_haar_x, inverse_haar_x) 27 | -------------------------------------------------------------------------------- /python_bindings/correctness/bit_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | class BitGenerator : public Halide::Generator { 6 | public: 7 | Input> bit_input{"input_uint1", 1}; 8 | Input bit_constant{"constant_uint1"}; 9 | 10 | Output> bit_output{"output_uint1", 1}; 11 | 12 | Var x, y, z; 13 | 14 | void generate() { 15 | bit_output(x) = bit_input(x) + bit_constant; 16 | } 17 | 18 | void schedule() { 19 | } 20 | }; 21 | 22 | HALIDE_REGISTER_GENERATOR(BitGenerator, bit) 23 | -------------------------------------------------------------------------------- /python_bindings/correctness/rdom.py: -------------------------------------------------------------------------------- 1 | import halide as hl 2 | 3 | def test_rdom(): 4 | x = hl.Var("x") 5 | y = hl.Var("y") 6 | 7 | diagonal = hl.Func("diagonal") 8 | diagonal[x, y] = 1 9 | 10 | domain_width = 10 11 | domain_height = 10 12 | 13 | r = hl.RDom([(0, domain_width), (0, domain_height)]) 14 | r.where(r.x <= r.y) 15 | 16 | diagonal[r.x, r.y] += 2 17 | output = diagonal.realize(domain_width, domain_height) 18 | 19 | for iy in range(domain_height): 20 | for ix in range(domain_width): 21 | if ix <= iy: 22 | assert output[ix, iy] == 3 23 | else: 24 | assert output[ix, iy] == 1 25 | 26 | return 0 27 | 28 | if __name__ == "__main__": 29 | test_rdom() 30 | -------------------------------------------------------------------------------- /python_bindings/correctness/user_context_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | class UserContextGenerator : public Halide::Generator { 6 | public: 7 | Input constant{"constant"}; 8 | Output> output{"output", 1}; 9 | 10 | Var x; 11 | 12 | void generate() { 13 | output(x) = constant; 14 | } 15 | 16 | void schedule() { 17 | } 18 | }; 19 | 20 | HALIDE_REGISTER_GENERATOR(UserContextGenerator, user_context) 21 | -------------------------------------------------------------------------------- /python_bindings/correctness/user_context_test.py: -------------------------------------------------------------------------------- 1 | import array 2 | import user_context 3 | 4 | 5 | def test(): 6 | output = bytearray("\0\0\0\0", "ascii") 7 | user_context.user_context(None, ord('q'), output) 8 | assert output == bytearray("qqqq", "ascii") 9 | 10 | 11 | if __name__ == "__main__": 12 | test() 13 | -------------------------------------------------------------------------------- /python_bindings/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file lists the python dependencies, 2 | # it is meant to be used with pip (and/or possibly virtualenv, pbundler, etc) 3 | # See http://pip.readthedocs.org/en/latest/user_guide.html#requirements-files 4 | # You will probably want to run 5 | # something similar to `pip3 install --user -r requirements.txt` 6 | 7 | # science packages 8 | numpy 9 | scipy 10 | pillow 11 | -------------------------------------------------------------------------------- /python_bindings/src/PyArgument.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYARGUMENT_H 2 | #define HALIDE_PYTHON_BINDINGS_PYARGUMENT_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_argument(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYARGUMENT_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyBoundaryConditions.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H 2 | #define HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_boundary_conditions(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYBOUNDARYCONDITIONS_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYBUFFER_H 2 | #define HALIDE_PYTHON_BINDINGS_PYBUFFER_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_buffer(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYBUFFER_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyConciseCasts.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H 2 | #define HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_concise_casts(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYCONCISECASTS_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyDerivative.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H 2 | #define HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_derivative(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYDERIVATIVE_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyEnums.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYENUMS_H 2 | #define HALIDE_PYTHON_BINDINGS_PYENUMS_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_enums(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYENUMS_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyError.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYERROR_H 2 | #define HALIDE_PYTHON_BINDINGS_PYERROR_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_error(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYERROR_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyExpr.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXPR_H 2 | #define HALIDE_PYTHON_BINDINGS_PYEXPR_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_expr(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYEXPR_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyExternFuncArgument.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H 2 | #define HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_extern_func_argument(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYEXTERNFUNCARGUMENT_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyFunc.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_H 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_func(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyFuncRef.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_func_ref(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_REF_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyIROperator.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H 2 | #define HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_operators(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYIROPERATOR_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyImageParam.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H 2 | #define HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_image_param(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYIMAGEPARAM_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyInlineReductions.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H 2 | #define HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_inline_reductions(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYINLINEREDUCTIONS_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyLambda.cpp: -------------------------------------------------------------------------------- 1 | #include "PyLambda.h" 2 | 3 | namespace Halide { 4 | namespace PythonBindings { 5 | 6 | void define_lambda(py::module &m) { 7 | // TODO: 'lambda' is a reserved word in Python, so we 8 | // can't use it for a function. Using 'lambda_func' for now. 9 | m.def("lambda_func", [](py::args args) -> Func { 10 | auto vars = args_to_vector(args, 0, 1); 11 | Expr e = args[args.size() - 1].cast(); 12 | Func f("lambda" + Internal::unique_name('_')); 13 | f(vars) = e; 14 | return f; 15 | }); 16 | } 17 | 18 | } // namespace PythonBindings 19 | } // namespace Halide 20 | -------------------------------------------------------------------------------- /python_bindings/src/PyLambda.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLAMBDA_H 2 | #define HALIDE_PYTHON_BINDINGS_PYLAMBDA_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_lambda(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYLAMBDA_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyLoopLevel.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H 2 | #define HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_loop_level(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYLOOPLEVEL_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyMachineParams.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H 2 | #define HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_machine_params(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYMACHINEPARAMS_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyModule.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYMODULE_H 2 | #define HALIDE_PYTHON_BINDINGS_PYMODULE_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_module(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYMODULE_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyOutputs.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H 2 | #define HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_outputs(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYOUTPUTS_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyParam.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPARAM_H 2 | #define HALIDE_PYTHON_BINDINGS_PYPARAM_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_param(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYPARAM_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyPipeline.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYPIPELINE_H 2 | #define HALIDE_PYTHON_BINDINGS_PYPIPELINE_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_pipeline(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYPIPELINE_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyRDom.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYRDOM_H 2 | #define HALIDE_PYTHON_BINDINGS_PYRDOM_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_rdom(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYRDOM_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyStage.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYSTAGE_H 2 | #define HALIDE_PYTHON_BINDINGS_PYSTAGE_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_stage(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_STAGE_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyTarget.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTARGET_H 2 | #define HALIDE_PYTHON_BINDINGS_PYTARGET_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_target(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYTARGET_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyTuple.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTUPLE_H 2 | #define HALIDE_PYTHON_BINDINGS_PYTUPLE_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_tuple(py::module &m); 10 | 11 | // Templated function to allow for use with Realization as well as Tuple 12 | template 13 | inline py::tuple to_python_tuple(const T &ht) { 14 | py::tuple pt(ht.size()); 15 | for (size_t i = 0; i < ht.size(); i++) { 16 | pt[i] = py::cast(ht[i]); 17 | } 18 | return pt; 19 | } 20 | 21 | } // namespace PythonBindings 22 | } // namespace Halide 23 | 24 | #endif // HALIDE_PYTHON_BINDINGS_PYTUPLE_H 25 | -------------------------------------------------------------------------------- /python_bindings/src/PyType.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYTYPE_H 2 | #define HALIDE_PYTHON_BINDINGS_PYTYPE_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_type(py::module &m); 10 | 11 | std::string halide_type_to_string(const Type &type); 12 | 13 | } // namespace PythonBindings 14 | } // namespace Halide 15 | 16 | #endif // HALIDE_PYTHON_BINDINGS_PYTYPE_H 17 | -------------------------------------------------------------------------------- /python_bindings/src/PyVar.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYVAR_H 2 | #define HALIDE_PYTHON_BINDINGS_PYVAR_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_var(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYVAR_H 15 | -------------------------------------------------------------------------------- /python_bindings/src/PyVarOrRVar.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H 2 | #define HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H 3 | 4 | #include "PyHalide.h" 5 | 6 | namespace Halide { 7 | namespace PythonBindings { 8 | 9 | void define_var_or_rvar(py::module &m); 10 | 11 | } // namespace PythonBindings 12 | } // namespace Halide 13 | 14 | #endif // HALIDE_PYTHON_BINDINGS_PYFUNC_VARORRVAR_H 15 | -------------------------------------------------------------------------------- /src/AddParameterChecks.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H 2 | #define HALIDE_INTERNAL_ADD_PARAMETER_CHECKS_H 3 | 4 | /** \file 5 | * 6 | * Defines the lowering pass that adds the assertions that validate 7 | * scalar parameters. 8 | */ 9 | 10 | #include "IR.h" 11 | 12 | namespace Halide { 13 | 14 | struct Target; 15 | 16 | namespace Internal { 17 | 18 | /** Insert checks to make sure that all referenced parameters meet 19 | * their constraints. Also injects any custom requirements provided 20 | * by the user. */ 21 | Stmt add_parameter_checks(const std::vector &requirements, Stmt s, const Target &t); 22 | 23 | } // namespace Internal 24 | } // namespace Halide 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/AlignLoads.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_ALIGN_LOADS_H 2 | #define HALIDE_ALIGN_LOADS_H 3 | 4 | /** \file 5 | * Defines a lowering pass that rewrites unaligned loads into 6 | * sequences of aligned loads. 7 | */ 8 | #include "IR.h" 9 | #include "ModulusRemainder.h" 10 | #include "Scope.h" 11 | #include "Target.h" 12 | 13 | namespace Halide { 14 | namespace Internal { 15 | 16 | /** Attempt to rewrite unaligned loads from buffers which are known to 17 | * be aligned to instead load aligned vectors that cover the original 18 | * load, and then slice the original load out of the aligned 19 | * vectors. */ 20 | Stmt align_loads(Stmt s, int alignment); 21 | 22 | } // namespace Internal 23 | } // namespace Halide 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/AllocationBoundsInference.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_ALLOCATION_BOUNDS_INFERENCE_H 2 | #define HALIDE_ALLOCATION_BOUNDS_INFERENCE_H 3 | 4 | /** \file 5 | * Defines the lowering pass that determines how large internal allocations should be. 6 | */ 7 | 8 | #include "Bounds.h" 9 | #include "IR.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Take a partially statement with Realize nodes in terms of 15 | * variables, and define values for those variables. */ 16 | Stmt allocation_bounds_inference(Stmt s, 17 | const std::map &env, 18 | const std::map, Interval> &func_bounds); 19 | } // namespace Internal 20 | } // namespace Halide 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/AsyncProducers.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_ASYNC_PRODUCERS_H 2 | #define HALIDE_ASYNC_PRODUCERS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that injects task parallelism for producers that are scheduled as async. 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | Stmt fork_async_producers(Stmt s, const std::map &env); 14 | 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/BoundSmallAllocations.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_BOUND_SMALL_ALLOCATIONS 2 | #define HALIDE_BOUND_SMALL_ALLOCATIONS 3 | 4 | #include "IR.h" 5 | 6 | /** \file 7 | * Defines the lowering pass that attempts to rewrite small 8 | * allocations to have constant size. 9 | */ 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** \file 15 | * 16 | * Use bounds analysis to attempt to bound the sizes of small 17 | * allocations. Inside GPU kernels this is necessary in order to 18 | * compile. On the CPU this is also useful, because it prevents malloc 19 | * calls for (provably) tiny allocations. */ 20 | Stmt bound_small_allocations(const Stmt &s); 21 | 22 | } // namespace Internal 23 | } // namespace Halide 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/CanonicalizeGPUVars.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_CANONICALIZE_GPU_VARS_H 2 | #define HALIDE_CANONICALIZE_GPU_VARS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that canonicalize the GPU var names over. 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Canonicalize GPU var names into some pre-determined block/thread names 14 | * (i.e. __block_id_x, __thread_id_x, etc.). The x/y/z/w order is determined 15 | * by the nesting order: innermost is assigned to x and so on. */ 16 | Stmt canonicalize_gpu_vars(Stmt s); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/CodeGen_RISCV.cpp: -------------------------------------------------------------------------------- 1 | #include "CodeGen_RISCV.h" 2 | #include "Util.h" 3 | #include "LLVM_Headers.h" 4 | 5 | namespace Halide { 6 | namespace Internal { 7 | 8 | using std::string; 9 | 10 | using namespace llvm; 11 | 12 | CodeGen_RISCV::CodeGen_RISCV(Target t) : CodeGen_Posix(t) { 13 | #if !defined(WITH_RISCV) 14 | user_error << "llvm build not configured with RISCV target enabled.\n"; 15 | #endif 16 | } 17 | 18 | string CodeGen_RISCV::mcpu() const { 19 | return ""; 20 | } 21 | 22 | string CodeGen_RISCV::mattrs() const { 23 | return ""; 24 | } 25 | 26 | bool CodeGen_RISCV::use_soft_float_abi() const { 27 | return false; 28 | } 29 | 30 | int CodeGen_RISCV::native_vector_bits() const { 31 | return 128; 32 | } 33 | 34 | }} 35 | -------------------------------------------------------------------------------- /src/Debug.cpp: -------------------------------------------------------------------------------- 1 | #include "Debug.h" 2 | 3 | namespace Halide { 4 | namespace Internal { 5 | 6 | int debug::debug_level() { 7 | static int cached_debug_level = ([]() -> int { 8 | std::string lvl = get_env_variable("HL_DEBUG_CODEGEN"); 9 | return !lvl.empty() ? atoi(lvl.c_str()) : 0; 10 | })(); 11 | return cached_debug_level; 12 | } 13 | 14 | } // namespace Internal 15 | } // namespace Halide 16 | -------------------------------------------------------------------------------- /src/DebugArguments.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_INTERNAL_DEBUG_ARGUMENTS_H 2 | #define HALIDE_INTERNAL_DEBUG_ARGUMENTS_H 3 | 4 | #include "Target.h" 5 | 6 | /** \file 7 | * 8 | * Defines a lowering pass that injects debug statements inside a 9 | * LoweredFunc. Intended to be used when Target::Debug is on. 10 | */ 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | struct LoweredFunc; 16 | 17 | /** Injects debug prints in a LoweredFunc that describe the target and 18 | * arguments. Mutates the given func. */ 19 | void debug_arguments(LoweredFunc *func, const Target &t); 20 | 21 | } // namespace Internal 22 | } // namespace Halide 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/EarlyFree.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_EARLY_FREE_H 2 | #define HALIDE_EARLY_FREE_H 3 | 4 | /** \file 5 | * Defines the lowering pass that injects markers just after 6 | * the last use of each buffer so that they can potentially be freed 7 | * earlier. 8 | */ 9 | 10 | #include "IR.h" 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | /** Take a statement with allocations and inject markers (of the form 16 | * of calls to "mark buffer dead") after the last use of each 17 | * allocation. Targets may use this to free buffers earlier than the 18 | * close of their Allocate node. */ 19 | Stmt inject_early_frees(Stmt s); 20 | 21 | } // namespace Internal 22 | } // namespace Halide 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/HexagonOffload.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_HEXAGON_OFFLOAD_H 2 | #define HALIDE_HEXAGON_OFFLOAD_H 3 | 4 | /** \file 5 | * Defines a lowering pass to pull loops marked with the 6 | * Hexagon device API to a separate module, and call them through the 7 | * Hexagon host runtime module. 8 | */ 9 | 10 | #include "Module.h" 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | /** Pull loops marked with the Hexagon device API to a separate 16 | * module, and call them through the Hexagon host runtime module. */ 17 | Stmt inject_hexagon_rpc(Stmt s, const Target &host_target, Module &module); 18 | 19 | Buffer compile_module_to_hexagon_shared_object(const Module &device_code); 20 | 21 | } // namespace Internal 22 | } // namespace Halide 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/InjectHostDevBufferCopies.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H 2 | #define HALIDE_HOST_GPU_BUFFER_COPIES_H 3 | 4 | /** \file 5 | * Defines the lowering passes that deal with host and device buffer flow. 6 | */ 7 | 8 | #include "IR.h" 9 | #include "Target.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** A helper function to call an extern function, and assert that it 15 | * returns 0. */ 16 | Stmt call_extern_and_assert(const std::string &name, const std::vector &args); 17 | 18 | /** Inject calls to halide_device_malloc, halide_copy_to_device, and 19 | * halide_copy_to_host as needed. */ 20 | Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t); 21 | 22 | } // namespace Internal 23 | } // namespace Halide 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/InjectOpenGLIntrinsics.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_INJECT_OPENGL_INTRINSICS_H 2 | #define HALIDE_INJECT_OPENGL_INTRINSICS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that injects texture loads and texture 6 | * stores for opengl. 7 | */ 8 | 9 | #include "IR.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Take a statement with for kernel for loops and turn loads and 15 | * stores inside the loops into OpenGL texture load and store 16 | * intrinsics. Should only be run when the OpenGL target is active. */ 17 | Stmt inject_opengl_intrinsics(Stmt s); 18 | 19 | } // namespace Internal 20 | } // namespace Halide 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/LICM.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_LICM_H 2 | #define HALIDE_LICM_H 3 | 4 | /** \file 5 | * Methods for lifting loop invariants out of inner loops. 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Hoist loop-invariants out of inner loops. This is especially 14 | * important in cases where LLVM would not do it for us 15 | * automatically. For example, it hoists loop invariants out of cuda 16 | * kernels. */ 17 | Stmt loop_invariant_code_motion(Stmt); 18 | 19 | } // namespace Internal 20 | } // namespace Halide 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/Lerp.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_LERP_H 2 | #define HALIDE_LERP_H 3 | 4 | /** \file 5 | * Defines methods for converting a lerp intrinsic into Halide IR. 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Build Halide IR that computes a lerp. Use by codegen targets that 14 | * don't have a native lerp. */ 15 | Expr lower_lerp(Expr zero_val, Expr one_val, Expr weight); 16 | 17 | } // namespace Internal 18 | } // namespace Halide 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/LoopCarry.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_LOOP_CARRY_H 2 | #define HALIDE_LOOP_CARRY_H 3 | 4 | #include "Expr.h" 5 | 6 | namespace Halide { 7 | namespace Internal { 8 | 9 | /** Reuse loads done on previous loop iterations by stashing them in 10 | * induction variables instead of redoing the load. If the loads are 11 | * predicated, the predicates need to match. Can be an optimization or 12 | * pessimization depending on how good the L1 cache is on the architecture 13 | * and how many memory issue slots there are. Currently only intended 14 | * for Hexagon. */ 15 | Stmt loop_carry(Stmt, int max_carried_values = 8); 16 | 17 | } // namespace Internal 18 | } // namespace Halide 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/LowerWarpShuffles.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_LOWER_WARP_SHUFFLES_H 2 | #define HALIDE_LOWER_WARP_SHUFFLES_H 3 | 4 | /** \file 5 | * Defines the lowering pass that injects CUDA warp shuffle 6 | * instructions to access storage outside of a GPULane loop. 7 | */ 8 | 9 | #include "IR.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Rewrite access to things stored outside the loop over GPU lanes to 15 | * use nvidia's warp shuffle instructions. */ 16 | Stmt lower_warp_shuffles(Stmt s); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/Monotonic.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_MONOTONIC_H 2 | #define HALIDE_MONOTONIC_H 3 | 4 | /** \file 5 | * 6 | * Methods for computing whether expressions are monotonic 7 | */ 8 | 9 | #include "IR.h" 10 | #include "Scope.h" 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | /** 16 | * Detect whether an expression is monotonic increasing in a variable, 17 | * decreasing, or unknown. 18 | */ 19 | enum class Monotonic {Constant, Increasing, Decreasing, Unknown}; 20 | Monotonic is_monotonic(Expr e, const std::string &var, 21 | const Scope &scope = Scope::empty_scope()); 22 | 23 | void is_monotonic_test(); 24 | 25 | } // namespace Internal 26 | } // namespace Halide 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/PrintLoopNest.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_INTERNAL_PRINT_LOOP_NEST_H 2 | #define HALIDE_INTERNAL_PRINT_LOOP_NEST_H 3 | 4 | /** \file 5 | * 6 | * Defines methods to print out the loop nest corresponding to a schedule. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | class Function; 16 | 17 | /** Emit some simple pseudocode that shows the structure of the loop 18 | * nest specified by this pipeline's schedule, and the schedules of 19 | * the functions it uses. */ 20 | std::string print_loop_nest(const std::vector &output_funcs); 21 | 22 | } // namespace Internal 23 | } // namespace Halide 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/PythonExtensionGen.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_PYTHON_EXTENSION_GEN_H_ 2 | #define HALIDE_PYTHON_EXTENSION_GEN_H_ 3 | 4 | #include 5 | #include "Module.h" 6 | #include "Target.h" 7 | 8 | namespace Halide { 9 | 10 | class Module; 11 | struct Target; 12 | 13 | namespace Internal { 14 | 15 | class PythonExtensionGen { 16 | public: 17 | PythonExtensionGen(std::ostream &dest, const std::string &header_name, Target target); 18 | 19 | void compile(const Module &module); 20 | void compile(const LoweredFunc &f); 21 | private: 22 | void convert_buffer(std::string name, const LoweredArgument* arg); 23 | std::ostream &dest; 24 | std::string header_name; 25 | Target target; 26 | }; 27 | 28 | } 29 | } 30 | 31 | #endif // HALIDE_PYTHON_EXTENSION_GEN_H_ 32 | -------------------------------------------------------------------------------- /src/Qualify.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_QUALIFY_H 2 | #define HALIDE_QUALIFY_H 3 | 4 | /** \file 5 | * 6 | * Defines methods for prefixing names in an expression with a prefix string. 7 | */ 8 | 9 | #include "IR.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Prefix all variable names in the given expression with the prefix string. */ 15 | Expr qualify(const std::string &prefix, Expr value); 16 | 17 | } // namespace Internal 18 | } // namespace Halide 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/RemoveDeadAllocations.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_REMOVE_DEAD_ALLOCATIONS_H 2 | #define HALIDE_REMOVE_DEAD_ALLOCATIONS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that removes allocate and free nodes that 6 | * are not used. 7 | */ 8 | 9 | #include "IR.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Find Allocate/Free pairs that are never loaded from or stored to, 15 | * and remove them from the Stmt. This doesn't touch Realize/Call 16 | * nodes and so must be called after storage_flattening. 17 | */ 18 | Stmt remove_dead_allocations(Stmt s); 19 | 20 | } // namespace Internal 21 | } // namespace Halide 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/RemoveExternLoops.cpp: -------------------------------------------------------------------------------- 1 | #include "RemoveExternLoops.h" 2 | #include "IRMutator.h" 3 | 4 | namespace Halide { 5 | namespace Internal { 6 | 7 | class RemoveExternLoops : public IRMutator { 8 | private: 9 | using IRMutator::visit; 10 | 11 | Stmt visit(const For *op) override { 12 | if (op->for_type != ForType::Extern) { 13 | return IRMutator::visit(op); 14 | } 15 | // Replace the for with its first iteration (implemented with a let). 16 | return LetStmt::make(op->name, op->min, mutate(op->body)); 17 | } 18 | }; 19 | 20 | Stmt remove_extern_loops(Stmt s) { 21 | return RemoveExternLoops().mutate(s); 22 | } 23 | 24 | } // namespace Internal 25 | } // namespace Halide 26 | -------------------------------------------------------------------------------- /src/RemoveExternLoops.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_REMOVE_EXTERN_LOOPS 2 | #define HALIDE_REMOVE_EXTERN_LOOPS 3 | 4 | #include "IR.h" 5 | 6 | /** \file 7 | * Defines a lowering pass that removes placeholder loops for extern stages. 8 | */ 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Removes placeholder loops for extern stages. */ 14 | Stmt remove_extern_loops(Stmt s); 15 | 16 | } // namespace Internal 17 | } // namespace Halide 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/RemoveUndef.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_REMOVE_UNDEF 2 | #define HALIDE_REMOVE_UNDEF 3 | 4 | #include "IR.h" 5 | 6 | /** \file 7 | * Defines a lowering pass that elides stores that depend on unitialized values. 8 | */ 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Removes stores that depend on undef values, and statements that 14 | * only contain such stores. */ 15 | Stmt remove_undef(Stmt s); 16 | 17 | } // namespace Internal 18 | } // namespace Halide 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/RoundingMode.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_ROUNDING_MODE_H 2 | #define HALIDE_ROUNDING_MODE_H 3 | namespace Halide { 4 | 5 | /** Rounding modes (IEEE754 2008 4.3 Rounding-direction attributes) */ 6 | enum class RoundingMode { 7 | TowardZero, ///< Round towards zero (IEEE754 2008 4.3.2) 8 | ToNearestTiesToEven, ///< Round to nearest, when there is a tie pick even integral significand (IEEE754 2008 4.3.1) 9 | ToNearestTiesToAway, ///< Round to nearest, when there is a tie pick value furthest away from zero (IEEE754 2008 4.3.1) 10 | TowardPositiveInfinity, ///< Round towards positive infinity (IEEE754 2008 4.3.2) 11 | TowardNegativeInfinity ///< Round towards negative infinity (IEEE754 2008 4.3.2) 12 | }; 13 | 14 | } // namespace Halide 15 | #endif 16 | -------------------------------------------------------------------------------- /src/SelectGPUAPI.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_INTERNAL_SELECT_GPU_API_H 2 | #define HALIDE_INTERNAL_SELECT_GPU_API_H 3 | 4 | #include "IR.h" 5 | #include "Target.h" 6 | 7 | /** \file 8 | * Defines a lowering pass that selects which GPU api to use for each 9 | * gpu for loop 10 | */ 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | /** Replace for loops with GPU_Default device_api with an actual 16 | * device API depending on what's enabled in the target. Choose the 17 | * first of the following: opencl, cuda, openglcompute, opengl */ 18 | Stmt select_gpu_api(Stmt s, Target t); 19 | 20 | } // namespace Internal 21 | } // namespace Halide 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/SimplifySpecializations.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLIFY_SPECIALIZATIONS_H 2 | #define SIMPLIFY_SPECIALIZATIONS_H 3 | 4 | /** \file 5 | * 6 | * Defines pass that try to simplify the RHS/LHS of a function's definition 7 | * based on its specializations. 8 | */ 9 | 10 | #include 11 | 12 | #include "IR.h" 13 | 14 | namespace Halide { 15 | namespace Internal { 16 | 17 | /** Try to simplify the RHS/LHS of a function's definition based on its 18 | * specializations. */ 19 | void simplify_specializations(std::map &env); 20 | 21 | } // namespace Internal 22 | } // namespace Halide 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/SkipStages.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_SKIP_STAGES 2 | #define HALIDE_SKIP_STAGES 3 | 4 | #include "IR.h" 5 | 6 | /** \file 7 | * Defines a pass that dynamically avoids realizing unnecessary stages. 8 | */ 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Avoid computing certain stages if we can infer a runtime condition 14 | * to check that tells us they won't be used. Does this by analyzing 15 | * all reads of each buffer allocated, and inferring some condition 16 | * that tells us if the reads occur. If the condition is non-trivial, 17 | * inject ifs that guard the production. */ 18 | Stmt skip_stages(Stmt s, const std::vector &order); 19 | 20 | } // namespace Internal 21 | } // namespace Halide 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/SlidingWindow.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_SLIDING_WINDOW_H 2 | #define HALIDE_SLIDING_WINDOW_H 3 | 4 | /** \file 5 | * 6 | * Defines the sliding_window lowering optimization pass, which avoids 7 | * computing provably-already-computed values. 8 | */ 9 | 10 | #include 11 | 12 | #include "IR.h" 13 | 14 | namespace Halide { 15 | namespace Internal { 16 | 17 | /** Perform sliding window optimizations on a halide 18 | * statement. I.e. don't bother computing points in a function that 19 | * have provably already been computed by a previous iteration. 20 | */ 21 | Stmt sliding_window(Stmt s, const std::map &env); 22 | 23 | } // namespace Internal 24 | } // namespace Halide 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/SplitTuples.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_SPLIT_TUPLES_H 2 | #define HALIDE_SPLIT_TUPLES_H 3 | 4 | #include "Expr.h" 5 | #include "Function.h" 6 | #include 7 | 8 | /** \file 9 | * Defines the lowering pass that breaks up Tuple-valued realization 10 | * and productions into several scalar-valued ones. */ 11 | 12 | namespace Halide { 13 | namespace Internal { 14 | 15 | /** Rewrite all tuple-valued Realizations, Provide nodes, and Call 16 | * nodes into several scalar-valued ones, so that later lowering 17 | * passes only need to think about scalar-valued productions. */ 18 | 19 | Stmt split_tuples(Stmt s, const std::map &env); 20 | 21 | } // namespace Internal 22 | } // namespace Halide 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/StmtToHtml.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_STMT_TO_HTML 2 | #define HALIDE_STMT_TO_HTML 3 | 4 | /** \file 5 | * Defines a function to dump an HTML-formatted stmt to a file. 6 | */ 7 | 8 | #include "Module.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** 14 | * Dump an HTML-formatted print of a Stmt to filename. 15 | */ 16 | void print_to_html(std::string filename, Stmt s); 17 | 18 | /** Dump an HTML-formatted print of a Module to filename. */ 19 | void print_to_html(std::string filename, const Module &m); 20 | 21 | } // namespace Internal 22 | } // namespace Halide 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/TrimNoOps.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIM_NO_OPS_H 2 | #define TRIM_NO_OPS_H 3 | 4 | /** \file 5 | * Defines a lowering pass that truncates loops to the region over 6 | * which they actually do something. 7 | */ 8 | 9 | #include "IR.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Truncate loop bounds to the region over which they actually do 15 | * something. For examples see test/correctness/trim_no_ops.cpp */ 16 | Stmt trim_no_ops(Stmt s); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/Tuple.cpp: -------------------------------------------------------------------------------- 1 | #include "Tuple.h" 2 | #include "Debug.h" 3 | #include "Func.h" 4 | 5 | namespace Halide { 6 | 7 | Tuple::Tuple(const FuncRef &f) : exprs(f.size()) { 8 | user_assert(f.size() > 1) 9 | << "Can't construct a Tuple from a call to Func \"" 10 | << f.function().name() << "\" because it does not return a Tuple.\n"; 11 | for (size_t i = 0; i < f.size(); i++) { 12 | exprs[i] = f[i]; 13 | } 14 | } 15 | 16 | } // namespace Halide 17 | -------------------------------------------------------------------------------- /src/UnifyDuplicateLets.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_UNIFY_DUPLICATE_LETS_H 2 | #define HALIDE_UNIFY_DUPLICATE_LETS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that coalesces redundant let statements 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Find let statements that all define the same value, and make later 14 | * ones just reuse the symbol names of the earlier ones. */ 15 | Stmt unify_duplicate_lets(Stmt s); 16 | 17 | } // namespace Internal 18 | } // namespace Halide 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/UniquifyVariableNames.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_UNIQUIFY_VARIABLE_NAMES 2 | #define HALIDE_UNIQUIFY_VARIABLE_NAMES 3 | 4 | /** \file 5 | * Defines the lowering pass that renames all variables to have unique names. 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Modify a statement so that every internally-defined variable name 14 | * is unique. This lets later passes assume syntactic equivalence is 15 | * semantic equivalence. */ 16 | Stmt uniquify_variable_names(Stmt s); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/UnpackBuffers.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_UNPACK_BUFFERS_H 2 | #define HALIDE_UNPACK_BUFFERS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that unpacks buffer arguments onto the symbol table 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Creates let stmts for the various buffer components 14 | * (e.g. foo.extent.0) in any referenced concrete buffers or buffer 15 | * parameters. After this pass, the only undefined symbols should 16 | * scalar parameters and the buffers themselves (e.g. foo.buffer). */ 17 | Stmt unpack_buffers(Stmt s); 18 | 19 | } // namespace Internal 20 | } // namespace Halide 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/UnrollLoops.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_UNROLL_LOOPS_H 2 | #define HALIDE_UNROLL_LOOPS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that unrolls loops marked as such 6 | */ 7 | 8 | #include "IR.h" 9 | 10 | namespace Halide { 11 | namespace Internal { 12 | 13 | /** Take a statement with for loops marked for unrolling, and convert 14 | * each into several copies of the innermost statement. I.e. unroll 15 | * the loop. */ 16 | Stmt unroll_loops(Stmt); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/UnsafePromises.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_UNSAFE_PROMISES_H 2 | #define HALIDE_UNSAFE_PROMISES_H 3 | 4 | /** \file 5 | * Defines the lowering pass that removes unsafe promises 6 | */ 7 | 8 | #include "IR.h" 9 | #include "Target.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Lower all unsafe promises into either assertions or unchecked 15 | code, depending on the target. */ 16 | Stmt lower_unsafe_promises(Stmt s, const Target &t); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/VectorizeLoops.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_VECTORIZE_LOOPS_H 2 | #define HALIDE_VECTORIZE_LOOPS_H 3 | 4 | /** \file 5 | * Defines the lowering pass that vectorizes loops marked as such 6 | */ 7 | 8 | #include "IR.h" 9 | #include "Target.h" 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Take a statement with for loops marked for vectorization, and turn 15 | * them into single statements that operate on vectors. The loops in 16 | * question must have constant extent. 17 | */ 18 | Stmt vectorize_loops(Stmt s, const Target &t); 19 | 20 | } // namespace Internal 21 | } // namespace Halide 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/WrapCalls.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_WRAP_CALLS_H 2 | #define HALIDE_WRAP_CALLS_H 3 | 4 | /** \file 5 | * 6 | * Defines pass to replace calls to wrapped Functions with their wrappers. 7 | */ 8 | 9 | #include 10 | 11 | #include "IR.h" 12 | 13 | namespace Halide { 14 | namespace Internal { 15 | 16 | /** Replace every call to wrapped Functions in the Functions' definitions with 17 | * call to their wrapper functions. */ 18 | std::map wrap_func_calls(const std::map &env); 19 | 20 | } // namespace Internal 21 | } // namespace Halide 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/WrapExternStages.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_WRAP_EXTERN_STAGES_H 2 | #define HALIDE_WRAP_EXTERN_STAGES_H 3 | 4 | #include "Module.h" 5 | 6 | /** \file 7 | * 8 | * Defines a pass over a Module that adds wrapper LoweredFuncs to any 9 | * extern stages that need them */ 10 | 11 | namespace Halide { 12 | namespace Internal { 13 | 14 | /** Add a wrapper for a LoweredFunc that accepts old buffers and 15 | * upgrades them. */ 16 | void add_legacy_wrapper(Module m, const LoweredFunc &fn); 17 | 18 | } // namespace Internal 19 | } // namespace Halide 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/runtime/aarch64_cpu_features.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "cpu_features.h" 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { 5 | 6 | WEAK CpuFeatures halide_get_cpu_features() { 7 | // AArch64 has no CPU-specific Features. 8 | return CpuFeatures(); 9 | } 10 | 11 | }}} // namespace Halide::Runtime::Internal 12 | -------------------------------------------------------------------------------- /src/runtime/alignment_128.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | namespace Halide { 4 | namespace Runtime { 5 | namespace Internal { 6 | 7 | WEAK __attribute__((always_inline)) int halide_malloc_alignment() { 8 | return 128; 9 | } 10 | 11 | }}} 12 | -------------------------------------------------------------------------------- /src/runtime/alignment_32.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | namespace Halide { 4 | namespace Runtime { 5 | namespace Internal { 6 | 7 | WEAK __attribute__((always_inline)) int halide_malloc_alignment() { 8 | return 32; 9 | } 10 | 11 | }}} 12 | -------------------------------------------------------------------------------- /src/runtime/alignment_64.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | namespace Halide { 4 | namespace Runtime { 5 | namespace Internal { 6 | 7 | WEAK __attribute__((always_inline)) int halide_malloc_alignment() { 8 | return 64; 9 | } 10 | 11 | }}} 12 | -------------------------------------------------------------------------------- /src/runtime/android_clock.cpp: -------------------------------------------------------------------------------- 1 | #ifdef BITS_64 2 | #define SYS_CLOCK_GETTIME 113 3 | #endif 4 | 5 | #ifdef BITS_32 6 | #define SYS_CLOCK_GETTIME 263 7 | #endif 8 | 9 | #include "linux_clock.cpp" 10 | -------------------------------------------------------------------------------- /src/runtime/android_host_cpu_count.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | extern long sysconf(int); 6 | 7 | WEAK int halide_host_cpu_count() { 8 | // Works for Android ARMv7. Probably bogus on other platforms. 9 | return sysconf(97); 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/runtime/android_io.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | #define ANDROID_LOG_INFO 4 6 | 7 | extern int __android_log_print(int, const char *, const char *, ...); 8 | 9 | WEAK void halide_default_print(void *user_context, const char * str) { 10 | __android_log_print(ANDROID_LOG_INFO, "halide", "%s", str); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /src/runtime/arm_no_neon.ll: -------------------------------------------------------------------------------- 1 | ; TODO: add specializations for ARMv7 without NEON 2 | -------------------------------------------------------------------------------- /src/runtime/destructors.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | #define INLINE inline __attribute__((weak)) __attribute__((always_inline)) __attribute__((used)) 4 | 5 | extern "C" { 6 | 7 | INLINE void call_destructor(void *user_context, void (*fn)(void *user_context, void *object), void **object, bool should_call) { 8 | void *o = *object; 9 | *object = NULL; 10 | // Call the function 11 | if (o && should_call) { 12 | fn(user_context, o); 13 | } 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/runtime/fuchsia_host_cpu_count.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | uint32_t zx_system_get_num_cpus(void); 6 | 7 | WEAK int halide_host_cpu_count() { 8 | return (int)zx_system_get_num_cpus(); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/runtime/fuchsia_yield.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | typedef int32_t zx_status_t; 4 | typedef int64_t zx_time_t; 5 | extern "C" zx_status_t zx_nanosleep(zx_time_t deadline); 6 | 7 | namespace Halide { namespace Runtime { namespace Internal { 8 | 9 | WEAK void halide_thread_yield() { 10 | zx_nanosleep(0); 11 | } 12 | 13 | }}} 14 | -------------------------------------------------------------------------------- /src/runtime/hexagon_cpu_features.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "cpu_features.h" 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { 5 | 6 | WEAK CpuFeatures halide_get_cpu_features() { 7 | // Hexagon has no CPU-specific Features. 8 | return CpuFeatures(); 9 | } 10 | 11 | }}} // namespace Halide::Runtime::Internal 12 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/.gitignore: -------------------------------------------------------------------------------- 1 | !bin/*/* 2 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/host/libhalide_hexagon_host.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/host/libhalide_hexagon_host.so -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/v60/hexagon_sim_remote: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/hexagon_sim_remote -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/v60/libsim_qurt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/libsim_qurt.a -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/v60/libsim_qurt_vtcm.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/libsim_qurt_vtcm.a -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/c11_stubs.cpp: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | 3 | // Hexagon-tools 8.0.06 and later are dependent on 2 additional symbols: 4 | // __cxa_finalize and __cxa_atexit 5 | // We are providing weak symbol definitions of the these functions. 6 | 7 | #include "HAP_farf.h" 8 | 9 | //#define FARF_LOW 1 // Enable debug output 10 | 11 | void __attribute__ ((weak)) __cxa_finalize() { 12 | FARF(LOW, "Finalizing\n"); 13 | return; 14 | } 15 | 16 | void __attribute__ ((weak)) __cxa_atexit() { 17 | FARF(LOW, "Atexiting\n"); 18 | return; 19 | } 20 | 21 | } // extern "C" 22 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/dlib.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_HEXAGON_MMAP_DLIB_H 2 | #define HALIDE_HEXAGON_MMAP_DLIB_H 3 | 4 | // This is a custom implementation of dlopen/dlsym/dlclose for loading 5 | // a shared object in memory, based on using mmap/mprotect to load and 6 | // make data executable. The arguments are the same as their standard 7 | // counterparts, except mmap_dlopen takes a pointer/size instead of a 8 | // file, and does not take a flags option. The exported symbols are 9 | // not actually loaded into the process for use by other 10 | // dlopen/mmap_dlopen calls. 11 | void *mmap_dlopen(const void *code, size_t size); 12 | void *mmap_dlsym(void *dlib, const char *name); 13 | int mmap_dlclose(void *dlib); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/known_symbols.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_HEXAGON_REMOTE_KNOWN_SYMBOLS_H 2 | #define HALIDE_HEXAGON_REMOTE_KNOWN_SYMBOLS_H 3 | 4 | // Mapping between a symbol name and an address. 5 | struct known_symbol { 6 | const char *name; 7 | char *addr; 8 | }; 9 | 10 | // Look up a symbol in an array of known symbols. The map should be 11 | // terminated with a {NULL, NULL} known_symbol. 12 | void *lookup_symbol(const char *sym, const known_symbol *map); 13 | 14 | // Look up common symbols. 15 | void *get_known_symbol(const char *sym); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/log.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_HEXAGON_REMOTE_LOG_H 2 | #define HALIDE_HEXAGON_REMOTE_LOG_H 3 | 4 | void log_printf(const char *fmt, ...); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/nearbyint.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" { 4 | 5 | // Hexagon doesn't have an implementation of nearbyint/nearbyintf, so 6 | // we provide one here. This implementation is not great, nearbyint is 7 | // supposed to round to nearest even in the case of a tie. 8 | 9 | float nearbyintf(float x) { 10 | return floorf(x + 0.5f); 11 | } 12 | 13 | double nearbyint(double x) { 14 | return floor(x + 0.5); 15 | } 16 | 17 | } // extern "C" 18 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/sim_protocol.h: -------------------------------------------------------------------------------- 1 | #ifndef SIM_PROTOCOL_H 2 | #define SIM_PROTOCOL_H 3 | 4 | namespace Message { 5 | enum { 6 | None = 0, 7 | Alloc, 8 | Free, 9 | LoadLibrary, 10 | GetSymbol, 11 | Run, 12 | ReleaseLibrary, 13 | Break, 14 | }; 15 | } 16 | 17 | #endif // SIM_PROTOCOL_H 18 | -------------------------------------------------------------------------------- /src/runtime/hexagon_remote/sim_qurt.cpp: -------------------------------------------------------------------------------- 1 | #include "hexagon_standalone.h" 2 | 3 | extern "C" { 4 | 5 | // Provide an implementation of qurt to redirect to the appropriate 6 | // simulator calls. 7 | int qurt_hvx_lock(int mode) { 8 | SIM_ACQUIRE_HVX; 9 | if (mode == 0) { 10 | SIM_CLEAR_HVX_DOUBLE_MODE; 11 | } else { 12 | SIM_SET_HVX_DOUBLE_MODE; 13 | } 14 | return 0; 15 | } 16 | 17 | int qurt_hvx_unlock() { 18 | SIM_RELEASE_HVX; 19 | return 0; 20 | } 21 | 22 | } // extern "C" 23 | -------------------------------------------------------------------------------- /src/runtime/ios_io.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "objc_support.h" 3 | 4 | extern "C" { 5 | 6 | WEAK void halide_default_print(void *user_context, const char *str) { 7 | void *pool = create_autorelease_pool(); 8 | ns_log_utf8_string(str); 9 | drain_autorelease_pool(pool); 10 | } 11 | 12 | } // extern "C" 13 | -------------------------------------------------------------------------------- /src/runtime/linux_host_cpu_count.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | extern long sysconf(int); 6 | 7 | WEAK int halide_host_cpu_count() { 8 | return sysconf(84); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/runtime/linux_yield.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | extern "C" int sched_yield(); 4 | 5 | namespace Halide { namespace Runtime { namespace Internal { 6 | 7 | WEAK void halide_thread_yield() { 8 | sched_yield(); 9 | } 10 | 11 | }}} 12 | -------------------------------------------------------------------------------- /src/runtime/metadata.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | namespace Halide { namespace Runtime { namespace Internal { 4 | 5 | // This is unused and expected to be optimized away; it exists solely to ensure 6 | // that the halide_filter_metadata_t type is in the runtime module, so that 7 | // Codegen_LLVM can access its description. 8 | WEAK const halide_filter_metadata_t *unused_function_to_get_halide_filter_metadata_t_declared() { return NULL; } 9 | 10 | } } } 11 | 12 | -------------------------------------------------------------------------------- /src/runtime/metal_objc_arm.cpp: -------------------------------------------------------------------------------- 1 | #define ARM_COMPILE 1 2 | #include "metal_objc_platform_dependent.cpp" 3 | -------------------------------------------------------------------------------- /src/runtime/metal_objc_platform_dependent.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_OBJC_METAL_PLATFORM_DEPENDENT_H 2 | #define HALIDE_OBJC_METAL_PLATFORM_DEPENDENT_H 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { namespace Metal { 5 | 6 | struct mtl_compute_command_encoder; 7 | 8 | void dispatch_threadgroups(mtl_compute_command_encoder *encoder, 9 | int32_t blocks_x, int32_t blocks_y, int32_t blocks_z, 10 | int32_t threads_x, int32_t threads_y, int32_t threads_z); 11 | 12 | }}}} 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /src/runtime/metal_objc_x86.cpp: -------------------------------------------------------------------------------- 1 | #define X86_COMPILE 1 2 | #include "metal_objc_platform_dependent.cpp" 3 | -------------------------------------------------------------------------------- /src/runtime/mini_qurt_vtcm.h: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | 3 | extern void* HAP_request_VTCM (unsigned int size, unsigned int single_page_flag); 4 | extern int HAP_release_VTCM (void* pVA); 5 | 6 | } 7 | -------------------------------------------------------------------------------- /src/runtime/mips_cpu_features.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "cpu_features.h" 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { 5 | 6 | WEAK CpuFeatures halide_get_cpu_features() { 7 | // MIPS has no CPU-specific Features. 8 | return CpuFeatures(); 9 | } 10 | 11 | }}} // namespace Halide::Runtime::Internal 12 | -------------------------------------------------------------------------------- /src/runtime/msan_stubs.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | WEAK int halide_msan_annotate_memory_is_initialized(void *user_context, const void *ptr, uint64_t len) { return 0; } 6 | 7 | WEAK int halide_msan_annotate_buffer_is_initialized(void *user_context, halide_buffer_t *b) { return 0; } 8 | 9 | WEAK void halide_msan_annotate_buffer_is_initialized_as_destructor(void *user_context, void *b) {} 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/runtime/nvidia_libdevice_bitcode/libdevice.compute_20.10.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_20.10.bc -------------------------------------------------------------------------------- /src/runtime/nvidia_libdevice_bitcode/libdevice.compute_30.10.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_30.10.bc -------------------------------------------------------------------------------- /src/runtime/nvidia_libdevice_bitcode/libdevice.compute_35.10.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/src/runtime/nvidia_libdevice_bitcode/libdevice.compute_35.10.bc -------------------------------------------------------------------------------- /src/runtime/osx_host_cpu_count.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | extern long sysconf(int); 6 | 7 | WEAK int halide_host_cpu_count() { 8 | return sysconf(58); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/runtime/osx_yield.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | extern "C" int swtch_pri(int); 4 | 5 | namespace Halide { namespace Runtime { namespace Internal { 6 | 7 | WEAK void halide_thread_yield() { 8 | swtch_pri(0); 9 | } 10 | 11 | }}} 12 | -------------------------------------------------------------------------------- /src/runtime/posix_abort.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | extern "C" void abort(); 4 | 5 | namespace Halide { 6 | namespace Runtime { 7 | namespace Internal { 8 | 9 | WEAK __attribute__((always_inline)) void halide_abort() { 10 | abort(); 11 | } 12 | 13 | }}} 14 | -------------------------------------------------------------------------------- /src/runtime/posix_io.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | WEAK void halide_default_print(void *user_context, const char *str) { 6 | write(STDOUT_FILENO, str, strlen(str)); 7 | } 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/runtime/posix_print.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" void halide_default_print(void *, const char *); 4 | 5 | namespace Halide { namespace Runtime { namespace Internal { 6 | 7 | WEAK halide_print_t custom_print = halide_default_print; 8 | 9 | }}} // namespace Halide::Runtime::Internal 10 | 11 | extern "C" { 12 | 13 | WEAK void halide_print(void *user_context, const char *msg) { 14 | (*custom_print)(user_context, msg); 15 | } 16 | 17 | WEAK halide_print_t halide_set_custom_print(halide_print_t print) { 18 | halide_print_t result = custom_print; 19 | custom_print = print; 20 | return result; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/runtime/posix_threads_tsan.cpp: -------------------------------------------------------------------------------- 1 | #define TSAN_ANNOTATIONS 1 2 | 3 | #include "posix_threads.cpp" 4 | -------------------------------------------------------------------------------- /src/runtime/prefetch.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | // These need to inline, otherwise the extern call with the ptr 6 | // parameter breaks a lot of optimizations. 7 | __attribute__((always_inline)) 8 | WEAK int _halide_prefetch(const void *ptr) { 9 | __builtin_prefetch(ptr, 1, 3); 10 | return 0; 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /src/runtime/qurt_hvx_vtcm.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | #include "HalideRuntimeQurt.h" 3 | #include "mini_qurt.h" 4 | #include "mini_qurt_vtcm.h" 5 | 6 | using namespace Halide::Runtime::Internal::Qurt; 7 | 8 | extern "C" { 9 | 10 | WEAK void* halide_vtcm_malloc(void *user_context, int size) { 11 | return HAP_request_VTCM(size, 1); 12 | } 13 | 14 | WEAK void halide_vtcm_free(void *user_context, void *addr) { 15 | HAP_release_VTCM(addr); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/runtime/qurt_threads_tsan.cpp: -------------------------------------------------------------------------------- 1 | #define TSAN_ANNOTATIONS 1 2 | 3 | #include "qurt_threads.cpp" 4 | -------------------------------------------------------------------------------- /src/runtime/qurt_yield.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | // TODO: what should we use here??? 4 | 5 | namespace Halide { namespace Runtime { namespace Internal { 6 | 7 | WEAK void halide_thread_yield() { 8 | } 9 | 10 | }}} 11 | -------------------------------------------------------------------------------- /src/runtime/riscv_cpu_features.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "cpu_features.h" 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { 5 | 6 | WEAK CpuFeatures halide_get_cpu_features() { 7 | // For now, no version specific features, though RISCV promises to have many. 8 | return CpuFeatures(); 9 | } 10 | 11 | }}} // namespace Halide::Runtime::Internal 12 | -------------------------------------------------------------------------------- /src/runtime/scoped_mutex_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_RUNTIME_SCOPED_MUTEX_LOCK_H 2 | #define HALIDE_RUNTIME_SCOPED_MUTEX_LOCK_H 3 | 4 | #include "HalideRuntime.h" 5 | 6 | namespace Halide { namespace Runtime { namespace Internal { 7 | 8 | // An RAII mutex locking operation 9 | struct ScopedMutexLock { 10 | halide_mutex *mutex; 11 | 12 | ScopedMutexLock(halide_mutex *mutex) __attribute__((always_inline)) : mutex(mutex) { 13 | halide_mutex_lock(mutex); 14 | } 15 | 16 | ~ScopedMutexLock() __attribute__((always_inline)) { 17 | halide_mutex_unlock(mutex); 18 | } 19 | }; 20 | 21 | }}} // namespace Halide::Runtime::Internal 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/runtime/scoped_spin_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef HALIDE_SCOPED_SPIN_LOCK_H 2 | #define HALIDE_SCOPED_SPIN_LOCK_H 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { 5 | 6 | // An RAII spin lock. 7 | struct ScopedSpinLock { 8 | volatile int *lock; 9 | 10 | ScopedSpinLock(volatile int *l) __attribute__((always_inline)) : lock(l) { 11 | while (__sync_lock_test_and_set(lock, 1)) { } 12 | } 13 | 14 | ~ScopedSpinLock() __attribute__((always_inline)) { 15 | __sync_lock_release(lock); 16 | } 17 | }; 18 | 19 | }}} // namespace Halide::Runtime::Internal 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/runtime/ssp.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "runtime_internal.h" 3 | 4 | // LLVM sometimes likes to generate calls to a stack smashing 5 | // protector, but some build environments (e.g. native client), don't 6 | // provide libssp reliably. We define two weak symbols here to help 7 | // things along. 8 | 9 | extern "C" { 10 | 11 | WEAK char *__stack_chk_guard = (char *)(0xdeadbeef); 12 | 13 | WEAK void __stack_chk_fail() { 14 | halide_error(NULL, "Memory error: stack smashing protector changed!\n"); 15 | Halide::Runtime::Internal::halide_abort(); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/runtime/wasm_cpu_features.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | #include "cpu_features.h" 3 | 4 | namespace Halide { namespace Runtime { namespace Internal { 5 | 6 | WEAK CpuFeatures halide_get_cpu_features() { 7 | CpuFeatures features; 8 | 9 | // There isn't a way to determine what features are available -- 10 | // if a feature we need isn't available, we couldn't 11 | // even load. So just declare that all wasm-related features are 12 | // known and available. 13 | features.set_known(halide_target_feature_wasm_simd128); 14 | features.set_available(halide_target_feature_wasm_simd128); 15 | 16 | return features; 17 | } 18 | 19 | }}} // namespace Halide::Runtime::Internal 20 | -------------------------------------------------------------------------------- /src/runtime/windows_cuda.cpp: -------------------------------------------------------------------------------- 1 | #define WINDOWS 2 | #include "cuda.cpp" 3 | -------------------------------------------------------------------------------- /src/runtime/windows_io.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideRuntime.h" 2 | 3 | extern "C" { 4 | 5 | WEAK void halide_default_print(void *user_context, const char *str) { 6 | write(STDOUT_FILENO, str, strlen(str)); 7 | } 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/runtime/windows_opencl.cpp: -------------------------------------------------------------------------------- 1 | #define WINDOWS 2 | #include "opencl.cpp" 3 | -------------------------------------------------------------------------------- /src/runtime/windows_profiler.cpp: -------------------------------------------------------------------------------- 1 | #define WINDOWS 2 | #include "profiler.cpp" 3 | -------------------------------------------------------------------------------- /src/runtime/windows_threads_tsan.cpp: -------------------------------------------------------------------------------- 1 | #define TSAN_ANNOTATIONS 1 2 | 3 | #include "windows_threads.cpp" 4 | -------------------------------------------------------------------------------- /src/runtime/windows_yield.cpp: -------------------------------------------------------------------------------- 1 | #include "runtime_internal.h" 2 | 3 | #ifdef BITS_64 4 | #define WIN32API 5 | #else 6 | #define WIN32API __stdcall 7 | #endif 8 | 9 | extern "C" WIN32API int32_t Sleep(int32_t timeout); 10 | 11 | namespace Halide { namespace Runtime { namespace Internal { 12 | 13 | WEAK void halide_thread_yield() { 14 | Sleep(0); 15 | } 16 | 17 | }}} 18 | -------------------------------------------------------------------------------- /test/auto_schedule/unused_func.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Var x("x"), y("y"); 7 | Func f("f"), g("g"), h("h"); 8 | 9 | g(x) = x; 10 | g(x) += 10; 11 | h(x) = x*x; 12 | f(x) = select(false, g(x + 1), h(x + 1)); 13 | 14 | f.set_estimates({{0, 256}}); 15 | 16 | Target target = get_jit_target_from_environment(); 17 | Pipeline p(f); 18 | 19 | p.auto_schedule(target); 20 | 21 | // Inspect the schedule 22 | f.print_loop_nest(); 23 | 24 | // Run the schedule 25 | p.realize(256); 26 | 27 | printf("Success!\n"); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /test/common/expect_failure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Simple bash script that will execute another process, which is *expected* to fail; 4 | # this is useful mainly for running test/error and other expected-to-fail tests. 5 | # 6 | 7 | echo Running $1 8 | 9 | "$1" 10 | if [[ "$?" -ne "0" ]] 11 | then 12 | echo "Success" 13 | exit 0 14 | fi 15 | 16 | echo "Expected Failure from '$1', but got Success" 17 | exit -1 18 | -------------------------------------------------------------------------------- /test/correctness/autoschedule_small_pure_update.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Buffer in(13, 17); 7 | ImageParam in_param(Float(32), 2); 8 | 9 | Func g, h; 10 | Var x, y; 11 | 12 | RDom r(0, 17); 13 | g(x) += in_param(x, r); 14 | 15 | h(x, y) = in_param(x, y) + g(x); 16 | 17 | h.set_estimates({{0, 13}, {0, 17}}); 18 | in_param.set_estimates({{0, 13}, {0, 17}}); 19 | 20 | Pipeline p(h); 21 | p.auto_schedule(Target("host")); 22 | 23 | in_param.set(in); 24 | 25 | // Ensure the autoscheduler doesn't try to RoundUp the pure loop 26 | // in g's update definition. 27 | p.realize(13, 17); 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /test/correctness/bad_likely.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | // Use a likely intrinsic to tag a disjoint range. 10 | f(x) = select(x < 10 || x > 20, likely(1), 2); 11 | 12 | Buffer im = f.realize(30); 13 | for (int x = 0; x < 30; x++) { 14 | int correct = (x < 10 || x > 20) ? 1 : 2; 15 | if (im(x) != correct) { 16 | printf("im(%d) = %d instead of %d\n", x, im(x), correct); 17 | return -1; 18 | } 19 | } 20 | 21 | printf("Success!\n"); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /test/correctness/bool_compute_root_vectorize.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x, y; 8 | 9 | Func pred("pred"); 10 | pred(x, y) = x < y; 11 | 12 | Func selector("selector"); 13 | selector(x, y) = select(pred(x, y), 1, 0); 14 | 15 | // Load a vector of 8 bools 16 | pred.compute_root(); 17 | selector.compute_root().vectorize(x, 8); 18 | 19 | RDom range(0, 100, 0, 100); 20 | int32_t result = evaluate_may_gpu(sum(selector(range.x, range.y))); 21 | 22 | assert(result == 4950); 23 | 24 | printf("Success!\n"); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /test/correctness/bounds_of_monotonic_math.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | 10 | ImageParam input(Float(32), 1); 11 | 12 | f(x) = input(cast(ceil(0.3f * ceil(0.4f * floor(x * 22.5f))))); 13 | 14 | f.infer_input_bounds(10); 15 | 16 | Buffer in = input.get(); 17 | 18 | int correct = 26; 19 | if (in.width() != correct) { 20 | printf("Width is %d instead of %d\n", in.width(), correct); 21 | return -1; 22 | } 23 | 24 | printf("Success!\n"); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /test/correctness/bounds_of_multiply.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | // See https://github.com/halide/Halide/issues/3070 5 | 6 | using namespace Halide; 7 | 8 | template 9 | void test() { 10 | Param bound; 11 | ImageParam in(UInt(8), 1); 12 | Var x; 13 | Func f; 14 | 15 | f(x) = in(clamp(x, 0, bound * 2 - 1)); 16 | 17 | Buffer foo(10); 18 | foo.fill(0); 19 | in.set(foo); 20 | bound.set(5); 21 | 22 | auto result = f.realize(200); 23 | } 24 | 25 | int main(int argc, char **argv) { 26 | printf("Trying int32_t\n"); 27 | test(); 28 | printf("Trying int16_t\n"); 29 | test(); 30 | printf("Success!\n"); 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /test/correctness/circular_reference_leak.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | // Recursive functions can create circular references. These could 9 | // cause leaks. Run this test under valgrind to check. 10 | for (int i = 0; i < 10000; i++) { 11 | Func f; 12 | Var x; 13 | RDom r(0, 10); 14 | f(x) = x; 15 | f(r) = f(r-1) + f(r+1); 16 | } 17 | 18 | printf("Success!\n"); 19 | return 0; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /test/correctness/compare_vars.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x, y; 9 | f(x, y) = select(x == y, 1, 0); 10 | 11 | Buffer im = f.realize(10, 10); 12 | 13 | for (int y = 0; y < 10; y++) { 14 | for (int x = 0; x < 10; x++) { 15 | int correct = (x == y) ? 1 : 0; 16 | if (im(x, y) != correct) { 17 | printf("im(%d, %d) = %d instead of %d\n", 18 | x, y, im(x, y), correct); 19 | return -1; 20 | } 21 | } 22 | } 23 | 24 | printf("Success!\n"); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /test/correctness/compute_with_in.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x("x"), y("y"); 8 | Func one("one"), two("two"), three("three"), output("output"); 9 | 10 | one(x, y) = x + y; 11 | two(x, y) = one(x, y) + 2; 12 | three(x, y) = one(x, y) + 3; 13 | output(x, y) = two(x, y) + three(x, y); 14 | 15 | two.compute_root(); 16 | one.in(three).compute_root().compute_with(two, Var::outermost()); 17 | one.compute_root(); 18 | one.compute_at(two, Var::outermost()); 19 | 20 | output.realize(64, 64); 21 | 22 | printf("Success!\n"); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /test/correctness/computed_index.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Buffer in1(256, 256); 7 | Buffer in2(256, 256, 10); 8 | 9 | Func f; 10 | Var x, y; 11 | 12 | f(x, y) = in2(x, y, clamp(in1(x, y), 0, 9)); 13 | Buffer out = f.realize(256, 256); 14 | 15 | printf("Success!\n"); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/correctness/dynamic_reduction_bounds.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | ImageParam input(Float(32), 2); 9 | 10 | Var x, y, z; 11 | RDom dom(0, input.width()*8); 12 | Func f; 13 | Expr hard_to_reason_about = cast(hypot(input.width(), input.height())); 14 | f(x, y, z) = 1; 15 | f(x, y, dom / hard_to_reason_about) += 1; 16 | f.compile_jit(); 17 | 18 | Buffer im(32, 32); 19 | input.set(im); 20 | 21 | f.realize(100, 100, 16); 22 | 23 | printf("Success!\n"); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /test/correctness/explicit_inline_reductions.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"); 8 | Var x("x"), y("y"); 9 | RDom r1(0, 10, "r1"), r2(0, 10, "r2"), r3(0, 10, "r3"); 10 | 11 | f(x, y) = product(sum(r1, r1 + r3) + sum(r2, r2 * 2 + r3)); 12 | f(r1, y) += product(r3, sum(r2, r1 + r2 + r3)); 13 | 14 | Buffer result = f.realize(10, 10); 15 | 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /test/correctness/issue_3926.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char* argv[]) { 6 | Func f("f"), g("g"); 7 | Var x("x"), y("y"); 8 | Var tx("tx"), ty("ty"); 9 | Param param; 10 | 11 | f(x) = x; 12 | g(x, y) = f(x) + select(param, 1, 2); 13 | 14 | //g.gpu_tile(x, y, tx, ty, 8, 8, TailStrategy::GuardWithIf); 15 | g.specialize(param).tile(x, y, tx, ty, 8, 8, TailStrategy::GuardWithIf); 16 | g.specialize(!param).tile(x, y, tx, ty, 8, 8, TailStrategy::GuardWithIf); 17 | g.specialize_fail("Unknown"); 18 | f.in().compute_at(g, tx); 19 | 20 | Buffer out(34, 34); 21 | param.set(false); 22 | g.realize(out); 23 | 24 | printf("Success\n"); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /test/correctness/many_updates.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | const int N = 20; 8 | 9 | Func f; 10 | Var x, y; 11 | f(x, y) = x + y; 12 | for (int i = 0; i < N; i++) { 13 | f(x, i) += 1; 14 | f(i, y) += 1; 15 | } 16 | f.compute_root(); 17 | 18 | Buffer im = f.realize(N, N); 19 | 20 | printf("Success!\n"); 21 | return 0; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /test/correctness/non_vector_aligned_embeded_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | char storage[5 * sizeof(int32_t)]{0}; 8 | char *ptr = storage; 9 | ptr += sizeof(int32_t); 10 | Buffer foo((int32_t *)(ptr), 4); 11 | 12 | Func f; 13 | Var x; 14 | 15 | f(x) = foo(x); 16 | f.vectorize(x, 4); 17 | f.output_buffer().dim(0).set_min(0); 18 | auto result = f.realize(4); 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/correctness/parallel.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x; 8 | Func f; 9 | 10 | Param k; 11 | k.set(3); 12 | 13 | f(x) = x*k; 14 | 15 | f.parallel(x); 16 | 17 | Buffer im = f.realize(16); 18 | 19 | for (int i = 0; i < 16; i++) { 20 | if (im(i) != i*3) { 21 | printf("im(%d) = %d\n", i, im(i)); 22 | return -1; 23 | } 24 | } 25 | 26 | printf("Success!\n"); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /test/correctness/plain_c_includes.c: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | #error "This test must be compiled as plain C, without C++ enabled." 3 | #endif 4 | 5 | #include 6 | 7 | // Verify that all HalideRuntime*.h files can be compiled without C++ 8 | #include "HalideRuntime.h" 9 | #include "HalideRuntimeCuda.h" 10 | #include "HalideRuntimeHexagonHost.h" 11 | #include "HalideRuntimeMetal.h" 12 | #include "HalideRuntimeOpenCL.h" 13 | #include "HalideRuntimeOpenGL.h" 14 | #include "HalideRuntimeOpenGLCompute.h" 15 | #include "HalideRuntimeQurt.h" 16 | 17 | 18 | int main(int argc, char **argv) { 19 | printf("Success!\n"); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/correctness/shared_self_references.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | // Check that recursive references get tracked properly 8 | { 9 | Func f; 10 | Var x; 11 | f(x) = x; 12 | { 13 | Expr e = f(2); 14 | f(0) = e; 15 | f(1) = e; 16 | } // Destroy e 17 | } // Destroy f 18 | 19 | // f should have been cleaned up. valgrind will complain if it 20 | // hasn't been. 21 | 22 | printf("Success!\n"); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /test/correctness/shifted_image.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | halide_dimension_t shape[] = {{100, 10, 1}, 9 | {300, 10, 10}, 10 | {500, 10, 100}, 11 | {400, 10, 1000}}; 12 | Buffer buf(nullptr, 4, shape); 13 | buf.allocate(); 14 | 15 | buf.data()[0] = 17; 16 | if (buf(100, 300, 500, 400) != 17) { 17 | printf("Image indexing into buffers with non-zero mins is broken\n"); 18 | return -1; 19 | } 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /test/correctness/simplified_away_embedded_image.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | // What happens if an emedded image gets simplified away? 9 | Buffer input(32, 32); 10 | 11 | Var x("x"), y("y"); 12 | Func foo("foo"); 13 | 14 | foo(x, y) = input(x, y) - input(x, y); 15 | 16 | Buffer output(32, 32); 17 | 18 | foo.realize(output); 19 | 20 | // Any non-error is a success. 21 | printf("Success!\n"); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /test/correctness/split_reuse_inner_name_bug.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x("x"), x0, x1, x2, x3; 8 | Func f("f"); 9 | 10 | f(x) = 1; 11 | f.compute_root().split(x, x0, x, 16).split(x, x, x1, 2).split(x, x2, x, 4).split(x, x, x3, 2); 12 | f.realize(1024); 13 | 14 | printf("Success!\n"); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /test/correctness/tracing_bounds.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | // Turning on tracing wraps certain Exprs. This shouldn't effect 8 | // bounds inference. 9 | 10 | Func f, g; 11 | Var x; 12 | f(x) = clamp(x, 0, 100); 13 | f.compute_root(); 14 | g(x) = f(f(x)); 15 | // f is known to be bounded, so this means we need 101 values of 16 | // f. This shouldn't be confused by tracing loads of f or stores 17 | // to g. 18 | f.trace_loads(); 19 | g.trace_stores(); 20 | 21 | // Shouldn't throw an error about unbounded access. 22 | g.compile_jit(); 23 | 24 | printf("Success!\n"); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /test/correctness/transitive_bounds.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f, g; 8 | Var x; 9 | f(x) = x; 10 | g(x) = f(x); 11 | 12 | g.bound(x, 0, 4); 13 | 14 | // Should be ok to unroll x because it's bounded by a constant in its only consumer 15 | f.compute_root().unroll(x); 16 | 17 | g.realize(4); 18 | 19 | printf("Success!\n"); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/correctness/two_vector_args.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | Func f, g; 9 | Var x, y; 10 | 11 | g(x, y) = x+y; 12 | 13 | f(x, y) = g(x, x); 14 | 15 | f.vectorize(x, 4); 16 | 17 | Buffer out = f.realize(4, 4); 18 | 19 | printf("Success!\n"); 20 | 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /test/correctness/unrolled_reduction.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x("x"), y("y"), z("z"); 8 | 9 | Buffer noise(32); 10 | for (int i = 0; i < 32; i++) { 11 | noise(i) = (float)rand() / RAND_MAX; 12 | } 13 | 14 | Func f("f"); 15 | Func g("g"); 16 | RDom r(0, 32); 17 | 18 | g(x, y) = 0.0f; 19 | g(r.x, y) += noise(r.x); 20 | 21 | f(x, y, z) = g(x, y) + g(x+1, y); 22 | 23 | RVar rxo, rxi; 24 | g.compute_at(f, y).update().split(r.x, rxo, rxi, 2).unroll(rxi); 25 | f.unroll(z, 2); 26 | 27 | Buffer im = f.realize(64, 64, 4); 28 | 29 | printf("Success!\n"); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /test/correctness/unused_func.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Var x, y, xi, yi; 7 | 8 | ImageParam input(Float(32), 2); 9 | 10 | Func filtered; 11 | filtered(x, y) = input(x, y); 12 | filtered.compute_root(); 13 | 14 | Func false_func; 15 | false_func() = cast(0); 16 | 17 | Func result; 18 | result(x, y) = select(false_func(), filtered(x, y), input(0, 0)); 19 | 20 | // The bounds required on the input depend on filtered, but 21 | // filtered is not going to be computed because it simplified away 22 | // entirely. This test ensures things compile anyway. 23 | result.compile_jit(); 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /test/correctness/update_chunk.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | // This test computes a function within the update step of a reduction 8 | 9 | Func f, g; 10 | Var x, y, z; 11 | RDom r(0, 10); 12 | 13 | f(x, y) = x*y; 14 | g(x, y) = 0; 15 | g(x, r) = f(r, x)+1; 16 | 17 | f.compute_at(g, r); 18 | g.realize(10, 10); 19 | 20 | printf("Success!\n"); 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /test/correctness/vector_extern.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | Var x, y; 9 | Func f, g; 10 | 11 | printf("Defining function...\n"); 12 | 13 | f(x) = sqrt(cast(x)); 14 | 15 | f.vectorize(x, 4); 16 | Buffer im = f.realize(32); 17 | 18 | for (int i = 0; i < 32; i++) { 19 | float correct = sqrtf((float)i); 20 | if (fabs(im(i) - correct) > 0.001) { 21 | printf("im(%d) = %f instead of %f\n", i, im(i), correct); 22 | return -1; 23 | } 24 | } 25 | 26 | printf("Success!\n"); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /test/correctness/vector_print_bug.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f; 7 | Var x; 8 | f(x) = print(x); 9 | f.vectorize(x, 4); 10 | f.realize(8); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /test/correctness/vectorize_mixed_widths.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | Var x("x"); 9 | Func f("f"), g("g"); 10 | 11 | f(x) = 2*x; 12 | g(x) = f(x)/2; 13 | 14 | Var xo, xi; 15 | f.compute_at(g, x).split(x, xo, xi, 16).vectorize(xi, 8).unroll(xi); 16 | g.compute_root().vectorize(x, 16); 17 | 18 | Buffer r = g.realize(16); 19 | for (int i = 0; i < 16; i++) { 20 | if (r(i) != i) { 21 | std::cout << "Error at " << i << ": " << r(i) << std::endl; 22 | return -1; 23 | } 24 | } 25 | 26 | printf("Success!\n"); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /test/error/ambiguous_inline_reductions.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"); 8 | Var x("x"), y("y"); 9 | RDom r1(0, 10, "r1"), r2(0, 10, "r2"), r3(0, 10, "r3"); 10 | 11 | f(x, y) = product(sum(r1, r1 + r3) + sum(r2, r2 * 2 + r3)); 12 | 13 | // Is this the product over r1, or r3? It must be r3 because r1 is 14 | // used on the LHS, but Halide's not smart enough to know 15 | // that. All it sees is a product over an expression with two 16 | // reduction domains. 17 | f(r1, y) += product(sum(r2, r1 + r2 + r3)); 18 | 19 | Buffer result = f.realize(10, 10); 20 | 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /test/error/bad_bound.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"); 8 | Var x("x"), y("y"); 9 | 10 | f(x) = 0; 11 | f.bound(y, 0, 10); 12 | 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/error/bad_compute_with.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"); 8 | Var x("x"), y("y"); 9 | 10 | f(x, y) = x + y; 11 | f(x, y) += 2; 12 | f.update(0).compute_with(f, x); 13 | 14 | f.realize(10, 10); 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/error/bad_compute_with_invalid_specialization.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x("x"), y("y"); 8 | Func f("f"), g("g"), h("h"); 9 | 10 | f(x, y) = x + y; 11 | g(x, y) = x - y; 12 | h(x, y) = f(x - 1, y + 1) + g(x + 2, y - 2); 13 | 14 | f.compute_root(); 15 | g.compute_root(); 16 | 17 | Param tile; 18 | Var xo("xo"), xi("xi"); 19 | g.specialize(tile).split(x, xo, xi, 8); 20 | g.compute_with(f.specialize(tile), y, LoopAlignStrategy::AlignEnd); 21 | 22 | tile.set(true); 23 | h.realize(200, 200); 24 | 25 | return 0; 26 | } -------------------------------------------------------------------------------- /test/error/bad_compute_with_parent_func_not_used.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x("x"), y("y"); 8 | Func f("f"), g("g"), h("h"), p("p"); 9 | 10 | f(x, y) = x + y; 11 | g(x, y) = x - y; 12 | p(x, y) = x * y; 13 | h(x, y) = g(x + 2, y - 2) + p(x, y); 14 | h(x, y) += f(x - 1, y + 1); 15 | 16 | f.compute_at(h, y); 17 | g.compute_at(h, y); 18 | p.compute_at(h, y); 19 | 20 | p.compute_with(f, x); 21 | g.compute_with(f, x); 22 | h.realize(200, 200); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /test/error/bad_const_cast.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | 10 | // The 256 here would be implicitly cast to uint8, and converted to 11 | // zero. That's bad. So we check for that inside IROperator.cpp. 12 | f(x) = cast(x) % 256; 13 | 14 | printf("How did I get here?\n"); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /test/error/bad_device_api.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Target t("host"); 8 | (void)get_device_interface_for_device_api((DeviceAPI)-1, t, "Bad DeviceAPI"); 9 | 10 | printf("I should not have reached here\n"); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/error/bad_dimensions.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | ImageParam im(UInt(8), 2); 8 | 9 | Var x, y; 10 | Func f; 11 | 12 | f(x, y) = im(x, y); 13 | 14 | Buffer b(10, 10, 3); 15 | im.set(b); 16 | 17 | f.realize(10, 10); 18 | 19 | printf("There should have been an error\n"); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/error/bad_extern_split.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | f.define_extern("test", {}, Int(32), {x}); 10 | Var xo; 11 | f.split(x, xo, x, 8).reorder(xo, x); 12 | 13 | f.compile_jit(); 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/error/bad_fold.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Halide.h" 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x, y, c; 8 | 9 | Func f, g; 10 | 11 | f(x, y) = x; 12 | g(x, y) = f(x-1, y+1) + f(x, y-1); 13 | f.store_root().compute_at(g, y).fold_storage(y, 2); 14 | 15 | Buffer im = g.realize(100, 1000); 16 | 17 | printf("Should have gotten a bad fold!\n"); 18 | return -1; 19 | } 20 | -------------------------------------------------------------------------------- /test/error/bad_host_alignment.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | using namespace Halide::Internal; 6 | 7 | IRPrinter irp(std::cerr); 8 | int main(int argc, char **argv) { 9 | Func f; 10 | Var x, y; 11 | ImageParam in(UInt(8), 2); 12 | 13 | Buffer param_buf(11, 10); 14 | param_buf.crop(0, 1, 10); 15 | 16 | in.set_host_alignment(512); 17 | f(x, y) = in(x, y); 18 | f.compute_root(); 19 | 20 | in.set(param_buf); 21 | Buffer result = f.realize(10, 10); 22 | 23 | printf("I should not have reached here\n"); 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /test/error/bad_rvar_order.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | RDom r1(0, 10, 0, 10); 8 | 9 | Func f("f"); 10 | Var x, y; 11 | f(x, y) = x + y; 12 | f(r1.x, r1.y) += f(r1.y, r1.x); 13 | 14 | // It's not permitted to change the relative ordering of reduction 15 | // domain variables when it could change the meaning. 16 | f.update().reorder(r1.y, r1.x); 17 | 18 | f.realize(10, 10); 19 | 20 | printf("Success!\n"); 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /test/error/bad_schedule.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f, g; 8 | Var x, y; 9 | 10 | f(x) = x; 11 | g(x) = f(x); 12 | 13 | // f is inlined, so this schedule is bad. 14 | f.vectorize(x, 4); 15 | 16 | g.realize(10); 17 | 18 | printf("There should have been an error\n"); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /test/error/bad_store_at.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"), g("g"), h("h"); 8 | Var x("x"), y("y"); 9 | 10 | f(x) = x; 11 | g(x) = f(x); 12 | h(x, y) = g(x); 13 | 14 | g.compute_at(h, y); 15 | 16 | // This makes no sense, because the compute_at level is higher than the store_at level 17 | f.store_at(h, y).compute_root(); 18 | 19 | h.realize(10, 10); 20 | 21 | printf("I should not have reached here\n"); 22 | return 0; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /test/error/buffer_larger_than_two_gigs.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | int main(int argc, char **argv) { 6 | if (sizeof(void *) == 8) { 7 | Buffer result(1 << 24, 1 << 24, 1 << 24); 8 | } else { 9 | Buffer result(1 << 12, 1 << 12, 1 << 8); 10 | } 11 | printf("Success!\n"); 12 | } 13 | -------------------------------------------------------------------------------- /test/error/clamp_out_of_range.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x; 8 | Func f; 9 | 10 | f(x) = clamp(cast(x), 0, 255); 11 | Buffer<> result = f.realize(42); 12 | 13 | printf("Success!\n"); 14 | 15 | printf("I should not have reached here\n"); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/error/constrain_wrong_output_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | f(x) = Tuple(x, sin(x)); 10 | 11 | // Don't do this. Instead constrain the size of output buffer 0. 12 | f.output_buffers()[1].dim(0).set_min(4); 13 | 14 | f.compile_jit(); 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/error/constraint_uses_non_param.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f, g; 7 | Var x, y; 8 | f(x, y) = 0; 9 | g(x, y) = f(x, y); 10 | Pipeline p(g); 11 | 12 | // This can't possibly be a precondition 13 | p.add_requirement(x == 4 && f(3, 2) == 5); 14 | 15 | p.realize(100, 100); 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /test/error/define_after_realize.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f, g; 8 | Var x; 9 | 10 | f(x) = x; 11 | 12 | Buffer im = f.realize(10); 13 | 14 | // Now try to add an update definition to f 15 | f(x) += 1; 16 | 17 | printf("There should have been an error\n"); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/error/define_after_use.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f, g; 8 | Var x; 9 | 10 | f(x) = x; 11 | g(x) = f(x) + 1; 12 | 13 | // Now try to add an update definition to f 14 | f(x) += 1; 15 | 16 | printf("There should have been an error\n"); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /test/error/device_target_mismatch.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Target t("host"); 8 | (void)get_device_interface_for_device_api(DeviceAPI::CUDA, t, "Device Target Mistmatch Test"); 9 | 10 | printf("I should not have reached here\n"); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/error/extern_func_self_argument.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | extern "C" 6 | int extern_func() { 7 | return 0; 8 | } 9 | 10 | int main(int argc, char **argv) { 11 | Func f("f"); 12 | 13 | f.define_extern("extern_func", {f}, Int(32), 2); 14 | f.infer_arguments(); 15 | 16 | printf("There should have been an error\n"); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /test/error/float_arg.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | Func f; 9 | Var x, y; 10 | f(x, y) = 3*x + y; 11 | 12 | // Should result in an error 13 | Func g; 14 | g(x) = f(f(x, 3) * 17.0f, 3); 15 | 16 | printf("Success!\n"); 17 | return 0; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /test/error/forward_on_undefined_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | const Buffer<> foo; 8 | foo.raw_buffer(); 9 | 10 | printf("I should not have reached here\n"); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /test/error/implicit_args.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x("x"), y("y"), z("z"); 8 | 9 | Func f("f"), g("g"), h("h"); 10 | 11 | g(x, y) = x + y; 12 | g.compute_root(); 13 | 14 | h(x, y, z) = x + y + z; 15 | h.compute_root(); 16 | 17 | // The initial definition uses 2 implicit vars: f(x, _0, _1) = g(_0, _1) + 2. 18 | // The update definition, however, calls h(_) which will be expanded into 19 | // h(_0, _1, _2), which is invalid. 20 | f(x, _) = g(_) + 2; 21 | f(x, _) += h(_) + 3; 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /test/error/impossible_constraints.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | ImageParam input(Float(32), 2, "in"); 8 | 9 | Func out("out"); 10 | 11 | // The requires that the input be larger than the input 12 | out() = input(input.width(), input.height()) + input(0, 0); 13 | 14 | out.infer_input_bounds(); 15 | 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /test/error/init_def_should_be_all_vars.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Buffer in(10, 10); 8 | 9 | Func f("f"); 10 | RDom r(0, in.width(), 0, in.height()); 11 | f(r.x, r.y) = in(r.x, r.y) + 2; 12 | f.realize(in.width(), in.height()); 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/error/inspect_loop_level.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | LoopLevel root = LoopLevel::root(); 8 | 9 | printf("LoopLevel is %s\n", root.to_string().c_str()); // should fail 10 | 11 | printf("I should not have reached here\n"); 12 | 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/error/lerp_float_weight_out_of_range.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | // This should trigger an error. 9 | Func f; 10 | f() = lerp(0, 42, 1.5f); 11 | 12 | printf("Success!\n"); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/error/lerp_mismatch.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | // This should trigger an error. 9 | Func f; 10 | f() = lerp(cast(0), cast(42), 0.5f); 11 | 12 | printf("Success!\n"); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/error/lerp_signed_weight.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | // This should trigger an error. 9 | Func f; 10 | f() = lerp(cast(0), cast(42), cast(16)); 11 | 12 | printf("Success!\n"); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/error/memoize_different_compute_store.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Param val; 8 | 9 | Func f, g; 10 | Var x, y; 11 | 12 | f(x, y) = val + cast(x); 13 | g(x, y) = f(x, y) + f(x - 1, y) + f(x + 1, y); 14 | 15 | g.split(y, y, _, 16); 16 | f.store_root(); 17 | f.compute_at(g, y).memoize(); 18 | 19 | val.set(23.0f); 20 | Buffer out = g.realize(128, 128); 21 | 22 | for (int32_t i = 0; i < 128; i++) { 23 | for (int32_t j = 0; j < 128; j++) { 24 | assert(out(i, j) == (uint8_t)(3 * 23 + i + (i - 1) + (i + 1))); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /test/error/metal_vector_too_large.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include "test/common/halide_test_dirs.h" 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | ImageParam input(UInt(16), 2, "input"); 9 | Func f("f"); 10 | Var x("x"), y("y"); 11 | 12 | f(x, y) = input(x, y) + 42; 13 | f.vectorize(x ,16).gpu_blocks(y, DeviceAPI::Metal); 14 | 15 | std::string test_object = Internal::get_test_tmp_dir() + "metal_vector_too_large.o"; 16 | Target mac_target("osx-metal"); 17 | 18 | f.compile_to_object(test_object, { input }, "f", mac_target); 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/error/missing_args.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | ImageParam im(Int(8), 2); 10 | Param arg; 11 | 12 | f(x) = im(x, x) + arg; 13 | 14 | std::vector args; 15 | //args.push_back(im); 16 | //args.push_back(arg); 17 | f.compile_to_object("f.o", args, "f"); 18 | 19 | printf("Success!\n"); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/error/modulo_constant_zero.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | Func f; 9 | Var x; 10 | f(x) = x % 0; 11 | 12 | f.realize(10); 13 | 14 | printf("Success!\n"); 15 | return 0; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /test/error/no_default_device.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Target t("host"); 8 | (void)get_device_interface_for_device_api(DeviceAPI::Default_GPU, t, "No Default Device Test"); 9 | 10 | printf("I should not have reached here\n"); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/error/nonexistent_update_stage.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | f(x) = x; 10 | f.update().vectorize(x, 4); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/error/overflow_during_constant_folding.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f; 7 | Var x; 8 | f(x) = Expr(0x12345678) * Expr(0x76543210); 9 | 10 | f.realize(10); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/error/pointer_arithmetic.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Param p; 8 | p.set("Hello, world!\n"); 9 | 10 | Func f; 11 | Var x; 12 | // Should error out during match_types 13 | f(x) = p + 2; 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /test/error/race_condition.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | Func f, g; 9 | Var x, y; 10 | 11 | f(x, y) = 0; 12 | 13 | RDom r(0, 10, 0, 10); 14 | f(r.x, r.y) += f(r.y, r.x); 15 | 16 | // This schedule should be forbidden, because it causes a race condition. 17 | f.update().parallel(r.y); 18 | 19 | // We shouldn't reach here, because there should have been a compile error. 20 | printf("There should have been an error\n"); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /test/error/rdom_undefined.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Expr undef_min, undef_extent; 8 | 9 | // This should assert-fail 10 | RDom r(undef_min, undef_min); 11 | 12 | // Just to ensure compiler doesn't optimize-away the RDom ctor 13 | printf("Dimensions: %d\n", r.dimensions()); 14 | 15 | printf("Success!\n"); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/error/reduction_bounds.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"), g("g"); 8 | Var x("x"); 9 | RDom r(0, 100, "r"); 10 | 11 | f(x) = x; 12 | 13 | g(x) = 0; 14 | g(x) = f(g(x-1)) + r; 15 | 16 | f.compute_at(g, r.x); 17 | 18 | // Use of f is unbounded in g. 19 | 20 | g.realize(100); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /test/error/reduction_type_mismatch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Halide.h" 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x; 8 | Func f; 9 | RDom dom(0, 50); 10 | 11 | f(x) = cast(0); // The type here... 12 | f(dom) += 1.0f; // does not match the type here. 13 | 14 | // Should result in an error 15 | Buffer result = f.realize(50); 16 | 17 | printf("Success!\n"); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/error/require_fail.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | #include 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | const int kPrime1 = 7829; 9 | const int kPrime2 = 7919; 10 | 11 | Buffer result; 12 | Param p1, p2; 13 | Var x; 14 | Func f; 15 | f(x) = require((p1 + p2) == kPrime1, 16 | (p1 + p2) * kPrime2, 17 | "The parameters should add to exactly", kPrime1, "but were", p1, p2); 18 | // choose values that will fail 19 | p1.set(1); 20 | p2.set(2); 21 | result = f.realize(1); 22 | 23 | return 0; 24 | 25 | } 26 | -------------------------------------------------------------------------------- /test/error/reuse_var_in_schedule.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f; 7 | Var x; 8 | 9 | f(x) = x; 10 | 11 | Var xo, xi; 12 | f.split(x, xo, xi, 4).split(xo, xo, xi, 4); 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/error/reused_args.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f; 7 | Var x; 8 | // You can't use the same variable more than once in the LHS of a 9 | // pure definition. 10 | f(x, x) = x; 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/error/specialize_fail.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x; 8 | Param p; 9 | 10 | Func f; 11 | f(x) = x; 12 | f.specialize(p == 0).vectorize(x, 8); 13 | f.specialize_fail("Expected failure"); 14 | 15 | p.set(42); // arbitrary nonzero value 16 | f.realize(100); 17 | 18 | printf("How did I get here?\n"); 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/error/split_inner_wrong_tail_strategy.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f; 7 | Var x; 8 | f(x) = x; 9 | f(x) += 1; 10 | Var xo, xi, xio, xii; 11 | // Would redundantly redo some +=1, and create incorrect output. 12 | f.compute_root(); 13 | f.update().split(x, xo, xi, 8).split(xi, xio, xii, 9, TailStrategy::RoundUp); 14 | 15 | Func g; 16 | g(x) = f(x); 17 | g.realize(10); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /test/error/thread_id_outside_block_id.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Target t = get_jit_target_from_environment(); 8 | t.set_feature(Target::CUDA); 9 | 10 | Func f; 11 | Var x; 12 | f(x) = x; 13 | Var xo, xi; 14 | f.gpu_tile(x, xo, xi, 16).reorder(xo, xi); 15 | 16 | f.compile_jit(t); 17 | Buffer result = f.realize(16); 18 | 19 | printf("There should have been an error\n"); 20 | return 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /test/error/too_many_args.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Halide.h" 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x, y; 8 | 9 | Func one_arg; 10 | one_arg(x) = x * 2; // One argument 11 | 12 | Func bad_call; 13 | bad_call(x, y) = one_arg(x, y); // Called with two 14 | 15 | // Should result in an error 16 | Buffer result = bad_call.realize(256, 256); 17 | 18 | printf("Success!\n"); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /test/error/tuple_arg_select_undef.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | using namespace Halide::Internal; 6 | 7 | int main(int argc, char **argv) { 8 | Var x("x"), y("y"); 9 | Func f("f"), g("g"); 10 | 11 | f(x, y) = {0, 0}; 12 | 13 | RDom r(0, 10); 14 | Expr arg_0 = clamp(select(r.x < 2, 13, undef()), 0, 20); 15 | Expr arg_1 = clamp(select(r.x < 5, 23, undef()), 0, 20); 16 | // Different predicates for the undefs: should result in an error 17 | f(arg_0, arg_1) = {f(arg_0, arg_1)[0] + 10, f(arg_0, arg_1)[1] + 5}; 18 | 19 | f.realize(100, 100); 20 | 21 | printf("Success!\n"); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /test/error/tuple_val_select_undef.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | using namespace Halide::Internal; 6 | 7 | int main(int argc, char **argv) { 8 | Var x("x"); 9 | Func f("f"); 10 | 11 | // Should result in an error 12 | f(x) = {x, select(x < 20, 20*x, undef())}; 13 | f.realize(10); 14 | 15 | printf("Success!\n"); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/error/unbounded_input.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x, y; 9 | 10 | ImageParam in(Float(32), 2); 11 | ImageParam x_coord(Int(32), 2); 12 | ImageParam y_coord(Int(32), 2); 13 | 14 | f(x, y) = in(x_coord(x, y), y_coord(x, y)); 15 | 16 | f.compile_jit(); 17 | 18 | printf("I should not have reached here\n"); 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /test/error/unbounded_output.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x, y; 9 | 10 | ImageParam in(Float(32), 2); 11 | ImageParam x_coord(Int(32), 2); 12 | ImageParam y_coord(Int(32), 2); 13 | 14 | f(x, y) = 0.0f; 15 | RDom r(0, 100, 0, 100); 16 | f(x_coord(r.x, r.y), y_coord(r.x, r.y)) += in(r.x, r.y); 17 | 18 | f.compile_jit(); 19 | 20 | printf("I should not have reached here\n"); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /test/error/undefined_func_compile.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Halide.h" 4 | 5 | #include "test/common/halide_test_dirs.h" 6 | 7 | using namespace Halide; 8 | 9 | int main(int argc, char **argv) { 10 | Func f("f"); 11 | 12 | std::string test_object = Internal::get_test_tmp_dir() + "compile_undefined.o"; 13 | f.compile_to_object(test_object, {}, "f"); 14 | 15 | // We shouldn't reach here, because there should have been a compile error. 16 | printf("There should have been an error\n"); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/error/undefined_func_realize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Halide.h" 4 | 5 | #include "test/common/halide_test_dirs.h" 6 | 7 | using namespace Halide; 8 | 9 | int main(int argc, char **argv) { 10 | Func f("f"); 11 | 12 | Buffer result = f.realize(100, 5, 3); 13 | 14 | // We shouldn't reach here, because there should have been a compile error. 15 | printf("There should have been an error\n"); 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /test/error/undefined_loop_level.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | LoopLevel undefined; 8 | 9 | Var x; 10 | Func f, g; 11 | f(x) = x; 12 | g(x) = f(x); 13 | f.compute_at(undefined); 14 | g.compute_root(); 15 | 16 | // Trying to lower/realize with an undefined LoopLevel should be fatal 17 | Buffer result = g.realize(1); 18 | 19 | printf("I should not have reached here\n"); 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /test/error/undefined_pipeline_compile.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Halide.h" 4 | 5 | #include "test/common/halide_test_dirs.h" 6 | 7 | using namespace Halide; 8 | 9 | int main(int argc, char **argv) { 10 | Func f("f"); 11 | 12 | Pipeline p(f); 13 | std::string test_object = Internal::get_test_tmp_dir() + "compile_undefined.o"; 14 | p.compile_to_object(test_object, {}, "f"); 15 | 16 | // We shouldn't reach here, because there should have been a compile error. 17 | printf("There should have been an error\n"); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /test/error/undefined_pipeline_realize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Halide.h" 4 | 5 | using namespace Halide; 6 | 7 | int main(int argc, char **argv) { 8 | Func f("f"); 9 | 10 | Pipeline p(f); 11 | Buffer result = p.realize(100, 5, 3); 12 | 13 | // We shouldn't reach here, because there should have been a compile error. 14 | printf("There should have been an error\n"); 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/error/undefined_rdom_dimension.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Halide.h" 4 | 5 | int error_occurred = false; 6 | void halide_error(void *ctx, const char *msg) { 7 | printf("Expected: %s\n", msg); 8 | error_occurred = true; 9 | } 10 | 11 | using namespace Halide; 12 | 13 | int main(int argc, char **argv) { 14 | Func f("f"), g("g"), h("h"); 15 | Var x("x"), y("y"), c("c"); 16 | 17 | RDom r(1, 99, "r"); 18 | g(x, y, c) = 42; 19 | h(x, y, c) = 88; 20 | f(x, y, c) = g(x, y, c); 21 | f(r.x, r.y, c) = f(r.x-1, r.y, c) + h(r.x, r.y, c); 22 | 23 | f.set_error_handler(&halide_error); 24 | Buffer result = f.realize(100, 5, 3); 25 | 26 | assert(error_occurred); 27 | printf("Success!\n"); 28 | } 29 | -------------------------------------------------------------------------------- /test/error/unknown_target.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Target t; 8 | 9 | // Calling natural_vector_size() on a Target with Unknown fields 10 | // should generate user_error. 11 | (void) t.natural_vector_size(); 12 | 13 | printf("I should not have reached here\n"); 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/error/vectorize_dynamic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Halide.h" 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x, y; 8 | 9 | Buffer input(5, 5); 10 | Func f; 11 | f(x, y) = input(x, y) * 2; 12 | Var xo, xi; 13 | 14 | Param vector_size; 15 | 16 | // You can only vectorize across compile-time-constant sizes. 17 | f.split(x, xo, xi, vector_size).vectorize(xi); 18 | 19 | // Should result in an error 20 | vector_size.set(4); 21 | Buffer out = f.realize(5, 5); 22 | 23 | printf("Success!\n"); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /test/error/vectorize_too_little.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Halide.h" 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x, y; 8 | 9 | Buffer input(5, 5); 10 | Func f; 11 | f(x, y) = input(x, y) * 2; 12 | f.vectorize(x, 0); 13 | 14 | // Should result in an error 15 | Buffer out = f.realize(5, 5); 16 | 17 | printf("Success!\n"); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/error/vectorize_too_much.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Halide.h" 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Var x, y; 8 | 9 | Buffer input(5, 5); 10 | Func f; 11 | f(x, y) = input(x, y) * 2; 12 | f.vectorize(x, 8).vectorize(y, 8); 13 | 14 | // Should result in an error 15 | Buffer out = f.realize(5, 5); 16 | 17 | printf("Success!\n"); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /test/error/vectorized_extern.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | f.define_extern("test", {}, Int(32), {x}); 10 | Var xo; 11 | f.split(x, xo, x, 8).vectorize(xo); 12 | 13 | f.compile_jit(); 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/error/wrap_custom_after_shared.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"), g1("g1"), g2("g2"), g3("g3"), g4("g4"); 8 | Var x("x"), y("y"); 9 | 10 | f(x) = x; 11 | g1(x, y) = f(x); 12 | g2(x, y) = f(x); 13 | g3(x, y) = f(x); 14 | 15 | // It's not valid to call f.in(g1) after defining a shared wrapper for 16 | // {g1, g2, g3} 17 | Func wrapper1 = f.in({g1, g4, g3}); 18 | Func wrapper2 = f.in(g3); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /test/error/wrap_frozen.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f("f"), g("g"); 8 | Var x("x"), y("y"); 9 | 10 | f(x) = x; 11 | g(x) = f(x); 12 | Func wrapper = f.in(g); 13 | wrapper(x) += 1; 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /test/error/wrapper_never_used.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | using namespace Halide::Internal; 5 | 6 | int main() { 7 | Var x("x"), y("y"); 8 | Func f("f"), g("g"), h("h"); 9 | f(x, y) = x + y; 10 | g(x, y) = 5; 11 | h(x, y) = f(x, y) + g(x, y); 12 | 13 | f.compute_root(); 14 | f.in(g).compute_root(); 15 | 16 | // This should cause an error since f.in(g) was called but 'f' is 17 | // never used in 'g'. 18 | h.realize(5, 5); 19 | 20 | return 0; 21 | } -------------------------------------------------------------------------------- /test/error/wrong_dimensionality_extern_stage.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | int main(int argc, char **argv) { 6 | Func f, g; 7 | Var x, y; 8 | 9 | g.define_extern("foo", {}, UInt(16), 3); 10 | 11 | // Show throw an error immediately because g was defined with 3 dimensions. 12 | f(x, y) = cast(g(x, y)); 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/error/wrong_type.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | f(x) = x; 10 | Buffer im = f.realize(100); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/generator/alias_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class Alias : public Halide::Generator { 6 | public: 7 | GeneratorParam offset{ "offset", 0 }; 8 | Input> input{ "input", 1 }; 9 | Output> output{ "output", 1 }; 10 | 11 | void generate() { 12 | Var x; 13 | output(x) = input(x) + offset; 14 | } 15 | }; 16 | 17 | } // namespace 18 | 19 | HALIDE_REGISTER_GENERATOR(Alias, alias) 20 | HALIDE_REGISTER_GENERATOR_ALIAS(alias_with_offset_42, alias, { { "offset", "42" }}) 21 | -------------------------------------------------------------------------------- /test/generator/argvcall_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class ArgvCall : public Halide::Generator { 6 | public: 7 | Input f1{ "f1", 1.0 }; 8 | Input f2{ "f2", 1.0 }; 9 | 10 | Output> output{ "output", 3 }; 11 | 12 | void generate() { 13 | Var x, y, c; 14 | Func f("f"); 15 | 16 | f(x, y) = max(x, y); 17 | output(x, y, c) = cast(f(x, y) * c * f1 / f2); 18 | 19 | output.bound(c, 0, 3).reorder(c, x, y).unroll(c); 20 | 21 | output.vectorize(x, natural_vector_size()); 22 | } 23 | }; 24 | 25 | } // namespace 26 | 27 | HALIDE_REGISTER_GENERATOR(ArgvCall, argvcall) 28 | -------------------------------------------------------------------------------- /test/generator/can_use_target_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class CanUseTarget : public Halide::Generator { 6 | public: 7 | Output> output{"output", 2}; 8 | 9 | // Current really just a placeholder: can_use_target_aottest.cpp just 10 | // needs to test the runtime itself, not the generator function. 11 | void generate() { 12 | Var x, y; 13 | output(x, y) = cast((int32_t)0xdeadbeef); 14 | } 15 | }; 16 | 17 | } // namespace 18 | 19 | HALIDE_REGISTER_GENERATOR(CanUseTarget, can_use_target) 20 | 21 | -------------------------------------------------------------------------------- /test/generator/cxx_mangling_externs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // These are the HalideExtern functions referenced by cxx_mangling_generator.cpp 4 | int32_t extract_value_global(int32_t *arg) { 5 | return *arg; 6 | } 7 | 8 | namespace HalideTest { 9 | 10 | int32_t extract_value_ns(const int32_t *arg) { 11 | return *arg; 12 | } 13 | 14 | } 15 | 16 | -------------------------------------------------------------------------------- /test/generator/error_codes_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class ErrorCodes : public Halide::Generator { 6 | public: 7 | Input> input{ "input", 2}; 8 | Input f_explicit_bound{"f_explicit_bound", 1, 0, 64}; 9 | 10 | Output> output{"output", 2}; 11 | 12 | void generate() { 13 | assert(!get_target().has_feature(Target::LargeBuffers)); 14 | Var x, y; 15 | 16 | output(x, y) = input(x, y); 17 | output.bound(x, 0, f_explicit_bound); 18 | 19 | add_requirement(input.dim(1).extent() == 123); 20 | } 21 | }; 22 | 23 | } // namespace 24 | 25 | HALIDE_REGISTER_GENERATOR(ErrorCodes, error_codes) 26 | -------------------------------------------------------------------------------- /test/generator/external_code_extern.cpp: -------------------------------------------------------------------------------- 1 | extern "C" float gen_extern_tester(float in) { 2 | return in + 42; 3 | } 4 | -------------------------------------------------------------------------------- /test/generator/float16_t_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | class Float16T : public Halide::Generator { 4 | public: 5 | Output> output{"output", 1}; 6 | 7 | void generate() { 8 | // Currently the float16 aot test just exercises the 9 | // runtime. More interesting code may go here in the future. 10 | Var x; 11 | output(x) = x; 12 | } 13 | }; 14 | 15 | HALIDE_REGISTER_GENERATOR(Float16T, float16_t) 16 | -------------------------------------------------------------------------------- /test/generator/gpu_object_lifetime_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class GpuObjectLifetime : public Halide::Generator { 6 | public: 7 | Output> output{"output", 1}; 8 | 9 | void generate() { 10 | Var x; 11 | 12 | output(x) = x; 13 | 14 | Target target = get_target(); 15 | if (target.has_gpu_feature()) { 16 | Var xo, xi; 17 | output.gpu_tile(x, xo, xi, 16); 18 | } 19 | } 20 | }; 21 | 22 | } // namespace 23 | 24 | HALIDE_REGISTER_GENERATOR(GpuObjectLifetime, gpu_object_lifetime) 25 | -------------------------------------------------------------------------------- /test/generator/gpu_only_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class GpuOnly : public Halide::Generator { 6 | public: 7 | Input> input{"input", 2}; 8 | 9 | Output> output{"output", 2}; 10 | 11 | void generate() { 12 | Var x("x"), y("y"); 13 | 14 | // Create a simple pipeline that scales pixel values by 2. 15 | output(x, y) = input(x, y) * 2; 16 | 17 | Target target = get_target(); 18 | if (target.has_gpu_feature()) { 19 | Var xo, yo, xi, yi; 20 | output.gpu_tile(x, y, xo, yo, xi, yi, 16, 16); 21 | } 22 | } 23 | }; 24 | 25 | } // namespace 26 | 27 | HALIDE_REGISTER_GENERATOR(GpuOnly, gpu_only) 28 | -------------------------------------------------------------------------------- /test/generator/image_from_array_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class ImageFromArray : public Halide::Generator { 6 | public: 7 | Output> output{"output", 1}; 8 | 9 | void generate() { 10 | // Currently the test just exercises halide_image.h. 11 | Var x; 12 | output(x) = x; 13 | } 14 | }; 15 | 16 | } // namespace 17 | 18 | HALIDE_REGISTER_GENERATOR(ImageFromArray, image_from_array) 19 | 20 | -------------------------------------------------------------------------------- /test/generator/matlab_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | using namespace Halide; 4 | 5 | namespace { 6 | 7 | class Matlab : public Halide::Generator { 8 | public: 9 | Input> input{"input", 2}; 10 | Input scale{"scale"}; 11 | Input negate{"negate"}; 12 | 13 | Output> output{"output", 2}; 14 | 15 | void generate() { 16 | Var x, y; 17 | output(x, y) = input(x, y) * scale * select(negate, -1.0f, 1.0f); 18 | } 19 | }; 20 | 21 | } // namespace 22 | 23 | HALIDE_REGISTER_GENERATOR(Matlab, matlab) 24 | -------------------------------------------------------------------------------- /test/generator/multitarget_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class Multitarget : public Halide::Generator { 6 | public: 7 | Output> output{"output", 2}; 8 | 9 | void generate() { 10 | Var x, y; 11 | if (get_target().has_feature(Target::Debug)) { 12 | output(x, y) = cast((int32_t)0xdeadbeef); 13 | } else { 14 | output(x, y) = cast((int32_t)0xf00dcafe); 15 | } 16 | } 17 | }; 18 | 19 | } // namespace 20 | 21 | HALIDE_REGISTER_GENERATOR(Multitarget, multitarget) 22 | -------------------------------------------------------------------------------- /test/generator/nested_externs_aottest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "HalideBuffer.h" 5 | #include "nested_externs_root.h" 6 | 7 | using namespace Halide::Runtime; 8 | 9 | int main(int argc, char **argv) { 10 | auto buf = Buffer::make_interleaved(100, 200, 3); 11 | 12 | nested_externs_root(38.5f, buf); 13 | 14 | buf.for_each_element([&](int x, int y, int c) { 15 | const float correct = 158.0f; 16 | const float actual = buf(x, y, c); 17 | if (actual != correct) { 18 | printf("result(%d, %d, %d) = %f instead of %f\n", 19 | x, y, c, actual, correct); 20 | exit(-1); 21 | } 22 | }); 23 | 24 | printf("Success!\n"); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /test/generator/string_param_aottest.cpp: -------------------------------------------------------------------------------- 1 | #include "HalideBuffer.h" 2 | #include "HalideRuntime.h" 3 | #include "string_param.h" 4 | #include 5 | 6 | int main(int argc, char **argv) { 7 | Halide::Runtime::Buffer output(3, 3); 8 | string_param(output); 9 | 10 | for (int x = 0; x < 3; ++x) { 11 | for (int y = 0; y < 3; ++y) { 12 | int expected_value = (5 * y + x); 13 | if (output(x, y) != expected_value) { 14 | printf("Unexpected output value : %d at output(%d, %d)\n", output(x, y), x, y); 15 | return -1; 16 | } 17 | } 18 | } 19 | 20 | printf("Success!\n"); 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /test/generator/user_context_insanity_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class UserContextInsanity : public Halide::Generator { 6 | public: 7 | Input> input{"input", 2}; 8 | Output> output{"output", 2}; 9 | 10 | void generate() { 11 | Var x, y; 12 | 13 | Func g; 14 | g(x, y) = input(x, y) * 2; 15 | g.compute_root(); 16 | 17 | output(x, y) = g(x, y); 18 | 19 | output.parallel(y); 20 | output.trace_stores(); 21 | } 22 | }; 23 | 24 | } // namespace 25 | 26 | HALIDE_REGISTER_GENERATOR(UserContextInsanity, user_context_insanity) 27 | -------------------------------------------------------------------------------- /test/generator/variable_num_threads_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | namespace { 4 | 5 | class VariableNumThreads : public Halide::Generator { 6 | public: 7 | Output> output{"output", 2}; 8 | 9 | void generate() { 10 | // A job with lots of nested parallelism 11 | Var x, y; 12 | 13 | output(x, y) = sqrt(sqrt(x*y)); 14 | output.parallel(x).parallel(y); 15 | } 16 | }; 17 | 18 | } // namespace 19 | 20 | HALIDE_REGISTER_GENERATOR(VariableNumThreads, variable_num_threads) 21 | -------------------------------------------------------------------------------- /test/opengl/inline_reduction.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | #include "testing.h" 5 | 6 | using namespace Halide; 7 | 8 | int main() { 9 | // This test must be run with an OpenGL target. 10 | const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); 11 | 12 | Func f; 13 | Var x, y, c; 14 | RDom r(0, 10); 15 | f(x, y, c) = sum(cast(r)); 16 | f.bound(c, 0, 3).glsl(x, y, c); 17 | 18 | Buffer result = f.realize(100, 100, 3, target); 19 | 20 | if (!Testing::check_result(result, [&](int x, int y, int c) { return 45; })) { 21 | return 1; 22 | } 23 | 24 | printf("Success!\n"); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /test/opengl/internal.cpp: -------------------------------------------------------------------------------- 1 | #include "../../src/CodeGen_OpenGL_Dev.h" 2 | 3 | using namespace Halide; 4 | using namespace Halide::Internal; 5 | 6 | int main() { 7 | CodeGen_GLSL::test(); 8 | 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /test/opengl/set_pixels.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | #include "testing.h" 5 | 6 | using namespace Halide; 7 | 8 | int main() { 9 | // This test must be run with an OpenGL target. 10 | const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); 11 | 12 | Func f; 13 | Var x, y, c; 14 | 15 | f(x, y, c) = cast(42); 16 | 17 | Buffer out(10, 10, 3); 18 | f.bound(c, 0, 3).glsl(x, y, c); 19 | f.realize(out, target); 20 | 21 | out.copy_to_host(); 22 | if (!Testing::check_result(out, [](int x, int y, int c) { return 42; })) { 23 | return 1; 24 | } 25 | 26 | printf("Success!\n"); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /test/opengl/vagrant/.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | -------------------------------------------------------------------------------- /test/opengl/vagrant/build_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -x 2 | mkdir -p ~/halide_build 3 | cd ~/halide_build 4 | ln -s -f /Halide/Makefile . 5 | make -j 3 6 | make -k test_opengl 7 | -------------------------------------------------------------------------------- /test/opengl/vagrant/provision/etc/environment: -------------------------------------------------------------------------------- 1 | PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games" 2 | LC_ALL=C 3 | DISPLAY=:0.0 4 | LLVM_CONFIG=/usr/bin/llvm-config-3.8 5 | CLANG=/usr/bin/clang-3.8 6 | HL_TARGET=host-opengl 7 | HL_JIT_TARGET=host-opengl 8 | -------------------------------------------------------------------------------- /test/opengl/vagrant/provision/etc/init/xdummy.conf: -------------------------------------------------------------------------------- 1 | description "Dummy X server providing DISPLAY=:0.0" 2 | 3 | expect fork 4 | 5 | script 6 | /usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -logfile /var/log/Xorg.log :0 & 7 | end script 8 | -------------------------------------------------------------------------------- /test/opengl/vagrant/provision/etc/systemd/system/xdummy.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Dummy X server providing DISPLAY=:0.0" 3 | 4 | [Service] 5 | Type=simple 6 | ExecStart=/usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config /dev/null -logfile /var/log/Xorg.log :0 7 | -------------------------------------------------------------------------------- /test/performance/jit_stress.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | #include 4 | #include "halide_benchmark.h" 5 | 6 | using namespace Halide; 7 | using namespace Halide::Tools; 8 | 9 | int main(int argc, char **argv) { 10 | Var x; 11 | 12 | ImageParam a(Int(32), 1); 13 | Buffer b(1), c(1); 14 | b(0) = 17; 15 | c(0) = 0; 16 | a.set(c); 17 | 18 | int expected = 0; 19 | double t = benchmark([&]() { 20 | Func f; 21 | f(x) = a(x) + b(x); 22 | f.realize(c); 23 | expected += 17; 24 | assert(c(0) == expected); 25 | }); 26 | 27 | printf("%g ms per jit compilation\n", t * 1e3); 28 | 29 | printf("Success!\n"); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /test/warning/double_vectorize.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f, g; 8 | Var x, y; 9 | f(x, y) = x + y; 10 | g(x, y) = f(x, y) + f(x + 1, y); 11 | 12 | // Nested vectorization should cause a warning. 13 | Var xi; 14 | g.split(x, x, xi, 8).vectorize(xi); 15 | f.compute_at(g, xi).vectorize(x); 16 | 17 | g.realize(16, 16); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /test/warning/hidden_pure_definition.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | #include 3 | 4 | using namespace Halide; 5 | 6 | int main(int argc, char **argv) { 7 | Func f; 8 | Var x; 9 | 10 | f(x) = x; 11 | 12 | // Hide the previous definition. 13 | f(x) = 2; 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /tools/GenGen.cpp: -------------------------------------------------------------------------------- 1 | #include "Halide.h" 2 | 3 | int main(int argc, char **argv) { 4 | return Halide::Internal::generate_filter_main(argc, argv, std::cerr); 5 | } 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /tools/halide_config.cmake.tpl: -------------------------------------------------------------------------------- 1 | # Machine-Generated: Do Not Edit 2 | set(HALIDE_SYSTEM_LIBS @HALIDE_SYSTEM_LIBS_RAW@) 3 | set(HALIDE_RTTI @HALIDE_RTTI_RAW@) 4 | -------------------------------------------------------------------------------- /tools/halide_config.make.tpl: -------------------------------------------------------------------------------- 1 | # Machine-Generated: Do Not Edit 2 | HALIDE_SYSTEM_LIBS=@HALIDE_SYSTEM_LIBS_RAW@ 3 | HALIDE_RTTI=@HALIDE_RTTI_RAW@ 4 | -------------------------------------------------------------------------------- /tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | lesson_01 2 | lesson_02 3 | lesson_03 4 | lesson_04 5 | lesson_05 6 | blurred.png 7 | brighter.png 8 | -------------------------------------------------------------------------------- /tutorial/figures/lesson_02_input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_02_input.jpg -------------------------------------------------------------------------------- /tutorial/figures/lesson_02_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_02_output.jpg -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_col_major.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_col_major.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_fast.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_fast.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_parallel_tiles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_parallel_tiles.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_row_major.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_row_major.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_split_7_by_3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_split_7_by_3.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_tiled.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_tiled.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_05_vectors.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_05_vectors.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_08_compute_root.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_compute_root.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_08_compute_y.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_compute_y.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_08_mixed.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_mixed.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_08_store_root_compute_x.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_store_root_compute_x.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_08_store_root_compute_y.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_store_root_compute_y.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_08_tile.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_08_tile.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_compute_at_multiple_updates.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_multiple_updates.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_compute_at_pure.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_pure.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_compute_at_pure_and_update.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_pure_and_update.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_compute_at_rvar.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_rvar.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_compute_at_update.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_compute_at_update.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_inline_reduction.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_inline_reduction.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_update.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_update.gif -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_update_rdom.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_update_rdom.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_09_update_schedule.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_09_update_schedule.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_17_rdom_calls_in_predicate.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_17_rdom_calls_in_predicate.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_17_rdom_circular.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_17_rdom_circular.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_17_rdom_triangular.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_17_rdom_triangular.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_18_hist_manual_par.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_manual_par.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_18_hist_rfactor_par.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_rfactor_par.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_18_hist_rfactor_tile.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_rfactor_tile.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_18_hist_rfactor_vec.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_rfactor_vec.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_18_hist_serial.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_18_hist_serial.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_19_group_updates.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_group_updates.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_19_transpose.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_transpose.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_19_wrapper_global.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_global.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_19_wrapper_local.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_local.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_19_wrapper_unique.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_unique.mp4 -------------------------------------------------------------------------------- /tutorial/figures/lesson_19_wrapper_vary_schedule.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/figures/lesson_19_wrapper_vary_schedule.mp4 -------------------------------------------------------------------------------- /tutorial/images/gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/images/gray.png -------------------------------------------------------------------------------- /tutorial/images/rgb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenneuendorffer/vyasa/8dc1c8be2c1fd51e9c68c1b52542c3ba6cc432be/tutorial/images/rgb.png -------------------------------------------------------------------------------- /tutorial/todo.txt: -------------------------------------------------------------------------------- 1 | - debug_to_file 2 | - the bounds query interface 3 | - multi-output pipelines 4 | - overriding the runtime 5 | - lambdas 6 | useful tricks, e.g., `Buffer image_buf = lambda(x, y, (sin(x+y)+1)/2).realize(10 * tile_size, 10 * tile_size);` 7 | - extern functions and extern stages 8 | - common scheduling patterns 9 | - scheduling rvars 10 | - tail strategies 11 | - Wrapping existing memory in a Halide::Buffer 12 | -------------------------------------------------------------------------------- /util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | halide_project(HalideTraceViz "utils" HalideTraceViz.cpp) 2 | halide_project(HalideTraceDump "utils" HalideTraceDump.cpp HalideTraceUtils.cpp) 3 | halide_use_image_io(HalideTraceDump) 4 | --------------------------------------------------------------------------------