├── aten ├── src │ ├── ATen │ │ ├── cudnn │ │ │ ├── Exceptions.h │ │ │ ├── Handles.h │ │ │ ├── Handle.h │ │ │ ├── Types.h │ │ │ ├── README.md │ │ │ ├── cudnn-wrapper.h │ │ │ ├── Types.cpp │ │ │ └── Utils.h │ │ ├── .gitignore │ │ ├── native │ │ │ ├── LegacyBridge.cpp │ │ │ ├── quantized │ │ │ │ ├── cpu │ │ │ │ │ ├── qnnpack │ │ │ │ │ │ ├── wrappers │ │ │ │ │ │ │ ├── dummy.c │ │ │ │ │ │ │ ├── x8lut │ │ │ │ │ │ │ │ └── scalar.c │ │ │ │ │ │ │ ├── sgemm │ │ │ │ │ │ │ │ ├── 6x8-psimd.c │ │ │ │ │ │ │ │ ├── 5x8-neon.c │ │ │ │ │ │ │ │ └── 6x8-neon.c │ │ │ │ │ │ │ ├── u8lut32norm │ │ │ │ │ │ │ │ └── scalar.c │ │ │ │ │ │ │ ├── requantization │ │ │ │ │ │ │ │ ├── fp32-psimd.c │ │ │ │ │ │ │ │ ├── q31-scalar.c │ │ │ │ │ │ │ │ ├── fp32-scalar.c │ │ │ │ │ │ │ │ ├── gemmlowp-scalar.c │ │ │ │ │ │ │ │ ├── precise-psimd.c │ │ │ │ │ │ │ │ ├── precise-scalar.c │ │ │ │ │ │ │ │ ├── fp32-neon.c │ │ │ │ │ │ │ │ ├── q31-neon.c │ │ │ │ │ │ │ │ ├── gemmlowp-neon.c │ │ │ │ │ │ │ │ ├── precise-neon.c │ │ │ │ │ │ │ │ ├── q31-sse2.c │ │ │ │ │ │ │ │ ├── q31-sse4.c │ │ │ │ │ │ │ │ ├── fp32-sse2.c │ │ │ │ │ │ │ │ ├── q31-ssse3.c │ │ │ │ │ │ │ │ ├── gemmlowp-sse2.c │ │ │ │ │ │ │ │ ├── gemmlowp-sse4.c │ │ │ │ │ │ │ │ ├── precise-sse2.c │ │ │ │ │ │ │ │ ├── precise-sse4.c │ │ │ │ │ │ │ │ ├── precise-ssse3.c │ │ │ │ │ │ │ │ └── gemmlowp-ssse3.c │ │ │ │ │ │ │ ├── q8conv │ │ │ │ │ │ │ │ ├── 4x8-aarch32-neon.S │ │ │ │ │ │ │ │ ├── 8x8-aarch64-neon.S │ │ │ │ │ │ │ │ ├── 4x8-neon.c │ │ │ │ │ │ │ │ ├── 8x8-neon.c │ │ │ │ │ │ │ │ └── 4x4c2-sse2.c │ │ │ │ │ │ │ ├── q8gemm │ │ │ │ │ │ │ │ ├── 4x8-aarch32-neon.S │ │ │ │ │ │ │ │ ├── 8x8-aarch64-neon.S │ │ │ │ │ │ │ │ ├── 4x8c2-xzp-aarch32-neon.S │ │ │ │ │ │ │ │ ├── 4x8-neon.c │ │ │ │ │ │ │ │ ├── 6x4-neon.c │ │ │ │ │ │ │ │ ├── 8x8-neon.c │ │ │ │ │ │ │ │ ├── 4x8c2-xzp-neon.c │ │ │ │ │ │ │ │ ├── 4x-sumrows-neon.c │ │ │ │ │ │ │ │ ├── 2x4c8-sse2.c │ │ │ │ │ │ │ │ └── 4x4c2-sse2.c │ │ │ │ │ │ │ ├── q8dwconv │ │ │ │ │ │ │ │ ├── up8x9-aarch32-neon.S │ │ │ │ │ │ │ │ ├── up8x9-neon.c │ │ │ │ │ │ │ │ ├── mp8x25-neon.c │ │ │ │ │ │ │ │ ├── mp8x25-sse2.c │ │ │ │ │ │ │ │ └── up8x9-sse2.c │ │ │ │ │ │ │ ├── hgemm │ │ │ │ │ │ │ │ └── 8x8-aarch32-neonfp16arith.S │ │ │ │ │ │ │ ├── q8vadd │ │ │ │ │ │ │ │ ├── neon.c │ │ │ │ │ │ │ │ └── sse2.c │ │ │ │ │ │ │ ├── u8rmax │ │ │ │ │ │ │ │ ├── neon.c │ │ │ │ │ │ │ │ └── sse2.c │ │ │ │ │ │ │ ├── u8clamp │ │ │ │ │ │ │ │ ├── neon.c │ │ │ │ │ │ │ │ └── sse2.c │ │ │ │ │ │ │ ├── x8zip │ │ │ │ │ │ │ │ ├── x2-neon.c │ │ │ │ │ │ │ │ ├── x3-neon.c │ │ │ │ │ │ │ │ ├── x4-neon.c │ │ │ │ │ │ │ │ ├── xm-neon.c │ │ │ │ │ │ │ │ ├── x2-sse2.c │ │ │ │ │ │ │ │ ├── x3-sse2.c │ │ │ │ │ │ │ │ ├── x4-sse2.c │ │ │ │ │ │ │ │ └── xm-sse2.c │ │ │ │ │ │ │ ├── q8avgpool │ │ │ │ │ │ │ │ ├── up8x9-neon.c │ │ │ │ │ │ │ │ ├── up8xm-neon.c │ │ │ │ │ │ │ │ ├── mp8x9p8q-neon.c │ │ │ │ │ │ │ │ ├── up8x9-sse2.c │ │ │ │ │ │ │ │ ├── up8xm-sse2.c │ │ │ │ │ │ │ │ └── mp8x9p8q-sse2.c │ │ │ │ │ │ │ ├── q8gavgpool │ │ │ │ │ │ │ │ ├── up8x7-neon.c │ │ │ │ │ │ │ │ ├── up8xm-neon.c │ │ │ │ │ │ │ │ ├── mp8x7p7q-neon.c │ │ │ │ │ │ │ │ ├── up8x7-sse2.c │ │ │ │ │ │ │ │ ├── up8xm-sse2.c │ │ │ │ │ │ │ │ └── mp8x7p7q-sse2.c │ │ │ │ │ │ │ └── u8maxpool │ │ │ │ │ │ │ │ ├── sub16-neon.c │ │ │ │ │ │ │ │ ├── 16x9p8q-neon.c │ │ │ │ │ │ │ │ ├── sub16-sse2.c │ │ │ │ │ │ │ │ └── 16x9p8q-sse2.c │ │ │ │ │ │ ├── deps │ │ │ │ │ │ │ └── clog │ │ │ │ │ │ │ │ ├── confu.yaml │ │ │ │ │ │ │ │ ├── .gitignore │ │ │ │ │ │ │ │ └── cmake │ │ │ │ │ │ │ │ └── DownloadGoogleTest.cmake │ │ │ │ │ │ ├── CODE_OF_CONDUCT.md │ │ │ │ │ │ ├── .gitignore │ │ │ │ │ │ ├── confu.yaml │ │ │ │ │ │ ├── src │ │ │ │ │ │ │ ├── requantization │ │ │ │ │ │ │ │ ├── runtime-sse2.h │ │ │ │ │ │ │ │ ├── runtime-neon.h │ │ │ │ │ │ │ │ └── runtime-assembly.h │ │ │ │ │ │ │ ├── operator-delete.c │ │ │ │ │ │ │ └── qnnpack │ │ │ │ │ │ │ │ ├── x8lut.h │ │ │ │ │ │ │ │ ├── u8rmax.h │ │ │ │ │ │ │ │ ├── u8lut32norm.h │ │ │ │ │ │ │ │ ├── assembly.h │ │ │ │ │ │ │ │ └── math.h │ │ │ │ │ │ └── cmake │ │ │ │ │ │ │ ├── DownloadFP16.cmake │ │ │ │ │ │ │ ├── DownloadFXdiv.cmake │ │ │ │ │ │ │ ├── DownloadPSimd.cmake │ │ │ │ │ │ │ ├── DownloadCpuinfo.cmake │ │ │ │ │ │ │ ├── DownloadPThreadPool.cmake │ │ │ │ │ │ │ ├── DownloadGoogleTest.cmake │ │ │ │ │ │ │ └── DownloadGoogleBenchmark.cmake │ │ │ │ │ ├── init_qnnpack.h │ │ │ │ │ ├── init_qnnpack.cpp │ │ │ │ │ └── fake_quantize_core.h │ │ │ │ ├── Copy.h │ │ │ │ └── cuda │ │ │ │ │ └── fake_quantize_core.h │ │ │ ├── TypeProperties.h │ │ │ ├── Unfold2d.cpp │ │ │ ├── mkldnn │ │ │ │ ├── IDeepRegistration.cpp │ │ │ │ ├── TensorShape.h │ │ │ │ └── Utils.h │ │ │ ├── Sorting.h │ │ │ ├── Cross.h │ │ │ ├── Copy.h │ │ │ ├── Fill.h │ │ │ ├── cuda │ │ │ │ ├── LaunchUtils.h │ │ │ │ ├── CrossKernel.cu │ │ │ │ ├── TensorShapeCUDA.cpp │ │ │ │ ├── SparseMM.cu │ │ │ │ ├── DeviceSqrt.cuh │ │ │ │ ├── FillKernel.cu │ │ │ │ └── CUDAScalar.cu │ │ │ ├── sparse │ │ │ │ └── SparseTensorMath.h │ │ │ ├── cpu │ │ │ │ ├── TensorCompareKernel.h │ │ │ │ ├── DepthwiseConvKernel.h │ │ │ │ ├── SoftmaxKernel.h │ │ │ │ └── GridSamplerKernel.h │ │ │ ├── Pow.h │ │ │ ├── PointwiseOps.h │ │ │ ├── Lerp.h │ │ │ ├── Unfold2d.h │ │ │ ├── Indexing.h │ │ │ ├── Distance.h │ │ │ ├── utils │ │ │ │ └── ParamUtils.h │ │ │ ├── Activation.h │ │ │ └── c10_utils.h │ │ ├── stub │ │ │ └── CombinedStub.cpp │ │ ├── Dimname.h │ │ ├── core │ │ │ ├── Scalar.h │ │ │ ├── blob.cpp │ │ │ ├── typeid.h │ │ │ ├── ScalarType.h │ │ │ ├── ATenGeneral.cpp │ │ │ ├── Macros.h │ │ │ ├── UndefinedTensorImpl.h │ │ │ ├── ATenGeneral.h │ │ │ ├── Backtrace.h │ │ │ ├── README.md │ │ │ ├── TensorImpl_test.cpp │ │ │ ├── Range.cpp │ │ │ ├── OpsAlreadyMovedToC10.h │ │ │ ├── DimVector.h │ │ │ ├── EnableNamedTensor.h │ │ │ ├── LegacyTypeDispatch.cpp │ │ │ ├── Tensor.h │ │ │ ├── Range.h │ │ │ ├── Reduction.h │ │ │ ├── dispatch │ │ │ │ └── README.md │ │ │ ├── UnsafeFromTH.h │ │ │ ├── LegacyDeviceTypeInit.cpp │ │ │ ├── Formatting.h │ │ │ ├── grad_mode.h │ │ │ ├── grad_mode.cpp │ │ │ └── DeprecatedTypeProperties.cpp │ │ ├── Formatting.h │ │ ├── NamedTensor.h │ │ ├── Device.h │ │ ├── Layout.h │ │ ├── ArrayRef.h │ │ ├── Backend.h │ │ ├── Scalar.h │ │ ├── Storage.h │ │ ├── Backtrace.h │ │ ├── DimVector.h │ │ ├── Generator.h │ │ ├── SmallVector.h │ │ ├── TensorOptions.h │ │ ├── TensorAccessor.h │ │ ├── miopen │ │ │ ├── miopen-wrapper.h │ │ │ ├── Handle.h │ │ │ ├── Types.h │ │ │ ├── Types.cpp │ │ │ ├── Utils.h │ │ │ └── Handle.cpp │ │ ├── mkldnn │ │ │ └── Runtime.cpp │ │ ├── test │ │ │ ├── test_install │ │ │ │ ├── main.cpp │ │ │ │ └── CMakeLists.txt │ │ │ ├── cuda_cudnn_test.cpp │ │ │ ├── verify_api_visibility.cpp │ │ │ ├── reduce_ops_test.cpp │ │ │ ├── dlconvertor_test.cpp │ │ │ └── cuda_optional_test.cu │ │ ├── ScalarType.h │ │ ├── detail │ │ │ ├── CPUGuardImpl.cpp │ │ │ ├── ScalarTypeConversions.h │ │ │ └── HIPHooksInterface.cpp │ │ ├── cuda │ │ │ ├── ATenCUDAGeneral.h │ │ │ ├── PinnedMemoryAllocator.h │ │ │ ├── nvrtc_stub │ │ │ │ └── ATenNVRTC.cpp │ │ │ ├── CUDATensorMethods.cuh │ │ │ ├── CUDADevice.h │ │ │ ├── PinnedMemoryAllocator.cpp │ │ │ ├── CUDAUtils.h │ │ │ ├── CUDAConfig.h.in │ │ │ └── detail │ │ │ │ └── IndexUtils.cuh │ │ ├── div_rtn.h │ │ ├── Utils.cpp │ │ ├── ATenConfig.cmake.in │ │ ├── quantized │ │ │ ├── QTensorImpl.cpp │ │ │ └── CMakeLists.txt │ │ ├── TensorGeometry.cpp │ │ ├── Version.h │ │ ├── mkl │ │ │ ├── Limits.h │ │ │ ├── README.md │ │ │ └── Exceptions.h │ │ ├── DynamicLibrary.h │ │ ├── env.py │ │ ├── PTThreadPool.h │ │ ├── Tensor.h │ │ ├── cpu │ │ │ ├── tbb │ │ │ │ └── extra │ │ │ │ │ └── version_string.ver.in │ │ │ └── FlushDenormal.h │ │ ├── InitialTensorOptions.h │ │ ├── hip │ │ │ └── impl │ │ │ │ ├── HIPCachingAllocatorMasqueradingAsCUDA.h │ │ │ │ ├── HIPCachingAllocatorMasqueradingAsCUDA.cpp │ │ │ │ └── HIPGuardImplMasqueradingAsCUDA.cpp │ │ ├── ThreadLocalDebugInfo.cpp │ │ ├── templates │ │ │ ├── LegacyTHFunctions.h │ │ │ └── TypeDefault.cpp │ │ ├── Config.h.in │ │ ├── DLConvertor.h │ │ ├── ATen.h │ │ └── WrapDimUtilsMulti.h │ ├── THC │ │ ├── THCStream.cpp │ │ ├── THCTensorMathReduce.cu │ │ ├── generated │ │ │ ├── THCTensorMathCompareBool.cu │ │ │ ├── THCTensorMathCompareByte.cu │ │ │ ├── THCTensorMathCompareChar.cu │ │ │ ├── THCTensorMathCompareFloat.cu │ │ │ ├── THCTensorMathCompareHalf.cu │ │ │ ├── THCTensorMathCompareInt.cu │ │ │ ├── THCTensorMathCompareLong.cu │ │ │ ├── THCTensorMathCompareShort.cu │ │ │ ├── THCTensorMathCompareTInt.cu │ │ │ ├── THCTensorMathCompareDouble.cu │ │ │ ├── THCTensorMathCompareTBool.cu │ │ │ ├── THCTensorMathCompareTByte.cu │ │ │ ├── THCTensorMathCompareTChar.cu │ │ │ ├── THCTensorMathCompareTDouble.cu │ │ │ ├── THCTensorMathCompareTFloat.cu │ │ │ ├── THCTensorMathCompareTHalf.cu │ │ │ ├── THCTensorMathCompareTLong.cu │ │ │ ├── THCTensorMathCompareTShort.cu │ │ │ ├── THCTensorSortByte.cu │ │ │ ├── THCTensorSortChar.cu │ │ │ ├── THCTensorSortHalf.cu │ │ │ ├── THCTensorSortInt.cu │ │ │ ├── THCTensorSortLong.cu │ │ │ ├── THCTensorMaskedInt.cu │ │ │ ├── THCTensorSortDouble.cu │ │ │ ├── THCTensorSortFloat.cu │ │ │ ├── THCTensorSortShort.cu │ │ │ ├── THCTensorMaskedBool.cu │ │ │ ├── THCTensorMaskedByte.cu │ │ │ ├── THCTensorMaskedChar.cu │ │ │ ├── THCTensorMaskedDouble.cu │ │ │ ├── THCTensorMaskedFloat.cu │ │ │ ├── THCTensorMaskedHalf.cu │ │ │ ├── THCTensorMaskedLong.cu │ │ │ ├── THCTensorMaskedShort.cu │ │ │ ├── THCTensorMaskedBFloat16.cu │ │ │ ├── THCTensorMathReduceBool.cu │ │ │ ├── THCTensorMathReduceByte.cu │ │ │ ├── THCTensorMathReduceChar.cu │ │ │ ├── THCTensorMathReduceFloat.cu │ │ │ ├── THCTensorMathReduceHalf.cu │ │ │ ├── THCTensorMathReduceInt.cu │ │ │ ├── THCTensorMathReduceLong.cu │ │ │ ├── THCTensorMathReduceShort.cu │ │ │ ├── THCTensorMathPointwiseInt.cu │ │ │ ├── THCTensorMathReduceDouble.cu │ │ │ ├── THCTensorMathPointwiseBool.cu │ │ │ ├── THCTensorMathPointwiseByte.cu │ │ │ ├── THCTensorMathPointwiseChar.cu │ │ │ ├── THCTensorMathPointwiseFloat.cu │ │ │ ├── THCTensorMathPointwiseHalf.cu │ │ │ ├── THCTensorMathPointwiseLong.cu │ │ │ ├── THCTensorMathPointwiseShort.cu │ │ │ ├── THCTensorMathReduceBFloat16.cu │ │ │ └── THCTensorMathPointwiseDouble.cu │ │ ├── generic │ │ │ ├── THCTensor.cu │ │ │ ├── THCTensorMathScan.h │ │ │ ├── THCTensorCopy.h │ │ │ ├── THCTensorTopK.h │ │ │ ├── THCTensorRandom.h │ │ │ ├── THCTensorMode.h │ │ │ ├── THCTensorScatterGather.h │ │ │ ├── THCTensorMathMagma.h │ │ │ ├── THCStorage.cu │ │ │ └── THCTensorIndex.h │ │ ├── THCSleep.h │ │ ├── THCTensor.cu │ │ ├── THCTensorMathBlas.cu │ │ ├── THCSortUtils.cu │ │ ├── THCStorageCopy.cpp │ │ ├── THCAllocator.h │ │ ├── THCStorageCopy.h │ │ ├── THCTensorCopy.h │ │ ├── THC.h │ │ ├── THCStorage.h │ │ ├── THCStorageCopy.cu │ │ ├── THCGenerateIntType.h │ │ ├── THCGenerateCharType.h │ │ ├── THCGenerateByteType.h │ │ ├── THCGenerateLongType.h │ │ ├── THCGenerateShortType.h │ │ ├── THCTensorRandom.h │ │ ├── THCSleep.cu │ │ ├── THCGenerateBoolType.h │ │ ├── THCGenerateDoubleType.h │ │ ├── THCStorage.cu │ │ ├── THCTensorMathMagma.cuh │ │ ├── THCTensorMode.cu │ │ ├── THCTensorTopK.cu │ │ ├── THCGenerateHalfType.h │ │ ├── THCGenerateBFloat16Type.h │ │ ├── THCTensor.h │ │ ├── THCGenerateFloatType.h │ │ ├── THCTensorCopy.hpp │ │ ├── THCThrustAllocator.cuh │ │ ├── THCTensorMathMagma.cu │ │ ├── THCTensorMathCompare.cuh │ │ ├── THCStorage.hpp │ │ ├── THCGeneral.hpp │ │ └── THCGenerateFloatTypes.h │ ├── TH │ │ ├── THLapack.cpp │ │ ├── THHalf.h │ │ ├── THStorage.h │ │ ├── THTensorConv.cpp │ │ ├── THTensorLapack.cpp │ │ ├── THBlas.cpp │ │ ├── generic │ │ │ ├── THTensorFill.h │ │ │ ├── THTensorLapack.h │ │ │ └── THTensorFill.cpp │ │ ├── THTensorRandom.cpp │ │ ├── README.md │ │ ├── THBlas.h │ │ ├── THLogAdd.h │ │ ├── THTensorMath.cpp │ │ ├── THTensorMoreMath.cpp │ │ ├── THTensorFill.cpp │ │ ├── THMemoryFile.h │ │ ├── THSize.h │ │ ├── THGenerateAllTypes.h │ │ ├── THGenerateFloatTypes.h │ │ ├── TH.h │ │ ├── THGenerateQTypes.h │ │ ├── THTensorEvenMoreMath.cpp │ │ ├── THSize.cpp │ │ ├── THGenerateIntTypes.h │ │ ├── THGenerateBFloat16Type.h │ │ ├── THGenerateQInt8Type.h │ │ ├── THGenerateQInt32Type.h │ │ ├── THGenerateQUInt8Type.h │ │ ├── THVector.h │ │ ├── THGenerateBoolType.h │ │ ├── THGenerateFloatType.h │ │ ├── THGenerateDoubleType.h │ │ ├── THDiskFile.h │ │ ├── THGenerateIntType.h │ │ ├── THGenerateCharType.h │ │ ├── THGenerateByteType.h │ │ ├── THGenerateLongType.h │ │ ├── THGenerateHalfType.h │ │ ├── THGenerateShortType.h │ │ └── vector │ │ │ └── AVX2.h │ ├── THNN │ │ ├── CMakeLists.txt │ │ └── THNN.h │ └── THCUNN │ │ ├── THCUNN.h │ │ ├── SpatialConvolutionMM.cu │ │ ├── Tanh.cu │ │ └── SharedMem.cuh ├── tools │ ├── valgrind.sup │ └── test_install.sh └── conda │ ├── meta.yaml │ └── build.sh └── cmake ├── Modules_CUDA_fix ├── upstream │ └── README.md └── FindCUDA.cmake ├── public ├── mkl.cmake ├── mkldnn.cmake └── threads.cmake ├── TorchConfigVersion.cmake.in ├── Caffe2ConfigVersion.cmake.in ├── Modules ├── FindCUB.cmake ├── Findpybind11.cmake ├── FindNuma.cmake └── FindBenchmark.cmake ├── External └── rccl.cmake ├── GoogleTestPatch.cmake └── Whitelist.cmake /aten/src/ATen/cudnn/Exceptions.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aten/src/ATen/.gitignore: -------------------------------------------------------------------------------- 1 | Config.h 2 | -------------------------------------------------------------------------------- /aten/src/ATen/native/LegacyBridge.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aten/src/ATen/stub/CombinedStub.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aten/src/ATen/Dimname.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Scalar.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/core/blob.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/core/typeid.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/dummy.c: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aten/src/THC/THCStream.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/Formatting.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/NamedTensor.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/core/ScalarType.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/Device.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/Layout.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/core/ATenGeneral.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/ArrayRef.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/Backend.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/Scalar.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/Storage.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/Backtrace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/DimVector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/Generator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/SmallVector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Macros.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/Handles.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/TensorOptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/core/UndefinedTensorImpl.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /aten/src/ATen/TensorAccessor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/ATen/core/ATenGeneral.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/miopen/miopen-wrapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Backtrace.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorMathReduce.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareBool.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareByte.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareChar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareFloat.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareHalf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareInt.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareLong.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareShort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTInt.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/TH/THLapack.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareDouble.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTBool.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTByte.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTChar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTDouble.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTFloat.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTHalf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTLong.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathCompareTShort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /aten/src/ATen/mkldnn/Runtime.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { namespace native { 4 | 5 | }} // namespace at::native 6 | -------------------------------------------------------------------------------- /aten/src/TH/THHalf.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_HALF_H 2 | #define TH_HALF_H 3 | 4 | #include 5 | 6 | #define THHalf at::Half 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /aten/src/ATen/test/test_install/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | std::cout << at::ones({3,4}, at::CPU(at::kFloat)) << "\n"; 5 | } 6 | -------------------------------------------------------------------------------- /aten/src/TH/THStorage.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // Compatability header. Use THStorageFunctions.h instead if you need this. 5 | -------------------------------------------------------------------------------- /aten/src/ATen/ScalarType.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // for BC reasons 3 | #include 4 | #include 5 | -------------------------------------------------------------------------------- /aten/src/ATen/native/TypeProperties.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { namespace native { 4 | 5 | ScalarType result_type(TensorList tensors); 6 | 7 | }} 8 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8lut/scalar.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/sgemm/6x8-psimd.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorConv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorLapack.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8lut32norm/scalar.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/confu.yaml: -------------------------------------------------------------------------------- 1 | name: clog 2 | title: C-style (a-la printf) logging library 3 | license: Simplified BSD 4 | deps: 5 | - name: googletest 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-psimd.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-scalar.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-scalar.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortByte.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortChar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortHalf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortInt.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortLong.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-scalar.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-psimd.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-scalar.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedInt.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortDouble.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortFloat.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorSortShort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedBool.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedByte.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedChar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedDouble.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedFloat.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedHalf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedLong.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedShort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/ATen/core/README.md: -------------------------------------------------------------------------------- 1 | ATen Core 2 | --------- 3 | 4 | ATen Core is a minimal subset of ATen which is suitable for deployment 5 | on mobile. Binary size of files in this folder is an important constraint. 6 | -------------------------------------------------------------------------------- /aten/src/ATen/detail/CPUGuardImpl.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { 4 | namespace detail { 5 | 6 | C10_REGISTER_GUARD_IMPL(CPU, CPUGuardImpl); 7 | 8 | }} // namespace at::detail 9 | -------------------------------------------------------------------------------- /aten/src/ATen/miopen/Handle.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { namespace native { 6 | 7 | miopenHandle_t getMiopenHandle(); 8 | 9 | }} // namespace 10 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Unfold2d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { namespace native { 4 | 5 | DEFINE_DISPATCH(unfolded2d_copy_stub); 6 | DEFINE_DISPATCH(unfolded2d_acc_stub); 7 | 8 | }} 9 | -------------------------------------------------------------------------------- /aten/src/TH/THBlas.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMaskedBFloat16.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceBool.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceByte.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceChar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceFloat.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceHalf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceInt.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceLong.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceShort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THNN/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(ATen_CPU_SRCS ${ATen_CPU_SRCS} 2 | ${CMAKE_CURRENT_SOURCE_DIR}/init.cpp 3 | PARENT_SCOPE) 4 | INSTALL(FILES generic/THNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THNN/generic") 5 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseInt.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceDouble.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseBool.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseByte.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseChar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseFloat.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseHalf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseLong.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseShort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathReduceBFloat16.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/THC/generated/THCTensorMathPointwiseDouble.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | -------------------------------------------------------------------------------- /aten/src/ATen/core/TensorImpl_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | TEST(TensorImplTest, Caffe2Constructor) { 5 | caffe2::Tensor tensor(caffe2::CPU); 6 | ASSERT_EQ(tensor.strides()[0], 1); 7 | } 8 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/ATenCUDAGeneral.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | // Use TORCH_CUDA_API for exports from this folder 10 | -------------------------------------------------------------------------------- /aten/src/ATen/native/mkldnn/IDeepRegistration.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if AT_MKLDNN_ENABLED() 4 | 5 | // needs to be included only once in library. 6 | #include 7 | 8 | #endif // AT_MKLDNN_ENALBED() 9 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/PinnedMemoryAllocator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { namespace cuda { 6 | 7 | TORCH_CUDA_API at::Allocator* getPinnedMemoryAllocator(); 8 | 9 | }} // namespace at::cuda 10 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/4x8-aarch32-neon.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) 4 | #include 5 | #endif /* defined(__arm__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8-aarch32-neon.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) 4 | #include 5 | #endif /* defined(__arm__) */ 6 | -------------------------------------------------------------------------------- /aten/tools/valgrind.sup: -------------------------------------------------------------------------------- 1 | { 2 | 3 | Memcheck:Cond 4 | fun:index 5 | fun:expand_dynamic_string_token 6 | fun:_dl_map_object 7 | fun:map_doit 8 | fun:_dl_catch_error 9 | fun:handle_ld_preload 10 | ... 11 | } 12 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/Copy.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { 6 | namespace native { 7 | 8 | Tensor& quantized_copy_from_float_(Tensor& self, const Tensor& src); 9 | 10 | } 11 | } // namespace at 12 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/init_qnnpack.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef USE_PYTORCH_QNNPACK 4 | 5 | namespace at { 6 | namespace native { 7 | 8 | void initQNNPACK(); 9 | 10 | } // namespace native 11 | } // namespace at 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/8x8-aarch64-neon.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__aarch64__) 4 | #include 5 | #endif /* defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/up8x9-aarch32-neon.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) 4 | #include 5 | #endif /* defined(__arm__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/8x8-aarch64-neon.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__aarch64__) 4 | #include 5 | #endif /* defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/TH/generic/THTensorFill.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "TH/generic/THTensorFill.h" 3 | #else 4 | 5 | TH_API void THTensor_(fill)(THTensor *r_, scalar_t value); 6 | TH_API void THTensor_(zero)(THTensor *r_); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8c2-xzp-aarch32-neon.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) 4 | #include 5 | #endif /* defined(__arm__) */ 6 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorRandom.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/hgemm/8x8-aarch32-neonfp16arith.S: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) 4 | #include 5 | #endif /* defined(__arm__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8vadd/neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8rmax/neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/tools/test_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | INSTALL_ROOT=$1 5 | SRC_ROOT=$2 6 | rm -rf test_build 7 | mkdir test_build 8 | cd test_build 9 | cmake -DCMAKE_PREFIX_PATH=$INSTALL_ROOT $SRC_ROOT/src/ATen/test/test_install 10 | make 11 | ./main 12 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/Handle.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | TORCH_CUDA_API cudnnHandle_t getCudnnHandle(); 9 | 10 | }} // namespace at::native 11 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/sgemm/5x8-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/sgemm/6x8-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8clamp/neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x2-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x3-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x4-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/xm-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/div_rtn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Integer division rounding to -Infinity 4 | template 5 | static inline T div_rtn(T x, T y) { 6 | int q = x/y; 7 | int r = x%y; 8 | if ((r!=0) && ((r<0) != (y<0))) --q; 9 | return q; 10 | } 11 | 12 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/4x8-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/8x8-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/6x4-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/8x8-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensor.cu: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensor.cu" 3 | #else 4 | 5 | int THCTensor_(getDevice)(THCState* state, const THCTensor* tensor) { 6 | return THCTensor_getDevice(state, tensor); 7 | } 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/up8x9-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8x9-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8xm-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/mp8x25-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8x7-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8xm-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8c2-xzp-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/sub16-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/THC/THCSleep.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_SPIN_INC 2 | #define THC_SPIN_INC 3 | 4 | #include 5 | #include 6 | 7 | // enqueues a kernel that spins for the specified number of cycles 8 | THC_API void THC_sleep(THCState* state, int64_t cycles); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/mp8x9p8q-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/mp8x7p7q-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x-sumrows-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/16x9p8q-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/Utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace at { 8 | 9 | int _crash_if_asan(int arg) { 10 | volatile char x[3]; 11 | x[arg] = 0; 12 | return x[0]; 13 | } 14 | 15 | } // at 16 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-neon.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__arm__) || defined(__aarch64__) 4 | #include 5 | #endif /* defined(__arm__) || defined(__aarch64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/Types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | cudnnDataType_t getCudnnDataType(const at::Tensor& tensor); 9 | 10 | int64_t cudnn_version(); 11 | 12 | }} // namespace at::cudnn 13 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Range.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace at { 6 | 7 | std::ostream& operator<<(std::ostream& out, const Range& range) { 8 | out << "Range[" << range.begin << ", " << range.end << "]"; 9 | return out; 10 | } 11 | 12 | } // namespace at 13 | -------------------------------------------------------------------------------- /aten/src/ATen/miopen/Types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | miopenDataType_t getMiopenDataType(const at::Tensor& tensor); 9 | 10 | int64_t miopen_version(); 11 | 12 | }} // namespace at::miopen 13 | -------------------------------------------------------------------------------- /aten/src/ATen/native/mkldnn/TensorShape.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { 6 | namespace native { 7 | 8 | Tensor mkldnn_view(const Tensor& self, IntArrayRef size); 9 | 10 | Tensor mkldnn_clone(const Tensor& self); 11 | 12 | } // namespace native 13 | } // namespace at 14 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8vadd/sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8clamp/sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8rmax/sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x2-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x3-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x4-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/xm-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/4x4c2-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/2x4c8-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x4c2-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/core/OpsAlreadyMovedToC10.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace c10 { 6 | struct OperatorName; 7 | } 8 | 9 | namespace at { 10 | 11 | // list of ATen ops that come from native_functions.yaml 12 | CAFFE2_API bool is_aten_op(const c10::OperatorName& opName); 13 | 14 | } 15 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8x9-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8xm-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/mp8x25-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/up8x9-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/sub16-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/core/DimVector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | 8 | constexpr size_t kDimVectorStaticSize = 5; 9 | 10 | /// A container for sizes or strides 11 | using DimVector = SmallVector; 12 | 13 | } // namespace at 14 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/.gitignore: -------------------------------------------------------------------------------- 1 | # Ninja files 2 | build.ninja 3 | 4 | # Build objects and artifacts 5 | deps/ 6 | build/ 7 | bin/ 8 | lib/ 9 | *.pyc 10 | *.pyo 11 | 12 | # System files 13 | .DS_Store 14 | .DS_Store? 15 | ._* 16 | .Spotlight-V100 17 | .Trashes 18 | ehthumbs.db 19 | Thumbs.db 20 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/mp8x9p8q-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8x7-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8xm-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-sse4.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/16x9p8q-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Sorting.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | using topk_fn = void(*)(Tensor&, Tensor&, const Tensor&, int64_t, int64_t, bool, bool); 9 | 10 | DECLARE_DISPATCH(topk_fn, topk_stub); 11 | 12 | }} // at::native 13 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/mp8x7p7q-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-ssse3.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-sse4.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-sse2.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-sse4.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-ssse3.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/TH/README.md: -------------------------------------------------------------------------------- 1 | Environment variables control the disabling of certain explicit SIMD optimizations. 2 | 3 | ``` 4 | x64 options: 5 | TH_NO_AVX2=1 # disable AVX2 codepaths 6 | TH_NO_AVX=1 # disable AVX codepaths 7 | TH_NO_SSE=1 # disable SSE codepaths 8 | 9 | ppc64le options: 10 | TH_NO_VSX=1 # disable VSX codepaths 11 | ``` 12 | -------------------------------------------------------------------------------- /aten/src/THCUNN/THCUNN.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define THCIndexTensor THCudaLongTensor 4 | #define THCIndexTensor_(NAME) THCudaLongTensor_ ## NAME 5 | typedef int64_t THCIndex_t; 6 | 7 | #define THNN_(NAME) TH_CONCAT_3(THNN_, CReal, NAME) 8 | 9 | #include 10 | #include 11 | -------------------------------------------------------------------------------- /cmake/Modules_CUDA_fix/upstream/README.md: -------------------------------------------------------------------------------- 1 | If you need to update files under this folder, we recommend you issue PRs 2 | against [the CMake mainline branch](https://gitlab.kitware.com/cmake/cmake/tree/master/Modules/FindCUDA.cmake), 3 | and then backport it here for earlier CMake compatibility. 4 | 5 | See [this](../README.md) for more details. 6 | -------------------------------------------------------------------------------- /aten/src/ATen/ATenConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # Find the TH includes and library 2 | # 3 | # ATEN_INCLUDE_DIR -- where to find the includes 4 | # ATEN_LIBRARIES -- list of libraries to link against 5 | # ATEN_FOUND -- set to 1 if found 6 | 7 | SET(ATEN_FOUND 1) 8 | SET(ATEN_INCLUDE_DIR "@ATEN_INCLUDE_DIR@") 9 | SET(ATEN_LIBRARIES "@ATEN_LIBRARIES@") 10 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Cross.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | using cross_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const int64_t d); 9 | 10 | DECLARE_DISPATCH(cross_fn, cross_stub); 11 | 12 | }} // namespace at::native 13 | 14 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-ssse3.c: -------------------------------------------------------------------------------- 1 | /* Auto-generated by generate-wrappers.py script. Do not modify */ 2 | 3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) 4 | #include 5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */ 6 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorMathScan.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorMathScan.h" 3 | #else 4 | 5 | THC_API void THCTensor_(cumsum)(THCState *state, THCTensor *self, THCTensor *src, int dim); 6 | THC_API void THCTensor_(cumprod)(THCState *state, THCTensor *self, THCTensor *src, int dim); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /aten/src/THCUNN/SpatialConvolutionMM.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | -------------------------------------------------------------------------------- /aten/src/ATen/quantized/QTensorImpl.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { 4 | 5 | QTensorImpl::QTensorImpl( 6 | Storage&& storage, 7 | TensorTypeSet type_set, 8 | QuantizerPtr quantizer) 9 | : TensorImpl(std::move(storage), type_set), 10 | quantizer_(quantizer) {} 11 | 12 | } // namespace at 13 | -------------------------------------------------------------------------------- /aten/src/TH/THBlas.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_BLAS_INC 2 | #define TH_BLAS_INC 3 | 4 | #include 5 | 6 | #define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME) 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensor.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | -------------------------------------------------------------------------------- /aten/src/ATen/core/EnableNamedTensor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // We are working on removing the BUILD_NAMEDTENSOR flag from the codebase. 6 | // 7 | // PyTorch's codegen also uses a similar flag. You can find it in 8 | // - aten/src/ATen/env.py 9 | #ifndef BUILD_NAMEDTENSOR 10 | #define BUILD_NAMEDTENSOR 11 | #endif 12 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace at { namespace cuda { 5 | 6 | NVRTC* load_nvrtc() { 7 | auto self = new NVRTC(); 8 | #define CREATE_ASSIGN(name) self->name = name; 9 | AT_FORALL_NVRTC(CREATE_ASSIGN) 10 | return self; 11 | } 12 | 13 | }} // at::cuda 14 | -------------------------------------------------------------------------------- /aten/src/ATen/test/test_install/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | find_package(ATen REQUIRED) 3 | include_directories(${ATEN_INCLUDE_DIR}) 4 | 5 | # C++11 6 | if (not MSVC) 7 | set(CMAKE_CXX_FLAGS "--std=c++11 ${CMAKE_CXX_FLAGS}") 8 | endif() 9 | add_executable(main main.cpp) 10 | target_link_libraries(main ${ATEN_LIBRARIES}) 11 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorMathBlas.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | -------------------------------------------------------------------------------- /aten/src/ATen/TensorGeometry.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | namespace at { 7 | 8 | bool TensorGeometry::is_contiguous() const { 9 | if (numel_ == 0) { 10 | return true; 11 | } 12 | return at::geometry_is_contiguous(sizes_, strides_); 13 | } 14 | 15 | } // namespace at 16 | -------------------------------------------------------------------------------- /cmake/public/mkl.cmake: -------------------------------------------------------------------------------- 1 | find_package(MKL QUIET) 2 | 3 | if(NOT TARGET caffe2::mkl) 4 | add_library(caffe2::mkl INTERFACE IMPORTED) 5 | endif() 6 | 7 | set_property( 8 | TARGET caffe2::mkl PROPERTY INTERFACE_INCLUDE_DIRECTORIES 9 | ${MKL_INCLUDE_DIR}) 10 | set_property( 11 | TARGET caffe2::mkl PROPERTY INTERFACE_LINK_LIBRARIES 12 | ${MKL_LIBRARIES}) 13 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/CUDATensorMethods.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace at { 11 | template <> 12 | inline __half* Tensor::data() const { 13 | return reinterpret_cast<__half*>(data()); 14 | } 15 | } // namespace at 16 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/.gitignore: -------------------------------------------------------------------------------- 1 | # Ninja files 2 | build.ninja 3 | 4 | # Build objects and artifacts 5 | deps/ 6 | build/ 7 | build-*/ 8 | bin/ 9 | lib/ 10 | out/ 11 | obj/ 12 | libs/ 13 | *.pyc 14 | *.pyo 15 | *.log 16 | 17 | # System files 18 | .DS_Store 19 | .DS_Store? 20 | ._* 21 | .Spotlight-V100 22 | .Trashes 23 | ehthumbs.db 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Copy.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | 8 | struct TensorIterator; 9 | 10 | namespace native { 11 | 12 | using copy_fn = void (*)(TensorIterator&, bool non_blocking); 13 | 14 | DECLARE_DISPATCH(copy_fn, copy_stub); 15 | 16 | } // namespace native 17 | } // namespace at 18 | -------------------------------------------------------------------------------- /aten/src/ATen/Version.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { 4 | 5 | /// Returns a detailed string describing the configuration PyTorch. 6 | CAFFE2_API std::string show_config(); 7 | 8 | CAFFE2_API std::string get_mkl_version(); 9 | 10 | CAFFE2_API std::string get_mkldnn_version(); 11 | 12 | CAFFE2_API std::string get_openmp_version(); 13 | 14 | } // namespace at 15 | -------------------------------------------------------------------------------- /aten/src/ATen/core/LegacyTypeDispatch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { 4 | 5 | // TODO: This could be bad juju if someone calls globalContext() in the 6 | // destructor of an object with static lifetime. 7 | LegacyTypeDispatch & globalLegacyTypeDispatch() { 8 | static LegacyTypeDispatch singleton; 9 | return singleton; 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /aten/src/ATen/mkl/Limits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { namespace native { 6 | 7 | // Since size of MKL_LONG varies on different platforms (linux 64 bit, windows 8 | // 32 bit), we need to programmatically calculate the max. 9 | static int64_t MKL_LONG_MAX = ((1LL << (sizeof(MKL_LONG) * 8 - 2)) - 1) * 2 + 1; 10 | 11 | }} // namespace 12 | -------------------------------------------------------------------------------- /aten/src/THC/THCSortUtils.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Returns 2^(ceil(lg(n)) from Stanford bit twiddling hacks 4 | uint64_t nextHighestPowerOf2(uint64_t n) { 5 | n--; 6 | n |= n >> 1; 7 | n |= n >> 2; 8 | n |= n >> 4; 9 | n |= n >> 8; 10 | n |= n >> 16; 11 | #ifndef _MSC_VER 12 | n |= n >> 32; 13 | #endif 14 | n++; 15 | 16 | return n; 17 | } 18 | -------------------------------------------------------------------------------- /aten/src/ATen/test/cuda_cudnn_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace at; 9 | using namespace at::native; 10 | 11 | TEST(CUDNNTest, CUDNNTestCUDA) { 12 | if (!at::cuda::is_available()) return; 13 | manual_seed(123); 14 | } 15 | -------------------------------------------------------------------------------- /aten/src/ATen/mkl/README.md: -------------------------------------------------------------------------------- 1 | All files living in this directory are written with the assumption that MKL is available, 2 | which means that these code are not guarded by `#if AT_MKL_ENABLED()`. Therefore, whenever 3 | you need to use definitions from here, please guard the `#include` and 4 | definition usages with `#if AT_MKL_ENABLED()` macro, e.g. [SpectralOps.cpp](native/mkl/SpectralOps.cpp). 5 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Fill.h: -------------------------------------------------------------------------------- 1 | // Functions that fill Tensors with constants. Implementations are in Fill.cpp. 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace at { namespace native { 10 | 11 | DECLARE_DISPATCH(void(*)(TensorIterator&, Scalar), fill_stub); 12 | 13 | }} // namespace at::native 14 | -------------------------------------------------------------------------------- /aten/src/TH/THLogAdd.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_LOG_ADD_INC 2 | #define TH_LOG_ADD_INC 3 | 4 | #include 5 | 6 | TH_API const double THLog2Pi; 7 | TH_API const double THLogZero; 8 | TH_API const double THLogOne; 9 | 10 | TH_API double THLogAdd(double log_a, double log_b); 11 | TH_API double THLogSub(double log_a, double log_b); 12 | TH_API double THExpMinusApprox(const double x); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorMath.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/README.md: -------------------------------------------------------------------------------- 1 | All files living in this directory are written with the assumption that cuDNN is available, 2 | which means that these code are not guarded by `#if AT_CUDNN_ENABLED()`. Therefore, whenever 3 | you need to use definitions from here, please guard the `#include` and 4 | definition usages with `#if AT_CUDNN_ENABLED()` macro, e.g. [native/cudnn/BatchNorm.cpp](native/cudnn/BatchNorm.cpp). 5 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/LaunchUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace at { 5 | namespace native { 6 | 7 | // returns 2**floor(log2(n)) 8 | static int lastPow2(unsigned int n) { 9 | n |= (n >> 1); 10 | n |= (n >> 2); 11 | n |= (n >> 4); 12 | n |= (n >> 8); 13 | n |= (n >> 16); 14 | return std::max(1, n - (n >> 1)); 15 | } 16 | 17 | } // namespace native 18 | } // namespace at 19 | -------------------------------------------------------------------------------- /aten/src/THC/THCStorageCopy.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorMoreMath.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | -------------------------------------------------------------------------------- /aten/src/ATen/native/sparse/SparseTensorMath.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | sparse::SparseTensor& mul_out_sparse_scalar(sparse::SparseTensor& r, const sparse::SparseTensor& t, Scalar value); 9 | sparse::SparseTensor& mul_out_sparse_zerodim(sparse::SparseTensor& r, const sparse::SparseTensor& t, const Tensor& value); 10 | 11 | }} 12 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorFill.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | -------------------------------------------------------------------------------- /cmake/public/mkldnn.cmake: -------------------------------------------------------------------------------- 1 | set(MKLDNN_USE_NATIVE_ARCH ${USE_NATIVE_ARCH}) 2 | 3 | find_package(MKLDNN QUIET) 4 | 5 | if(NOT TARGET caffe2::mkldnn) 6 | add_library(caffe2::mkldnn INTERFACE IMPORTED) 7 | endif() 8 | 9 | set_property( 10 | TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES 11 | ${MKLDNN_INCLUDE_DIR}) 12 | set_property( 13 | TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES 14 | ${MKLDNN_LIBRARIES}) 15 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/CUDADevice.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace at { 9 | namespace cuda { 10 | 11 | inline Device getDeviceFromPtr(void* ptr) { 12 | cudaPointerAttributes attr; 13 | AT_CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr)); 14 | return {DeviceType::CUDA, static_cast(attr.device)}; 15 | } 16 | 17 | }} // namespace at::cuda 18 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cpu/TensorCompareKernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace at { namespace native { 8 | 9 | using reduce_fn = 10 | void (*)(Tensor&, Tensor&, const Tensor&, c10::optional); 11 | 12 | DECLARE_DISPATCH(reduce_fn, max_kernel); 13 | DECLARE_DISPATCH(reduce_fn, min_kernel); 14 | 15 | }} // namespace at::native 16 | -------------------------------------------------------------------------------- /cmake/TorchConfigVersion.cmake.in: -------------------------------------------------------------------------------- 1 | set(PACKAGE_VERSION "@TORCH_VERSION@") 2 | 3 | # Check whether the requested PACKAGE_FIND_VERSION is compatible 4 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") 5 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 6 | else() 7 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 8 | if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") 9 | set(PACKAGE_VERSION_EXACT TRUE) 10 | endif() 11 | endif() 12 | -------------------------------------------------------------------------------- /aten/src/TH/THMemoryFile.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_MEMORY_FILE_INC 2 | #define TH_MEMORY_FILE_INC 3 | 4 | #include 5 | #include 6 | 7 | TH_API THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode); 8 | TH_API THFile *THMemoryFile_new(const char *mode); 9 | 10 | TH_API THCharStorage *THMemoryFile_storage(THFile *self); 11 | TH_API void THMemoryFile_longSize(THFile *self, int size); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /cmake/Caffe2ConfigVersion.cmake.in: -------------------------------------------------------------------------------- 1 | set(PACKAGE_VERSION "@CAFFE2_VERSION@") 2 | 3 | # Check whether the requested PACKAGE_FIND_VERSION is compatible 4 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") 5 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 6 | else() 7 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 8 | if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") 9 | set(PACKAGE_VERSION_EXACT TRUE) 10 | endif() 11 | endif() 12 | -------------------------------------------------------------------------------- /aten/src/ATen/DynamicLibrary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | 8 | struct DynamicLibrary { 9 | AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary); 10 | 11 | CAFFE2_API DynamicLibrary(const char* name); 12 | 13 | CAFFE2_API void* sym(const char* name); 14 | 15 | CAFFE2_API ~DynamicLibrary(); 16 | 17 | private: 18 | void* handle = nullptr; 19 | }; 20 | 21 | } // namespace at 22 | -------------------------------------------------------------------------------- /aten/src/THC/THCAllocator.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_ALLOCATOR_INC 2 | #define THC_ALLOCATOR_INC 3 | 4 | #include 5 | 6 | // IPC doesn't support (re)allocation 7 | 8 | class TORCH_CUDA_API THCIpcDeleter { 9 | public: 10 | THCIpcDeleter(std::shared_ptr basePtr); 11 | ~THCIpcDeleter(); 12 | static at::DataPtr makeDataPtr(std::shared_ptr basePtr, void* data); 13 | private: 14 | std::shared_ptr basePtr_; 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/PinnedMemoryAllocator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace at { namespace cuda { 11 | 12 | at::Allocator* getPinnedMemoryAllocator() { 13 | auto state = globalContext().lazyInitCUDA(); 14 | return state->cudaHostAllocator; 15 | } 16 | 17 | }} // namespace at::cuda 18 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/CrossKernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | void cross_kernel_impl(Tensor& result, const Tensor& x1, const Tensor& x2, const int64_t dim) { 9 | legacy::cuda::_th_cross_kernel_out(result, x1, x2, dim); 10 | } 11 | 12 | REGISTER_DISPATCH(cross_stub, &cross_kernel_impl); 13 | 14 | }} 15 | 16 | -------------------------------------------------------------------------------- /aten/src/ATen/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # This file copied from tools/setup_helpers/env.py 4 | # PLEASE DO NOT ADD ANYTHING TO THIS FILE, the BUILD_NAMEDTENSOR flag is temporary. 5 | def check_env_flag(name, default=''): 6 | return os.getenv(name, default).upper() in ['ON', '1', 'YES', 'TRUE', 'Y'] 7 | 8 | 9 | def check_negative_env_flag(name, default=''): 10 | return os.getenv(name, default).upper() in ['OFF', '0', 'NO', 'FALSE', 'N'] 11 | 12 | BUILD_NAMEDTENSOR = True 13 | -------------------------------------------------------------------------------- /aten/src/THC/THCStorageCopy.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_STORAGE_COPY_INC 2 | #define THC_STORAGE_COPY_INC 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorCopy.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_CUDA_TENSOR_COPY_INC 2 | #define TH_CUDA_TENSOR_COPY_INC 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /aten/src/ATen/PTThreadPool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | 8 | class CAFFE2_API PTThreadPool : public c10::ThreadPool { 9 | public: 10 | explicit PTThreadPool( 11 | int pool_size, 12 | int numa_node_id = -1) 13 | : c10::ThreadPool(pool_size, numa_node_id, [](){ 14 | c10::setThreadName("PTThreadPool"); 15 | at::init_num_threads(); 16 | }) {} 17 | }; 18 | 19 | } // namespace at 20 | -------------------------------------------------------------------------------- /aten/src/TH/THSize.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_SIZE_INC 2 | #define TH_SIZE_INC 3 | 4 | #include 5 | #include 6 | 7 | // THTensor functions that would work on a THSize if we had such a class in C++, 8 | // i.e. THTensor functions that depend only on the shape of the tensor, not the type. 9 | 10 | TH_API int THSize_isSameSizeAs(const int64_t *sizeA, int64_t dimsA, const int64_t *sizeB, int64_t dimsB); 11 | TH_API ptrdiff_t THSize_nElement(int64_t dims, int64_t *size); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /aten/src/ATen/Tensor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * We split Tensor.h into TensorBody.h and TensorMethods.h because we want 5 | * all TensorMethods to be inlined, but they depend on the Dispatcher, 6 | * which in turn depends on many other things, which then depend back on Tensor. 7 | * 8 | * We can break this dependency chain by having the dispatcher only depend on 9 | * TensorBody.h and not TensorMethods.h. 10 | */ 11 | #include 12 | #include 13 | -------------------------------------------------------------------------------- /aten/src/ATen/mkl/Exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace at { namespace native { 9 | 10 | static inline void MKL_DFTI_CHECK(MKL_INT status) 11 | { 12 | if (status && !DftiErrorClass(status, DFTI_NO_ERROR)) { 13 | std::ostringstream ss; 14 | ss << "MKL FFT error: " << DftiErrorMessage(status); 15 | throw std::runtime_error(ss.str()); 16 | } 17 | } 18 | 19 | }} // namespace at::native 20 | -------------------------------------------------------------------------------- /aten/src/THC/THC.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_INC 2 | #define THC_INC 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /aten/src/THC/THCStorage.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_STORAGE_INC 2 | #define THC_STORAGE_INC 3 | 4 | #include 5 | #include 6 | 7 | #define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME) 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /aten/src/THC/THCStorageCopy.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Tensor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * We split Tensor.h into TensorBody.h and TensorMethods.h because we want 5 | * all TensorMethods to be inlined, but they depend on the Dispatcher, 6 | * which in turn depends on many other things, which then depend back on Tensor. 7 | * 8 | * We can break this dependency chain by having the dispatcher only depend on 9 | * TensorBody.h and not TensorMethods.h. 10 | */ 11 | #include 12 | #include 13 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateAllTypes.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateAllTypes.h" 3 | #endif 4 | 5 | #ifndef THGenerateManyTypes 6 | #define THAllLocalGenerateManyTypes 7 | #define THGenerateManyTypes 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #ifdef THAllLocalGenerateManyTypes 14 | #undef THAllLocalGenerateManyTypes 15 | #undef THGenerateManyTypes 16 | #undef TH_GENERIC_FILE 17 | #endif 18 | -------------------------------------------------------------------------------- /aten/src/ATen/quantized/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | FILE(GLOB_RECURSE ATen_QUANTIZED_HEADERS "*.h") 2 | FILE(GLOB_RECURSE ATen_QUANTIZED_SRCS "*.cpp") 3 | FILE(GLOB_RECURSE ATen_QUANTIZED_TEST_SRCS "*_test.cpp") 4 | EXCLUDE(ATen_QUANTIZED_SRCS "${ATen_QUANTIZED_SRCS}" ${ATen_QUANTIZED_TEST_SRCS}) 5 | 6 | # Pass to parent 7 | set(ATen_QUANTIZED_HEADERS ${ATen_QUANTIZED_HEADERS} PARENT_SCOPE) 8 | set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) 9 | set(ATen_QUANTIZED_TEST_SRCS ${ATen_QUANTIZED_TEST_SRCS} PARENT_SCOPE) 10 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateFloatTypes.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h" 3 | #endif 4 | 5 | #ifndef THGenerateManyTypes 6 | #define THFloatLocalGenerateManyTypes 7 | #define THGenerateManyTypes 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #ifdef THFloatLocalGenerateManyTypes 14 | #undef THFloatLocalGenerateManyTypes 15 | #undef THGenerateManyTypes 16 | #undef TH_GENERIC_FILE 17 | #endif 18 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorCopy.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorCopy.h" 3 | #else 4 | 5 | THC_API void THCTensor_(copy)(THCState *state, THCTensor *self, THCTensor *src); 6 | THC_API void THCTensor_(copyIgnoringOverlaps)(THCState *state, THCTensor *self, THCTensor *src); 7 | 8 | THC_API void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, THTensor *src); 9 | THC_API void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, THCTensor *src); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /aten/src/TH/TH.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_INC 2 | #define TH_INC 3 | 4 | #include 5 | 6 | #include 7 | #ifdef USE_LAPACK 8 | #include 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateIntType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateIntType.h" 3 | #endif 4 | 5 | #define scalar_t int32_t 6 | #define accreal int64_t 7 | #define Real Int 8 | #define CReal CudaInt 9 | #define THC_REAL_IS_INT 10 | #line 1 THC_GENERIC_FILE 11 | #include THC_GENERIC_FILE 12 | #undef scalar_t 13 | #undef accreal 14 | #undef Real 15 | #undef CReal 16 | #undef THC_REAL_IS_INT 17 | 18 | #ifndef THCGenerateAllTypes 19 | #undef THC_GENERIC_FILE 20 | #endif 21 | -------------------------------------------------------------------------------- /aten/src/ATen/cpu/tbb/extra/version_string.ver.in: -------------------------------------------------------------------------------- 1 | #define __TBB_VERSION_STRINGS(N) \ 2 | #N": BUILD_HOST @CMAKE_SYSTEM_NAME@" ENDL \ 3 | #N": BUILD_OS @CMAKE_SYSTEM@" ENDL \ 4 | #N": BUILD_KERNEL @CMAKE_SYSTEM_VERSION@" ENDL \ 5 | #N": BUILD_GCC @CMAKE_CXX_COMPILER_ID@" ENDL \ 6 | #N": BUILD_LIBC Unknown" ENDL \ 7 | #N": BUILD_LD Unknown" ENDL \ 8 | #N": BUILD_TARGET Unknown" ENDL \ 9 | #N": BUILD_COMMAND Unknown" ENDL 10 | 11 | #define __TBB_DATETIME "@_configure_date@" 12 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateCharType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateCharType.h" 3 | #endif 4 | 5 | #define scalar_t int8_t 6 | #define accreal int64_t 7 | #define Real Char 8 | #define CReal CudaChar 9 | #define THC_REAL_IS_CHAR 10 | #line 1 THC_GENERIC_FILE 11 | #include THC_GENERIC_FILE 12 | #undef scalar_t 13 | #undef accreal 14 | #undef Real 15 | #undef CReal 16 | #undef THC_REAL_IS_CHAR 17 | 18 | #ifndef THCGenerateAllTypes 19 | #undef THC_GENERIC_FILE 20 | #endif 21 | -------------------------------------------------------------------------------- /aten/src/ATen/InitialTensorOptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { 6 | 7 | // Represents the initial TensorOptions, before the "defaults" are ever changed. 8 | // This is designed to be used in library code, where the explicit devices, dtypes, etc. are known. 9 | // NOTE: this is not a stable API. 10 | inline TensorOptions initialTensorOptions() { 11 | return TensorOptions(kCPU).dtype(kFloat).layout(kStrided) 12 | .requires_grad(false); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Pow.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | 8 | struct TensorIterator; 9 | 10 | namespace native { 11 | 12 | using pow_tensor_tensor_fn = void (*)(TensorIterator&); 13 | using pow_tensor_scalar_fn = void (*)(TensorIterator&, Scalar); 14 | 15 | DECLARE_DISPATCH(pow_tensor_tensor_fn, pow_tensor_tensor_stub); 16 | DECLARE_DISPATCH(pow_tensor_scalar_fn, pow_tensor_scalar_stub); 17 | 18 | } // namespace native 19 | 20 | } // namespace at 21 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateByteType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateByteType.h" 3 | #endif 4 | 5 | #define scalar_t uint8_t 6 | #define accreal int64_t 7 | #define Real Byte 8 | #define CReal CudaByte 9 | #define THC_REAL_IS_BYTE 10 | #line 1 THC_GENERIC_FILE 11 | #include THC_GENERIC_FILE 12 | #undef scalar_t 13 | #undef accreal 14 | #undef Real 15 | #undef CReal 16 | #undef THC_REAL_IS_BYTE 17 | 18 | #ifndef THCGenerateAllTypes 19 | #undef THC_GENERIC_FILE 20 | #endif 21 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateLongType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateLongType.h" 3 | #endif 4 | 5 | #define scalar_t int64_t 6 | #define accreal int64_t 7 | #define Real Long 8 | #define CReal CudaLong 9 | #define THC_REAL_IS_LONG 10 | #line 1 THC_GENERIC_FILE 11 | #include THC_GENERIC_FILE 12 | #undef scalar_t 13 | #undef accreal 14 | #undef Real 15 | #undef CReal 16 | #undef THC_REAL_IS_LONG 17 | 18 | #ifndef THCGenerateAllTypes 19 | #undef THC_GENERIC_FILE 20 | #endif 21 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/CUDAUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { namespace cuda { 6 | 7 | // Check if every tensor in a list of tensors matches the current 8 | // device. 9 | inline bool check_device(ArrayRef ts) { 10 | if (ts.empty()) { 11 | return true; 12 | } 13 | Device curDevice = Device(kCUDA, current_device()); 14 | for (const Tensor& t : ts) { 15 | if (t.device() != curDevice) return false; 16 | } 17 | return true; 18 | } 19 | 20 | }} // namespace at::cuda 21 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateQTypes.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQTypes.h" 3 | #endif 4 | 5 | #ifndef THGenerateManyTypes 6 | #define THQLocalGenerateManyTypes 7 | #define THGenerateManyTypes 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #ifdef THQLocalGenerateManyTypes 15 | #undef THQLocalGenerateManyTypes 16 | #undef THGenerateManyTypes 17 | #undef TH_GENERIC_FILE 18 | #endif 19 | -------------------------------------------------------------------------------- /aten/src/TH/THTensorEvenMoreMath.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateShortType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateShortType.h" 3 | #endif 4 | 5 | #define scalar_t int16_t 6 | #define accreal int64_t 7 | #define Real Short 8 | #define CReal CudaShort 9 | #define THC_REAL_IS_SHORT 10 | #line 1 THC_GENERIC_FILE 11 | #include THC_GENERIC_FILE 12 | #undef scalar_t 13 | #undef accreal 14 | #undef Real 15 | #undef CReal 16 | #undef THC_REAL_IS_SHORT 17 | 18 | #ifndef THCGenerateAllTypes 19 | #undef THC_GENERIC_FILE 20 | #endif 21 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Range.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | 8 | struct Range { 9 | Range(int64_t begin, int64_t end) 10 | : begin(begin) 11 | , end(end) {} 12 | 13 | int64_t size() const { return end - begin; } 14 | 15 | Range operator/(int64_t divisor) { 16 | return Range(begin / divisor, end / divisor); 17 | } 18 | 19 | int64_t begin; 20 | int64_t end; 21 | }; 22 | 23 | std::ostream& operator<<(std::ostream& out, const Range& range); 24 | 25 | } // namespace at 26 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Reduction.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace at { 4 | namespace Reduction { 5 | 6 | // NB: Keep this in sync with Reduction class in torch/nn/_reduction.py 7 | // These constants control the reduction behavior of loss functions. 8 | // Ideally, this would be a scoped enum, but jit doesn't support that 9 | enum Reduction { 10 | None, // Do not reduce 11 | Mean, // (Possibly weighted) mean of losses 12 | Sum, // Sum losses 13 | END 14 | }; 15 | } // namespace Reduction 16 | } // namespace at 17 | -------------------------------------------------------------------------------- /cmake/Modules/FindCUB.cmake: -------------------------------------------------------------------------------- 1 | # Try to find the CUB library and headers. 2 | # CUB_FOUND - system has CUB 3 | # CUB_INCLUDE_DIRS - the CUB include directory 4 | 5 | find_path(CUB_INCLUDE_DIR 6 | NAMES cub/cub.cuh 7 | DOC "The directory where CUB includes reside" 8 | ) 9 | 10 | set(CUB_INCLUDE_DIRS ${CUB_INCLUDE_DIR}) 11 | 12 | include(FindPackageHandleStandardArgs) 13 | find_package_handle_standard_args(CUB 14 | FOUND_VAR CUB_FOUND 15 | REQUIRED_VARS CUB_INCLUDE_DIR 16 | ) 17 | 18 | mark_as_advanced(CUB_FOUND) 19 | -------------------------------------------------------------------------------- /aten/src/ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace c10 { namespace hip { 8 | namespace HIPCachingAllocatorMasqueradingAsCUDA { 9 | 10 | Allocator* get(); 11 | C10_HIP_API void recordStreamMasqueradingAsCUDA(void *ptr, HIPStreamMasqueradingAsCUDA stream); 12 | 13 | } // namespace HIPCachingAllocatorMasqueradingAsCUDA 14 | }} // namespace c10::hip 15 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorRandom.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_CUDA_TENSOR_RANDOM_INC 2 | #define TH_CUDA_TENSOR_RANDOM_INC 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | THC_API void THCRandom_getRNGState(at::Generator *gen_, THByteTensor *rng_state); 15 | THC_API void THCRandom_setRNGState(at::Generator *gen_, THByteTensor *rng_state); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorTopK.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorTopK.h" 3 | #else 4 | 5 | /* Returns the set of all kth smallest (or largest) elements, depending */ 6 | /* on `dir` */ 7 | THC_API void THCTensor_(topk)(THCState* state, 8 | THCTensor* topK, 9 | THCudaLongTensor* indices, 10 | THCTensor* input, 11 | int64_t k, int dim, int dir, int sorted); 12 | 13 | #endif // THC_GENERIC_FILE 14 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/CUDAConfig.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Test these using #if AT_CUDNN_ENABLED(), not #ifdef, so that it's 4 | // obvious if you forgot to include Config.h 5 | // c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined 6 | // 7 | // NB: This header MUST NOT be included from other headers; it should 8 | // only be included from C++ files. 9 | 10 | #define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@ 11 | #define AT_ROCM_ENABLED() @AT_ROCM_ENABLED@ 12 | 13 | #define NVCC_FLAGS_EXTRA "@NVCC_FLAGS_EXTRA@" 14 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cpu/DepthwiseConvKernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /* 7 | Depthwise 3x3 Winograd convolution operator 8 | */ 9 | 10 | namespace at { 11 | namespace native { 12 | 13 | using convolution_depthwise3x3_winograd_fn = 14 | Tensor (*)(const Tensor &, const Tensor &, const Tensor &,IntArrayRef, IntArrayRef, int64_t); 15 | 16 | DECLARE_DISPATCH(convolution_depthwise3x3_winograd_fn, convolution_depthwise3x3_winograd_stub); 17 | 18 | } // namespace native 19 | } // namespace at 20 | -------------------------------------------------------------------------------- /aten/src/ATen/test/verify_api_visibility.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef AT_CUDNN_ENABLED 4 | #error "AT_CUDNN_ENABLED should not be visible in public headers" 5 | #endif 6 | 7 | #ifdef AT_MKL_ENABLED 8 | #error "AT_MKL_ENABLED should not be visible in public headers" 9 | #endif 10 | 11 | #ifdef AT_MKLDNN_ENABLED 12 | #error "AT_MKLDNN_ENABLED should not be visible in public headers" 13 | #endif 14 | 15 | #ifdef CAFFE2_STATIC_LINK_CUDA 16 | #error "CAFFE2_STATIC_LINK_CUDA should not be visible in public headers" 17 | #endif 18 | 19 | auto main() -> int {} 20 | -------------------------------------------------------------------------------- /aten/src/THC/THCSleep.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | __global__ void spin_kernel(int64_t cycles) 5 | { 6 | // see concurrentKernels CUDA sampl 7 | int64_t start_clock = clock64(); 8 | int64_t clock_offset = 0; 9 | while (clock_offset < cycles) 10 | { 11 | clock_offset = clock64() - start_clock; 12 | } 13 | } 14 | 15 | void THC_sleep(THCState* state, int64_t cycles) 16 | { 17 | dim3 grid(1); 18 | dim3 block(1); 19 | spin_kernel<<>>(cycles); 20 | THCudaCheck(cudaGetLastError()); 21 | } 22 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/TensorShapeCUDA.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace at { 5 | namespace native { 6 | 7 | // this needs to be split along CPU/CUDA lines because we don't have a consistent 8 | // way of getting the allocator to use for a device (c10::GetAllocator is not 9 | // the same as at::cuda::getCUDADeviceAllocator(). 10 | Tensor& set_cuda_(Tensor& result) { 11 | Storage storage(result.dtype(), 0, at::cuda::getCUDADeviceAllocator(), true); 12 | return result.set_(storage, 0, {0}, {}); 13 | } 14 | 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateBoolType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THCGenerateBoolType.h" 3 | #endif 4 | 5 | #define scalar_t bool 6 | #define ureal bool 7 | #define accreal int64_t 8 | #define Real Bool 9 | #define CReal CudaBool 10 | #define THC_REAL_IS_BOOL 11 | #line 1 THC_GENERIC_FILE 12 | #include THC_GENERIC_FILE 13 | #undef scalar_t 14 | #undef ureal 15 | #undef accreal 16 | #undef Real 17 | #undef CReal 18 | #undef THC_REAL_IS_BOOL 19 | 20 | #ifndef THCGenerateBoolType 21 | #undef THC_GENERIC_FILE 22 | #endif 23 | -------------------------------------------------------------------------------- /aten/src/ATen/ThreadLocalDebugInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "ATen/ThreadLocalDebugInfo.h" 2 | 3 | namespace at { 4 | 5 | namespace { 6 | thread_local std::shared_ptr debug_info; 7 | } 8 | 9 | std::shared_ptr getThreadLocalDebugInfo() noexcept { 10 | return debug_info; 11 | } 12 | 13 | std::shared_ptr setThreadLocalDebugInfo( 14 | std::shared_ptr info) noexcept { 15 | auto ret = std::move(debug_info); 16 | debug_info = std::move(info); 17 | return ret; 18 | } 19 | 20 | } // namespace at 21 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/cudnn-wrapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define STRINGIFY(x) #x 6 | #define STRING(x) STRINGIFY(x) 7 | 8 | #if CUDNN_MAJOR < 6 9 | #pragma message ("CuDNN v" STRING(CUDNN_MAJOR) " found, but need at least CuDNN v6. You can get the latest version of CuDNN from https://developer.nvidia.com/cudnn or disable CuDNN with USE_CUDNN=0") 10 | #pragma message "We strongly encourage you to move to 6.0 and above." 11 | #pragma message "This message is intended to annoy you enough to update." 12 | #endif 13 | 14 | #undef STRINGIFY 15 | #undef STRING 16 | 17 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateDoubleType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateDoubleType.h" 3 | #endif 4 | 5 | #define scalar_t double 6 | #define accreal double 7 | #define Real Double 8 | #define CReal CudaDouble 9 | #define THC_REAL_IS_DOUBLE 10 | #line 1 THC_GENERIC_FILE 11 | #include THC_GENERIC_FILE 12 | #undef scalar_t 13 | #undef accreal 14 | #undef Real 15 | #undef CReal 16 | #undef THC_REAL_IS_DOUBLE 17 | 18 | #ifndef THCGenerateAllTypes 19 | #ifndef THCGenerateFloatTypes 20 | #undef THC_GENERIC_FILE 21 | #endif 22 | #endif 23 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cpu/SoftmaxKernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | namespace native { 8 | 9 | using forward_fn = void(*)(Tensor &, const Tensor &); 10 | using backward_fn = void(*)(Tensor &, const Tensor &, const Tensor&); 11 | 12 | DECLARE_DISPATCH(forward_fn, softmax_lastdim_kernel); 13 | DECLARE_DISPATCH(forward_fn, log_softmax_lastdim_kernel); 14 | DECLARE_DISPATCH(backward_fn, softmax_backward_lastdim_kernel); 15 | DECLARE_DISPATCH(backward_fn, log_softmax_backward_lastdim_kernel); 16 | 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /aten/src/ATen/templates/LegacyTHFunctions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // ${generated_comment} 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace c10 { 10 | class Scalar; 11 | } 12 | namespace at { 13 | struct Generator; 14 | class Tensor; 15 | struct Type; 16 | } // namespace at 17 | 18 | namespace at { 19 | namespace native { 20 | namespace legacy { 21 | namespace ${namespace} { 22 | 23 | ${legacy_th_declarations} 24 | 25 | } // namespace th 26 | } // namespace legacy 27 | } // namespace native 28 | } // namespace at 29 | -------------------------------------------------------------------------------- /aten/src/THC/THCStorage.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #if CUDA_VERSION >= 7000 || defined(__HIP_PLATFORM_HCC__) 7 | #include 8 | #endif 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | -------------------------------------------------------------------------------- /cmake/Modules_CUDA_fix/FindCUDA.cmake: -------------------------------------------------------------------------------- 1 | # This is a wrapper of the upstream `./upstream/FindCUDA.cmake` that 2 | # automatically includes `./upstream/CMakeInitializeConfigs.cmake` before 3 | # `./upstream/FindCUDA.cmake`. The `CMakeInitializeConfigs.cmake`, which is 4 | # absent in old CMake versions, creates some necessary variables for the later 5 | # to run. 6 | # See ./README.md for details. 7 | 8 | set(UPSTREAM_FIND_CUDA_DIR "${CMAKE_CURRENT_LIST_DIR}/upstream/") 9 | 10 | include("${UPSTREAM_FIND_CUDA_DIR}/CMakeInitializeConfigs.cmake") 11 | include("${UPSTREAM_FIND_CUDA_DIR}/FindCUDA.cmake") 12 | -------------------------------------------------------------------------------- /aten/src/ATen/core/dispatch/README.md: -------------------------------------------------------------------------------- 1 | This folder contains the c10 dispatcher. This dispatcher is a single point 2 | through which we are planning to route all kernel calls. 3 | Existing dispatch mechanisms from legacy PyTorch or caffe2 are planned to 4 | be replaced. 5 | 6 | This folder contains the following files: 7 | - Dispatcher.h: Main facade interface. Code using the dispatcher should only use this. 8 | - DispatchTable.h: Implementation of the actual dispatch mechanism. Hash table with kernels, lookup, ... 9 | - KernelFunction.h: The core interface (i.e. function pointer) for calling a kernel 10 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorRandom.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorRandom.h" 3 | #else 4 | 5 | #include "ATen/core/Generator.h" 6 | 7 | #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF) 8 | 9 | THC_API void THCTensor_(multinomialAliasSetup)(struct THCState *state, THCTensor *probs, THCudaLongTensor *J, THCTensor *q); 10 | THC_API void THCTensor_(multinomialAliasDraw)(THCState *state, THCudaLongTensor *self, THCTensor *_q, THCudaLongTensor *_J, int n_sample, at::Generator* gen_); 11 | 12 | #endif 13 | #endif 14 | -------------------------------------------------------------------------------- /aten/src/ATen/Config.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's 4 | // obvious if you forgot to include Config.h 5 | // c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined 6 | // 7 | // DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h 8 | 9 | #define AT_MKLDNN_ENABLED() @AT_MKLDNN_ENABLED@ 10 | #define AT_MKL_ENABLED() @AT_MKL_ENABLED@ 11 | #define AT_NNPACK_ENABLED() @AT_NNPACK_ENABLED@ 12 | #define CAFFE2_STATIC_LINK_CUDA() @CAFFE2_STATIC_LINK_CUDA_INT@ 13 | -------------------------------------------------------------------------------- /aten/src/ATen/native/PointwiseOps.h: -------------------------------------------------------------------------------- 1 | // Ternary and higher-order pointwise operations 2 | #pragma once 3 | 4 | #include 5 | #include 6 | 7 | namespace at { 8 | 9 | struct TensorIterator; 10 | 11 | namespace native { 12 | 13 | using pointwise_fn = void (*)(TensorIterator&, Scalar scalar); 14 | 15 | DECLARE_DISPATCH(pointwise_fn, addcmul_stub); 16 | DECLARE_DISPATCH(pointwise_fn, addcdiv_stub); 17 | DECLARE_DISPATCH(pointwise_fn, smooth_l1_backward_stub); 18 | DECLARE_DISPATCH(pointwise_fn, mse_backward_stub); 19 | 20 | } // namespace native 21 | } // namespace at 22 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorMode.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorMode.h" 3 | #else 4 | 5 | /* Returns the mode, and index of the mode, for the set of values 6 | * along a given dimension in the input tensor. */ 7 | THC_API void THCTensor_(mode)(THCState *state, 8 | THCTensor *values, 9 | THCudaLongTensor *indices, 10 | THCTensor *input, 11 | int dimension, 12 | int keepdim); 13 | 14 | #endif // THC_GENERIC_FILE 15 | -------------------------------------------------------------------------------- /aten/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set version = "0.1.dev" %} 2 | 3 | package: 4 | name: aten 5 | version: {{ version }} 6 | 7 | source: 8 | path: .. 9 | 10 | build: 11 | number: 1 12 | skip: True # [win] 13 | script_env: 14 | - CONDA_CMAKE_ARGS 15 | 16 | requirements: 17 | build: 18 | - cmake 19 | - pyyaml 20 | - setuptools 21 | - python 22 | - mkl # [not osx] 23 | run: 24 | - mkl # [not osx] 25 | 26 | about: 27 | home: https://github.com/zdevito/ATen 28 | license: BSD 29 | summary: A TENsor library for C++11 30 | 31 | extra: 32 | recipe-maintainers: 33 | - ezyang 34 | -------------------------------------------------------------------------------- /aten/src/TH/THSize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int THSize_isSameSizeAs(const int64_t *sizeA, int64_t dimsA, const int64_t *sizeB, int64_t dimsB) { 4 | int d; 5 | if (dimsA != dimsB) 6 | return 0; 7 | for(d = 0; d < dimsA; ++d) 8 | { 9 | if(sizeA[d] != sizeB[d]) 10 | return 0; 11 | } 12 | return 1; 13 | } 14 | 15 | ptrdiff_t THSize_nElement(int64_t dims, int64_t *size) { 16 | if(dims == 0) 17 | return 0; 18 | else 19 | { 20 | ptrdiff_t nElement = 1; 21 | int d; 22 | for(d = 0; d < dims; d++) 23 | nElement *= size[d]; 24 | return nElement; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorMathMagma.cuh: -------------------------------------------------------------------------------- 1 | #ifndef THC_TENSOR_MATH_MAGMA_CUH 2 | #define THC_TENSOR_MATH_MAGMA_CUH 3 | 4 | #ifdef USE_MAGMA 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | #ifdef USE_MAGMA 11 | template 12 | static inline T* th_magma_malloc_pinned(size_t n) 13 | { 14 | void* ptr; 15 | if (MAGMA_SUCCESS != magma_malloc_pinned(&ptr, n * sizeof(T))) 16 | THError("$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", n/268435456); 17 | return reinterpret_cast(ptr); 18 | } 19 | 20 | #endif 21 | 22 | #endif // THC_TENSOR_MATH_MAGMA_CUH 23 | -------------------------------------------------------------------------------- /aten/src/ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace c10 { namespace hip { 4 | namespace HIPCachingAllocatorMasqueradingAsCUDA { 5 | 6 | Allocator* get() { 7 | static HIPAllocatorMasqueradingAsCUDA allocator(HIPCachingAllocator::get()); 8 | return &allocator; 9 | } 10 | 11 | void recordStreamMasqueradingAsCUDA(void *ptr, HIPStreamMasqueradingAsCUDA stream) { 12 | HIPCachingAllocator::recordStream(ptr, stream.hip_stream()); 13 | } 14 | 15 | } // namespace HIPCachingAllocatorMasqueradingAsCUDA 16 | }} // namespace c10::hip 17 | -------------------------------------------------------------------------------- /aten/src/ATen/native/mkldnn/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | std::vector conv_output_size( 9 | IntArrayRef input_size, 10 | IntArrayRef kernel_size, 11 | IntArrayRef padding, 12 | IntArrayRef stride, 13 | IntArrayRef dilation); 14 | 15 | std::vector pool_output_sizes( 16 | IntArrayRef input_size, 17 | IntArrayRef kernel_size, 18 | IntArrayRef stride, 19 | IntArrayRef padding_l, 20 | IntArrayRef padding_r, 21 | IntArrayRef dilation, 22 | bool ceil_mode); 23 | }} 24 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateIntTypes.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateIntTypes.h" 3 | #endif 4 | 5 | #ifndef THGenerateManyTypes 6 | #define THIntLocalGenerateManyTypes 7 | #define THGenerateManyTypes 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef THIntLocalGenerateManyTypes 17 | #undef THIntLocalGenerateManyTypes 18 | #undef THGenerateManyTypes 19 | #undef TH_GENERIC_FILE 20 | #endif 21 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorMode.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | #include 19 | -------------------------------------------------------------------------------- /aten/src/ATen/cpu/FlushDenormal.h: -------------------------------------------------------------------------------- 1 | /// Flush-To-Zero and Denormals-Are-Zero mode 2 | /// 3 | /// Flush-To-Zero (FTZ) and Denormals-Are-Zero (DAZ) are modes that bypass 4 | /// IEEE 754 methods of dealing with denormal floating-point numbers on x86-64 5 | /// and some x86 CPUs. They result in reduced precision for values near zero, 6 | /// but increased performance. 7 | /// 8 | /// See https://software.intel.com/en-us/articles/x87-and-sse-floating-point-assists-in-ia-32-flush-to-zero-ftz-and-denormals-are-zero-daz 9 | 10 | namespace at { namespace cpu { 11 | 12 | bool set_flush_denormal(bool on); 13 | 14 | }} // namespace at::cpu 15 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorTopK.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include // for std::min 10 | 11 | #if CUDA_VERSION >= 7000 || defined __HIP_PLATFORM_HCC__ 12 | #include 13 | #endif 14 | 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | -------------------------------------------------------------------------------- /cmake/Modules/Findpybind11.cmake: -------------------------------------------------------------------------------- 1 | # Try to find the pybind11 library and headers. 2 | # pybind11_FOUND - system has pybind11 3 | # pybind11_INCLUDE_DIRS - the pybind11 include directory 4 | 5 | find_path(pybind11_INCLUDE_DIR 6 | NAMES pybind11/pybind11.h 7 | DOC "The directory where pybind11 includes reside" 8 | ) 9 | 10 | set(pybind11_INCLUDE_DIRS ${pybind11_INCLUDE_DIR}) 11 | 12 | include(FindPackageHandleStandardArgs) 13 | find_package_handle_standard_args(pybind11 14 | FOUND_VAR pybind11_FOUND 15 | REQUIRED_VARS pybind11_INCLUDE_DIR 16 | ) 17 | 18 | mark_as_advanced(pybind11_FOUND) 19 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml: -------------------------------------------------------------------------------- 1 | name: qnnpack 2 | title: Quantized UINT8 Functions for Mobile 3 | license: Apache 2.0 4 | deps: 5 | - name: cpuinfo 6 | url: https://github.com/pytorch/cpuinfo.git 7 | - name: fxdiv 8 | url: https://github.com/Maratyszcza/FXdiv.git 9 | - name: psimd 10 | url: https://github.com/Maratyszcza/psimd.git 11 | - name: pthreadpool 12 | url: https://github.com/Maratyszcza/pthreadpool.git 13 | - name: FP16 14 | url: https://github.com/Maratyszcza/FP16.git 15 | - name: clog 16 | dir: deps/clog 17 | - name: googletest 18 | - name: googlebenchmark 19 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateHalfType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateHalfType.h" 3 | #endif 4 | 5 | #include 6 | 7 | #define scalar_t THHalf 8 | #define accreal float 9 | #define Real Half 10 | 11 | #define CReal CudaHalf 12 | 13 | #define THC_REAL_IS_HALF 14 | #line 1 THC_GENERIC_FILE 15 | #include THC_GENERIC_FILE 16 | #undef scalar_t 17 | #undef accreal 18 | #undef Real 19 | 20 | #undef CReal 21 | 22 | #undef THC_REAL_IS_HALF 23 | 24 | #ifndef THCGenerateAllTypes 25 | #ifndef THCGenerateFloatTypes 26 | #undef THC_GENERIC_FILE 27 | #endif 28 | #endif 29 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateBFloat16Type.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateBFloat16Type.h" 3 | #endif 4 | 5 | #include 6 | #define scalar_t at::BFloat16 7 | #define accreal double 8 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 9 | #define Real BFloat16 10 | #define TH_REAL_IS_BFLOAT16 11 | #line 1 TH_GENERIC_FILE 12 | #include TH_GENERIC_FILE 13 | #undef accreal 14 | #undef scalar_t 15 | #undef Real 16 | #undef TH_REAL_IS_BFLOAT16 17 | #undef TH_CONVERT_ACCREAL_TO_REAL 18 | 19 | #ifndef THGenerateManyTypes 20 | #undef TH_GENERIC_FILE 21 | #endif 22 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateQInt8Type.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQInt8Type.h" 3 | #endif 4 | 5 | #define quantized_t c10::qint8 6 | #define scalar_t int8_t 7 | #define Real QInt8 8 | #define RealUnderlying Char 9 | #define THQUANTIZED 10 | #define THQINT8 11 | #define TH_REAL_IS_BYTE 12 | #line 1 TH_GENERIC_FILE 13 | #include TH_GENERIC_FILE 14 | #undef scalar_t 15 | #undef quantized_t 16 | #undef Real 17 | #undef RealUnderlying 18 | #undef TH_REAL_IS_BYTE 19 | #undef THQINT8 20 | #undef THQUANTIZED 21 | 22 | #ifndef THGenerateManyTypes 23 | #undef TH_GENERIC_FILE 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateQInt32Type.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQInt32Type.h" 3 | #endif 4 | 5 | #define quantized_t c10::qint32 6 | #define scalar_t int32_t 7 | #define Real QInt32 8 | #define RealUnderlying Int 9 | #define THQUANTIZED 10 | #define THQINT32 11 | #define TH_REAL_IS_BYTE 12 | #line 1 TH_GENERIC_FILE 13 | #include TH_GENERIC_FILE 14 | #undef scalar_t 15 | #undef quantized_t 16 | #undef Real 17 | #undef RealUnderlying 18 | #undef TH_REAL_IS_BYTE 19 | #undef THQINT32 20 | #undef THQUANTIZED 21 | 22 | #ifndef THGenerateManyTypes 23 | #undef TH_GENERIC_FILE 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateQUInt8Type.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQUInt8Type.h" 3 | #endif 4 | 5 | #define quantized_t c10::quint8 6 | #define scalar_t uint8_t 7 | #define Real QUInt8 8 | #define RealUnderlying Byte 9 | #define THQUANTIZED 10 | #define THQUINT8 11 | #define TH_REAL_IS_BYTE 12 | #line 1 TH_GENERIC_FILE 13 | #include TH_GENERIC_FILE 14 | #undef scalar_t 15 | #undef quantized_t 16 | #undef Real 17 | #undef RealUnderlying 18 | #undef TH_REAL_IS_BYTE 19 | #undef THQUINT8 20 | #undef THQUANTIZED 21 | 22 | #ifndef THGenerateManyTypes 23 | #undef TH_GENERIC_FILE 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/TH/THVector.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_VECTOR_INC 2 | #define TH_VECTOR_INC 3 | 4 | #include 5 | #define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME) 6 | 7 | /* We are going to use dynamic dispatch, and want only to generate declarations 8 | * of the vector functions */ 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | #endif // TH_VECTOR_INC 22 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorScatterGather.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorScatterGather.h" 3 | #else 4 | 5 | THC_API void THCTensor_(gather)(THCState* state, THCTensor *tensor, THCTensor *src, int dim, THCudaLongTensor *index); 6 | THC_API void THCTensor_(scatter)(THCState* state, THCTensor *tensor, int dim, THCudaLongTensor *index, THCTensor *src); 7 | THC_API void THCTensor_(scatterAdd)(THCState* state, THCTensor *tensor, int dim, THCudaLongTensor *index, THCTensor *src); 8 | THC_API void THCTensor_(scatterFill)(THCState* state, THCTensor *tensor, int dim, THCudaLongTensor *index, scalar_t value); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/runtime-sse2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | PYTORCH_QNNP_INLINE __m128i 14 | sub_zero_point(const __m128i va, const __m128i vzp) { 15 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION 16 | // Run-time quantization 17 | return _mm_sub_epi16(va, vzp); 18 | #else 19 | // Design-time quantization (no-op) 20 | return va; 21 | #endif 22 | } 23 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateBFloat16Type.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THCGenerateBFloat16Type.h" 3 | #endif 4 | #include 5 | 6 | #define scalar_t at::BFloat16 7 | #define accreal float 8 | #define Real BFloat16 9 | 10 | #define CReal CudaBFloat16 11 | 12 | #define THC_REAL_IS_BFLOAT16 13 | #line 1 THC_GENERIC_FILE 14 | #include THC_GENERIC_FILE 15 | #undef scalar_t 16 | #undef accreal 17 | #undef Real 18 | 19 | #undef CReal 20 | 21 | #undef THC_REAL_IS_BFLOAT16 22 | 23 | #ifndef THCGenerateAllTypes 24 | #ifndef THCGenerateFloatTypes 25 | #undef THC_GENERIC_FILE 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /cmake/public/threads.cmake: -------------------------------------------------------------------------------- 1 | find_package(Threads REQUIRED) 2 | # For newer CMake, Threads::Threads is already defined. Otherwise, we will 3 | # provide a backward compatible wrapper for Threads::Threads. 4 | if(THREADS_FOUND AND NOT TARGET Threads::Threads) 5 | add_library(Threads::Threads INTERFACE IMPORTED) 6 | 7 | if(THREADS_HAVE_PTHREAD_ARG) 8 | set_property(TARGET Threads::Threads 9 | PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") 10 | endif() 11 | 12 | if(CMAKE_THREAD_LIBS_INIT) 13 | set_property(TARGET Threads::Threads 14 | PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") 15 | endif() 16 | endif() 17 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/runtime-neon.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | PYTORCH_QNNP_INLINE uint16x8_t 14 | sub_zero_point(const uint8x8_t va, const uint8x8_t vzp) { 15 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION 16 | // Run-time quantization 17 | return vsubl_u8(va, vzp); 18 | #else 19 | // Design-time quantization 20 | return vmovl_u8(va); 21 | #endif 22 | } 23 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensor.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_TENSOR_INC 2 | #define THC_TENSOR_INC 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME) 9 | 10 | #define THC_DESC_BUFF_LEN 64 11 | 12 | typedef struct THC_CLASS THCDescBuff 13 | { 14 | char str[THC_DESC_BUFF_LEN]; 15 | } THCDescBuff; 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Lerp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | namespace native { 8 | 9 | using lerp_fn_scalar = void (*)( 10 | at::Tensor& ret, 11 | const at::Tensor& self, 12 | const at::Tensor& end, 13 | Scalar weight); 14 | 15 | using lerp_fn_tensor = void (*)( 16 | at::Tensor& ret, 17 | const at::Tensor& self, 18 | const at::Tensor& end, 19 | const at::Tensor& weights); 20 | 21 | DECLARE_DISPATCH(lerp_fn_scalar, lerp_kernel_scalar_weight); 22 | DECLARE_DISPATCH(lerp_fn_tensor, lerp_kernel_tensor_weight); 23 | 24 | } // namespace native 25 | } // namespace at 26 | -------------------------------------------------------------------------------- /aten/src/TH/generic/THTensorLapack.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "TH/generic/THTensorLapack.h" 3 | #else 4 | 5 | TH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_); 6 | TH_API void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, bool eigenvectors); 7 | TH_API void THTensor_(potri)(THTensor *ra_, THTensor *a, bool upper); 8 | TH_API void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a); 9 | TH_API void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau); 10 | TH_API void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, bool left, bool transpose); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /aten/src/ATen/DLConvertor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // this convertor will: 8 | // 1) take a Tensor object and wrap it in the DLPack tensor 9 | // 2) take a dlpack tensor and convert it to the ATen Tensor 10 | 11 | namespace at { 12 | 13 | CAFFE2_API ScalarType toScalarType(const DLDataType& dtype); 14 | CAFFE2_API DLManagedTensor* toDLPack(const Tensor& src); 15 | CAFFE2_API Tensor fromDLPack(const DLManagedTensor* src); 16 | CAFFE2_API DLDataType getDLDataType(const Tensor& t); 17 | CAFFE2_API DLContext getDLContext(const Tensor& tensor, const int64_t& device_id); 18 | 19 | } //namespace at 20 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateFloatType.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateFloatType.h" 3 | #endif 4 | 5 | #define scalar_t float 6 | /* FIXME: fp64 has bad performance on some platforms; avoid using it unless 7 | we opt into it? */ 8 | #define accreal float 9 | #define Real Float 10 | #define CReal Cuda 11 | #define THC_REAL_IS_FLOAT 12 | #line 1 THC_GENERIC_FILE 13 | #include THC_GENERIC_FILE 14 | #undef scalar_t 15 | #undef accreal 16 | #undef Real 17 | #undef CReal 18 | #undef THC_REAL_IS_FLOAT 19 | 20 | #ifndef THCGenerateAllTypes 21 | #ifndef THCGenerateFloatTypes 22 | #undef THC_GENERIC_FILE 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/init_qnnpack.cpp: -------------------------------------------------------------------------------- 1 | #ifdef USE_PYTORCH_QNNPACK 2 | 3 | #include "init_qnnpack.h" 4 | #include 5 | #include 6 | #include 7 | 8 | namespace at { 9 | namespace native { 10 | 11 | void initQNNPACK() { 12 | static std::once_flag once; 13 | static enum pytorch_qnnp_status qnnpackStatus = 14 | pytorch_qnnp_status_uninitialized; 15 | std::call_once(once, []() { qnnpackStatus = pytorch_qnnp_initialize(); }); 16 | TORCH_CHECK( 17 | qnnpackStatus == pytorch_qnnp_status_success, 18 | "failed to initialize QNNPACK"); 19 | } 20 | 21 | } // namespace native 22 | } // namespace at 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Unfold2d.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | using unfold2d_fn = 9 | void (*)( 10 | Tensor& finput, 11 | Tensor& input, 12 | int64_t kH, 13 | int64_t kW, 14 | int64_t dH, 15 | int64_t dW, 16 | int64_t padH, 17 | int64_t padW, 18 | int64_t n_input_plane, 19 | int64_t input_height, 20 | int64_t input_width, 21 | int64_t output_height, 22 | int64_t output_width 23 | ); 24 | 25 | DECLARE_DISPATCH(unfold2d_fn, unfolded2d_copy_stub); 26 | DECLARE_DISPATCH(unfold2d_fn, unfolded2d_acc_stub); 27 | 28 | }} // namespace at::native 29 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorCopy.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template 6 | void THC_copyTensor(THCState* state, THCTensor* dst, THCTensor* src); 7 | 8 | template 9 | THCTensor *THCTensor_newClone(THCState *state, THCTensor *self); 10 | 11 | template 12 | THCTensor *THCTensor_newContiguous(THCState *state, THCTensor *self); 13 | 14 | template 15 | void THCTensor_freeCopyTo(THCState *state, THCTensor *self, THCTensor *dst); 16 | 17 | template 18 | void THCTensor_copyIgnoringOverlaps(THCState* state, THCTensor* dst, THCTensor* src); 19 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cpu/GridSamplerKernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace at { namespace native { 12 | 13 | using forward_2d_fn = Tensor(*)(const Tensor &, const Tensor &, int64_t, int64_t, bool); 14 | using backward_2d_fn = std::tuple(*)(const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool); 15 | DECLARE_DISPATCH(forward_2d_fn, grid_sampler_2d_cpu_kernel); 16 | DECLARE_DISPATCH(backward_2d_fn, grid_sampler_2d_backward_cpu_kernel); 17 | 18 | }} // namespace at::native 19 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/SparseMM.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace at { namespace native { 6 | // sparse, sparse, sparse, dense, real, real -> sparse 7 | Tensor& _sspaddmm_out_only_sparse_cuda(Tensor& result, const Tensor& self, 8 | const Tensor& mat1, const Tensor& mat2, Scalar beta, Scalar alpha) { 9 | AT_ERROR("tensor.sspaddmm(...) can only be called on sparse tensors"); 10 | } 11 | Tensor& _sspaddmm_out_cuda(Tensor& result, const Tensor& self, 12 | const Tensor& mat1, const Tensor& mat2, Scalar beta, Scalar alpha) { 13 | AT_ERROR("NYI: CUDA sspaddmm is not implemented"); 14 | } 15 | }} // namespace at::native 16 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateBoolType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateBoolType.h" 3 | #endif 4 | 5 | #define scalar_t bool 6 | #define ureal bool 7 | #define accreal int64_t 8 | #define Real Bool 9 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 10 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 11 | #define TH_REAL_IS_BOOL 12 | #line 1 TH_GENERIC_FILE 13 | #include TH_GENERIC_FILE 14 | #undef scalar_t 15 | #undef ureal 16 | #undef accreal 17 | #undef Real 18 | #undef TH_REAL_IS_BOOL 19 | #undef TH_CONVERT_REAL_TO_ACCREAL 20 | #undef TH_CONVERT_ACCREAL_TO_REAL 21 | 22 | #ifndef THGenerateManyTypes 23 | #undef TH_GENERIC_FILE 24 | #endif 25 | -------------------------------------------------------------------------------- /cmake/External/rccl.cmake: -------------------------------------------------------------------------------- 1 | if (NOT __NCCL_INCLUDED) 2 | set(__NCCL_INCLUDED TRUE) 3 | 4 | if (USE_SYSTEM_NCCL) 5 | # NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line. 6 | find_package(RCCL REQUIRED) 7 | if (RCCL_FOUND) 8 | message (STATUS "RCCL Found!") 9 | add_library(__caffe2_nccl INTERFACE) 10 | target_link_libraries(__caffe2_nccl INTERFACE ${PYTORCH_RCCL_LIBRARIES}) 11 | target_include_directories(__caffe2_nccl INTERFACE ${RCCL_INCLUDE_DIRS}) 12 | else() 13 | message (STATUS "RCCL NOT Found!") 14 | endif() 15 | else() 16 | message (STATUS "USE_SYSTEM_NCCL=OFF is not supported yet when using RCCL") 17 | endif() 18 | endif() 19 | -------------------------------------------------------------------------------- /aten/src/THNN/THNN.h: -------------------------------------------------------------------------------- 1 | #ifndef THNN_H 2 | #define THNN_H 3 | 4 | #include 5 | #include 6 | 7 | #define THNN_(NAME) TH_CONCAT_3(THNN_, Real, NAME) 8 | 9 | #define THIndexTensor THLongTensor 10 | #define THIndexTensor_(NAME) THLongTensor_ ## NAME 11 | 12 | #define THIntegerTensor THIntTensor 13 | #define THIntegerTensor_(NAME) THIntTensor_ ## NAME 14 | 15 | typedef int64_t THIndex_t; 16 | typedef int32_t THInteger_t; 17 | typedef void THNNState; 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/DeviceSqrt.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace at { namespace native { 4 | #if defined(__HIP_PLATFORM_HCC__) 5 | // take these out when ROCm implements std:: math functions 6 | #include 7 | template 8 | static __forceinline__ __device__ scalar_t device_sqrt(scalar_t val); 9 | 10 | template <> 11 | __forceinline__ __device__ float device_sqrt(float val) { 12 | return ::sqrtf(val); 13 | } 14 | 15 | template <> 16 | __forceinline__ __device__ double device_sqrt(double val) { 17 | return ::sqrt(val); 18 | } 19 | #else 20 | template 21 | __forceinline__ __device__ double device_sqrt(scalar_t val) { 22 | return std::sqrt(val); 23 | } 24 | #endif 25 | }} 26 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateFloatType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateFloatType.h" 3 | #endif 4 | 5 | #define scalar_t float 6 | #define accreal double 7 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 8 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 9 | #define Real Float 10 | #define THInf FLT_MAX 11 | #define TH_REAL_IS_FLOAT 12 | #line 1 TH_GENERIC_FILE 13 | #include TH_GENERIC_FILE 14 | #undef accreal 15 | #undef scalar_t 16 | #undef Real 17 | #undef THInf 18 | #undef TH_REAL_IS_FLOAT 19 | #undef TH_CONVERT_REAL_TO_ACCREAL 20 | #undef TH_CONVERT_ACCREAL_TO_REAL 21 | 22 | #ifndef THGenerateManyTypes 23 | #undef TH_GENERIC_FILE 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/Types.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace at { namespace native { 6 | 7 | cudnnDataType_t getCudnnDataType(const at::Tensor& tensor) { 8 | if (tensor.scalar_type() == at::kFloat) { 9 | return CUDNN_DATA_FLOAT; 10 | } else if (tensor.scalar_type() == at::kDouble) { 11 | return CUDNN_DATA_DOUBLE; 12 | } else if (tensor.scalar_type() == at::kHalf) { 13 | return CUDNN_DATA_HALF; 14 | } 15 | std::string msg("getCudnnDataType() not supported for "); 16 | msg += toString(tensor.scalar_type()); 17 | throw std::runtime_error(msg); 18 | } 19 | 20 | int64_t cudnn_version() { 21 | return CUDNN_VERSION; 22 | } 23 | 24 | }} // namespace at::cudnn 25 | -------------------------------------------------------------------------------- /aten/src/ATen/miopen/Types.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | miopenDataType_t getMiopenDataType(const at::Tensor& tensor) { 9 | if (tensor.scalar_type() == at::kFloat) { 10 | return miopenFloat; 11 | } else if (tensor.scalar_type() == at::kHalf) { 12 | return miopenHalf; 13 | } 14 | std::string msg("getMiopenDataType() not supported for "); 15 | msg += toString(tensor.scalar_type()); 16 | throw std::runtime_error(msg); 17 | } 18 | 19 | int64_t miopen_version() { 20 | return (MIOPEN_VERSION_MAJOR<<8) + (MIOPEN_VERSION_MINOR<<4) + MIOPEN_VERSION_PATCH; 21 | } 22 | 23 | }} // namespace at::miopen 24 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateDoubleType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateDoubleType.h" 3 | #endif 4 | 5 | #define scalar_t double 6 | #define accreal double 7 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 8 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 9 | #define Real Double 10 | #define THInf DBL_MAX 11 | #define TH_REAL_IS_DOUBLE 12 | #line 1 TH_GENERIC_FILE 13 | #include TH_GENERIC_FILE 14 | #undef accreal 15 | #undef scalar_t 16 | #undef Real 17 | #undef THInf 18 | #undef TH_REAL_IS_DOUBLE 19 | #undef TH_CONVERT_REAL_TO_ACCREAL 20 | #undef TH_CONVERT_ACCREAL_TO_REAL 21 | 22 | #ifndef THGenerateManyTypes 23 | #undef TH_GENERIC_FILE 24 | #endif 25 | -------------------------------------------------------------------------------- /aten/src/ATen/test/reduce_ops_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | using namespace at; 7 | 8 | TEST(ReduceOpsTest, MaxValuesAndMinValues) { 9 | const int W = 10; 10 | const int H = 10; 11 | if (hasCUDA()) { 12 | for (const auto dtype : {kHalf, kFloat, kDouble, kShort, kInt, kLong}) { 13 | auto a = at::rand({H, W}, TensorOptions(kCUDA).dtype(at::kHalf)); 14 | ASSERT_FLOAT_EQ( 15 | a.max_values(c10::IntArrayRef{0, 1}).item(), 16 | a.max().item() 17 | ); 18 | ASSERT_FLOAT_EQ( 19 | a.min_values(c10::IntArrayRef{0, 1}).item(), 20 | a.min().item() 21 | ); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadFP16.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(fp16-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(fp16 13 | GIT_REPOSITORY https://github.com/Maratyszcza/FP16.git 14 | GIT_TAG master 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fp16" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/fp16" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadFXdiv.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(fxdiv-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(fxdiv 13 | GIT_REPOSITORY https://github.com/Maratyszcza/FXdiv.git 14 | GIT_TAG master 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fxdiv" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/fxdiv" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadPSimd.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(psimd-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(psimd 13 | GIT_REPOSITORY https://github.com/Maratyszcza/psimd.git 14 | GIT_TAG master 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cuda/fake_quantize_core.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | /* FakeQuantize Op for PerChannelAffine quantization scheme */ 8 | namespace at { 9 | namespace native { 10 | void fake_quantize_slice_cuda( 11 | Tensor& output, 12 | const Tensor& input, 13 | float sc, 14 | int64_t z_point, 15 | int64_t quant_min, 16 | int64_t quant_max); 17 | 18 | void fake_quantize_grad_slice_cuda( 19 | Tensor& input_grad, 20 | const Tensor& output_grad, 21 | const Tensor& input, 22 | float sc, 23 | int64_t z_point, 24 | int64_t quant_min, 25 | int64_t quant_max); 26 | 27 | } // namespace native 28 | } // namespace at 29 | -------------------------------------------------------------------------------- /aten/src/THC/THCThrustAllocator.cuh: -------------------------------------------------------------------------------- 1 | #ifndef THC_THRUST_ALLOCATOR_INC 2 | #define THC_THRUST_ALLOCATOR_INC 3 | 4 | #include 5 | 6 | /// Allocator for Thrust to re-route its internal device allocations 7 | /// to the THC allocator 8 | class THCThrustAllocator { 9 | public: 10 | typedef char value_type; 11 | 12 | THCThrustAllocator(THCState* state) 13 | : state_(state) { 14 | } 15 | 16 | ~THCThrustAllocator() { 17 | } 18 | 19 | char* allocate(std::ptrdiff_t size) { 20 | return static_cast(THCudaMalloc(state_, size)); 21 | } 22 | 23 | void deallocate(char* p, size_t size) { 24 | THCudaFree(state_, p); 25 | } 26 | 27 | private: 28 | THCState* state_; 29 | }; 30 | 31 | #endif // THC_THRUST_ALLOCATOR_INC 32 | -------------------------------------------------------------------------------- /aten/src/TH/THDiskFile.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_DISK_FILE_INC 2 | #define TH_DISK_FILE_INC 3 | 4 | #include 5 | 6 | TH_API THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet); 7 | TH_API THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet); 8 | 9 | TH_API const char *THDiskFile_name(THFile *self); 10 | 11 | TH_API int THDiskFile_isLittleEndianCPU(void); 12 | TH_API int THDiskFile_isBigEndianCPU(void); 13 | TH_API void THDiskFile_nativeEndianEncoding(THFile *self); 14 | TH_API void THDiskFile_littleEndianEncoding(THFile *self); 15 | TH_API void THDiskFile_bigEndianEncoding(THFile *self); 16 | TH_API void THDiskFile_longSize(THFile *self, int size); 17 | TH_API void THDiskFile_noBuffer(THFile *self); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateIntType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateIntType.h" 3 | #endif 4 | 5 | #define scalar_t int32_t 6 | #define ureal uint32_t 7 | #define accreal int64_t 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 10 | #define Real Int 11 | #define THInf INT_MAX 12 | #define TH_REAL_IS_INT 13 | #line 1 TH_GENERIC_FILE 14 | #include TH_GENERIC_FILE 15 | #undef scalar_t 16 | #undef ureal 17 | #undef accreal 18 | #undef Real 19 | #undef THInf 20 | #undef TH_REAL_IS_INT 21 | #undef TH_CONVERT_REAL_TO_ACCREAL 22 | #undef TH_CONVERT_ACCREAL_TO_REAL 23 | 24 | #ifndef THGenerateManyTypes 25 | #undef TH_GENERIC_FILE 26 | #endif 27 | -------------------------------------------------------------------------------- /aten/src/ATen/miopen/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace at { namespace native { 9 | 10 | inline void setMIOpenStreamToCurrent() { 11 | // NB: Due to in-place HIPify, getCurrentCUDAStream actually means 12 | // getCurrentHIPStream 13 | MIOPEN_CHECK(miopenSetStream(getMiopenHandle(), at::hip::getCurrentHIPStream())); 14 | } 15 | 16 | // This function makes tensors which have zero stride contiguous, by 17 | // setting the strides to 1. 18 | inline Tensor contiguousIfZeroInStrides(const Tensor& t) { 19 | for (auto s : t.strides()) { 20 | if (s == 0) return t.contiguous(); 21 | } 22 | return t; 23 | } 24 | 25 | }} 26 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadCpuinfo.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(cpuinfo-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(cpuinfo 13 | GIT_REPOSITORY https://github.com/Maratyszcza/cpuinfo.git 14 | GIT_TAG master 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/cpuinfo" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateCharType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateCharType.h" 3 | #endif 4 | 5 | #define scalar_t int8_t 6 | #define ureal uint8_t 7 | #define accreal int64_t 8 | #define Real Char 9 | #define THInf SCHAR_MAX 10 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 11 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 12 | #define TH_REAL_IS_CHAR 13 | #line 1 TH_GENERIC_FILE 14 | #include TH_GENERIC_FILE 15 | #undef scalar_t 16 | #undef ureal 17 | #undef accreal 18 | #undef Real 19 | #undef THInf 20 | #undef TH_REAL_IS_CHAR 21 | #undef TH_CONVERT_REAL_TO_ACCREAL 22 | #undef TH_CONVERT_ACCREAL_TO_REAL 23 | 24 | #ifndef THGenerateManyTypes 25 | #undef TH_GENERIC_FILE 26 | #endif 27 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateByteType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateByteType.h" 3 | #endif 4 | 5 | #define scalar_t uint8_t 6 | #define ureal uint8_t 7 | #define accreal int64_t 8 | #define Real Byte 9 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 10 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 11 | #define THInf UCHAR_MAX 12 | #define TH_REAL_IS_BYTE 13 | #line 1 TH_GENERIC_FILE 14 | #include TH_GENERIC_FILE 15 | #undef scalar_t 16 | #undef ureal 17 | #undef accreal 18 | #undef Real 19 | #undef THInf 20 | #undef TH_REAL_IS_BYTE 21 | #undef TH_CONVERT_REAL_TO_ACCREAL 22 | #undef TH_CONVERT_ACCREAL_TO_REAL 23 | 24 | #ifndef THGenerateManyTypes 25 | #undef TH_GENERIC_FILE 26 | #endif 27 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateLongType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateLongType.h" 3 | #endif 4 | 5 | #define scalar_t int64_t 6 | #define ureal uint64_t 7 | #define accreal int64_t 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 10 | #define Real Long 11 | #define THInf LONG_MAX 12 | #define TH_REAL_IS_LONG 13 | #line 1 TH_GENERIC_FILE 14 | #include TH_GENERIC_FILE 15 | #undef scalar_t 16 | #undef ureal 17 | #undef accreal 18 | #undef Real 19 | #undef THInf 20 | #undef TH_REAL_IS_LONG 21 | #undef TH_CONVERT_REAL_TO_ACCREAL 22 | #undef TH_CONVERT_ACCREAL_TO_REAL 23 | 24 | #ifndef THGenerateManyTypes 25 | #undef TH_GENERIC_FILE 26 | #endif 27 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/FillKernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace at { namespace native { 8 | 9 | void fill_kernel_cuda(TensorIterator& iter, Scalar value) { 10 | AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "fill_cuda", [&]() { 11 | auto value_converted = value.to(); 12 | gpu_kernel(iter, [value_converted]GPU_LAMBDA() -> scalar_t { 13 | return value_converted; 14 | }); 15 | }); 16 | } 17 | 18 | REGISTER_DISPATCH(fill_stub, &fill_kernel_cuda); 19 | 20 | } // namespace native 21 | } // namespace at 22 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateHalfType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateHalfType.h" 3 | #endif 4 | 5 | #include 6 | #define scalar_t THHalf 7 | #define accreal float 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 10 | #define Real Half 11 | #define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF) 12 | #define TH_REAL_IS_HALF 13 | #line 1 TH_GENERIC_FILE 14 | #include TH_GENERIC_FILE 15 | #undef scalar_t 16 | #undef accreal 17 | #undef Real 18 | #undef THInf 19 | #undef TH_REAL_IS_HALF 20 | #undef TH_CONVERT_REAL_TO_ACCREAL 21 | #undef TH_CONVERT_ACCREAL_TO_REAL 22 | 23 | #ifndef THGenerateManyTypes 24 | #undef TH_GENERIC_FILE 25 | #endif 26 | -------------------------------------------------------------------------------- /aten/src/TH/THGenerateShortType.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #error "You must define TH_GENERIC_FILE before including THGenerateShortType.h" 3 | #endif 4 | 5 | #define scalar_t int16_t 6 | #define ureal uint16_t 7 | #define accreal int64_t 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val) 10 | #define Real Short 11 | #define THInf SHRT_MAX 12 | #define TH_REAL_IS_SHORT 13 | #line 1 TH_GENERIC_FILE 14 | #include TH_GENERIC_FILE 15 | #undef scalar_t 16 | #undef ureal 17 | #undef accreal 18 | #undef Real 19 | #undef THInf 20 | #undef TH_REAL_IS_SHORT 21 | #undef TH_CONVERT_REAL_TO_ACCREAL 22 | #undef TH_CONVERT_ACCREAL_TO_REAL 23 | 24 | #ifndef THGenerateManyTypes 25 | #undef TH_GENERIC_FILE 26 | #endif 27 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/runtime-assembly.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #ifdef __aarch64__ 10 | 11 | .macro SUB_ZERO_POINT vout, vin1, vin2 12 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION 13 | USUBL \vout, \vin1, \vin2 14 | #else 15 | UXTL \vout, \vin1 16 | #endif 17 | .endm 18 | 19 | #else /* aarch32 */ 20 | 21 | .macro SUB_ZERO_POINT qout, din1, din2 22 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION 23 | VSUBL.U8 \qout, \din1, \din2 24 | #else 25 | VMOVL.U8 \qout, \din1 26 | #endif 27 | .endm 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/fake_quantize_core.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | /* FakeQuantize Op for PerChannelAffine quantization scheme */ 7 | namespace at { 8 | namespace native { 9 | void fake_quantize_slice( 10 | Tensor& output, 11 | const Tensor& input, 12 | float sc, 13 | int64_t z_point, 14 | int64_t quant_min, 15 | int64_t quant_max); 16 | 17 | void fake_quantize_grad_slice( 18 | Tensor& input_grad, 19 | const Tensor& output_grad, 20 | const Tensor& input, 21 | float sc, 22 | int64_t z_point, 23 | int64_t quant_min, 24 | int64_t quant_max); 25 | 26 | } // namespace native 27 | } // namespace at 28 | -------------------------------------------------------------------------------- /aten/src/ATen/test/dlconvertor_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace at; 11 | TEST(TestDlconvertor, TestDlconvertor) { 12 | manual_seed(123); 13 | 14 | Tensor a = rand({3, 4}); 15 | DLManagedTensor* dlMTensor = toDLPack(a); 16 | 17 | Tensor b = fromDLPack(dlMTensor); 18 | 19 | ASSERT_TRUE(a.equal(b)); 20 | } 21 | 22 | TEST(TestDlconvertor, TestDlconvertorNoStrides) { 23 | manual_seed(123); 24 | 25 | Tensor a = rand({3, 4}); 26 | DLManagedTensor* dlMTensor = toDLPack(a); 27 | dlMTensor->dl_tensor.strides = nullptr; 28 | 29 | Tensor b = fromDLPack(dlMTensor); 30 | 31 | ASSERT_TRUE(a.equal(b)); 32 | } 33 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Indexing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Indexing tensors by by tensors 4 | 5 | #include 6 | #include 7 | 8 | namespace at { 9 | struct TensorIterator; 10 | } 11 | 12 | namespace at { namespace native { 13 | 14 | using index_fn = void(*)(TensorIterator &, IntArrayRef indexed_sizes, IntArrayRef indexed_strides); 15 | using index_put_fn = void(*)(TensorIterator &, IntArrayRef indexed_sizes, IntArrayRef indexed_strides, bool accumulate); 16 | using index_put_accum_fn = void(*)(Tensor &, TensorList , const Tensor &, bool unsafe); 17 | 18 | DECLARE_DISPATCH(index_fn, index_stub); 19 | DECLARE_DISPATCH(index_put_fn, index_put_stub); 20 | DECLARE_DISPATCH(index_put_accum_fn, index_put_accum_stub); 21 | 22 | }} // namespace at::native 23 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadPThreadPool.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(pthreadpool-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(pthreadpool 13 | GIT_REPOSITORY https://github.com/Maratyszcza/pthreadpool.git 14 | GIT_TAG master 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/pthreadpool" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorMathMagma.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorMathMagma.h" 3 | #else 4 | 5 | #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) 6 | 7 | // MAGMA (i.e. CUDA implementation of LAPACK functions) 8 | THC_API void THCTensor_(gels)(THCState *state, THCTensor *rb_, THCTensor *ra_, THCTensor *b_, THCTensor *a_); 9 | THC_API void THCTensor_(geev)(THCState *state, THCTensor *re_, THCTensor *rv_, THCTensor *a_, bool eigenvectors); 10 | THC_API void THCTensor_(potri)(THCState *state, THCTensor *ra_, THCTensor *a, bool upper); 11 | THC_API void THCTensor_(geqrf)(THCState *state, THCTensor *ra_, THCTensor *rtau_, THCTensor *a_); 12 | 13 | #endif // defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /aten/src/ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // THIS IS A MASSIVE HACK. This will BREAK you Caffe2 CUDA code if you 4 | // load ATen_hip, even if you don't ever actually use ATen_hip at runtime. 5 | // 6 | // If you ever link ATen_hip statically into the full library along 7 | // with ATen_cuda (libomnibus), the loading order of this versus the regular 8 | // ATen_cuda will be nondeterministic, and you'll nondeterministically get 9 | // one or the other. (This will be obvious because all of your code 10 | // will fail.) 11 | // 12 | // This hack can be removed once PyTorch is out-of-place HIPified, and 13 | // doesn't pretend CUDA is HIP. 14 | C10_REGISTER_GUARD_IMPL(CUDA, at::cuda::HIPGuardImplMasqueradingAsCUDA); 15 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorMathMagma.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef USE_MAGMA 10 | #include 11 | #else 12 | #include 13 | #endif 14 | 15 | #ifndef DIVUP 16 | #define DIVUP(x, y) (((x) + (y) - 1) / (y)) 17 | #endif 18 | 19 | #define NoMagma(name) "No CUDA implementation of '" #name "'. Install MAGMA and rebuild cutorch (http://icl.cs.utk.edu/magma/)" 20 | 21 | void THCMagma_init(THCState *state) 22 | { 23 | #ifdef USE_MAGMA 24 | magma_init(); 25 | #endif 26 | } 27 | 28 | #include 29 | #include 30 | -------------------------------------------------------------------------------- /aten/src/TH/vector/AVX2.h: -------------------------------------------------------------------------------- 1 | #ifndef TH_AVX2_H 2 | #define TH_AVX2_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | TH_API void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n); 13 | TH_API void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n); 14 | TH_API void THFloatVector_normal_fill_AVX2(float *data, 15 | const int64_t size, 16 | at::Generator *generator, 17 | const float mean, 18 | const float stddev); 19 | #endif 20 | -------------------------------------------------------------------------------- /aten/src/ATen/core/UnsafeFromTH.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { 4 | 5 | inline Tensor unsafeTensorFromTH(void * th_pointer, bool retain) { 6 | auto tensor_impl = c10::intrusive_ptr::reclaim(static_cast(th_pointer)); 7 | if (retain && tensor_impl.get() != UndefinedTensorImpl::singleton()) { 8 | c10::raw::intrusive_ptr::incref(tensor_impl.get()); 9 | } 10 | return Tensor(std::move(tensor_impl)); 11 | } 12 | 13 | inline Storage unsafeStorageFromTH(void * th_pointer, bool retain) { 14 | if (retain && th_pointer) { 15 | c10::raw::intrusive_ptr::incref(static_cast(th_pointer)); 16 | } 17 | return Storage(c10::intrusive_ptr::reclaim(static_cast(th_pointer))); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /aten/src/ATen/detail/ScalarTypeConversions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace at { namespace detail { 9 | 10 | template 11 | inline T load(const void* data, ScalarType src_type) { 12 | return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::Bool, src_type, "load", [&]() { 13 | return at::convert(*(scalar_t*)data); 14 | }); 15 | } 16 | 17 | template 18 | inline void store(T value, void* dst, ScalarType dst_type) { 19 | AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::Bool, dst_type, "store", [&]() { 20 | *(scalar_t*)dst = at::convert(value); 21 | }); 22 | } 23 | 24 | }} // namespace at::detail 25 | -------------------------------------------------------------------------------- /aten/conda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [ -z "$PREFIX" ]; then 6 | PREFIX="$CONDA_PREFIX" 7 | fi 8 | 9 | # When conda-build constructs a new working copy to perform a build 10 | # in, it recursively copies *all* files and directories in the original 11 | # source directory, including any pre-existing build products (e.g., 12 | # if you previously ran cmake.) This is problematic, because if 13 | # a 'build' directory already exists, cmake will reuse build settings 14 | # rather than recompute them from scratch. We want a fresh build, so 15 | # we prophylactically remove the build directory. 16 | rm -rf build || true 17 | 18 | mkdir -p build 19 | cd build 20 | cmake -DCMAKE_INSTALL_PREFIX="$PREFIX" -DCMAKE_PREFIX_PATH="$PREFIX" -DCMAKE_BUILD_TYPE=Release $CONDA_CMAKE_ARGS .. 21 | make install -j20 22 | -------------------------------------------------------------------------------- /aten/src/ATen/miopen/Handle.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace at { namespace native { 9 | 10 | namespace { 11 | 12 | struct Handle { 13 | miopenHandle_t handle; 14 | Handle() : handle(NULL) { 15 | MIOPEN_CHECK(miopenCreate(&handle)); 16 | } 17 | ~Handle() { 18 | if (handle) { 19 | miopenDestroy(handle); 20 | } 21 | } 22 | }; 23 | 24 | std::mutex mutex; 25 | std::unordered_map handles; 26 | 27 | } // namespace 28 | 29 | 30 | miopenHandle_t getMiopenHandle() 31 | { 32 | int device; 33 | HIP_CHECK(hipGetDevice(&device)); 34 | 35 | std::lock_guard guard(mutex); 36 | return handles[device].handle; 37 | } 38 | 39 | }} // namespace at::native 40 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/operator-delete.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | enum pytorch_qnnp_status pytorch_qnnp_delete_operator( 15 | pytorch_qnnp_operator_t op) { 16 | if (op == NULL) { 17 | return pytorch_qnnp_status_invalid_parameter; 18 | } 19 | 20 | free(op->indirection_buffer); 21 | free(op->packed_weights); 22 | free(op->a_sum); 23 | free(op->zero_buffer); 24 | free(op->lookup_table); 25 | free(op); 26 | return pytorch_qnnp_status_success; 27 | } 28 | -------------------------------------------------------------------------------- /aten/src/ATen/test/cuda_optional_test.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | using namespace at; 10 | 11 | // optional in cuda files 12 | TEST(OptionalTest, OptionalTestCUDA) { 13 | if (!at::cuda::is_available()) return; 14 | c10::optional trivially_destructible; 15 | c10::optional> non_trivially_destructible; 16 | ASSERT_FALSE(trivially_destructible.has_value()); 17 | ASSERT_FALSE(non_trivially_destructible.has_value()); 18 | 19 | trivially_destructible = {5}; 20 | non_trivially_destructible = std::vector{5, 10}; 21 | ASSERT_TRUE(trivially_destructible.has_value()); 22 | ASSERT_TRUE(non_trivially_destructible.has_value()); 23 | } 24 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Distance.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { namespace native { 7 | 8 | using pdist_forward_fn = void(*)(Tensor&, const Tensor&, const double p); 9 | using pdist_backward_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const double p, const Tensor&); 10 | using cdist_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const double p); 11 | using cdist_backward_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const Tensor&, const double p, const Tensor&); 12 | 13 | DECLARE_DISPATCH(pdist_forward_fn, pdist_forward_stub); 14 | DECLARE_DISPATCH(pdist_backward_fn, pdist_backward_stub); 15 | DECLARE_DISPATCH(cdist_fn, cdist_stub); 16 | DECLARE_DISPATCH(cdist_backward_fn, cdist_backward_stub); 17 | 18 | }} // namespace at::native 19 | -------------------------------------------------------------------------------- /aten/src/TH/generic/THTensorFill.cpp: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "TH/generic/THTensorFill.cpp" 3 | #else 4 | 5 | #include 6 | 7 | void THTensor_(fill)(THTensor *r_, scalar_t value) 8 | { 9 | if (THTensor_(isContiguous)(r_) || THTensor_(isTransposed)(r_)) { 10 | TH_TENSOR_APPLY_CONTIG(scalar_t, r_, THVector_(fill)(r__data, value, r__len);); 11 | } else { 12 | TH_TENSOR_APPLY(scalar_t, r_, 13 | if (r__stride == 1) { 14 | THVector_(fill)(r__data, value, r__size); 15 | r__i = r__size; 16 | r__data += r__stride * r__size; 17 | break; 18 | } else { 19 | *r__data = value; 20 | } 21 | ); 22 | } 23 | } 24 | 25 | void THTensor_(zero)(THTensor *r_) 26 | { 27 | THTensor_(fill)(r_, 0); 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /aten/src/ATen/native/cuda/CUDAScalar.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | namespace at { 8 | namespace native { 9 | 10 | Scalar _local_scalar_dense_cuda(const Tensor& self) { 11 | Scalar r; 12 | AT_DISPATCH_ALL_TYPES_AND3( 13 | at::ScalarType::Half, at::ScalarType::Bool, at::ScalarType::BFloat16, self.scalar_type(), "_local_scalar_dense_cuda", [&] { 14 | scalar_t value; 15 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 16 | AT_CUDA_CHECK(cudaMemcpyAsync(&value, self.data_ptr(), sizeof(scalar_t), cudaMemcpyDeviceToHost, stream)); 17 | AT_CUDA_CHECK(cudaStreamSynchronize(stream)); 18 | r = Scalar(value); 19 | }); 20 | return r; 21 | } 22 | 23 | }} // at::native 24 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/x8lut.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #define DECLARE_PYTORCH_X8LUT_UKERNEL_FUNCTION(fn_name) \ 22 | PYTORCH_QNNP_INTERNAL void fn_name( \ 23 | size_t n, const uint8_t* x, const uint8_t* t, uint8_t* y); 24 | 25 | DECLARE_PYTORCH_X8LUT_UKERNEL_FUNCTION(pytorch_x8lut_ukernel__scalar) 26 | 27 | #ifdef __cplusplus 28 | } /* extern "C" */ 29 | #endif 30 | -------------------------------------------------------------------------------- /aten/src/ATen/core/LegacyDeviceTypeInit.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace at { 4 | 5 | C10_DEFINE_REGISTRY( 6 | LegacyDeviceTypeInitRegistry, 7 | LegacyDeviceTypeInitInterface, 8 | LegacyDeviceTypeInitArgs) 9 | 10 | const LegacyDeviceTypeInitInterface& getLegacyDeviceTypeInit() { 11 | static std::unique_ptr legacy_device_type_init; 12 | static std::once_flag once; 13 | std::call_once(once, [] { 14 | legacy_device_type_init = LegacyDeviceTypeInitRegistry()->Create("LegacyDeviceTypeInit", LegacyDeviceTypeInitArgs{}); 15 | if (!legacy_device_type_init) { 16 | legacy_device_type_init = 17 | std::unique_ptr(new LegacyDeviceTypeInitInterface()); 18 | } 19 | }); 20 | return *legacy_device_type_init; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleTest.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(googletest-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(googletest 13 | URL https://github.com/google/googletest/archive/release-1.8.0.zip 14 | URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/THCUNN/Tanh.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | template 6 | struct tanh_updateGradInput_functor 7 | { 8 | __device__ __forceinline__ void operator()(T *gradInput, 9 | const T *output, const T *gradOutput) const { 10 | *gradInput = *gradOutput * (1.f - *output * *output); 11 | } 12 | }; 13 | 14 | template <> 15 | struct tanh_updateGradInput_functor 16 | { 17 | __device__ __forceinline__ void operator()(half *gradInput, 18 | const half *output, const half *gradOutput) const { 19 | const float out = __half2float(*output); 20 | const float go = __half2float(*gradOutput); 21 | *gradInput = __float2half(go * (1.f - out * out)); 22 | } 23 | }; 24 | 25 | #include 26 | #include 27 | -------------------------------------------------------------------------------- /cmake/GoogleTestPatch.cmake: -------------------------------------------------------------------------------- 1 | # CMake file to replace the string contents in Google Test and Google Mock 2 | # Usage example: 3 | # Patch the cmake file 4 | # cmake -DFILENAME=internal_utils.cmake 5 | # -DBACKUP=internal_utils.cmake.bak 6 | # -DREVERT=0 7 | # -P GoogleTestPatch.cmake 8 | # Revert the changes 9 | # cmake -DFILENAME=internal_utils.cmake 10 | # -DBACKUP=internal_utils.cmake.bak 11 | # -DREVERT=1 12 | # -P GoogleTestPatch.cmake 13 | 14 | 15 | if(REVERT) 16 | file(READ ${BACKUP} content) 17 | file(WRITE ${FILENAME} "${content}") 18 | file(REMOVE ${BACKUP}) 19 | else(REVERT) 20 | file(READ ${FILENAME} content) 21 | file(WRITE ${BACKUP} "${content}") 22 | string(REGEX REPLACE "[-/]Z[iI]" "/Z7" content "${content}") 23 | file(WRITE ${FILENAME} "${content}") 24 | endif(REVERT) 25 | -------------------------------------------------------------------------------- /aten/src/ATen/detail/HIPHooksInterface.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace at { 10 | namespace detail { 11 | 12 | // See getCUDAHooks for some more commentary 13 | const HIPHooksInterface& getHIPHooks() { 14 | static std::unique_ptr hip_hooks; 15 | static std::once_flag once; 16 | std::call_once(once, [] { 17 | hip_hooks = HIPHooksRegistry()->Create("HIPHooks", HIPHooksArgs{}); 18 | if (!hip_hooks) { 19 | hip_hooks = 20 | std::unique_ptr(new HIPHooksInterface()); 21 | } 22 | }); 23 | return *hip_hooks; 24 | } 25 | } // namespace detail 26 | 27 | C10_DEFINE_REGISTRY(HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs) 28 | 29 | } // namespace at 30 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/cmake/DownloadGoogleTest.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(googletest-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(googletest 13 | URL https://github.com/google/googletest/archive/release-1.8.0.zip 14 | URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCStorage.cu: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCStorage.cu" 3 | #else 4 | 5 | void THCStorage_(fill)(THCState *state, THCStorage *self, scalar_t value) 6 | { 7 | THCThrustAllocator thrustAlloc(state); 8 | thrust::device_ptr self_data(THCStorage_(data)(state, self)); 9 | thrust::fill( 10 | #if CUDA_VERSION >= 7000 || defined __HIP_PLATFORM_HCC__ 11 | thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)), 12 | #endif 13 | self_data, self_data+self->numel(), value); 14 | } 15 | 16 | void THCStorage_(resize)(THCState *state, THCStorage *self, ptrdiff_t size) 17 | { 18 | THCStorage_resize(state, self, size); 19 | } 20 | 21 | int THCStorage_(getDevice)(THCState* state, const THCStorage* storage) { 22 | return THCStorage_getDevice(state, storage); 23 | } 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/u8rmax.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #define DECLARE_PYTORCH_U8RMAX_UKERNEL_FUNCTION(fn_name) \ 22 | PYTORCH_QNNP_INTERNAL uint8_t fn_name(size_t n, const uint8_t* x); 23 | 24 | DECLARE_PYTORCH_U8RMAX_UKERNEL_FUNCTION(pytorch_u8rmax_ukernel__neon) 25 | DECLARE_PYTORCH_U8RMAX_UKERNEL_FUNCTION(pytorch_u8rmax_ukernel__sse2) 26 | 27 | #ifdef __cplusplus 28 | } /* extern "C" */ 29 | #endif 30 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleBenchmark.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) 8 | 9 | PROJECT(googlebenchmark-download NONE) 10 | 11 | INCLUDE(ExternalProject) 12 | ExternalProject_Add(googlebenchmark 13 | URL https://github.com/google/benchmark/archive/v1.4.1.zip 14 | URL_HASH SHA256=61ae07eb5d4a0b02753419eb17a82b7d322786bb36ab62bd3df331a4d47c00a7 15 | SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" 16 | BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/u8lut32norm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #define DECLARE_PYTORCH_X8LUT32NORM_UKERNEL_FUNCTION(fn_name) \ 22 | PYTORCH_QNNP_INTERNAL void fn_name( \ 23 | size_t n, const uint8_t* x, const uint32_t* t, uint8_t* y); 24 | 25 | DECLARE_PYTORCH_X8LUT32NORM_UKERNEL_FUNCTION(pytorch_u8lut32norm_ukernel__scalar) 26 | 27 | #ifdef __cplusplus 28 | } /* extern "C" */ 29 | #endif 30 | -------------------------------------------------------------------------------- /aten/src/THC/THCTensorMathCompare.cuh: -------------------------------------------------------------------------------- 1 | #ifndef THC_TENSORMATH_COMPARE_CUH 2 | #define THC_TENSORMATH_COMPARE_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | void THC_logicalValue(THCState *state, 12 | TensorTypeOut *self_, 13 | TensorType *src, 14 | Op op) { 15 | THCTensor_resize(state, self_, src->sizes(), {}); 16 | 17 | if (!THC_pointwiseApply2(state, self_, src, op)) { 18 | THArgCheck(false, 2, CUTORCH_DIM_WARNING); 19 | } 20 | 21 | THCudaCheck(cudaGetLastError()); 22 | } 23 | 24 | #endif // THC_TENSORMATH_COMPARE_CUH 25 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/assembly.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | // clang-format off 10 | #ifdef __ELF__ 11 | .macro BEGIN_FUNCTION name 12 | .text 13 | .align 2 14 | .global \name 15 | .type \name, %function 16 | \name: 17 | .endm 18 | 19 | .macro END_FUNCTION name 20 | .size \name, .-\name 21 | .endm 22 | #elif defined(__MACH__) 23 | .macro BEGIN_FUNCTION name 24 | .text 25 | .align 2 26 | .global _\name 27 | .private_extern _\name 28 | _\name: 29 | .endm 30 | 31 | .macro END_FUNCTION name 32 | .endm 33 | #endif 34 | -------------------------------------------------------------------------------- /aten/src/ATen/core/Formatting.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | namespace c10 { 9 | CAFFE2_API std::ostream& operator<<(std::ostream& out, Backend b); 10 | } 11 | namespace at { 12 | 13 | CAFFE2_API std::ostream& operator<<(std::ostream& out, const DeprecatedTypeProperties& t); 14 | CAFFE2_API std::ostream& print( 15 | std::ostream& stream, 16 | const Tensor& tensor, 17 | int64_t linesize); 18 | static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) { 19 | return print(out,t,80); 20 | } 21 | static inline void print(const Tensor & t, int64_t linesize=80) { 22 | print(std::cout,t,linesize); 23 | } 24 | 25 | static inline std::ostream& operator<<(std::ostream & out, Scalar s) { 26 | return out << (s.isFloatingPoint() ? s.toDouble() : s.toLong()); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /aten/src/ATen/core/grad_mode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace at { 6 | 7 | struct CAFFE2_API GradMode { 8 | static bool is_enabled(); 9 | static void set_enabled(bool enabled); 10 | }; 11 | 12 | // A RAII, thread local (!) guard that enables or disables grad mode upon 13 | // construction, and sets it back to the original value upon destruction. 14 | struct CAFFE2_API AutoGradMode { 15 | AutoGradMode(bool enabled) : prev_mode(GradMode::is_enabled()) { 16 | GradMode::set_enabled(enabled); 17 | } 18 | ~AutoGradMode() { 19 | GradMode::set_enabled(prev_mode); 20 | } 21 | bool prev_mode; 22 | }; 23 | 24 | // A RAII, thread local (!) guard that stops future operations from building 25 | // gradients. 26 | struct CAFFE2_API NoGradGuard : public AutoGradMode { 27 | NoGradGuard() : AutoGradMode(/*enabled=*/false) {} 28 | }; 29 | 30 | } 31 | -------------------------------------------------------------------------------- /aten/src/ATen/native/utils/ParamUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace at { 7 | namespace native { 8 | 9 | inline std::vector expand_param_if_needed( 10 | IntArrayRef list_param, 11 | const char* param_name, 12 | int64_t expected_dim) { 13 | if (list_param.size() == 1) { 14 | return std::vector(expected_dim, list_param[0]); 15 | } else if ((int64_t)list_param.size() != expected_dim) { 16 | std::ostringstream ss; 17 | ss << "expected " << param_name << " to be a single integer value or a " 18 | << "list of " << expected_dim << " values to match the convolution " 19 | << "dimensions, but got " << param_name << "=" << list_param; 20 | AT_ERROR(ss.str()); 21 | } else { 22 | return list_param.vec(); 23 | } 24 | } 25 | 26 | } // namespace native 27 | } // namespace at 28 | -------------------------------------------------------------------------------- /aten/src/THCUNN/SharedMem.cuh: -------------------------------------------------------------------------------- 1 | // Based on the simpleTempltes CUDA example 2 | 3 | #ifndef THCUNN_SHAREDMEM_H 4 | #define THCUNN_SHAREDMEM_H 5 | 6 | template 7 | struct SharedMem { 8 | __device__ T *getPointer() 9 | { 10 | extern __device__ void error(void); 11 | error(); 12 | return NULL; 13 | } 14 | }; 15 | 16 | template <> 17 | struct SharedMem 18 | { 19 | __device__ half *getPointer() { 20 | extern __shared__ half s_half[]; 21 | return s_half; 22 | } 23 | }; 24 | 25 | template <> 26 | struct SharedMem 27 | { 28 | __device__ float *getPointer() { 29 | extern __shared__ float s_float[]; 30 | return s_float; 31 | } 32 | }; 33 | 34 | template <> 35 | struct SharedMem 36 | { 37 | __device__ double *getPointer() { 38 | extern __shared__ double s_double[]; 39 | return s_double; 40 | } 41 | }; 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /aten/src/ATen/ATen.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | -------------------------------------------------------------------------------- /aten/src/ATen/WrapDimUtilsMulti.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace at { 9 | 10 | // This is in an extra file to work around strange interaction of 11 | // bitset on Windows with operator overloading 12 | 13 | constexpr size_t dim_bitset_size = 64; 14 | 15 | static inline std::bitset dim_list_to_bitset(IntArrayRef dims, int64_t ndims) { 16 | TORCH_CHECK(ndims <= (int64_t) dim_bitset_size, "only tensors with up to ", dim_bitset_size, " dims are supported"); 17 | std::bitset seen; 18 | for (size_t i = 0; i < dims.size(); i++) { 19 | size_t dim = maybe_wrap_dim(dims[i], ndims); 20 | TORCH_CHECK(!seen[dim], "dim ", dim, " appears multiple times in the list of dims"); 21 | seen[dim] = true; 22 | } 23 | return seen; 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /aten/src/ATen/cudnn/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace at { namespace native { 10 | 11 | inline void setCuDNNStreamToCurrent() { 12 | // TODO: Should getCurrentStream be a method on Context? 13 | AT_CUDNN_CHECK(cudnnSetStream(getCudnnHandle(), at::cuda::getCurrentCUDAStream())); 14 | } 15 | 16 | // cuDNN has a buggy check for tensor being contiguous (that is, it does 17 | // not ignore stride for dimension that is equal to 0). This function 18 | // makes tensors which have zero stride contiguous, by setting the 19 | // strides to 1 as cuDNN likes. 20 | inline Tensor contiguousIfZeroInStrides(const Tensor& t) { 21 | for (auto s : t.strides()) { 22 | if (s == 0) return t.contiguous(); 23 | } 24 | return t; 25 | } 26 | 27 | }} 28 | -------------------------------------------------------------------------------- /aten/src/ATen/templates/TypeDefault.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // ${generated_comment} 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace at { 20 | namespace TypeDefault { 21 | 22 | ${type_method_definitions} 23 | 24 | } // namespace TypeDefault 25 | 26 | #ifndef USE_STATIC_DISPATCH 27 | namespace { 28 | auto registerer = torch::RegisterOperators() 29 | ${function_registrations}; 30 | } 31 | #endif 32 | 33 | } // namespace at 34 | -------------------------------------------------------------------------------- /cmake/Modules/FindNuma.cmake: -------------------------------------------------------------------------------- 1 | # Find the Numa libraries 2 | # 3 | # The following variables are optionally searched for defaults 4 | # NUMA_ROOT_DIR: Base directory where all Numa components are found 5 | # 6 | # The following are set after configuration is done: 7 | # NUMA_FOUND 8 | # Numa_INCLUDE_DIR 9 | # Numa_LIBRARIES 10 | 11 | find_path( 12 | Numa_INCLUDE_DIR NAMES numa.h 13 | PATHS ${NUMA_ROOT_DIR} ${NUMA_ROOT_DIR}/include) 14 | 15 | find_library( 16 | Numa_LIBRARIES NAMES numa 17 | PATHS ${NUMA_ROOT_DIR} ${NUMA_ROOT_DIR}/lib) 18 | 19 | include(FindPackageHandleStandardArgs) 20 | find_package_handle_standard_args( 21 | Numa DEFAULT_MSG Numa_INCLUDE_DIR Numa_LIBRARIES) 22 | 23 | if(NUMA_FOUND) 24 | message( 25 | STATUS 26 | "Found Numa (include: ${Numa_INCLUDE_DIR}, library: ${Numa_LIBRARIES})") 27 | mark_as_advanced(Numa_INCLUDE_DIR Numa_LIBRARIES) 28 | endif() 29 | 30 | -------------------------------------------------------------------------------- /aten/src/ATen/cuda/detail/IndexUtils.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace at { 8 | namespace cuda { 9 | namespace detail { 10 | 11 | TORCH_CUDA_API bool maybeOverlappingIndices(const at::Tensor& t); 12 | TORCH_CUDA_API bool canUse32BitIndexMath(const at::Tensor &t, int64_t max_elem=std::numeric_limits::max()); 13 | 14 | template 15 | TensorInfo 16 | getTensorInfo(const at::Tensor& t) { 17 | IndexType sz[MAX_TENSORINFO_DIMS]; 18 | IndexType st[MAX_TENSORINFO_DIMS]; 19 | 20 | int dims = t.dim(); 21 | for (int i = 0; i < dims; ++i) { 22 | sz[i] = t.size(i); 23 | st[i] = t.stride(i); 24 | } 25 | 26 | return TensorInfo( 27 | t.data_ptr(), dims, sz, st); 28 | } 29 | 30 | } // detail 31 | } // cuda 32 | } // at 33 | -------------------------------------------------------------------------------- /aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/math.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #ifdef _MSC_VER 13 | #undef min 14 | #undef max 15 | #endif 16 | 17 | inline static size_t min(size_t a, size_t b) { 18 | return a < b ? a : b; 19 | } 20 | 21 | inline static size_t max(size_t a, size_t b) { 22 | return a > b ? a : b; 23 | } 24 | 25 | inline static size_t doz(size_t a, size_t b) { 26 | return a < b ? 0 : a - b; 27 | } 28 | 29 | inline static size_t divide_round_up(size_t n, size_t q) { 30 | return n % q == 0 ? n / q : n / q + 1; 31 | } 32 | 33 | inline static size_t round_up(size_t n, size_t q) { 34 | return divide_round_up(n, q) * q; 35 | } 36 | -------------------------------------------------------------------------------- /aten/src/THC/THCStorage.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // STOP!!! Thinking of including this header directly? Please 4 | // read Note [TH abstraction violation] 5 | 6 | #include 7 | // Should work with THStorageClass 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | THC_API THCStorage* THCStorage_new(THCState* state, caffe2::TypeMeta); 17 | 18 | THC_API void THCStorage_retain(THCState *state, THCStorage *storage); 19 | 20 | THC_API void THCStorage_resize(THCState *state, THCStorage *storage, ptrdiff_t size); 21 | THC_API int THCStorage_getDevice(THCState* state, const THCStorage* storage); 22 | 23 | THC_API THCStorage* THCStorage_newWithDataAndAllocator( 24 | THCState *state, at::ScalarType scalar_type, 25 | at::DataPtr&& data, ptrdiff_t size, 26 | at::Allocator* allocator); 27 | -------------------------------------------------------------------------------- /aten/src/THC/THCGeneral.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* Global state of THC. */ 6 | struct THCState { 7 | /* Set of all allocated resources. */ 8 | THCCudaResourcesPerDevice* resourcesPerDevice; 9 | /* Captured number of devices upon startup; convenience for bounds checking */ 10 | int numDevices; 11 | 12 | /* Allocator using cudaMallocHost. */ 13 | // NB: These allocators (specifically, cudaHostAllocator) MUST implement 14 | // maybeGlobalBoundDeleter, because we have a few use-cases where we need to 15 | // do raw allocations with them (for Thrust). 16 | // TODO: Make this statically obvious 17 | at::Allocator* cudaHostAllocator; 18 | at::Allocator* cudaDeviceAllocator; 19 | 20 | /* Table of enabled peer-to-peer access between directed pairs of GPUs. 21 | If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */ 22 | int** p2pAccessEnabled; 23 | }; 24 | -------------------------------------------------------------------------------- /aten/src/ATen/native/Activation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace at { 8 | 9 | struct TensorIterator; 10 | 11 | namespace native { 12 | 13 | using activation_fn = void (*)(TensorIterator&); 14 | using activation_backward_fn = void (*)(TensorIterator&); 15 | using threshold_fn = void (*)(TensorIterator&, Scalar, Scalar); 16 | using hardshrink_cpu_fn = void (*)(TensorIterator&, Scalar); 17 | using hardshrink_backward_cpu_fn = void (*)(TensorIterator&, Scalar); 18 | 19 | DECLARE_DISPATCH(threshold_fn, threshold_stub); 20 | DECLARE_DISPATCH(activation_fn, GeluKernel); 21 | DECLARE_DISPATCH(activation_backward_fn, GeluBackwardKernel); 22 | DECLARE_DISPATCH(hardshrink_cpu_fn, hardshrink_cpu_stub); 23 | DECLARE_DISPATCH(hardshrink_backward_cpu_fn, hardshrink_backward_cpu_stub); 24 | 25 | } // namespace native 26 | 27 | } // namespace at 28 | -------------------------------------------------------------------------------- /cmake/Modules/FindBenchmark.cmake: -------------------------------------------------------------------------------- 1 | # Try to find the Google Benchmark library and headers. 2 | # Benchmark_FOUND - system has benchmark lib 3 | # Benchmark_INCLUDE_DIRS - the benchmark include directory 4 | # Benchmark_LIBRARIES - libraries needed to use benchmark 5 | 6 | find_path(Benchmark_INCLUDE_DIR 7 | NAMES benchmark/benchmark.h 8 | NO_SYSTEM_ENVIRONMENT_PATH 9 | DOC "The directory where benchmark includes reside" 10 | ) 11 | 12 | find_library(Benchmark_LIBRARY 13 | NAMES benchmark 14 | NO_SYSTEM_ENVIRONMENT_PATH 15 | DOC "The benchmark library" 16 | ) 17 | 18 | set(Benchmark_INCLUDE_DIRS ${Benchmark_INCLUDE_DIR}) 19 | set(Benchmark_LIBRARIES ${Benchmark_LIBRARY}) 20 | 21 | include(FindPackageHandleStandardArgs) 22 | find_package_handle_standard_args(Benchmark 23 | FOUND_VAR Benchmark_FOUND 24 | REQUIRED_VARS Benchmark_INCLUDE_DIR Benchmark_LIBRARY 25 | ) 26 | 27 | mark_as_advanced(Benchmark_FOUND) 28 | -------------------------------------------------------------------------------- /aten/src/ATen/core/grad_mode.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace at { 6 | 7 | /// thread_local is a feature that is not enabled by Caffe2 mobile 8 | /// build (e.g. iOS). Therefore, we only provide `at::GradMode` 9 | /// when we are not in mobile build or when FEATURE_TORCH_MOBILE 10 | /// is on. 11 | #if !defined(C10_MOBILE) || defined(FEATURE_TORCH_MOBILE) 12 | 13 | thread_local bool GradMode_enabled = true; 14 | 15 | bool GradMode::is_enabled() { 16 | return GradMode_enabled; 17 | } 18 | 19 | void GradMode::set_enabled(bool enabled) { 20 | GradMode_enabled = enabled; 21 | } 22 | 23 | #else 24 | 25 | bool GradMode::is_enabled() { 26 | throw std::runtime_error("GradMode is not supported on mobile"); 27 | } 28 | 29 | void GradMode::set_enabled(bool enabled) { 30 | throw std::runtime_error("GradMode is not supported on mobile"); 31 | } 32 | 33 | #endif 34 | 35 | } // namespace at 36 | -------------------------------------------------------------------------------- /aten/src/ATen/native/c10_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template 6 | inline std::vector makeStack(Inputs&&... inputs) { 7 | return {std::forward(inputs)...}; 8 | } 9 | 10 | template 11 | inline std::vector callOp( 12 | const c10::OperatorHandle& op, 13 | Args... args) { 14 | auto stack = makeStack(std::forward(args)...); 15 | c10::Dispatcher::singleton().callBoxed(op, &stack); 16 | return stack; 17 | } 18 | 19 | template 20 | inline std::vector callOp( 21 | const char* func_name, 22 | const char* overload_name, 23 | Args... args) { 24 | const c10::optional op_handle = 25 | c10::Dispatcher::singleton().findSchema({func_name, overload_name}); 26 | assert(op_handle.has_value()); 27 | return callOp(op_handle.value(), args...); 28 | } 29 | -------------------------------------------------------------------------------- /cmake/Whitelist.cmake: -------------------------------------------------------------------------------- 1 | 2 | if (__caffe2_whitelist_included) 3 | return() 4 | endif() 5 | 6 | set (__caffe2_whitelist_included TRUE) 7 | 8 | set(CAFFE2_WHITELISTED_FILES) 9 | if (NOT CAFFE2_WHITELIST) 10 | return() 11 | endif() 12 | 13 | # First read the whitelist file and break it by line. 14 | file(READ "${CAFFE2_WHITELIST}" whitelist_content) 15 | # Convert file contents into a CMake list 16 | string(REGEX REPLACE "\n" ";" whitelist_content ${whitelist_content}) 17 | 18 | foreach(item ${whitelist_content}) 19 | file(GLOB_RECURSE tmp ${item}) 20 | set(CAFFE2_WHITELISTED_FILES ${CAFFE2_WHITELISTED_FILES} ${tmp}) 21 | endforeach() 22 | 23 | macro(caffe2_do_whitelist output whitelist) 24 | set(_tmp) 25 | foreach(item ${${output}}) 26 | list(FIND ${whitelist} ${item} _index) 27 | if (${_index} GREATER -1) 28 | set(_tmp ${_tmp} ${item}) 29 | endif() 30 | endforeach() 31 | set(${output} ${_tmp}) 32 | endmacro() 33 | -------------------------------------------------------------------------------- /aten/src/THC/THCGenerateFloatTypes.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #error "You must define THC_GENERIC_FILE before including THGenerateFloatTypes.h" 3 | #endif 4 | 5 | #define THCGenerateFloatTypes 6 | 7 | #define THCTypeIdxByte 1 8 | #define THCTypeIdxChar 2 9 | #define THCTypeIdxShort 3 10 | #define THCTypeIdxInt 4 11 | #define THCTypeIdxLong 5 12 | #define THCTypeIdxFloat 6 13 | #define THCTypeIdxDouble 7 14 | #define THCTypeIdxHalf 8 15 | #define THCTypeIdx_(T) TH_CONCAT_2(THCTypeIdx,T) 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #undef THCTypeIdxByte 22 | #undef THCTypeIdxChar 23 | #undef THCTypeIdxShort 24 | #undef THCTypeIdxInt 25 | #undef THCTypeIdxLong 26 | #undef THCTypeIdxFloat 27 | #undef THCTypeIdxDouble 28 | #undef THCTypeIdxHalf 29 | #undef THCTypeIdx_ 30 | 31 | #undef THCGenerateFloatTypes 32 | #undef THC_GENERIC_FILE 33 | -------------------------------------------------------------------------------- /aten/src/ATen/core/DeprecatedTypeProperties.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace at { 8 | 9 | Tensor DeprecatedTypeProperties::unsafeTensorFromTH(void * th_pointer, bool retain) const { 10 | return at::unsafeTensorFromTH(th_pointer, retain); 11 | } 12 | 13 | Storage DeprecatedTypeProperties::unsafeStorageFromTH(void * th_pointer, bool retain) const { 14 | return at::unsafeStorageFromTH(th_pointer, retain); 15 | } 16 | 17 | Tensor DeprecatedTypeProperties::copy(const Tensor & src, bool non_blocking, c10::optional to_device) const { 18 | if (to_device) { 19 | return src.to(src.options().dtype(scalarType()).device(to_device), non_blocking, /*copy=*/true); 20 | } 21 | return src.to(src.options().dtype(scalarType()), non_blocking, /*copy=*/true); 22 | } 23 | 24 | } // namespace at 25 | -------------------------------------------------------------------------------- /aten/src/THC/generic/THCTensorIndex.h: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorIndex.h" 3 | #else 4 | 5 | THC_API void THCTensor_(indexCopy)(THCState *state, THCTensor *res_, int dim, THCudaLongTensor *indices, THCTensor *src); 6 | THC_API void THCTensor_(indexFill)(THCState *state, THCTensor *tensor, int dim, THCudaLongTensor *index, scalar_t val); 7 | THC_API void THCTensor_(indexSelect)(THCState *state, THCTensor *tensor, THCTensor *src, int dim, THCudaLongTensor *index); 8 | THC_API void THCTensor_(take)(THCState *state, THCTensor *res_, THCTensor *src, THCudaLongTensor *index); 9 | THC_API void THCTensor_(put)(THCState *state, THCTensor *res_, THCudaLongTensor *indices, THCTensor *src, int accumulate); 10 | 11 | #if !defined(THC_REAL_IS_BOOL) /* non bool only part */ 12 | THC_API void THCTensor_(indexAdd)(THCState *state, THCTensor *res_, int dim, THCudaLongTensor *indices, THCTensor *src); 13 | #endif 14 | 15 | #endif 16 | --------------------------------------------------------------------------------