├── aten
    ├── src
    │   ├── ATen
    │   │   ├── cudnn
    │   │   │   ├── Exceptions.h
    │   │   │   ├── Handles.h
    │   │   │   ├── Handle.h
    │   │   │   ├── Types.h
    │   │   │   ├── README.md
    │   │   │   ├── cudnn-wrapper.h
    │   │   │   ├── Types.cpp
    │   │   │   └── Utils.h
    │   │   ├── .gitignore
    │   │   ├── native
    │   │   │   ├── LegacyBridge.cpp
    │   │   │   ├── quantized
    │   │   │   │   ├── cpu
    │   │   │   │   │   ├── qnnpack
    │   │   │   │   │   │   ├── wrappers
    │   │   │   │   │   │   │   ├── dummy.c
    │   │   │   │   │   │   │   ├── x8lut
    │   │   │   │   │   │   │   │   └── scalar.c
    │   │   │   │   │   │   │   ├── sgemm
    │   │   │   │   │   │   │   │   ├── 6x8-psimd.c
    │   │   │   │   │   │   │   │   ├── 5x8-neon.c
    │   │   │   │   │   │   │   │   └── 6x8-neon.c
    │   │   │   │   │   │   │   ├── u8lut32norm
    │   │   │   │   │   │   │   │   └── scalar.c
    │   │   │   │   │   │   │   ├── requantization
    │   │   │   │   │   │   │   │   ├── fp32-psimd.c
    │   │   │   │   │   │   │   │   ├── q31-scalar.c
    │   │   │   │   │   │   │   │   ├── fp32-scalar.c
    │   │   │   │   │   │   │   │   ├── gemmlowp-scalar.c
    │   │   │   │   │   │   │   │   ├── precise-psimd.c
    │   │   │   │   │   │   │   │   ├── precise-scalar.c
    │   │   │   │   │   │   │   │   ├── fp32-neon.c
    │   │   │   │   │   │   │   │   ├── q31-neon.c
    │   │   │   │   │   │   │   │   ├── gemmlowp-neon.c
    │   │   │   │   │   │   │   │   ├── precise-neon.c
    │   │   │   │   │   │   │   │   ├── q31-sse2.c
    │   │   │   │   │   │   │   │   ├── q31-sse4.c
    │   │   │   │   │   │   │   │   ├── fp32-sse2.c
    │   │   │   │   │   │   │   │   ├── q31-ssse3.c
    │   │   │   │   │   │   │   │   ├── gemmlowp-sse2.c
    │   │   │   │   │   │   │   │   ├── gemmlowp-sse4.c
    │   │   │   │   │   │   │   │   ├── precise-sse2.c
    │   │   │   │   │   │   │   │   ├── precise-sse4.c
    │   │   │   │   │   │   │   │   ├── precise-ssse3.c
    │   │   │   │   │   │   │   │   └── gemmlowp-ssse3.c
    │   │   │   │   │   │   │   ├── q8conv
    │   │   │   │   │   │   │   │   ├── 4x8-aarch32-neon.S
    │   │   │   │   │   │   │   │   ├── 8x8-aarch64-neon.S
    │   │   │   │   │   │   │   │   ├── 4x8-neon.c
    │   │   │   │   │   │   │   │   ├── 8x8-neon.c
    │   │   │   │   │   │   │   │   └── 4x4c2-sse2.c
    │   │   │   │   │   │   │   ├── q8gemm
    │   │   │   │   │   │   │   │   ├── 4x8-aarch32-neon.S
    │   │   │   │   │   │   │   │   ├── 8x8-aarch64-neon.S
    │   │   │   │   │   │   │   │   ├── 4x8c2-xzp-aarch32-neon.S
    │   │   │   │   │   │   │   │   ├── 4x8-neon.c
    │   │   │   │   │   │   │   │   ├── 6x4-neon.c
    │   │   │   │   │   │   │   │   ├── 8x8-neon.c
    │   │   │   │   │   │   │   │   ├── 4x8c2-xzp-neon.c
    │   │   │   │   │   │   │   │   ├── 4x-sumrows-neon.c
    │   │   │   │   │   │   │   │   ├── 2x4c8-sse2.c
    │   │   │   │   │   │   │   │   └── 4x4c2-sse2.c
    │   │   │   │   │   │   │   ├── q8dwconv
    │   │   │   │   │   │   │   │   ├── up8x9-aarch32-neon.S
    │   │   │   │   │   │   │   │   ├── up8x9-neon.c
    │   │   │   │   │   │   │   │   ├── mp8x25-neon.c
    │   │   │   │   │   │   │   │   ├── mp8x25-sse2.c
    │   │   │   │   │   │   │   │   └── up8x9-sse2.c
    │   │   │   │   │   │   │   ├── hgemm
    │   │   │   │   │   │   │   │   └── 8x8-aarch32-neonfp16arith.S
    │   │   │   │   │   │   │   ├── q8vadd
    │   │   │   │   │   │   │   │   ├── neon.c
    │   │   │   │   │   │   │   │   └── sse2.c
    │   │   │   │   │   │   │   ├── u8rmax
    │   │   │   │   │   │   │   │   ├── neon.c
    │   │   │   │   │   │   │   │   └── sse2.c
    │   │   │   │   │   │   │   ├── u8clamp
    │   │   │   │   │   │   │   │   ├── neon.c
    │   │   │   │   │   │   │   │   └── sse2.c
    │   │   │   │   │   │   │   ├── x8zip
    │   │   │   │   │   │   │   │   ├── x2-neon.c
    │   │   │   │   │   │   │   │   ├── x3-neon.c
    │   │   │   │   │   │   │   │   ├── x4-neon.c
    │   │   │   │   │   │   │   │   ├── xm-neon.c
    │   │   │   │   │   │   │   │   ├── x2-sse2.c
    │   │   │   │   │   │   │   │   ├── x3-sse2.c
    │   │   │   │   │   │   │   │   ├── x4-sse2.c
    │   │   │   │   │   │   │   │   └── xm-sse2.c
    │   │   │   │   │   │   │   ├── q8avgpool
    │   │   │   │   │   │   │   │   ├── up8x9-neon.c
    │   │   │   │   │   │   │   │   ├── up8xm-neon.c
    │   │   │   │   │   │   │   │   ├── mp8x9p8q-neon.c
    │   │   │   │   │   │   │   │   ├── up8x9-sse2.c
    │   │   │   │   │   │   │   │   ├── up8xm-sse2.c
    │   │   │   │   │   │   │   │   └── mp8x9p8q-sse2.c
    │   │   │   │   │   │   │   ├── q8gavgpool
    │   │   │   │   │   │   │   │   ├── up8x7-neon.c
    │   │   │   │   │   │   │   │   ├── up8xm-neon.c
    │   │   │   │   │   │   │   │   ├── mp8x7p7q-neon.c
    │   │   │   │   │   │   │   │   ├── up8x7-sse2.c
    │   │   │   │   │   │   │   │   ├── up8xm-sse2.c
    │   │   │   │   │   │   │   │   └── mp8x7p7q-sse2.c
    │   │   │   │   │   │   │   └── u8maxpool
    │   │   │   │   │   │   │   │   ├── sub16-neon.c
    │   │   │   │   │   │   │   │   ├── 16x9p8q-neon.c
    │   │   │   │   │   │   │   │   ├── sub16-sse2.c
    │   │   │   │   │   │   │   │   └── 16x9p8q-sse2.c
    │   │   │   │   │   │   ├── deps
    │   │   │   │   │   │   │   └── clog
    │   │   │   │   │   │   │   │   ├── confu.yaml
    │   │   │   │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   │   │   │   └── cmake
    │   │   │   │   │   │   │   │       └── DownloadGoogleTest.cmake
    │   │   │   │   │   │   ├── CODE_OF_CONDUCT.md
    │   │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   │   ├── confu.yaml
    │   │   │   │   │   │   ├── src
    │   │   │   │   │   │   │   ├── requantization
    │   │   │   │   │   │   │   │   ├── runtime-sse2.h
    │   │   │   │   │   │   │   │   ├── runtime-neon.h
    │   │   │   │   │   │   │   │   └── runtime-assembly.h
    │   │   │   │   │   │   │   ├── operator-delete.c
    │   │   │   │   │   │   │   └── qnnpack
    │   │   │   │   │   │   │   │   ├── x8lut.h
    │   │   │   │   │   │   │   │   ├── u8rmax.h
    │   │   │   │   │   │   │   │   ├── u8lut32norm.h
    │   │   │   │   │   │   │   │   ├── assembly.h
    │   │   │   │   │   │   │   │   └── math.h
    │   │   │   │   │   │   └── cmake
    │   │   │   │   │   │   │   ├── DownloadFP16.cmake
    │   │   │   │   │   │   │   ├── DownloadFXdiv.cmake
    │   │   │   │   │   │   │   ├── DownloadPSimd.cmake
    │   │   │   │   │   │   │   ├── DownloadCpuinfo.cmake
    │   │   │   │   │   │   │   ├── DownloadPThreadPool.cmake
    │   │   │   │   │   │   │   ├── DownloadGoogleTest.cmake
    │   │   │   │   │   │   │   └── DownloadGoogleBenchmark.cmake
    │   │   │   │   │   ├── init_qnnpack.h
    │   │   │   │   │   ├── init_qnnpack.cpp
    │   │   │   │   │   └── fake_quantize_core.h
    │   │   │   │   ├── Copy.h
    │   │   │   │   └── cuda
    │   │   │   │   │   └── fake_quantize_core.h
    │   │   │   ├── TypeProperties.h
    │   │   │   ├── Unfold2d.cpp
    │   │   │   ├── mkldnn
    │   │   │   │   ├── IDeepRegistration.cpp
    │   │   │   │   ├── TensorShape.h
    │   │   │   │   └── Utils.h
    │   │   │   ├── Sorting.h
    │   │   │   ├── Cross.h
    │   │   │   ├── Copy.h
    │   │   │   ├── Fill.h
    │   │   │   ├── cuda
    │   │   │   │   ├── LaunchUtils.h
    │   │   │   │   ├── CrossKernel.cu
    │   │   │   │   ├── TensorShapeCUDA.cpp
    │   │   │   │   ├── SparseMM.cu
    │   │   │   │   ├── DeviceSqrt.cuh
    │   │   │   │   ├── FillKernel.cu
    │   │   │   │   └── CUDAScalar.cu
    │   │   │   ├── sparse
    │   │   │   │   └── SparseTensorMath.h
    │   │   │   ├── cpu
    │   │   │   │   ├── TensorCompareKernel.h
    │   │   │   │   ├── DepthwiseConvKernel.h
    │   │   │   │   ├── SoftmaxKernel.h
    │   │   │   │   └── GridSamplerKernel.h
    │   │   │   ├── Pow.h
    │   │   │   ├── PointwiseOps.h
    │   │   │   ├── Lerp.h
    │   │   │   ├── Unfold2d.h
    │   │   │   ├── Indexing.h
    │   │   │   ├── Distance.h
    │   │   │   ├── utils
    │   │   │   │   └── ParamUtils.h
    │   │   │   ├── Activation.h
    │   │   │   └── c10_utils.h
    │   │   ├── stub
    │   │   │   └── CombinedStub.cpp
    │   │   ├── Dimname.h
    │   │   ├── core
    │   │   │   ├── Scalar.h
    │   │   │   ├── blob.cpp
    │   │   │   ├── typeid.h
    │   │   │   ├── ScalarType.h
    │   │   │   ├── ATenGeneral.cpp
    │   │   │   ├── Macros.h
    │   │   │   ├── UndefinedTensorImpl.h
    │   │   │   ├── ATenGeneral.h
    │   │   │   ├── Backtrace.h
    │   │   │   ├── README.md
    │   │   │   ├── TensorImpl_test.cpp
    │   │   │   ├── Range.cpp
    │   │   │   ├── OpsAlreadyMovedToC10.h
    │   │   │   ├── DimVector.h
    │   │   │   ├── EnableNamedTensor.h
    │   │   │   ├── LegacyTypeDispatch.cpp
    │   │   │   ├── Tensor.h
    │   │   │   ├── Range.h
    │   │   │   ├── Reduction.h
    │   │   │   ├── dispatch
    │   │   │   │   └── README.md
    │   │   │   ├── UnsafeFromTH.h
    │   │   │   ├── LegacyDeviceTypeInit.cpp
    │   │   │   ├── Formatting.h
    │   │   │   ├── grad_mode.h
    │   │   │   ├── grad_mode.cpp
    │   │   │   └── DeprecatedTypeProperties.cpp
    │   │   ├── Formatting.h
    │   │   ├── NamedTensor.h
    │   │   ├── Device.h
    │   │   ├── Layout.h
    │   │   ├── ArrayRef.h
    │   │   ├── Backend.h
    │   │   ├── Scalar.h
    │   │   ├── Storage.h
    │   │   ├── Backtrace.h
    │   │   ├── DimVector.h
    │   │   ├── Generator.h
    │   │   ├── SmallVector.h
    │   │   ├── TensorOptions.h
    │   │   ├── TensorAccessor.h
    │   │   ├── miopen
    │   │   │   ├── miopen-wrapper.h
    │   │   │   ├── Handle.h
    │   │   │   ├── Types.h
    │   │   │   ├── Types.cpp
    │   │   │   ├── Utils.h
    │   │   │   └── Handle.cpp
    │   │   ├── mkldnn
    │   │   │   └── Runtime.cpp
    │   │   ├── test
    │   │   │   ├── test_install
    │   │   │   │   ├── main.cpp
    │   │   │   │   └── CMakeLists.txt
    │   │   │   ├── cuda_cudnn_test.cpp
    │   │   │   ├── verify_api_visibility.cpp
    │   │   │   ├── reduce_ops_test.cpp
    │   │   │   ├── dlconvertor_test.cpp
    │   │   │   └── cuda_optional_test.cu
    │   │   ├── ScalarType.h
    │   │   ├── detail
    │   │   │   ├── CPUGuardImpl.cpp
    │   │   │   ├── ScalarTypeConversions.h
    │   │   │   └── HIPHooksInterface.cpp
    │   │   ├── cuda
    │   │   │   ├── ATenCUDAGeneral.h
    │   │   │   ├── PinnedMemoryAllocator.h
    │   │   │   ├── nvrtc_stub
    │   │   │   │   └── ATenNVRTC.cpp
    │   │   │   ├── CUDATensorMethods.cuh
    │   │   │   ├── CUDADevice.h
    │   │   │   ├── PinnedMemoryAllocator.cpp
    │   │   │   ├── CUDAUtils.h
    │   │   │   ├── CUDAConfig.h.in
    │   │   │   └── detail
    │   │   │   │   └── IndexUtils.cuh
    │   │   ├── div_rtn.h
    │   │   ├── Utils.cpp
    │   │   ├── ATenConfig.cmake.in
    │   │   ├── quantized
    │   │   │   ├── QTensorImpl.cpp
    │   │   │   └── CMakeLists.txt
    │   │   ├── TensorGeometry.cpp
    │   │   ├── Version.h
    │   │   ├── mkl
    │   │   │   ├── Limits.h
    │   │   │   ├── README.md
    │   │   │   └── Exceptions.h
    │   │   ├── DynamicLibrary.h
    │   │   ├── env.py
    │   │   ├── PTThreadPool.h
    │   │   ├── Tensor.h
    │   │   ├── cpu
    │   │   │   ├── tbb
    │   │   │   │   └── extra
    │   │   │   │   │   └── version_string.ver.in
    │   │   │   └── FlushDenormal.h
    │   │   ├── InitialTensorOptions.h
    │   │   ├── hip
    │   │   │   └── impl
    │   │   │   │   ├── HIPCachingAllocatorMasqueradingAsCUDA.h
    │   │   │   │   ├── HIPCachingAllocatorMasqueradingAsCUDA.cpp
    │   │   │   │   └── HIPGuardImplMasqueradingAsCUDA.cpp
    │   │   ├── ThreadLocalDebugInfo.cpp
    │   │   ├── templates
    │   │   │   ├── LegacyTHFunctions.h
    │   │   │   └── TypeDefault.cpp
    │   │   ├── Config.h.in
    │   │   ├── DLConvertor.h
    │   │   ├── ATen.h
    │   │   └── WrapDimUtilsMulti.h
    │   ├── THC
    │   │   ├── THCStream.cpp
    │   │   ├── THCTensorMathReduce.cu
    │   │   ├── generated
    │   │   │   ├── THCTensorMathCompareBool.cu
    │   │   │   ├── THCTensorMathCompareByte.cu
    │   │   │   ├── THCTensorMathCompareChar.cu
    │   │   │   ├── THCTensorMathCompareFloat.cu
    │   │   │   ├── THCTensorMathCompareHalf.cu
    │   │   │   ├── THCTensorMathCompareInt.cu
    │   │   │   ├── THCTensorMathCompareLong.cu
    │   │   │   ├── THCTensorMathCompareShort.cu
    │   │   │   ├── THCTensorMathCompareTInt.cu
    │   │   │   ├── THCTensorMathCompareDouble.cu
    │   │   │   ├── THCTensorMathCompareTBool.cu
    │   │   │   ├── THCTensorMathCompareTByte.cu
    │   │   │   ├── THCTensorMathCompareTChar.cu
    │   │   │   ├── THCTensorMathCompareTDouble.cu
    │   │   │   ├── THCTensorMathCompareTFloat.cu
    │   │   │   ├── THCTensorMathCompareTHalf.cu
    │   │   │   ├── THCTensorMathCompareTLong.cu
    │   │   │   ├── THCTensorMathCompareTShort.cu
    │   │   │   ├── THCTensorSortByte.cu
    │   │   │   ├── THCTensorSortChar.cu
    │   │   │   ├── THCTensorSortHalf.cu
    │   │   │   ├── THCTensorSortInt.cu
    │   │   │   ├── THCTensorSortLong.cu
    │   │   │   ├── THCTensorMaskedInt.cu
    │   │   │   ├── THCTensorSortDouble.cu
    │   │   │   ├── THCTensorSortFloat.cu
    │   │   │   ├── THCTensorSortShort.cu
    │   │   │   ├── THCTensorMaskedBool.cu
    │   │   │   ├── THCTensorMaskedByte.cu
    │   │   │   ├── THCTensorMaskedChar.cu
    │   │   │   ├── THCTensorMaskedDouble.cu
    │   │   │   ├── THCTensorMaskedFloat.cu
    │   │   │   ├── THCTensorMaskedHalf.cu
    │   │   │   ├── THCTensorMaskedLong.cu
    │   │   │   ├── THCTensorMaskedShort.cu
    │   │   │   ├── THCTensorMaskedBFloat16.cu
    │   │   │   ├── THCTensorMathReduceBool.cu
    │   │   │   ├── THCTensorMathReduceByte.cu
    │   │   │   ├── THCTensorMathReduceChar.cu
    │   │   │   ├── THCTensorMathReduceFloat.cu
    │   │   │   ├── THCTensorMathReduceHalf.cu
    │   │   │   ├── THCTensorMathReduceInt.cu
    │   │   │   ├── THCTensorMathReduceLong.cu
    │   │   │   ├── THCTensorMathReduceShort.cu
    │   │   │   ├── THCTensorMathPointwiseInt.cu
    │   │   │   ├── THCTensorMathReduceDouble.cu
    │   │   │   ├── THCTensorMathPointwiseBool.cu
    │   │   │   ├── THCTensorMathPointwiseByte.cu
    │   │   │   ├── THCTensorMathPointwiseChar.cu
    │   │   │   ├── THCTensorMathPointwiseFloat.cu
    │   │   │   ├── THCTensorMathPointwiseHalf.cu
    │   │   │   ├── THCTensorMathPointwiseLong.cu
    │   │   │   ├── THCTensorMathPointwiseShort.cu
    │   │   │   ├── THCTensorMathReduceBFloat16.cu
    │   │   │   └── THCTensorMathPointwiseDouble.cu
    │   │   ├── generic
    │   │   │   ├── THCTensor.cu
    │   │   │   ├── THCTensorMathScan.h
    │   │   │   ├── THCTensorCopy.h
    │   │   │   ├── THCTensorTopK.h
    │   │   │   ├── THCTensorRandom.h
    │   │   │   ├── THCTensorMode.h
    │   │   │   ├── THCTensorScatterGather.h
    │   │   │   ├── THCTensorMathMagma.h
    │   │   │   ├── THCStorage.cu
    │   │   │   └── THCTensorIndex.h
    │   │   ├── THCSleep.h
    │   │   ├── THCTensor.cu
    │   │   ├── THCTensorMathBlas.cu
    │   │   ├── THCSortUtils.cu
    │   │   ├── THCStorageCopy.cpp
    │   │   ├── THCAllocator.h
    │   │   ├── THCStorageCopy.h
    │   │   ├── THCTensorCopy.h
    │   │   ├── THC.h
    │   │   ├── THCStorage.h
    │   │   ├── THCStorageCopy.cu
    │   │   ├── THCGenerateIntType.h
    │   │   ├── THCGenerateCharType.h
    │   │   ├── THCGenerateByteType.h
    │   │   ├── THCGenerateLongType.h
    │   │   ├── THCGenerateShortType.h
    │   │   ├── THCTensorRandom.h
    │   │   ├── THCSleep.cu
    │   │   ├── THCGenerateBoolType.h
    │   │   ├── THCGenerateDoubleType.h
    │   │   ├── THCStorage.cu
    │   │   ├── THCTensorMathMagma.cuh
    │   │   ├── THCTensorMode.cu
    │   │   ├── THCTensorTopK.cu
    │   │   ├── THCGenerateHalfType.h
    │   │   ├── THCGenerateBFloat16Type.h
    │   │   ├── THCTensor.h
    │   │   ├── THCGenerateFloatType.h
    │   │   ├── THCTensorCopy.hpp
    │   │   ├── THCThrustAllocator.cuh
    │   │   ├── THCTensorMathMagma.cu
    │   │   ├── THCTensorMathCompare.cuh
    │   │   ├── THCStorage.hpp
    │   │   ├── THCGeneral.hpp
    │   │   └── THCGenerateFloatTypes.h
    │   ├── TH
    │   │   ├── THLapack.cpp
    │   │   ├── THHalf.h
    │   │   ├── THStorage.h
    │   │   ├── THTensorConv.cpp
    │   │   ├── THTensorLapack.cpp
    │   │   ├── THBlas.cpp
    │   │   ├── generic
    │   │   │   ├── THTensorFill.h
    │   │   │   ├── THTensorLapack.h
    │   │   │   └── THTensorFill.cpp
    │   │   ├── THTensorRandom.cpp
    │   │   ├── README.md
    │   │   ├── THBlas.h
    │   │   ├── THLogAdd.h
    │   │   ├── THTensorMath.cpp
    │   │   ├── THTensorMoreMath.cpp
    │   │   ├── THTensorFill.cpp
    │   │   ├── THMemoryFile.h
    │   │   ├── THSize.h
    │   │   ├── THGenerateAllTypes.h
    │   │   ├── THGenerateFloatTypes.h
    │   │   ├── TH.h
    │   │   ├── THGenerateQTypes.h
    │   │   ├── THTensorEvenMoreMath.cpp
    │   │   ├── THSize.cpp
    │   │   ├── THGenerateIntTypes.h
    │   │   ├── THGenerateBFloat16Type.h
    │   │   ├── THGenerateQInt8Type.h
    │   │   ├── THGenerateQInt32Type.h
    │   │   ├── THGenerateQUInt8Type.h
    │   │   ├── THVector.h
    │   │   ├── THGenerateBoolType.h
    │   │   ├── THGenerateFloatType.h
    │   │   ├── THGenerateDoubleType.h
    │   │   ├── THDiskFile.h
    │   │   ├── THGenerateIntType.h
    │   │   ├── THGenerateCharType.h
    │   │   ├── THGenerateByteType.h
    │   │   ├── THGenerateLongType.h
    │   │   ├── THGenerateHalfType.h
    │   │   ├── THGenerateShortType.h
    │   │   └── vector
    │   │   │   └── AVX2.h
    │   ├── THNN
    │   │   ├── CMakeLists.txt
    │   │   └── THNN.h
    │   └── THCUNN
    │   │   ├── THCUNN.h
    │   │   ├── SpatialConvolutionMM.cu
    │   │   ├── Tanh.cu
    │   │   └── SharedMem.cuh
    ├── tools
    │   ├── valgrind.sup
    │   └── test_install.sh
    └── conda
    │   ├── meta.yaml
    │   └── build.sh
└── cmake
    ├── Modules_CUDA_fix
        ├── upstream
        │   └── README.md
        └── FindCUDA.cmake
    ├── public
        ├── mkl.cmake
        ├── mkldnn.cmake
        └── threads.cmake
    ├── TorchConfigVersion.cmake.in
    ├── Caffe2ConfigVersion.cmake.in
    ├── Modules
        ├── FindCUB.cmake
        ├── Findpybind11.cmake
        ├── FindNuma.cmake
        └── FindBenchmark.cmake
    ├── External
        └── rccl.cmake
    ├── GoogleTestPatch.cmake
    └── Whitelist.cmake


/aten/src/ATen/cudnn/Exceptions.h:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aten/src/ATen/.gitignore:
--------------------------------------------------------------------------------
1 | Config.h
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/LegacyBridge.cpp:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aten/src/ATen/stub/CombinedStub.cpp:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Dimname.h:
--------------------------------------------------------------------------------
1 | #include <ATen/core/Dimname.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Scalar.h:
--------------------------------------------------------------------------------
1 | #include <c10/core/Scalar.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/blob.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/core/blob.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/typeid.h:
--------------------------------------------------------------------------------
1 | #include <c10/util/typeid.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/dummy.c:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStream.cpp:
--------------------------------------------------------------------------------
1 | #include <c10/cuda/CUDAStream.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Formatting.h:
--------------------------------------------------------------------------------
1 | #include <ATen/core/Formatting.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/NamedTensor.h:
--------------------------------------------------------------------------------
1 | #include <ATen/core/NamedTensor.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/ScalarType.h:
--------------------------------------------------------------------------------
1 | #include <c10/core/ScalarType.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Device.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/core/Device.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Layout.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/core/Layout.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/ATenGeneral.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/core/ATenGeneral.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/ArrayRef.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/util/ArrayRef.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Backend.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/core/Backend.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Scalar.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <ATen/core/Scalar.h>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Storage.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/core/Storage.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Backtrace.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <ATen/core/Backtrace.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/DimVector.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <ATen/core/DimVector.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Generator.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <ATen/core/Generator.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/SmallVector.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/util/SmallVector.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Macros.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/macros/Macros.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/Handles.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <ATen/cudnn/Handle.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/TensorOptions.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <c10/core/TensorOptions.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/UndefinedTensorImpl.h:
--------------------------------------------------------------------------------
1 | #include <c10/core/UndefinedTensorImpl.h>
2 | 


--------------------------------------------------------------------------------
/aten/src/ATen/TensorAccessor.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <ATen/core/TensorAccessor.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/ATenGeneral.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <c10/macros/Macros.h>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/miopen/miopen-wrapper.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <miopen/miopen.h>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Backtrace.h:
--------------------------------------------------------------------------------
1 | #include <c10/util/Backtrace.h>
2 | #include <c10/util/Type.h>
3 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorMathReduce.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareBool.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareByte.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareChar.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareFloat.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareHalf.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareInt.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareLong.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareShort.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTInt.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/TH/THLapack.cpp:
--------------------------------------------------------------------------------
1 | #include <TH/THLapack.h>
2 | 
3 | #include <TH/generic/THLapack.cpp>
4 | #include <TH/THGenerateFloatTypes.h>
5 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareDouble.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompare.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTBool.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTByte.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTChar.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTDouble.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTFloat.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTHalf.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTLong.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathCompareTShort.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathCompareT.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/mkldnn/Runtime.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/mkldnn/Runtime.h>
2 | 
3 | namespace at { namespace native {
4 | 
5 | }}  // namespace at::native
6 | 


--------------------------------------------------------------------------------
/aten/src/TH/THHalf.h:
--------------------------------------------------------------------------------
1 | #ifndef TH_HALF_H
2 | #define TH_HALF_H
3 | 
4 | #include <c10/util/Half.h>
5 | 
6 | #define THHalf at::Half
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/test_install/main.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/ATen.h>
2 | 
3 | int main() {
4 |   std::cout << at::ones({3,4}, at::CPU(at::kFloat)) << "\n";
5 | }
6 | 


--------------------------------------------------------------------------------
/aten/src/TH/THStorage.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <TH/THStorageFunctions.h>
3 | 
4 | // Compatability header. Use THStorageFunctions.h instead if you need this.
5 | 


--------------------------------------------------------------------------------
/aten/src/ATen/ScalarType.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <ATen/core/ATenGeneral.h> // for BC reasons
3 | #include <c10/core/Backend.h>
4 | #include <c10/core/ScalarType.h>
5 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/TypeProperties.h:
--------------------------------------------------------------------------------
1 | #include <ATen/ATen.h>
2 | 
3 | namespace at { namespace native {
4 | 
5 | ScalarType result_type(TensorList tensors);
6 | 
7 | }}
8 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8lut/scalar.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <x8lut/scalar.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/sgemm/6x8-psimd.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <sgemm/6x8-psimd.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorConv.cpp:
--------------------------------------------------------------------------------
1 | #include <TH/THTensor.hpp>
2 | #include <TH/THVector.h>
3 | 
4 | #include <TH/generic/THTensorConv.cpp>
5 | #include <TH/THGenerateAllTypes.h>
6 | 
7 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorLapack.cpp:
--------------------------------------------------------------------------------
1 | #include <TH/THTensor.hpp>
2 | #include <TH/THLapack.h>
3 | 
4 | #include <TH/generic/THTensorLapack.cpp>
5 | #include <TH/THGenerateFloatTypes.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8lut32norm/scalar.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <u8lut32norm/scalar.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/confu.yaml:
--------------------------------------------------------------------------------
1 | name: clog
2 | title: C-style (a-la printf) logging library
3 | license: Simplified BSD
4 | deps:
5 |   - name: googletest
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-psimd.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <requantization/fp32-psimd.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-scalar.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <requantization/q31-scalar.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-scalar.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <requantization/fp32-scalar.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortByte.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateByteType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortChar.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateCharType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortHalf.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateHalfType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortInt.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateIntType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortLong.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateLongType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-scalar.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <requantization/gemmlowp-scalar.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-psimd.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <requantization/precise-psimd.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-scalar.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #include <requantization/precise-scalar.c>
4 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedInt.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateIntType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortDouble.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateDoubleType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortFloat.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateFloatType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorSortShort.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorSort.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorSort.cu>
5 | #include <THC/THCGenerateShortType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedBool.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateBoolType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedByte.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateByteType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedChar.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateCharType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedDouble.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateDoubleType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedFloat.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateFloatType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedHalf.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateHalfType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedLong.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateLongType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedShort.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateShortType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/README.md:
--------------------------------------------------------------------------------
1 | ATen Core
2 | ---------
3 | 
4 | ATen Core is a minimal subset of ATen which is suitable for deployment
5 | on mobile.  Binary size of files in this folder is an important constraint.
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/detail/CPUGuardImpl.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/detail/CPUGuardImpl.h>
2 | 
3 | namespace at {
4 | namespace detail {
5 | 
6 | C10_REGISTER_GUARD_IMPL(CPU, CPUGuardImpl);
7 | 
8 | }} // namespace at::detail
9 | 


--------------------------------------------------------------------------------
/aten/src/ATen/miopen/Handle.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/miopen/miopen-wrapper.h>
 4 | 
 5 | namespace at { namespace native {
 6 | 
 7 | miopenHandle_t getMiopenHandle();
 8 | 
 9 | }} // namespace
10 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Unfold2d.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/native/Unfold2d.h>
2 | 
3 | namespace at { namespace native {
4 | 
5 | DEFINE_DISPATCH(unfolded2d_copy_stub);
6 | DEFINE_DISPATCH(unfolded2d_acc_stub);
7 | 
8 | }}
9 | 


--------------------------------------------------------------------------------
/aten/src/TH/THBlas.cpp:
--------------------------------------------------------------------------------
1 | #include <TH/THBlas.h>
2 | 
3 | #include <TH/generic/THBlas.cpp>
4 | #include <TH/THGenerateAllTypes.h>
5 | 
6 | #include <TH/generic/THBlas.cpp>
7 | #include <TH/THGenerateBFloat16Type.h>
8 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMaskedBFloat16.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMasked.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMasked.cu>
5 | #include <THC/THCGenerateBFloat16Type.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceBool.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateBoolType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceByte.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateByteType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceChar.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateCharType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceFloat.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateFloatType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceHalf.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateHalfType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceInt.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateIntType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceLong.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateLongType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceShort.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateShortType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THNN/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(ATen_CPU_SRCS ${ATen_CPU_SRCS}
2 |   ${CMAKE_CURRENT_SOURCE_DIR}/init.cpp
3 | PARENT_SCOPE)
4 | INSTALL(FILES generic/THNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THNN/generic")
5 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseInt.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateIntType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceDouble.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateDoubleType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseBool.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateBoolType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseByte.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateByteType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseChar.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateCharType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseFloat.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateFloatType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseHalf.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateHalfType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseLong.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateLongType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseShort.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateShortType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathReduceBFloat16.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathReduce.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathReduce.cu>
5 | #include <THC/THCGenerateBFloat16Type.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generated/THCTensorMathPointwiseDouble.cu:
--------------------------------------------------------------------------------
1 | #include <THC/THCTensorMathPointwise.cuh>
2 | #include <THC/THCTensor.hpp>
3 | 
4 | #include <THC/generic/THCTensorMathPointwise.cu>
5 | #include <THC/THCGenerateDoubleType.h>
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/TensorImpl_test.cpp:
--------------------------------------------------------------------------------
1 | #include <gtest/gtest.h>
2 | #include <caffe2/core/tensor.h>
3 | 
4 | TEST(TensorImplTest, Caffe2Constructor) {
5 |   caffe2::Tensor tensor(caffe2::CPU);
6 |   ASSERT_EQ(tensor.strides()[0], 1);
7 | }
8 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/ATenCUDAGeneral.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cuda.h>
 4 | #include <cuda_runtime.h>
 5 | #include <cuda_fp16.h>
 6 | 
 7 | #include <c10/macros/Export.h>
 8 | 
 9 | // Use TORCH_CUDA_API for exports from this folder
10 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/mkldnn/IDeepRegistration.cpp:
--------------------------------------------------------------------------------
1 | #include <ATen/Config.h>
2 | 
3 | #if AT_MKLDNN_ENABLED()
4 | 
5 | // needs to be included only once in library.
6 | #include <ideep_pin_singletons.hpp>
7 | 
8 | #endif // AT_MKLDNN_ENALBED()
9 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/PinnedMemoryAllocator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/core/Allocator.h>
 4 | 
 5 | namespace at { namespace cuda {
 6 | 
 7 | TORCH_CUDA_API at::Allocator* getPinnedMemoryAllocator();
 8 | 
 9 | }} // namespace at::cuda
10 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/4x8-aarch32-neon.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__)
4 | #include <q8conv/4x8-aarch32-neon.S>
5 | #endif /* defined(__arm__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8-aarch32-neon.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__)
4 | #include <q8gemm/4x8-aarch32-neon.S>
5 | #endif /* defined(__arm__) */
6 | 


--------------------------------------------------------------------------------
/aten/tools/valgrind.sup:
--------------------------------------------------------------------------------
 1 | {
 2 |    <startup>
 3 |    Memcheck:Cond
 4 |    fun:index
 5 |    fun:expand_dynamic_string_token
 6 |    fun:_dl_map_object
 7 |    fun:map_doit
 8 |    fun:_dl_catch_error
 9 |    fun:handle_ld_preload
10 |    ...
11 | }
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/Copy.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/core/Tensor.h>
 4 | 
 5 | namespace at {
 6 | namespace native {
 7 | 
 8 | Tensor& quantized_copy_from_float_(Tensor& self, const Tensor& src);
 9 | 
10 | }
11 | } // namespace at
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/init_qnnpack.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifdef USE_PYTORCH_QNNPACK
 4 | 
 5 | namespace at {
 6 | namespace native {
 7 | 
 8 | void initQNNPACK();
 9 | 
10 | } // namespace native
11 | } // namespace at
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/8x8-aarch64-neon.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__aarch64__)
4 | #include <q8conv/8x8-aarch64-neon.S>
5 | #endif /* defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/up8x9-aarch32-neon.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__)
4 | #include <q8dwconv/up8x9-aarch32-neon.S>
5 | #endif /* defined(__arm__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/8x8-aarch64-neon.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__aarch64__)
4 | #include <q8gemm/8x8-aarch64-neon.S>
5 | #endif /* defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/TH/generic/THTensorFill.h:
--------------------------------------------------------------------------------
1 | #ifndef TH_GENERIC_FILE
2 | #define TH_GENERIC_FILE "TH/generic/THTensorFill.h"
3 | #else
4 | 
5 | TH_API void THTensor_(fill)(THTensor *r_, scalar_t value);
6 | TH_API void THTensor_(zero)(THTensor *r_);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8c2-xzp-aarch32-neon.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__)
4 | #include <q8gemm/4x8c2-xzp-aarch32-neon.S>
5 | #endif /* defined(__arm__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorRandom.cpp:
--------------------------------------------------------------------------------
1 | #include <TH/THTensor.hpp>
2 | #include <TH/THVector.h>
3 | 
4 | #include <TH/generic/THTensorRandom.cpp>
5 | #include <TH/THGenerateAllTypes.h>
6 | 
7 | #include <TH/generic/THTensorRandom.cpp>
8 | #include <TH/THGenerateBoolType.h>
9 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/hgemm/8x8-aarch32-neonfp16arith.S:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__)
4 | #include <hgemm/8x8-aarch32-neonfp16arith.S>
5 | #endif /* defined(__arm__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8vadd/neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8vadd/neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8rmax/neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <u8rmax/neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/tools/test_install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | set -e
 4 | INSTALL_ROOT=$1
 5 | SRC_ROOT=$2
 6 | rm -rf test_build
 7 | mkdir test_build
 8 | cd test_build
 9 | cmake -DCMAKE_PREFIX_PATH=$INSTALL_ROOT $SRC_ROOT/src/ATen/test/test_install
10 | make
11 | ./main
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/Handle.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/cudnn/cudnn-wrapper.h>
 4 | #include <ATen/cuda/ATenCUDAGeneral.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | TORCH_CUDA_API cudnnHandle_t getCudnnHandle();
 9 | 
10 | }} // namespace at::native
11 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/sgemm/5x8-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <sgemm/5x8-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/sgemm/6x8-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <sgemm/6x8-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8clamp/neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <u8clamp/neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x2-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <x8zip/x2-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x3-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <x8zip/x3-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x4-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <x8zip/x4-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/xm-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <x8zip/xm-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/div_rtn.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Integer division rounding to -Infinity
 4 | template<typename T>
 5 | static inline T div_rtn(T x, T y) {
 6 |     int q = x/y;
 7 |     int r = x%y;
 8 |     if ((r!=0) && ((r<0) != (y<0))) --q;
 9 |     return q;
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/4x8-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8conv/4x8-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/8x8-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8conv/8x8-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gemm/4x8-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/6x4-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gemm/6x4-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/8x8-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gemm/8x8-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensor.cu:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensor.cu"
 3 | #else
 4 | 
 5 | int THCTensor_(getDevice)(THCState* state, const THCTensor* tensor) {
 6 |   return THCTensor_getDevice(state, tensor);
 7 | }
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/up8x9-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8dwconv/up8x9-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8x9-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8avgpool/up8x9-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8xm-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8avgpool/up8xm-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/mp8x25-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8dwconv/mp8x25-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8x7-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gavgpool/up8x7-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8xm-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gavgpool/up8xm-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x8c2-xzp-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gemm/4x8c2-xzp-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/sub16-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <u8maxpool/sub16-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCSleep.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_SPIN_INC
 2 | #define THC_SPIN_INC
 3 | 
 4 | #include <THC/THCGeneral.h>
 5 | #include <time.h>
 6 | 
 7 | // enqueues a kernel that spins for the specified number of cycles
 8 | THC_API void THC_sleep(THCState* state, int64_t cycles);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/mp8x9p8q-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8avgpool/mp8x9p8q-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/mp8x7p7q-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gavgpool/mp8x7p7q-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x-sumrows-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <q8gemm/4x-sumrows-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <requantization/fp32-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <requantization/q31-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/16x9p8q-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <u8maxpool/16x9p8q-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/Utils.h>
 2 | #include <stdarg.h>
 3 | #include <stdexcept>
 4 | #include <typeinfo>
 5 | #include <cstdlib>
 6 | 
 7 | namespace at {
 8 | 
 9 | int _crash_if_asan(int arg) {
10 |   volatile char x[3];
11 |   x[arg] = 0;
12 |   return x[0];
13 | }
14 | 
15 | } // at
16 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <requantization/gemmlowp-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-neon.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__arm__) || defined(__aarch64__)
4 | #include <requantization/precise-neon.c>
5 | #endif /* defined(__arm__) || defined(__aarch64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/Types.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/cudnn/cudnn-wrapper.h>
 4 | #include <ATen/Tensor.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | cudnnDataType_t getCudnnDataType(const at::Tensor& tensor);
 9 | 
10 | int64_t cudnn_version();
11 | 
12 | }}  // namespace at::cudnn
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Range.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/core/Range.h>
 2 | 
 3 | #include <iostream>
 4 | 
 5 | namespace at {
 6 | 
 7 | std::ostream& operator<<(std::ostream& out, const Range& range) {
 8 |   out << "Range[" << range.begin << ", " << range.end << "]";
 9 |   return out;
10 | }
11 | 
12 | }  // namespace at
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/miopen/Types.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/miopen/miopen-wrapper.h>
 4 | #include <ATen/Tensor.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | miopenDataType_t getMiopenDataType(const at::Tensor& tensor);
 9 | 
10 | int64_t miopen_version();
11 | 
12 | }}  // namespace at::miopen
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/mkldnn/TensorShape.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | namespace at {
 6 | namespace native {
 7 | 
 8 | Tensor mkldnn_view(const Tensor& self, IntArrayRef size);
 9 | 
10 | Tensor mkldnn_clone(const Tensor& self);
11 | 
12 | } // namespace native
13 | } // namespace at
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8vadd/sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8vadd/sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8clamp/sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <u8clamp/sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8rmax/sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <u8rmax/sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x2-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <x8zip/x2-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x3-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <x8zip/x3-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/x4-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <x8zip/x4-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/x8zip/xm-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <x8zip/xm-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8conv/4x4c2-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8conv/4x4c2-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/2x4c8-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8gemm/2x4c8-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gemm/4x4c2-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8gemm/4x4c2-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/OpsAlreadyMovedToC10.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/macros/Export.h>
 4 | 
 5 | namespace c10 {
 6 | struct OperatorName;
 7 | }
 8 | 
 9 | namespace at {
10 | 
11 | // list of ATen ops that come from native_functions.yaml
12 | CAFFE2_API bool is_aten_op(const c10::OperatorName& opName);
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8x9-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8avgpool/up8x9-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/up8xm-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8avgpool/up8xm-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/mp8x25-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8dwconv/mp8x25-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8dwconv/up8x9-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8dwconv/up8x9-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/sub16-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <u8maxpool/sub16-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/DimVector.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/util/SmallVector.h>
 4 | #include <stdint.h>
 5 | 
 6 | namespace at {
 7 | 
 8 | constexpr size_t kDimVectorStaticSize = 5;
 9 | 
10 | /// A container for sizes or strides
11 | using DimVector = SmallVector<int64_t, kDimVectorStaticSize>;
12 | 
13 | } // namespace at
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ninja files
 2 | build.ninja
 3 | 
 4 | # Build objects and artifacts
 5 | deps/
 6 | build/
 7 | bin/
 8 | lib/
 9 | *.pyc
10 | *.pyo
11 | 
12 | # System files
13 | .DS_Store
14 | .DS_Store?
15 | ._*
16 | .Spotlight-V100
17 | .Trashes
18 | ehthumbs.db
19 | Thumbs.db
20 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8avgpool/mp8x9p8q-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8avgpool/mp8x9p8q-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8x7-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8gavgpool/up8x7-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/up8xm-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8gavgpool/up8xm-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/q31-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-sse4.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/q31-sse4.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/u8maxpool/16x9p8q-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <u8maxpool/16x9p8q-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Sorting.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | using topk_fn = void(*)(Tensor&, Tensor&, const Tensor&, int64_t, int64_t, bool, bool);
 9 | 
10 | DECLARE_DISPATCH(topk_fn, topk_stub);
11 | 
12 | }} // at::native
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/q8gavgpool/mp8x7p7q-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <q8gavgpool/mp8x7p7q-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/fp32-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/fp32-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/q31-ssse3.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/q31-ssse3.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/gemmlowp-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-sse4.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/gemmlowp-sse4.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-sse2.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/precise-sse2.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-sse4.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/precise-sse4.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/precise-ssse3.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/precise-ssse3.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/TH/README.md:
--------------------------------------------------------------------------------
 1 | Environment variables control the disabling of certain explicit SIMD optimizations.
 2 | 
 3 | ```
 4 | x64 options:
 5 | TH_NO_AVX2=1 # disable AVX2 codepaths
 6 | TH_NO_AVX=1  # disable AVX codepaths
 7 | TH_NO_SSE=1  # disable SSE codepaths
 8 | 
 9 | ppc64le options:
10 | TH_NO_VSX=1  # disable VSX codepaths
11 | ```
12 | 


--------------------------------------------------------------------------------
/aten/src/THCUNN/THCUNN.h:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | 
 3 | #define THCIndexTensor THCudaLongTensor
 4 | #define THCIndexTensor_(NAME) THCudaLongTensor_ ## NAME
 5 | typedef int64_t THCIndex_t;
 6 | 
 7 | #define THNN_(NAME) TH_CONCAT_3(THNN_, CReal, NAME)
 8 | 
 9 | #include <THCUNN/generic/THCUNN.h>
10 | #include <THC/THCGenerateFloatTypes.h>
11 | 


--------------------------------------------------------------------------------
/cmake/Modules_CUDA_fix/upstream/README.md:
--------------------------------------------------------------------------------
1 | If you need to update files under this folder, we recommend you issue PRs
2 | against [the CMake mainline branch](https://gitlab.kitware.com/cmake/cmake/tree/master/Modules/FindCUDA.cmake),
3 | and then backport it here for earlier CMake compatibility.
4 | 
5 | See [this](../README.md) for more details.
6 | 


--------------------------------------------------------------------------------
/aten/src/ATen/ATenConfig.cmake.in:
--------------------------------------------------------------------------------
 1 | # Find the TH includes and library
 2 | #
 3 | # ATEN_INCLUDE_DIR -- where to find the includes
 4 | # ATEN_LIBRARIES -- list of libraries to link against
 5 | # ATEN_FOUND -- set to 1 if found
 6 | 
 7 | SET(ATEN_FOUND 1)
 8 | SET(ATEN_INCLUDE_DIR "@ATEN_INCLUDE_DIR@")
 9 | SET(ATEN_LIBRARIES "@ATEN_LIBRARIES@")
10 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Cross.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | using cross_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const int64_t d);
 9 | 
10 | DECLARE_DISPATCH(cross_fn, cross_stub);
11 | 
12 | }} // namespace at::native
13 | 
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/wrappers/requantization/gemmlowp-ssse3.c:
--------------------------------------------------------------------------------
1 | /* Auto-generated by generate-wrappers.py script. Do not modify */
2 | 
3 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__)
4 | #include <requantization/gemmlowp-ssse3.c>
5 | #endif /* defined(__i386__) || defined(__i686__) || defined(__x86_64__) */
6 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorMathScan.h:
--------------------------------------------------------------------------------
1 | #ifndef THC_GENERIC_FILE
2 | #define THC_GENERIC_FILE "THC/generic/THCTensorMathScan.h"
3 | #else
4 | 
5 | THC_API void THCTensor_(cumsum)(THCState *state, THCTensor *self, THCTensor *src, int dim);
6 | THC_API void THCTensor_(cumprod)(THCState *state, THCTensor *self, THCTensor *src, int dim);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/aten/src/THCUNN/SpatialConvolutionMM.cu:
--------------------------------------------------------------------------------
 1 | #include <THCUNN/THCUNN.h>
 2 | #include <THC/THCTensor.hpp>
 3 | #include <THCUNN/common.h>
 4 | #include <ATen/native/cuda/im2col.cuh>
 5 | 
 6 | #include <TH/THHalf.h>
 7 | #include <THC/THCNumerics.cuh>
 8 | 
 9 | #include <THCUNN/generic/SpatialConvolutionMM.cu>
10 | #include <THC/THCGenerateFloatTypes.h>
11 | 


--------------------------------------------------------------------------------
/aten/src/ATen/quantized/QTensorImpl.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/quantized/QTensorImpl.h>
 2 | 
 3 | namespace at {
 4 | 
 5 | QTensorImpl::QTensorImpl(
 6 |     Storage&& storage,
 7 |     TensorTypeSet type_set,
 8 |     QuantizerPtr quantizer)
 9 |     : TensorImpl(std::move(storage), type_set),
10 |       quantizer_(quantizer) {}
11 | 
12 | } // namespace at
13 | 


--------------------------------------------------------------------------------
/aten/src/TH/THBlas.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_BLAS_INC
 2 | #define TH_BLAS_INC
 3 | 
 4 | #include <TH/THGeneral.h>
 5 | 
 6 | #define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME)
 7 | 
 8 | #include <TH/generic/THBlas.h>
 9 | #include <TH/THGenerateAllTypes.h>
10 | 
11 | #include <TH/generic/THBlas.h>
12 | #include <TH/THGenerateBFloat16Type.h>
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensor.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCTensor.hpp>
 2 | #include <THC/THCStorage.hpp>
 3 | 
 4 | #include <THC/generic/THCTensor.cu>
 5 | #include <THC/THCGenerateAllTypes.h>
 6 | 
 7 | #include <THC/generic/THCTensor.cu>
 8 | #include <THC/THCGenerateBoolType.h>
 9 | 
10 | #include <THC/generic/THCTensor.cu>
11 | #include <THC/THCGenerateBFloat16Type.h>
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/EnableNamedTensor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/macros/Macros.h>
 4 | 
 5 | // We are working on removing the BUILD_NAMEDTENSOR flag from the codebase.
 6 | //
 7 | // PyTorch's codegen also uses a similar flag. You can find it in
 8 | // - aten/src/ATen/env.py
 9 | #ifndef BUILD_NAMEDTENSOR
10 | #define BUILD_NAMEDTENSOR
11 | #endif
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/cuda/nvrtc_stub/ATenNVRTC.h>
 2 | #include <iostream>
 3 | 
 4 | namespace at { namespace cuda {
 5 | 
 6 | NVRTC* load_nvrtc() {
 7 |   auto self = new NVRTC();
 8 | #define CREATE_ASSIGN(name) self->name = name;
 9 |   AT_FORALL_NVRTC(CREATE_ASSIGN)
10 |   return self;
11 | }
12 | 
13 | }} // at::cuda
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/test_install/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0)
 2 | find_package(ATen REQUIRED)
 3 | include_directories(${ATEN_INCLUDE_DIR})
 4 | 
 5 | # C++11
 6 | if (not MSVC) 
 7 |     set(CMAKE_CXX_FLAGS "--std=c++11 ${CMAKE_CXX_FLAGS}") 
 8 | endif()
 9 | add_executable(main main.cpp)
10 | target_link_libraries(main ${ATEN_LIBRARIES})
11 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorMathBlas.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCTensorMath.h>
 2 | #include <THC/THCGeneral.h>
 3 | #include <THC/THCBlas.h>
 4 | #include <THC/THCTensorCopy.h>
 5 | #include <THC/THCNumerics.cuh>
 6 | #include <THC/THCTensor.hpp>
 7 | #include <THC/THCStorage.hpp>
 8 | 
 9 | #include <THC/generic/THCTensorMathBlas.cu>
10 | #include <THC/THCGenerateAllTypes.h>
11 | 


--------------------------------------------------------------------------------
/aten/src/ATen/TensorGeometry.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/TensorGeometry.h>
 2 | #include <ATen/TensorUtils.h>
 3 | 
 4 | #include <ATen/ATen.h>
 5 | 
 6 | namespace at {
 7 | 
 8 | bool TensorGeometry::is_contiguous() const {
 9 |   if (numel_ == 0) {
10 |     return true;
11 |   }
12 |   return at::geometry_is_contiguous(sizes_, strides_);
13 | }
14 | 
15 | } // namespace at
16 | 


--------------------------------------------------------------------------------
/cmake/public/mkl.cmake:
--------------------------------------------------------------------------------
 1 | find_package(MKL QUIET)
 2 | 
 3 | if(NOT TARGET caffe2::mkl)
 4 |   add_library(caffe2::mkl INTERFACE IMPORTED)
 5 | endif()
 6 | 
 7 | set_property(
 8 |   TARGET caffe2::mkl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
 9 |   ${MKL_INCLUDE_DIR})
10 | set_property(
11 |   TARGET caffe2::mkl PROPERTY INTERFACE_LINK_LIBRARIES
12 |   ${MKL_LIBRARIES})
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/CUDATensorMethods.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/Tensor.h>
 4 | #include <c10/util/Half.h>
 5 | 
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_fp16.h>
 9 | 
10 | namespace at {
11 | template <>
12 | inline __half* Tensor::data() const {
13 |   return reinterpret_cast<__half*>(data<Half>());
14 | }
15 | } // namespace at
16 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ninja files
 2 | build.ninja
 3 | 
 4 | # Build objects and artifacts
 5 | deps/
 6 | build/
 7 | build-*/
 8 | bin/
 9 | lib/
10 | out/
11 | obj/
12 | libs/
13 | *.pyc
14 | *.pyo
15 | *.log
16 | 
17 | # System files
18 | .DS_Store
19 | .DS_Store?
20 | ._*
21 | .Spotlight-V100
22 | .Trashes
23 | ehthumbs.db
24 | Thumbs.db
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Copy.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at {
 7 | 
 8 | struct TensorIterator;
 9 | 
10 | namespace native {
11 | 
12 | using copy_fn = void (*)(TensorIterator&, bool non_blocking);
13 | 
14 | DECLARE_DISPATCH(copy_fn, copy_stub);
15 | 
16 | } // namespace native
17 | } // namespace at
18 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Version.h:
--------------------------------------------------------------------------------
 1 | #include <ATen/Context.h>
 2 | 
 3 | namespace at {
 4 | 
 5 | /// Returns a detailed string describing the configuration PyTorch.
 6 | CAFFE2_API std::string show_config();
 7 | 
 8 | CAFFE2_API std::string get_mkl_version();
 9 | 
10 | CAFFE2_API std::string get_mkldnn_version();
11 | 
12 | CAFFE2_API std::string get_openmp_version();
13 | 
14 | }  // namespace at
15 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/LegacyTypeDispatch.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/core/LegacyTypeDispatch.h>
 2 | 
 3 | namespace at {
 4 | 
 5 | // TODO: This could be bad juju if someone calls globalContext() in the
 6 | // destructor of an object with static lifetime.
 7 | LegacyTypeDispatch & globalLegacyTypeDispatch() {
 8 |   static LegacyTypeDispatch singleton;
 9 |   return singleton;
10 | }
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/mkl/Limits.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <mkl_types.h>
 4 | 
 5 | namespace at { namespace native {
 6 | 
 7 |   // Since size of MKL_LONG varies on different platforms (linux 64 bit, windows
 8 |   // 32 bit), we need to programmatically calculate the max.
 9 |   static int64_t MKL_LONG_MAX = ((1LL << (sizeof(MKL_LONG) * 8 - 2)) - 1) * 2 + 1;
10 | 
11 | }}  // namespace
12 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCSortUtils.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCSortUtils.cuh>
 2 | 
 3 | // Returns 2^(ceil(lg(n)) from Stanford bit twiddling hacks
 4 | uint64_t nextHighestPowerOf2(uint64_t n) {
 5 |   n--;
 6 |   n |= n >> 1;
 7 |   n |= n >> 2;
 8 |   n |= n >> 4;
 9 |   n |= n >> 8;
10 |   n |= n >> 16;
11 | #ifndef _MSC_VER
12 |   n |= n >> 32;
13 | #endif
14 |   n++;
15 | 
16 |   return n;
17 | }
18 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/cuda_cudnn_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include <ATen/cudnn/Descriptors.h>
 6 | #include <ATen/cudnn/Handle.h>
 7 | 
 8 | using namespace at;
 9 | using namespace at::native;
10 | 
11 | TEST(CUDNNTest, CUDNNTestCUDA) {
12 |   if (!at::cuda::is_available()) return;
13 |   manual_seed(123);
14 | }
15 | 


--------------------------------------------------------------------------------
/aten/src/ATen/mkl/README.md:
--------------------------------------------------------------------------------
1 | All files living in this directory are written with the assumption that MKL is available,
2 | which means that these code are not guarded by `#if AT_MKL_ENABLED()`. Therefore, whenever
3 | you need to use definitions from here, please guard the `#include<ATen/mkl/*.h>` and
4 | definition usages with `#if AT_MKL_ENABLED()` macro, e.g. [SpectralOps.cpp](native/mkl/SpectralOps.cpp).
5 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Fill.h:
--------------------------------------------------------------------------------
 1 | // Functions that fill Tensors with constants. Implementations are in Fill.cpp.
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <ATen/ATen.h>
 6 | #include <ATen/native/DispatchStub.h>
 7 | #include <ATen/native/TensorIterator.h>
 8 | 
 9 | namespace at { namespace native {
10 | 
11 | DECLARE_DISPATCH(void(*)(TensorIterator&, Scalar), fill_stub);
12 | 
13 | }} // namespace at::native
14 | 


--------------------------------------------------------------------------------
/aten/src/TH/THLogAdd.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_LOG_ADD_INC
 2 | #define TH_LOG_ADD_INC
 3 | 
 4 | #include <TH/THGeneral.h>
 5 | 
 6 | TH_API const double THLog2Pi;
 7 | TH_API const double THLogZero;
 8 | TH_API const double THLogOne;
 9 | 
10 | TH_API double THLogAdd(double log_a, double log_b);
11 | TH_API double THLogSub(double log_a, double log_b);
12 | TH_API double THExpMinusApprox(const double x);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorMath.cpp:
--------------------------------------------------------------------------------
 1 | #include <TH/THTensor.hpp>
 2 | #include <TH/THVector.h>
 3 | #include <TH/THBlas.h>
 4 | #include <TH/THTensorDimApply.h>
 5 | 
 6 | #include <TH/generic/THTensorMath.cpp>
 7 | #include <TH/THGenerateAllTypes.h>
 8 | 
 9 | #include <TH/generic/THTensorMath.cpp>
10 | #include <TH/THGenerateBFloat16Type.h>
11 | 
12 | #include <TH/generic/THTensorMath.cpp>
13 | #include <TH/THGenerateBoolType.h>
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/README.md:
--------------------------------------------------------------------------------
1 | All files living in this directory are written with the assumption that cuDNN is available,
2 | which means that these code are not guarded by `#if AT_CUDNN_ENABLED()`. Therefore, whenever
3 | you need to use definitions from here, please guard the `#include<ATen/cudnn/*.h>` and
4 | definition usages with `#if AT_CUDNN_ENABLED()` macro, e.g. [native/cudnn/BatchNorm.cpp](native/cudnn/BatchNorm.cpp).
5 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/LaunchUtils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include<algorithm>
 3 | 
 4 | namespace at {
 5 | namespace native {
 6 | 
 7 | // returns 2**floor(log2(n))
 8 | static int lastPow2(unsigned int n) {
 9 |   n |= (n >> 1);
10 |   n |= (n >> 2);
11 |   n |= (n >> 4);
12 |   n |= (n >> 8);
13 |   n |= (n >> 16);
14 |   return std::max<int>(1, n - (n >> 1));
15 | }
16 | 
17 | } // namespace native
18 | } // namespace at
19 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStorageCopy.cpp:
--------------------------------------------------------------------------------
 1 | #include <THC/THCStorageCopy.h>
 2 | #include <THC/THCTensor.hpp>
 3 | 
 4 | #include <THC/THCTensorCopy.h>
 5 | 
 6 | #include <THC/generic/THCStorageCopy.cpp>
 7 | #include <THC/THCGenerateAllTypes.h>
 8 | 
 9 | #include <THC/generic/THCStorageCopy.cpp>
10 | #include <THC/THCGenerateBoolType.h>
11 | 
12 | #include <THC/generic/THCStorageCopy.cpp>
13 | #include <THC/THCGenerateBFloat16Type.h>
14 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorMoreMath.cpp:
--------------------------------------------------------------------------------
 1 | #include <TH/THTensor.hpp>
 2 | #include <TH/THVector.h>
 3 | #include <TH/THBlas.h>
 4 | #include <TH/THTensorDimApply.h>
 5 | 
 6 | #include <TH/generic/THTensorMoreMath.cpp>
 7 | #include <TH/THGenerateAllTypes.h>
 8 | 
 9 | #include <TH/generic/THTensorMoreMath.cpp>
10 | #include <TH/THGenerateBoolType.h>
11 | 
12 | #include <TH/generic/THTensorMoreMath.cpp>
13 | #include <TH/THGenerateBFloat16Type.h>
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/sparse/SparseTensorMath.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/SparseTensorUtils.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | sparse::SparseTensor& mul_out_sparse_scalar(sparse::SparseTensor& r, const sparse::SparseTensor& t, Scalar value);
 9 | sparse::SparseTensor& mul_out_sparse_zerodim(sparse::SparseTensor& r, const sparse::SparseTensor& t, const Tensor& value);
10 | 
11 | }}
12 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorFill.cpp:
--------------------------------------------------------------------------------
 1 | #include <TH/THTensor.hpp>
 2 | #include <TH/THVector.h>
 3 | 
 4 | #include <TH/generic/THTensorFill.cpp>
 5 | #include <TH/THGenerateAllTypes.h>
 6 | 
 7 | #include <TH/generic/THTensorFill.cpp>
 8 | #include <TH/THGenerateHalfType.h>
 9 | 
10 | #include <TH/generic/THTensorFill.cpp>
11 | #include <TH/THGenerateBoolType.h>
12 | 
13 | #include <TH/generic/THTensorFill.cpp>
14 | #include <TH/THGenerateBFloat16Type.h>
15 | 


--------------------------------------------------------------------------------
/cmake/public/mkldnn.cmake:
--------------------------------------------------------------------------------
 1 | set(MKLDNN_USE_NATIVE_ARCH ${USE_NATIVE_ARCH})
 2 | 
 3 | find_package(MKLDNN QUIET)
 4 | 
 5 | if(NOT TARGET caffe2::mkldnn)
 6 |   add_library(caffe2::mkldnn INTERFACE IMPORTED)
 7 | endif()
 8 | 
 9 | set_property(
10 |   TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
11 |   ${MKLDNN_INCLUDE_DIR})
12 | set_property(
13 |   TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
14 |   ${MKLDNN_LIBRARIES})
15 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/CUDADevice.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/cuda/Exceptions.h>
 4 | 
 5 | #include <cuda.h>
 6 | #include <cuda_runtime.h>
 7 | 
 8 | namespace at {
 9 | namespace cuda {
10 | 
11 | inline Device getDeviceFromPtr(void* ptr) {
12 |   cudaPointerAttributes attr;
13 |   AT_CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr));
14 |   return {DeviceType::CUDA, static_cast<int16_t>(attr.device)};
15 | }
16 | 
17 | }} // namespace at::cuda
18 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cpu/TensorCompareKernel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | #include <c10/util/Optional.h>
 6 | 
 7 | namespace at { namespace native {
 8 | 
 9 | using reduce_fn =
10 |     void (*)(Tensor&, Tensor&, const Tensor&, c10::optional<int64_t>);
11 | 
12 | DECLARE_DISPATCH(reduce_fn, max_kernel);
13 | DECLARE_DISPATCH(reduce_fn, min_kernel);
14 | 
15 | }} // namespace at::native
16 | 


--------------------------------------------------------------------------------
/cmake/TorchConfigVersion.cmake.in:
--------------------------------------------------------------------------------
 1 | set(PACKAGE_VERSION "@TORCH_VERSION@")
 2 | 
 3 | # Check whether the requested PACKAGE_FIND_VERSION is compatible
 4 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
 5 |   set(PACKAGE_VERSION_COMPATIBLE FALSE)
 6 | else()
 7 |   set(PACKAGE_VERSION_COMPATIBLE TRUE)
 8 |   if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
 9 |     set(PACKAGE_VERSION_EXACT TRUE)
10 |   endif()
11 | endif()
12 | 


--------------------------------------------------------------------------------
/aten/src/TH/THMemoryFile.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_MEMORY_FILE_INC
 2 | #define TH_MEMORY_FILE_INC
 3 | 
 4 | #include <TH/THFile.h>
 5 | #include <TH/THStorageFunctions.h>
 6 | 
 7 | TH_API THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode);
 8 | TH_API THFile *THMemoryFile_new(const char *mode);
 9 | 
10 | TH_API THCharStorage *THMemoryFile_storage(THFile *self);
11 | TH_API void THMemoryFile_longSize(THFile *self, int size);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/cmake/Caffe2ConfigVersion.cmake.in:
--------------------------------------------------------------------------------
 1 | set(PACKAGE_VERSION "@CAFFE2_VERSION@")
 2 |  
 3 | # Check whether the requested PACKAGE_FIND_VERSION is compatible
 4 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
 5 |   set(PACKAGE_VERSION_COMPATIBLE FALSE)
 6 | else()
 7 |   set(PACKAGE_VERSION_COMPATIBLE TRUE)
 8 |   if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
 9 |     set(PACKAGE_VERSION_EXACT TRUE)
10 |   endif()
11 | endif()
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/DynamicLibrary.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/macros/Export.h>
 4 | #include <ATen/Utils.h>
 5 | 
 6 | namespace at {
 7 | 
 8 | struct DynamicLibrary {
 9 |   AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary);
10 | 
11 |   CAFFE2_API DynamicLibrary(const char* name);
12 | 
13 |   CAFFE2_API void* sym(const char* name);
14 | 
15 |   CAFFE2_API ~DynamicLibrary();
16 | 
17 |  private:
18 |   void* handle = nullptr;
19 | };
20 | 
21 | } // namespace at
22 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCAllocator.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_ALLOCATOR_INC
 2 | #define THC_ALLOCATOR_INC
 3 | 
 4 | #include <THC/THCGeneral.h>
 5 | 
 6 | // IPC doesn't support (re)allocation
 7 | 
 8 | class TORCH_CUDA_API THCIpcDeleter {
 9 |  public:
10 |   THCIpcDeleter(std::shared_ptr<void> basePtr);
11 |   ~THCIpcDeleter();
12 |   static at::DataPtr makeDataPtr(std::shared_ptr<void> basePtr, void* data);
13 | private:
14 |   std::shared_ptr<void> basePtr_;
15 | };
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/PinnedMemoryAllocator.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/cuda/PinnedMemoryAllocator.h>
 2 | #include <ATen/Context.h>
 3 | #include <ATen/Config.h>
 4 | 
 5 | #include <THC/THC.h>
 6 | #include <THC/THCGeneral.hpp>
 7 | 
 8 | #include <stdexcept>
 9 | 
10 | namespace at { namespace cuda {
11 | 
12 | at::Allocator* getPinnedMemoryAllocator() {
13 |   auto state = globalContext().lazyInitCUDA();
14 |   return state->cudaHostAllocator;
15 | }
16 | 
17 | }} // namespace at::cuda
18 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/CrossKernel.cu:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <ATen/NativeFunctions.h>
 3 | #include <ATen/LegacyTHFunctionsCUDA.h>
 4 | #include <ATen/native/Cross.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | void cross_kernel_impl(Tensor& result, const Tensor& x1, const Tensor& x2, const int64_t dim) {
 9 |   legacy::cuda::_th_cross_kernel_out(result, x1, x2, dim);
10 | }
11 | 
12 | REGISTER_DISPATCH(cross_stub, &cross_kernel_impl);
13 | 
14 | }}
15 | 
16 | 


--------------------------------------------------------------------------------
/aten/src/ATen/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # This file copied from tools/setup_helpers/env.py
 4 | # PLEASE DO NOT ADD ANYTHING TO THIS FILE, the BUILD_NAMEDTENSOR flag is temporary.
 5 | def check_env_flag(name, default=''):
 6 |     return os.getenv(name, default).upper() in ['ON', '1', 'YES', 'TRUE', 'Y']
 7 | 
 8 | 
 9 | def check_negative_env_flag(name, default=''):
10 |     return os.getenv(name, default).upper() in ['OFF', '0', 'NO', 'FALSE', 'N']
11 | 
12 | BUILD_NAMEDTENSOR = True
13 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStorageCopy.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_STORAGE_COPY_INC
 2 | #define THC_STORAGE_COPY_INC
 3 | 
 4 | #include <THC/THCStorage.h>
 5 | #include <THC/THCGeneral.h>
 6 | #include <TH/THHalf.h>
 7 | 
 8 | #include <THC/generic/THCStorageCopy.h>
 9 | #include <THC/THCGenerateAllTypes.h>
10 | 
11 | #include <THC/generic/THCStorageCopy.h>
12 | #include <THC/THCGenerateBoolType.h>
13 | 
14 | #include <THC/generic/THCStorageCopy.h>
15 | #include <THC/THCGenerateBFloat16Type.h>
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorCopy.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_CUDA_TENSOR_COPY_INC
 2 | #define TH_CUDA_TENSOR_COPY_INC
 3 | 
 4 | #include <THC/THCTensor.h>
 5 | #include <THC/THCGeneral.h>
 6 | #include <TH/THHalf.h>
 7 | 
 8 | 
 9 | #include <THC/generic/THCTensorCopy.h>
10 | #include <THC/THCGenerateAllTypes.h>
11 | 
12 | #include <THC/generic/THCTensorCopy.h>
13 | #include <THC/THCGenerateBoolType.h>
14 | 
15 | #include <THC/generic/THCTensorCopy.h>
16 | #include <THC/THCGenerateBFloat16Type.h>
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/PTThreadPool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/Parallel.h>
 4 | #include <c10/core/thread_pool.h>
 5 | 
 6 | namespace at {
 7 | 
 8 | class CAFFE2_API PTThreadPool : public c10::ThreadPool {
 9 | public:
10 |   explicit PTThreadPool(
11 |       int pool_size,
12 |       int numa_node_id = -1)
13 |     : c10::ThreadPool(pool_size, numa_node_id, [](){
14 |         c10::setThreadName("PTThreadPool");
15 |         at::init_num_threads();
16 |       }) {}
17 | };
18 | 
19 | } // namespace at
20 | 


--------------------------------------------------------------------------------
/aten/src/TH/THSize.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_SIZE_INC
 2 | #define TH_SIZE_INC
 3 | 
 4 | #include <TH/THGeneral.h>
 5 | #include <stddef.h>
 6 | 
 7 | // THTensor functions that would work on a THSize if we had such a class in C++,
 8 | // i.e. THTensor functions that depend only on the shape of the tensor, not the type.
 9 | 
10 | TH_API int THSize_isSameSizeAs(const int64_t *sizeA, int64_t dimsA, const int64_t *sizeB, int64_t dimsB);
11 | TH_API ptrdiff_t THSize_nElement(int64_t dims, int64_t *size);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Tensor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | /*
 4 |  * We split Tensor.h into TensorBody.h and TensorMethods.h because we want
 5 |  * all TensorMethods to be inlined, but they depend on the Dispatcher,
 6 |  * which in turn depends on many other things, which then depend back on Tensor.
 7 |  *
 8 |  * We can break this dependency chain by having the dispatcher only depend on
 9 |  * TensorBody.h and not TensorMethods.h.
10 |  */
11 | #include <ATen/core/TensorBody.h>
12 | #include <ATen/core/TensorMethods.h>
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/mkl/Exceptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include <stdexcept>
 5 | #include <sstream>
 6 | #include <mkl_dfti.h>
 7 | 
 8 | namespace at { namespace native {
 9 | 
10 | static inline void MKL_DFTI_CHECK(MKL_INT status)
11 | {
12 |   if (status && !DftiErrorClass(status, DFTI_NO_ERROR)) {
13 |     std::ostringstream ss;
14 |     ss << "MKL FFT error: " << DftiErrorMessage(status);
15 |     throw std::runtime_error(ss.str());
16 |   }
17 | }
18 | 
19 | }}  // namespace at::native
20 | 


--------------------------------------------------------------------------------
/aten/src/THC/THC.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_INC
 2 | #define THC_INC
 3 | 
 4 | #include <THC/THCGeneral.h>
 5 | #include <THC/THCAllocator.h>
 6 | #include <THC/THCBlas.h>
 7 | #include <c10/cuda/CUDACachingAllocator.h>
 8 | #include <THC/THCCachingHostAllocator.h>
 9 | #include <THC/THCSleep.h>
10 | #include <THC/THCStorage.h>
11 | #include <THC/THCStorageCopy.h>
12 | 
13 | #include <THC/THCTensor.h>
14 | #include <THC/THCTensorCopy.h>
15 | #include <THC/THCTensorRandom.h>
16 | #include <THC/THCTensorMath.h>
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStorage.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_STORAGE_INC
 2 | #define THC_STORAGE_INC
 3 | 
 4 | #include <TH/THStorageFunctions.h>
 5 | #include <THC/THCGeneral.h>
 6 | 
 7 | #define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME)
 8 | 
 9 | #include <THC/generic/THCStorage.h>
10 | #include <THC/THCGenerateAllTypes.h>
11 | 
12 | #include <THC/generic/THCStorage.h>
13 | #include <THC/THCGenerateBoolType.h>
14 | 
15 | #include <THC/generic/THCStorage.h>
16 | #include <THC/THCGenerateBFloat16Type.h>
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStorageCopy.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCStorageCopy.h>
 2 | #include <THC/THCGeneral.h>
 3 | 
 4 | #include <TH/THHalf.h>
 5 | #include <THC/THCTensorCopy.h>
 6 | #include <THC/THCTensor.hpp>
 7 | #include <THC/THCStorage.hpp>
 8 | 
 9 | #include <THC/generic/THCStorageCopy.cu>
10 | #include <THC/THCGenerateAllTypes.h>
11 | 
12 | #include <THC/generic/THCStorageCopy.cu>
13 | #include <THC/THCGenerateBoolType.h>
14 | 
15 | #include <THC/generic/THCStorageCopy.cu>
16 | #include <THC/THCGenerateBFloat16Type.h>
17 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Tensor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | /*
 4 |  * We split Tensor.h into TensorBody.h and TensorMethods.h because we want
 5 |  * all TensorMethods to be inlined, but they depend on the Dispatcher,
 6 |  * which in turn depends on many other things, which then depend back on Tensor.
 7 |  *
 8 |  * We can break this dependency chain by having the dispatcher only depend on
 9 |  * TensorBody.h and not TensorMethods.h.
10 |  */
11 | #include <ATen/core/TensorBody.h>
12 | #include <ATen/core/TensorMethods.h>
13 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateAllTypes.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateAllTypes.h"
 3 | #endif
 4 | 
 5 | #ifndef THGenerateManyTypes
 6 | #define THAllLocalGenerateManyTypes
 7 | #define THGenerateManyTypes
 8 | #endif
 9 | 
10 | #include <TH/THGenerateFloatTypes.h>
11 | #include <TH/THGenerateIntTypes.h>
12 | 
13 | #ifdef THAllLocalGenerateManyTypes
14 | #undef THAllLocalGenerateManyTypes
15 | #undef THGenerateManyTypes
16 | #undef TH_GENERIC_FILE
17 | #endif
18 | 


--------------------------------------------------------------------------------
/aten/src/ATen/quantized/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | FILE(GLOB_RECURSE ATen_QUANTIZED_HEADERS "*.h")
 2 | FILE(GLOB_RECURSE ATen_QUANTIZED_SRCS "*.cpp")
 3 | FILE(GLOB_RECURSE ATen_QUANTIZED_TEST_SRCS "*_test.cpp")
 4 | EXCLUDE(ATen_QUANTIZED_SRCS "${ATen_QUANTIZED_SRCS}" ${ATen_QUANTIZED_TEST_SRCS})
 5 | 
 6 | # Pass to parent
 7 | set(ATen_QUANTIZED_HEADERS ${ATen_QUANTIZED_HEADERS} PARENT_SCOPE)
 8 | set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE)
 9 | set(ATen_QUANTIZED_TEST_SRCS ${ATen_QUANTIZED_TEST_SRCS} PARENT_SCOPE)
10 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateFloatTypes.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h"
 3 | #endif
 4 | 
 5 | #ifndef THGenerateManyTypes
 6 | #define THFloatLocalGenerateManyTypes
 7 | #define THGenerateManyTypes
 8 | #endif
 9 | 
10 | #include <TH/THGenerateFloatType.h>
11 | #include <TH/THGenerateDoubleType.h>
12 | 
13 | #ifdef THFloatLocalGenerateManyTypes
14 | #undef THFloatLocalGenerateManyTypes
15 | #undef THGenerateManyTypes
16 | #undef TH_GENERIC_FILE
17 | #endif
18 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorCopy.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorCopy.h"
 3 | #else
 4 | 
 5 | THC_API void THCTensor_(copy)(THCState *state, THCTensor *self, THCTensor *src);
 6 | THC_API void THCTensor_(copyIgnoringOverlaps)(THCState *state, THCTensor *self, THCTensor *src);
 7 | 
 8 | THC_API void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, THTensor *src);
 9 | THC_API void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, THCTensor *src);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/aten/src/TH/TH.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_INC
 2 | #define TH_INC
 3 | 
 4 | #include <TH/THGeneral.h>
 5 | 
 6 | #include <TH/THBlas.h>
 7 | #ifdef USE_LAPACK
 8 | #include <TH/THLapack.h>
 9 | #endif
10 | 
11 | #include <TH/THVector.h>
12 | #include <TH/THLogAdd.h>
13 | #include <TH/THSize.h>
14 | #include <TH/THStorageFunctions.h>
15 | #include <TH/THTensor.h>
16 | #include <TH/THTensorApply.h>
17 | #include <TH/THTensorDimApply.h>
18 | 
19 | #include <TH/THFile.h>
20 | #include <TH/THDiskFile.h>
21 | #include <TH/THMemoryFile.h>
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateIntType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateIntType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int32_t
 6 | #define accreal int64_t
 7 | #define Real Int
 8 | #define CReal CudaInt
 9 | #define THC_REAL_IS_INT
10 | #line 1 THC_GENERIC_FILE
11 | #include THC_GENERIC_FILE
12 | #undef scalar_t
13 | #undef accreal
14 | #undef Real
15 | #undef CReal
16 | #undef THC_REAL_IS_INT
17 | 
18 | #ifndef THCGenerateAllTypes
19 | #undef THC_GENERIC_FILE
20 | #endif
21 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cpu/tbb/extra/version_string.ver.in:
--------------------------------------------------------------------------------
 1 | #define __TBB_VERSION_STRINGS(N) \
 2 | #N": BUILD_HOST         @CMAKE_SYSTEM_NAME@" ENDL \
 3 | #N": BUILD_OS           @CMAKE_SYSTEM@" ENDL \
 4 | #N": BUILD_KERNEL       @CMAKE_SYSTEM_VERSION@" ENDL \
 5 | #N": BUILD_GCC          @CMAKE_CXX_COMPILER_ID@" ENDL \
 6 | #N": BUILD_LIBC         Unknown" ENDL \
 7 | #N": BUILD_LD           Unknown" ENDL \
 8 | #N": BUILD_TARGET       Unknown" ENDL \
 9 | #N": BUILD_COMMAND      Unknown" ENDL
10 | 
11 | #define __TBB_DATETIME "@_configure_date@"
12 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateCharType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateCharType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int8_t
 6 | #define accreal int64_t
 7 | #define Real Char
 8 | #define CReal CudaChar
 9 | #define THC_REAL_IS_CHAR
10 | #line 1 THC_GENERIC_FILE
11 | #include THC_GENERIC_FILE
12 | #undef scalar_t
13 | #undef accreal
14 | #undef Real
15 | #undef CReal
16 | #undef THC_REAL_IS_CHAR
17 | 
18 | #ifndef THCGenerateAllTypes
19 | #undef THC_GENERIC_FILE
20 | #endif
21 | 


--------------------------------------------------------------------------------
/aten/src/ATen/InitialTensorOptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/core/TensorOptions.h>
 4 | 
 5 | namespace at {
 6 | 
 7 | // Represents the initial TensorOptions, before the "defaults" are ever changed.
 8 | // This is designed to be used in library code, where the explicit devices, dtypes, etc. are known.
 9 | // NOTE: this is not a stable API.
10 | inline TensorOptions initialTensorOptions() {
11 |   return TensorOptions(kCPU).dtype(kFloat).layout(kStrided)
12 |                             .requires_grad(false);
13 | }
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Pow.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at {
 7 | 
 8 | struct TensorIterator;
 9 | 
10 | namespace native {
11 | 
12 | using pow_tensor_tensor_fn = void (*)(TensorIterator&);
13 | using pow_tensor_scalar_fn = void (*)(TensorIterator&, Scalar);
14 | 
15 | DECLARE_DISPATCH(pow_tensor_tensor_fn, pow_tensor_tensor_stub);
16 | DECLARE_DISPATCH(pow_tensor_scalar_fn, pow_tensor_scalar_stub);
17 | 
18 | } // namespace native
19 | 
20 | } // namespace at
21 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateByteType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateByteType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t uint8_t
 6 | #define accreal int64_t
 7 | #define Real Byte
 8 | #define CReal CudaByte
 9 | #define THC_REAL_IS_BYTE
10 | #line 1 THC_GENERIC_FILE
11 | #include THC_GENERIC_FILE
12 | #undef scalar_t
13 | #undef accreal
14 | #undef Real
15 | #undef CReal
16 | #undef THC_REAL_IS_BYTE
17 | 
18 | #ifndef THCGenerateAllTypes
19 | #undef THC_GENERIC_FILE
20 | #endif
21 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateLongType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateLongType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int64_t
 6 | #define accreal int64_t
 7 | #define Real Long
 8 | #define CReal CudaLong
 9 | #define THC_REAL_IS_LONG
10 | #line 1 THC_GENERIC_FILE
11 | #include THC_GENERIC_FILE
12 | #undef scalar_t
13 | #undef accreal
14 | #undef Real
15 | #undef CReal
16 | #undef THC_REAL_IS_LONG
17 | 
18 | #ifndef THCGenerateAllTypes
19 | #undef THC_GENERIC_FILE
20 | #endif
21 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/CUDAUtils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | 
 5 | namespace at { namespace cuda {
 6 | 
 7 | // Check if every tensor in a list of tensors matches the current
 8 | // device.
 9 | inline bool check_device(ArrayRef<Tensor> ts) {
10 |   if (ts.empty()) {
11 |     return true;
12 |   }
13 |   Device curDevice = Device(kCUDA, current_device());
14 |   for (const Tensor& t : ts) {
15 |     if (t.device() != curDevice) return false;
16 |   }
17 |   return true;
18 | }
19 | 
20 | }} // namespace at::cuda
21 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateQTypes.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQTypes.h"
 3 | #endif
 4 | 
 5 | #ifndef THGenerateManyTypes
 6 | #define THQLocalGenerateManyTypes
 7 | #define THGenerateManyTypes
 8 | #endif
 9 | 
10 | #include <TH/THGenerateQUInt8Type.h>
11 | #include <TH/THGenerateQInt8Type.h>
12 | #include <TH/THGenerateQInt32Type.h>
13 | 
14 | #ifdef THQLocalGenerateManyTypes
15 | #undef THQLocalGenerateManyTypes
16 | #undef THGenerateManyTypes
17 | #undef TH_GENERIC_FILE
18 | #endif
19 | 


--------------------------------------------------------------------------------
/aten/src/TH/THTensorEvenMoreMath.cpp:
--------------------------------------------------------------------------------
 1 | #include <TH/THTensor.hpp>
 2 | #include <TH/THVector.h>
 3 | #include <TH/THBlas.h>
 4 | #include <TH/THTensorDimApply.h>
 5 | 
 6 | #include <TH/generic/THTensorEvenMoreMath.cpp>
 7 | #include <TH/THGenerateAllTypes.h>
 8 | 
 9 | #include <TH/generic/THTensorEvenMoreMath.cpp>
10 | #include <TH/THGenerateBoolType.h>
11 | 
12 | #include <TH/generic/THTensorEvenMoreMath.cpp>
13 | #include <TH/THGenerateHalfType.h>
14 | 
15 | #include <TH/generic/THTensorEvenMoreMath.cpp>
16 | #include <TH/THGenerateBFloat16Type.h>
17 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateShortType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateShortType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int16_t
 6 | #define accreal int64_t
 7 | #define Real Short
 8 | #define CReal CudaShort
 9 | #define THC_REAL_IS_SHORT
10 | #line 1 THC_GENERIC_FILE
11 | #include THC_GENERIC_FILE
12 | #undef scalar_t
13 | #undef accreal
14 | #undef Real
15 | #undef CReal
16 | #undef THC_REAL_IS_SHORT
17 | 
18 | #ifndef THCGenerateAllTypes
19 | #undef THC_GENERIC_FILE
20 | #endif
21 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Range.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <iosfwd>
 5 | 
 6 | namespace at {
 7 | 
 8 | struct Range {
 9 |   Range(int64_t begin, int64_t end)
10 |     : begin(begin)
11 |     , end(end) {}
12 | 
13 |   int64_t size() const { return end - begin; }
14 | 
15 |   Range operator/(int64_t divisor) {
16 |     return Range(begin / divisor, end / divisor);
17 |   }
18 | 
19 |   int64_t begin;
20 |   int64_t end;
21 | };
22 | 
23 | std::ostream& operator<<(std::ostream& out, const Range& range);
24 | 
25 | }  // namespace at
26 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Reduction.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace at {
 4 | namespace Reduction {
 5 | 
 6 | // NB: Keep this in sync with Reduction class in torch/nn/_reduction.py
 7 | // These constants control the reduction behavior of loss functions.
 8 | // Ideally, this would be a scoped enum, but jit doesn't support that
 9 | enum Reduction {
10 |   None,             // Do not reduce
11 |   Mean,             // (Possibly weighted) mean of losses
12 |   Sum,              // Sum losses
13 |   END
14 | };
15 | } // namespace Reduction
16 | } // namespace at
17 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindCUB.cmake:
--------------------------------------------------------------------------------
 1 | # Try to find the CUB library and headers.
 2 | #  CUB_FOUND        - system has CUB
 3 | #  CUB_INCLUDE_DIRS - the CUB include directory
 4 | 
 5 | find_path(CUB_INCLUDE_DIR
 6 |         NAMES cub/cub.cuh
 7 |         DOC "The directory where CUB includes reside"
 8 | )
 9 | 
10 | set(CUB_INCLUDE_DIRS ${CUB_INCLUDE_DIR})
11 | 
12 | include(FindPackageHandleStandardArgs)
13 | find_package_handle_standard_args(CUB
14 |         FOUND_VAR CUB_FOUND
15 |         REQUIRED_VARS CUB_INCLUDE_DIR
16 | )
17 | 
18 | mark_as_advanced(CUB_FOUND)
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/hip/HIPCachingAllocator.h>
 4 | #include <ATen/hip/impl/HIPAllocatorMasqueradingAsCUDA.h>
 5 | #include <ATen/hip/impl/HIPStreamMasqueradingAsCUDA.h>
 6 | 
 7 | namespace c10 { namespace hip {
 8 | namespace HIPCachingAllocatorMasqueradingAsCUDA {
 9 | 
10 | Allocator* get();
11 | C10_HIP_API void recordStreamMasqueradingAsCUDA(void *ptr, HIPStreamMasqueradingAsCUDA stream);
12 | 
13 | } // namespace HIPCachingAllocatorMasqueradingAsCUDA
14 | }} // namespace c10::hip
15 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorRandom.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_CUDA_TENSOR_RANDOM_INC
 2 | #define TH_CUDA_TENSOR_RANDOM_INC
 3 | 
 4 | #include <THC/THCTensor.h>
 5 | 
 6 | #include <THC/generic/THCTensorRandom.h>
 7 | #include <THC/THCGenerateAllTypes.h>
 8 | 
 9 | #include <THC/generic/THCTensorRandom.h>
10 | #include <THC/THCGenerateBoolType.h>
11 | 
12 | #include <ATen/CUDAGenerator.h>
13 | 
14 | THC_API void THCRandom_getRNGState(at::Generator *gen_, THByteTensor *rng_state);
15 | THC_API void THCRandom_setRNGState(at::Generator *gen_, THByteTensor *rng_state);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorTopK.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorTopK.h"
 3 | #else
 4 | 
 5 | /* Returns the set of all kth smallest (or largest) elements, depending */
 6 | /* on `dir` */
 7 | THC_API void THCTensor_(topk)(THCState* state,
 8 |                                THCTensor* topK,
 9 |                                THCudaLongTensor* indices,
10 |                                THCTensor* input,
11 |                                int64_t k, int dim, int dir, int sorted);
12 | 
13 | #endif // THC_GENERIC_FILE
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/CUDAConfig.h.in:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Test these using #if AT_CUDNN_ENABLED(), not #ifdef, so that it's
 4 | // obvious if you forgot to include Config.h
 5 | //    c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined
 6 | //
 7 | // NB: This header MUST NOT be included from other headers; it should
 8 | // only be included from C++ files.
 9 | 
10 | #define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@
11 | #define AT_ROCM_ENABLED() @AT_ROCM_ENABLED@
12 | 
13 | #define NVCC_FLAGS_EXTRA "@NVCC_FLAGS_EXTRA@"
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cpu/DepthwiseConvKernel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | /*
 7 |   Depthwise 3x3 Winograd convolution operator
 8 | */
 9 | 
10 | namespace at {
11 | namespace native {
12 | 
13 | using convolution_depthwise3x3_winograd_fn =
14 |     Tensor (*)(const Tensor &, const Tensor &, const Tensor &,IntArrayRef, IntArrayRef, int64_t);
15 | 
16 | DECLARE_DISPATCH(convolution_depthwise3x3_winograd_fn, convolution_depthwise3x3_winograd_stub);
17 | 
18 | }  // namespace native
19 | }  // namespace at
20 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/verify_api_visibility.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | 
 3 | #ifdef AT_CUDNN_ENABLED
 4 | #error "AT_CUDNN_ENABLED should not be visible in public headers"
 5 | #endif
 6 | 
 7 | #ifdef AT_MKL_ENABLED
 8 | #error "AT_MKL_ENABLED should not be visible in public headers"
 9 | #endif
10 | 
11 | #ifdef AT_MKLDNN_ENABLED
12 | #error "AT_MKLDNN_ENABLED should not be visible in public headers"
13 | #endif
14 | 
15 | #ifdef CAFFE2_STATIC_LINK_CUDA
16 | #error "CAFFE2_STATIC_LINK_CUDA should not be visible in public headers"
17 | #endif
18 | 
19 | auto main() -> int {}
20 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCSleep.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCSleep.h>
 2 | 
 3 | 
 4 | __global__ void spin_kernel(int64_t cycles)
 5 | {
 6 |   // see concurrentKernels CUDA sampl
 7 |   int64_t start_clock = clock64();
 8 |   int64_t clock_offset = 0;
 9 |   while (clock_offset < cycles)
10 |   {
11 |     clock_offset = clock64() - start_clock;
12 |   }
13 | }
14 | 
15 | void THC_sleep(THCState* state, int64_t cycles)
16 | {
17 |   dim3 grid(1);
18 |   dim3 block(1);
19 |   spin_kernel<<<grid, block, 0, THCState_getCurrentStream(state)>>>(cycles);
20 |   THCudaCheck(cudaGetLastError());
21 | }
22 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/TensorShapeCUDA.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <ATen/cuda/CUDAContext.h>
 3 | 
 4 | namespace at {
 5 | namespace native {
 6 | 
 7 | // this needs to be split along CPU/CUDA lines because we don't have a consistent
 8 | // way of getting the allocator to use for a device (c10::GetAllocator is not
 9 | // the same as at::cuda::getCUDADeviceAllocator().
10 | Tensor& set_cuda_(Tensor& result) {
11 |   Storage storage(result.dtype(), 0, at::cuda::getCUDADeviceAllocator(), true);
12 |   return result.set_(storage, 0, {0}, {});
13 | }
14 | 
15 | }
16 | }
17 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateBoolType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THCGenerateBoolType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t bool
 6 | #define ureal bool
 7 | #define accreal int64_t
 8 | #define Real Bool
 9 | #define CReal CudaBool
10 | #define THC_REAL_IS_BOOL
11 | #line 1 THC_GENERIC_FILE
12 | #include THC_GENERIC_FILE
13 | #undef scalar_t
14 | #undef ureal
15 | #undef accreal
16 | #undef Real
17 | #undef CReal
18 | #undef THC_REAL_IS_BOOL
19 | 
20 | #ifndef THCGenerateBoolType
21 | #undef THC_GENERIC_FILE
22 | #endif
23 | 


--------------------------------------------------------------------------------
/aten/src/ATen/ThreadLocalDebugInfo.cpp:
--------------------------------------------------------------------------------
 1 | #include "ATen/ThreadLocalDebugInfo.h"
 2 | 
 3 | namespace at {
 4 | 
 5 | namespace {
 6 | thread_local std::shared_ptr<ThreadLocalDebugInfoBase> debug_info;
 7 | }
 8 | 
 9 | std::shared_ptr<ThreadLocalDebugInfoBase> getThreadLocalDebugInfo() noexcept {
10 |   return debug_info;
11 | }
12 | 
13 | std::shared_ptr<ThreadLocalDebugInfoBase> setThreadLocalDebugInfo(
14 |     std::shared_ptr<ThreadLocalDebugInfoBase> info) noexcept {
15 |   auto ret = std::move(debug_info);
16 |   debug_info = std::move(info);
17 |   return ret;
18 | }
19 | 
20 | } // namespace at
21 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/cudnn-wrapper.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cudnn.h>
 4 | 
 5 | #define STRINGIFY(x) #x
 6 | #define STRING(x) STRINGIFY(x)
 7 | 
 8 | #if CUDNN_MAJOR < 6
 9 | #pragma message ("CuDNN v" STRING(CUDNN_MAJOR) " found, but need at least CuDNN v6. You can get the latest version of CuDNN from https://developer.nvidia.com/cudnn or disable CuDNN with USE_CUDNN=0")
10 | #pragma message "We strongly encourage you to move to 6.0 and above."
11 | #pragma message "This message is intended to annoy you enough to update."
12 | #endif
13 | 
14 | #undef STRINGIFY
15 | #undef STRING
16 | 
17 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateDoubleType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateDoubleType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t double
 6 | #define accreal double
 7 | #define Real Double
 8 | #define CReal CudaDouble
 9 | #define THC_REAL_IS_DOUBLE
10 | #line 1 THC_GENERIC_FILE
11 | #include THC_GENERIC_FILE
12 | #undef scalar_t
13 | #undef accreal
14 | #undef Real
15 | #undef CReal
16 | #undef THC_REAL_IS_DOUBLE
17 | 
18 | #ifndef THCGenerateAllTypes
19 | #ifndef THCGenerateFloatTypes
20 | #undef THC_GENERIC_FILE
21 | #endif
22 | #endif
23 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cpu/SoftmaxKernel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at {
 7 | namespace native {
 8 | 
 9 | using forward_fn = void(*)(Tensor &, const Tensor &);
10 | using backward_fn = void(*)(Tensor &, const Tensor &, const Tensor&);
11 | 
12 | DECLARE_DISPATCH(forward_fn, softmax_lastdim_kernel);
13 | DECLARE_DISPATCH(forward_fn, log_softmax_lastdim_kernel);
14 | DECLARE_DISPATCH(backward_fn, softmax_backward_lastdim_kernel);
15 | DECLARE_DISPATCH(backward_fn, log_softmax_backward_lastdim_kernel);
16 | 
17 | }
18 | }
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/templates/LegacyTHFunctions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // ${generated_comment}
 4 | 
 5 | #include <ATen/Context.h>
 6 | #include <c10/core/ScalarType.h>
 7 | #include <c10/core/TensorOptions.h>
 8 | 
 9 | namespace c10 {
10 | class Scalar;
11 | }
12 | namespace at {
13 | struct Generator;
14 | class Tensor;
15 | struct Type;
16 | } // namespace at
17 | 
18 | namespace at {
19 | namespace native {
20 | namespace legacy {
21 | namespace ${namespace} {
22 | 
23 | ${legacy_th_declarations}
24 | 
25 | } // namespace th
26 | } // namespace legacy
27 | } // namespace native
28 | } // namespace at
29 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStorage.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCStorage.hpp>
 2 | 
 3 | #include <THC/THCThrustAllocator.cuh>
 4 | #include <thrust/device_ptr.h>
 5 | #include <thrust/fill.h>
 6 | #if CUDA_VERSION >= 7000 || defined(__HIP_PLATFORM_HCC__)
 7 | #include <thrust/system/cuda/execution_policy.h>
 8 | #endif
 9 | 
10 | #include <TH/THHalf.h>
11 | 
12 | #include <THC/generic/THCStorage.cu>
13 | #include <THC/THCGenerateAllTypes.h>
14 | 
15 | #include <THC/generic/THCStorage.cu>
16 | #include <THC/THCGenerateBoolType.h>
17 | 
18 | #include <THC/generic/THCStorage.cu>
19 | #include <THC/THCGenerateBFloat16Type.h>
20 | 


--------------------------------------------------------------------------------
/cmake/Modules_CUDA_fix/FindCUDA.cmake:
--------------------------------------------------------------------------------
 1 | # This is a wrapper of the upstream `./upstream/FindCUDA.cmake` that
 2 | # automatically includes `./upstream/CMakeInitializeConfigs.cmake` before
 3 | # `./upstream/FindCUDA.cmake`. The `CMakeInitializeConfigs.cmake`, which is
 4 | # absent in old CMake versions, creates some necessary variables for the later
 5 | # to run.
 6 | # See ./README.md for details.
 7 | 
 8 | set(UPSTREAM_FIND_CUDA_DIR "${CMAKE_CURRENT_LIST_DIR}/upstream/")
 9 | 
10 | include("${UPSTREAM_FIND_CUDA_DIR}/CMakeInitializeConfigs.cmake")
11 | include("${UPSTREAM_FIND_CUDA_DIR}/FindCUDA.cmake")
12 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/dispatch/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains the c10 dispatcher. This dispatcher is a single point
 2 | through which we are planning to route all kernel calls.
 3 | Existing dispatch mechanisms from legacy PyTorch or caffe2 are planned to
 4 | be replaced.
 5 | 
 6 | This folder contains the following files:
 7 | - Dispatcher.h: Main facade interface. Code using the dispatcher should only use this.
 8 | - DispatchTable.h: Implementation of the actual dispatch mechanism. Hash table with kernels, lookup, ...
 9 | - KernelFunction.h: The core interface (i.e. function pointer) for calling a kernel
10 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorRandom.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorRandom.h"
 3 | #else
 4 | 
 5 | #include "ATen/core/Generator.h"
 6 | 
 7 | #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF)
 8 | 
 9 | THC_API void THCTensor_(multinomialAliasSetup)(struct THCState *state, THCTensor *probs, THCudaLongTensor *J, THCTensor *q);
10 | THC_API void THCTensor_(multinomialAliasDraw)(THCState *state, THCudaLongTensor *self, THCTensor *_q, THCudaLongTensor *_J, int n_sample, at::Generator* gen_);
11 | 
12 | #endif
13 | #endif
14 | 


--------------------------------------------------------------------------------
/aten/src/ATen/Config.h.in:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's
 4 | // obvious if you forgot to include Config.h
 5 | //    c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined
 6 | //
 7 | // DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h
 8 | 
 9 | #define AT_MKLDNN_ENABLED() @AT_MKLDNN_ENABLED@
10 | #define AT_MKL_ENABLED() @AT_MKL_ENABLED@
11 | #define AT_NNPACK_ENABLED() @AT_NNPACK_ENABLED@
12 | #define CAFFE2_STATIC_LINK_CUDA() @CAFFE2_STATIC_LINK_CUDA_INT@
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/PointwiseOps.h:
--------------------------------------------------------------------------------
 1 | // Ternary and higher-order pointwise operations
 2 | #pragma once
 3 | 
 4 | #include <ATen/ATen.h>
 5 | #include <ATen/native/DispatchStub.h>
 6 | 
 7 | namespace at {
 8 | 
 9 | struct TensorIterator;
10 | 
11 | namespace native {
12 | 
13 | using pointwise_fn = void (*)(TensorIterator&, Scalar scalar);
14 | 
15 | DECLARE_DISPATCH(pointwise_fn, addcmul_stub);
16 | DECLARE_DISPATCH(pointwise_fn, addcdiv_stub);
17 | DECLARE_DISPATCH(pointwise_fn, smooth_l1_backward_stub);
18 | DECLARE_DISPATCH(pointwise_fn, mse_backward_stub);
19 | 
20 | } // namespace native
21 | } // namespace at
22 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorMode.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorMode.h"
 3 | #else
 4 | 
 5 | /* Returns the mode, and index of the mode, for the set of values
 6 |  * along a given dimension in the input tensor. */
 7 | THC_API void THCTensor_(mode)(THCState *state,
 8 |                               THCTensor *values,
 9 |                               THCudaLongTensor *indices,
10 |                               THCTensor *input,
11 |                               int dimension,
12 |                               int keepdim);
13 | 
14 | #endif // THC_GENERIC_FILE
15 | 


--------------------------------------------------------------------------------
/aten/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set version = "0.1.dev" %}
 2 | 
 3 | package:
 4 |   name: aten
 5 |   version: {{ version }}
 6 | 
 7 | source:
 8 |   path: ..
 9 | 
10 | build:
11 |   number: 1
12 |   skip: True  # [win]
13 |   script_env:
14 |     - CONDA_CMAKE_ARGS
15 | 
16 | requirements:
17 |   build:
18 |     - cmake
19 |     - pyyaml
20 |     - setuptools
21 |     - python
22 |     - mkl # [not osx]
23 |   run:
24 |     - mkl # [not osx]
25 | 
26 | about:
27 |   home: https://github.com/zdevito/ATen
28 |   license: BSD
29 |   summary: A TENsor library for C++11
30 | 
31 | extra:
32 |   recipe-maintainers:
33 |     - ezyang
34 | 


--------------------------------------------------------------------------------
/aten/src/TH/THSize.cpp:
--------------------------------------------------------------------------------
 1 | #include <TH/THSize.h>
 2 | 
 3 | int THSize_isSameSizeAs(const int64_t *sizeA, int64_t dimsA, const int64_t *sizeB, int64_t dimsB) {
 4 |   int d;
 5 |   if (dimsA != dimsB)
 6 |     return 0;
 7 |   for(d = 0; d < dimsA; ++d)
 8 |   {
 9 |     if(sizeA[d] != sizeB[d])
10 |       return 0;
11 |   }
12 |   return 1;
13 | }
14 | 
15 | ptrdiff_t THSize_nElement(int64_t dims, int64_t *size) {
16 |   if(dims == 0)
17 |     return 0;
18 |   else
19 |   {
20 |     ptrdiff_t nElement = 1;
21 |     int d;
22 |     for(d = 0; d < dims; d++)
23 |       nElement *= size[d];
24 |     return nElement;
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorMathMagma.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef THC_TENSOR_MATH_MAGMA_CUH
 2 | #define THC_TENSOR_MATH_MAGMA_CUH
 3 | 
 4 | #ifdef USE_MAGMA
 5 | #include <magma.h>
 6 | #else
 7 | #include <THC/THCBlas.h>
 8 | #endif
 9 | 
10 | #ifdef USE_MAGMA
11 | template <typename T>
12 | static inline T* th_magma_malloc_pinned(size_t n)
13 | {
14 |   void* ptr;
15 |   if (MAGMA_SUCCESS != magma_malloc_pinned(&ptr, n * sizeof(T)))
16 |     THError("$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", n/268435456);
17 |   return reinterpret_cast<T*>(ptr);
18 | }
19 | 
20 | #endif
21 | 
22 | #endif // THC_TENSOR_MATH_MAGMA_CUH
23 | 


--------------------------------------------------------------------------------
/aten/src/ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.h>
 2 | 
 3 | namespace c10 { namespace hip {
 4 | namespace HIPCachingAllocatorMasqueradingAsCUDA {
 5 | 
 6 | Allocator* get() {
 7 |   static HIPAllocatorMasqueradingAsCUDA allocator(HIPCachingAllocator::get());
 8 |   return &allocator;
 9 | }
10 | 
11 | void recordStreamMasqueradingAsCUDA(void *ptr, HIPStreamMasqueradingAsCUDA stream) {
12 |   HIPCachingAllocator::recordStream(ptr, stream.hip_stream());
13 | }
14 | 
15 | } // namespace HIPCachingAllocatorMasqueradingAsCUDA
16 | }} // namespace c10::hip
17 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/mkldnn/Utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/util/ArrayRef.h>
 4 | #include <vector>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | std::vector<int64_t> conv_output_size(
 9 |     IntArrayRef input_size,
10 |     IntArrayRef kernel_size,
11 |     IntArrayRef padding,
12 |     IntArrayRef stride,
13 |     IntArrayRef dilation);
14 | 
15 | std::vector<int64_t> pool_output_sizes(
16 |     IntArrayRef input_size,
17 |     IntArrayRef kernel_size,
18 |     IntArrayRef stride,
19 |     IntArrayRef padding_l,
20 |     IntArrayRef padding_r,
21 |     IntArrayRef dilation,
22 |     bool ceil_mode);
23 | }}
24 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateIntTypes.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateIntTypes.h"
 3 | #endif
 4 | 
 5 | #ifndef THGenerateManyTypes
 6 | #define THIntLocalGenerateManyTypes
 7 | #define THGenerateManyTypes
 8 | #endif
 9 | 
10 | #include <TH/THGenerateByteType.h>
11 | #include <TH/THGenerateCharType.h>
12 | #include <TH/THGenerateShortType.h>
13 | #include <TH/THGenerateIntType.h>
14 | #include <TH/THGenerateLongType.h>
15 | 
16 | #ifdef THIntLocalGenerateManyTypes
17 | #undef THIntLocalGenerateManyTypes
18 | #undef THGenerateManyTypes
19 | #undef TH_GENERIC_FILE
20 | #endif
21 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorMode.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <THC/THCThrustAllocator.cuh>
 3 | #include <THC/THCTensorTypeUtils.cuh>
 4 | #include <THC/THCReduceApplyUtils.cuh>
 5 | #include <THC/THCTensor.hpp>
 6 | #include <THC/THCStorage.hpp>
 7 | #include <thrust/device_ptr.h>
 8 | #include <thrust/sort.h>
 9 | #include <thrust/inner_product.h>
10 | #include <thrust/device_vector.h>
11 | #include <thrust/extrema.h>
12 | #include <thrust/execution_policy.h>
13 | #include <thrust/sequence.h>
14 | 
15 | #include <THC/THCTensorMode.cuh>
16 | 
17 | #include <THC/generic/THCTensorMode.cu>
18 | #include <THC/THCGenerateAllTypes.h>
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cpu/FlushDenormal.h:
--------------------------------------------------------------------------------
 1 | /// Flush-To-Zero and Denormals-Are-Zero mode
 2 | ///
 3 | /// Flush-To-Zero (FTZ) and Denormals-Are-Zero (DAZ) are modes that bypass
 4 | /// IEEE 754 methods of dealing with denormal floating-point numbers on x86-64
 5 | /// and some x86 CPUs. They result in reduced precision for values near zero,
 6 | /// but increased performance.
 7 | ///
 8 | /// See https://software.intel.com/en-us/articles/x87-and-sse-floating-point-assists-in-ia-32-flush-to-zero-ftz-and-denormals-are-zero-daz
 9 | 
10 | namespace at { namespace cpu {
11 | 
12 | bool set_flush_denormal(bool on);
13 | 
14 | }}  // namespace at::cpu
15 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorTopK.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <THC/THCReduceApplyUtils.cuh>
 3 | #include <THC/THCTensorCopy.h>
 4 | #include <THC/THCTensorMath.h>
 5 | #include <THC/THCAsmUtils.cuh>
 6 | #include <THC/THCScanUtils.cuh>
 7 | #include <THC/THCTensorTypeUtils.cuh>
 8 | #include <THC/THCTensorMathReduce.cuh>
 9 | #include <algorithm> // for std::min
10 | 
11 | #if CUDA_VERSION >= 7000 || defined __HIP_PLATFORM_HCC__
12 | #include <thrust/system/cuda/execution_policy.h>
13 | #endif
14 | 
15 | #include <THC/THCTensorTopK.cuh>
16 | 
17 | #include <THC/generic/THCTensorTopK.cu>
18 | #include <THC/THCGenerateAllTypes.h>
19 | 
20 | 


--------------------------------------------------------------------------------
/cmake/Modules/Findpybind11.cmake:
--------------------------------------------------------------------------------
 1 | # Try to find the pybind11 library and headers.
 2 | #  pybind11_FOUND        - system has pybind11
 3 | #  pybind11_INCLUDE_DIRS - the pybind11 include directory
 4 | 
 5 | find_path(pybind11_INCLUDE_DIR
 6 |         NAMES pybind11/pybind11.h
 7 |         DOC "The directory where pybind11 includes reside"
 8 | )
 9 | 
10 | set(pybind11_INCLUDE_DIRS ${pybind11_INCLUDE_DIR})
11 | 
12 | include(FindPackageHandleStandardArgs)
13 | find_package_handle_standard_args(pybind11
14 |         FOUND_VAR pybind11_FOUND
15 |         REQUIRED_VARS pybind11_INCLUDE_DIR
16 | )
17 | 
18 | mark_as_advanced(pybind11_FOUND)
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml:
--------------------------------------------------------------------------------
 1 | name: qnnpack
 2 | title: Quantized UINT8 Functions for Mobile
 3 | license: Apache 2.0
 4 | deps:
 5 |   - name: cpuinfo
 6 |     url:  https://github.com/pytorch/cpuinfo.git
 7 |   - name: fxdiv
 8 |     url:  https://github.com/Maratyszcza/FXdiv.git
 9 |   - name: psimd
10 |     url:  https://github.com/Maratyszcza/psimd.git
11 |   - name: pthreadpool
12 |     url:  https://github.com/Maratyszcza/pthreadpool.git
13 |   - name: FP16
14 |     url:  https://github.com/Maratyszcza/FP16.git
15 |   - name: clog
16 |     dir:  deps/clog
17 |   - name: googletest
18 |   - name: googlebenchmark
19 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateHalfType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateHalfType.h"
 3 | #endif
 4 | 
 5 | #include <TH/THHalf.h>
 6 | 
 7 | #define scalar_t THHalf
 8 | #define accreal float
 9 | #define Real Half
10 | 
11 | #define CReal CudaHalf
12 | 
13 | #define THC_REAL_IS_HALF
14 | #line 1 THC_GENERIC_FILE
15 | #include THC_GENERIC_FILE
16 | #undef scalar_t
17 | #undef accreal
18 | #undef Real
19 | 
20 | #undef CReal
21 | 
22 | #undef THC_REAL_IS_HALF
23 | 
24 | #ifndef THCGenerateAllTypes
25 | #ifndef THCGenerateFloatTypes
26 | #undef THC_GENERIC_FILE
27 | #endif
28 | #endif
29 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateBFloat16Type.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateBFloat16Type.h"
 3 | #endif
 4 | 
 5 | #include <c10/util/BFloat16.h>
 6 | #define scalar_t at::BFloat16
 7 | #define accreal double
 8 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
 9 | #define Real BFloat16
10 | #define TH_REAL_IS_BFLOAT16
11 | #line 1 TH_GENERIC_FILE
12 | #include TH_GENERIC_FILE
13 | #undef accreal
14 | #undef scalar_t
15 | #undef Real
16 | #undef TH_REAL_IS_BFLOAT16
17 | #undef TH_CONVERT_ACCREAL_TO_REAL
18 | 
19 | #ifndef THGenerateManyTypes
20 | #undef TH_GENERIC_FILE
21 | #endif
22 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateQInt8Type.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQInt8Type.h"
 3 | #endif
 4 | 
 5 | #define quantized_t c10::qint8
 6 | #define scalar_t int8_t
 7 | #define Real QInt8
 8 | #define RealUnderlying Char
 9 | #define THQUANTIZED
10 | #define THQINT8
11 | #define TH_REAL_IS_BYTE
12 | #line 1 TH_GENERIC_FILE
13 | #include TH_GENERIC_FILE
14 | #undef scalar_t
15 | #undef quantized_t
16 | #undef Real
17 | #undef RealUnderlying
18 | #undef TH_REAL_IS_BYTE
19 | #undef THQINT8
20 | #undef THQUANTIZED
21 | 
22 | #ifndef THGenerateManyTypes
23 | #undef TH_GENERIC_FILE
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateQInt32Type.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQInt32Type.h"
 3 | #endif
 4 | 
 5 | #define quantized_t c10::qint32
 6 | #define scalar_t int32_t
 7 | #define Real QInt32
 8 | #define RealUnderlying Int
 9 | #define THQUANTIZED
10 | #define THQINT32
11 | #define TH_REAL_IS_BYTE
12 | #line 1 TH_GENERIC_FILE
13 | #include TH_GENERIC_FILE
14 | #undef scalar_t
15 | #undef quantized_t
16 | #undef Real
17 | #undef RealUnderlying
18 | #undef TH_REAL_IS_BYTE
19 | #undef THQINT32
20 | #undef THQUANTIZED
21 | 
22 | #ifndef THGenerateManyTypes
23 | #undef TH_GENERIC_FILE
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateQUInt8Type.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateQUInt8Type.h"
 3 | #endif
 4 | 
 5 | #define quantized_t c10::quint8
 6 | #define scalar_t uint8_t
 7 | #define Real QUInt8
 8 | #define RealUnderlying Byte
 9 | #define THQUANTIZED
10 | #define THQUINT8
11 | #define TH_REAL_IS_BYTE
12 | #line 1 TH_GENERIC_FILE
13 | #include TH_GENERIC_FILE
14 | #undef scalar_t
15 | #undef quantized_t
16 | #undef Real
17 | #undef RealUnderlying
18 | #undef TH_REAL_IS_BYTE
19 | #undef THQUINT8
20 | #undef THQUANTIZED
21 | 
22 | #ifndef THGenerateManyTypes
23 | #undef TH_GENERIC_FILE
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/TH/THVector.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_VECTOR_INC
 2 | #define TH_VECTOR_INC
 3 | 
 4 | #include <TH/THGeneral.h>
 5 | #define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME)
 6 | 
 7 | /* We are going to use dynamic dispatch, and want only to generate declarations
 8 |  * of the vector functions */
 9 | #include <TH/generic/THVector.h>
10 | #include <TH/THGenerateAllTypes.h>
11 | 
12 | #include <TH/generic/THVector.h>
13 | #include <TH/THGenerateHalfType.h>
14 | 
15 | #include <TH/generic/THVector.h>
16 | #include <TH/THGenerateBoolType.h>
17 | 
18 | #include <TH/generic/THVector.h>
19 | #include <TH/THGenerateBFloat16Type.h>
20 | 
21 | #endif // TH_VECTOR_INC
22 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorScatterGather.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorScatterGather.h"
 3 | #else
 4 | 
 5 | THC_API void THCTensor_(gather)(THCState* state, THCTensor *tensor, THCTensor *src, int dim, THCudaLongTensor *index);
 6 | THC_API void THCTensor_(scatter)(THCState* state, THCTensor *tensor, int dim, THCudaLongTensor *index, THCTensor *src);
 7 | THC_API void THCTensor_(scatterAdd)(THCState* state, THCTensor *tensor, int dim, THCudaLongTensor *index, THCTensor *src);
 8 | THC_API void THCTensor_(scatterFill)(THCState* state, THCTensor *tensor, int dim, THCudaLongTensor *index, scalar_t value);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/runtime-sse2.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <immintrin.h>
12 | 
13 | PYTORCH_QNNP_INLINE __m128i
14 | sub_zero_point(const __m128i va, const __m128i vzp) {
15 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION
16 |   // Run-time quantization
17 |   return _mm_sub_epi16(va, vzp);
18 | #else
19 |   // Design-time quantization (no-op)
20 |   return va;
21 | #endif
22 | }
23 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateBFloat16Type.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THCGenerateBFloat16Type.h"
 3 | #endif
 4 | #include <c10/util/BFloat16.h>
 5 | 
 6 | #define scalar_t at::BFloat16
 7 | #define accreal float
 8 | #define Real BFloat16
 9 | 
10 | #define CReal CudaBFloat16
11 | 
12 | #define THC_REAL_IS_BFLOAT16
13 | #line 1 THC_GENERIC_FILE
14 | #include THC_GENERIC_FILE
15 | #undef scalar_t
16 | #undef accreal
17 | #undef Real
18 | 
19 | #undef CReal
20 | 
21 | #undef THC_REAL_IS_BFLOAT16
22 | 
23 | #ifndef THCGenerateAllTypes
24 | #ifndef THCGenerateFloatTypes
25 | #undef THC_GENERIC_FILE
26 | #endif
27 | #endif
28 | 


--------------------------------------------------------------------------------
/cmake/public/threads.cmake:
--------------------------------------------------------------------------------
 1 | find_package(Threads REQUIRED)
 2 | # For newer CMake, Threads::Threads is already defined. Otherwise, we will
 3 | # provide a backward compatible wrapper for Threads::Threads.
 4 | if(THREADS_FOUND AND NOT TARGET Threads::Threads)
 5 |   add_library(Threads::Threads INTERFACE IMPORTED)
 6 | 
 7 |   if(THREADS_HAVE_PTHREAD_ARG)
 8 |     set_property(TARGET Threads::Threads
 9 |                  PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
10 |   endif()
11 | 
12 |   if(CMAKE_THREAD_LIBS_INIT)
13 |     set_property(TARGET Threads::Threads
14 |                  PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
15 |   endif()
16 | endif()
17 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/runtime-neon.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <arm_neon.h>
12 | 
13 | PYTORCH_QNNP_INLINE uint16x8_t
14 | sub_zero_point(const uint8x8_t va, const uint8x8_t vzp) {
15 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION
16 |   // Run-time quantization
17 |   return vsubl_u8(va, vzp);
18 | #else
19 |   // Design-time quantization
20 |   return vmovl_u8(va);
21 | #endif
22 | }
23 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensor.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_TENSOR_INC
 2 | #define THC_TENSOR_INC
 3 | 
 4 | #include <TH/THTensor.h>
 5 | #include <THC/THCStorage.h>
 6 | #include <THC/THCGeneral.h>
 7 | 
 8 | #define THCTensor_(NAME)   TH_CONCAT_4(TH,CReal,Tensor_,NAME)
 9 | 
10 | #define THC_DESC_BUFF_LEN 64
11 | 
12 | typedef struct THC_CLASS THCDescBuff
13 | {
14 |     char str[THC_DESC_BUFF_LEN];
15 | } THCDescBuff;
16 | 
17 | #include <THC/generic/THCTensor.h>
18 | #include <THC/THCGenerateAllTypes.h>
19 | 
20 | #include <THC/generic/THCTensor.h>
21 | #include <THC/THCGenerateBoolType.h>
22 | 
23 | #include <THC/generic/THCTensor.h>
24 | #include <THC/THCGenerateBFloat16Type.h>
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Lerp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at {
 7 | namespace native {
 8 | 
 9 | using lerp_fn_scalar = void (*)(
10 |     at::Tensor& ret,
11 |     const at::Tensor& self,
12 |     const at::Tensor& end,
13 |     Scalar weight);
14 | 
15 | using lerp_fn_tensor = void (*)(
16 |     at::Tensor& ret,
17 |     const at::Tensor& self,
18 |     const at::Tensor& end,
19 |     const at::Tensor& weights);
20 | 
21 | DECLARE_DISPATCH(lerp_fn_scalar, lerp_kernel_scalar_weight);
22 | DECLARE_DISPATCH(lerp_fn_tensor, lerp_kernel_tensor_weight);
23 | 
24 | } // namespace native
25 | } // namespace at
26 | 


--------------------------------------------------------------------------------
/aten/src/TH/generic/THTensorLapack.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #define TH_GENERIC_FILE "TH/generic/THTensorLapack.h"
 3 | #else
 4 | 
 5 | TH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_);
 6 | TH_API void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, bool eigenvectors);
 7 | TH_API void THTensor_(potri)(THTensor *ra_, THTensor *a, bool upper);
 8 | TH_API void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a);
 9 | TH_API void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau);
10 | TH_API void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, bool left, bool transpose);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/aten/src/ATen/DLConvertor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/Tensor.h>
 4 | #include <ATen/ATen.h>
 5 | #include <ATen/dlpack.h>
 6 | 
 7 | // this convertor will:
 8 | // 1) take a Tensor object and wrap it in the DLPack tensor
 9 | // 2) take a dlpack tensor and convert it to the ATen Tensor
10 | 
11 | namespace at {
12 | 
13 | CAFFE2_API ScalarType toScalarType(const DLDataType& dtype);
14 | CAFFE2_API DLManagedTensor* toDLPack(const Tensor& src);
15 | CAFFE2_API Tensor fromDLPack(const DLManagedTensor* src);
16 | CAFFE2_API DLDataType getDLDataType(const Tensor& t);
17 | CAFFE2_API DLContext getDLContext(const Tensor& tensor, const int64_t& device_id);
18 | 
19 | } //namespace at
20 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateFloatType.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateFloatType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t float
 6 | /* FIXME: fp64 has bad performance on some platforms; avoid using it unless
 7 |    we opt into it? */
 8 | #define accreal float
 9 | #define Real Float
10 | #define CReal Cuda
11 | #define THC_REAL_IS_FLOAT
12 | #line 1 THC_GENERIC_FILE
13 | #include THC_GENERIC_FILE
14 | #undef scalar_t
15 | #undef accreal
16 | #undef Real
17 | #undef CReal
18 | #undef THC_REAL_IS_FLOAT
19 | 
20 | #ifndef THCGenerateAllTypes
21 | #ifndef THCGenerateFloatTypes
22 | #undef THC_GENERIC_FILE
23 | #endif
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/init_qnnpack.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef USE_PYTORCH_QNNPACK
 2 | 
 3 | #include "init_qnnpack.h"
 4 | #include <ATen/ATen.h>
 5 | #include <ATen/Config.h>
 6 | #include <pytorch_qnnpack.h>
 7 | 
 8 | namespace at {
 9 | namespace native {
10 | 
11 | void initQNNPACK() {
12 |   static std::once_flag once;
13 |   static enum pytorch_qnnp_status qnnpackStatus =
14 |       pytorch_qnnp_status_uninitialized;
15 |   std::call_once(once, []() { qnnpackStatus = pytorch_qnnp_initialize(); });
16 |   TORCH_CHECK(
17 |       qnnpackStatus == pytorch_qnnp_status_success,
18 |       "failed to initialize QNNPACK");
19 | }
20 | 
21 | } // namespace native
22 | } // namespace at
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Unfold2d.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | using unfold2d_fn =
 9 |     void (*)(
10 |     Tensor& finput,
11 |     Tensor& input,
12 |     int64_t kH,
13 |     int64_t kW,
14 |     int64_t dH,
15 |     int64_t dW,
16 |     int64_t padH,
17 |     int64_t padW,
18 |     int64_t n_input_plane,
19 |     int64_t input_height,
20 |     int64_t input_width,
21 |     int64_t output_height,
22 |     int64_t output_width
23 | );
24 | 
25 | DECLARE_DISPATCH(unfold2d_fn, unfolded2d_copy_stub);
26 | DECLARE_DISPATCH(unfold2d_fn, unfolded2d_acc_stub);
27 | 
28 | }} // namespace at::native
29 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorCopy.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <THC/THCTensorCopy.h>
 4 | 
 5 | template <typename ScalarTypeDst, typename ScalarTypeSrc>
 6 | void THC_copyTensor(THCState* state, THCTensor* dst, THCTensor* src);
 7 | 
 8 | template <typename ScalarType>
 9 | THCTensor *THCTensor_newClone(THCState *state, THCTensor *self);
10 | 
11 | template <typename ScalarType>
12 | THCTensor *THCTensor_newContiguous(THCState *state, THCTensor *self);
13 | 
14 | template <typename ScalarType>
15 | void THCTensor_freeCopyTo(THCState *state, THCTensor *self, THCTensor *dst);
16 | 
17 | template <typename ScalarType>
18 | void THCTensor_copyIgnoringOverlaps(THCState* state, THCTensor* dst, THCTensor* src);
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cpu/GridSamplerKernel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/Dispatch.h>
 5 | #include <ATen/NativeFunctions.h>
 6 | #include <ATen/native/DispatchStub.h>
 7 | #include <ATen/cpu/vml.h>
 8 | 
 9 | #include <tuple>
10 | 
11 | namespace at { namespace native {
12 | 
13 | using forward_2d_fn = Tensor(*)(const Tensor &, const Tensor &, int64_t, int64_t, bool);
14 | using backward_2d_fn = std::tuple<Tensor, Tensor>(*)(const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool);
15 | DECLARE_DISPATCH(forward_2d_fn, grid_sampler_2d_cpu_kernel);
16 | DECLARE_DISPATCH(backward_2d_fn, grid_sampler_2d_backward_cpu_kernel);
17 | 
18 | }}  // namespace at::native
19 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/SparseMM.cu:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <ATen/NativeFunctions.h>
 3 | #include <c10/util/Exception.h>
 4 | 
 5 | namespace at { namespace native {
 6 | // sparse, sparse, sparse, dense, real, real -> sparse
 7 | Tensor& _sspaddmm_out_only_sparse_cuda(Tensor& result, const Tensor& self,
 8 |     const Tensor& mat1, const Tensor& mat2, Scalar beta, Scalar alpha) {
 9 |   AT_ERROR("tensor.sspaddmm(...) can only be called on sparse tensors");
10 | }
11 | Tensor& _sspaddmm_out_cuda(Tensor& result, const Tensor& self,
12 |     const Tensor& mat1, const Tensor& mat2, Scalar beta, Scalar alpha) {
13 |   AT_ERROR("NYI: CUDA sspaddmm is not implemented");
14 | }
15 | }} // namespace at::native
16 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateBoolType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateBoolType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t bool
 6 | #define ureal bool
 7 | #define accreal int64_t
 8 | #define Real Bool
 9 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
10 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
11 | #define TH_REAL_IS_BOOL
12 | #line 1 TH_GENERIC_FILE
13 | #include TH_GENERIC_FILE
14 | #undef scalar_t
15 | #undef ureal
16 | #undef accreal
17 | #undef Real
18 | #undef TH_REAL_IS_BOOL
19 | #undef TH_CONVERT_REAL_TO_ACCREAL
20 | #undef TH_CONVERT_ACCREAL_TO_REAL
21 | 
22 | #ifndef THGenerateManyTypes
23 | #undef TH_GENERIC_FILE
24 | #endif
25 | 


--------------------------------------------------------------------------------
/cmake/External/rccl.cmake:
--------------------------------------------------------------------------------
 1 | if (NOT __NCCL_INCLUDED)
 2 |   set(__NCCL_INCLUDED TRUE)
 3 | 
 4 |   if (USE_SYSTEM_NCCL)
 5 |     # NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
 6 |     find_package(RCCL REQUIRED)
 7 |     if (RCCL_FOUND)
 8 |       message (STATUS "RCCL Found!")
 9 |       add_library(__caffe2_nccl INTERFACE)
10 |       target_link_libraries(__caffe2_nccl INTERFACE ${PYTORCH_RCCL_LIBRARIES})
11 |       target_include_directories(__caffe2_nccl INTERFACE ${RCCL_INCLUDE_DIRS})
12 |     else()
13 |       message (STATUS "RCCL NOT Found!")
14 |     endif()
15 |   else()
16 |     message (STATUS "USE_SYSTEM_NCCL=OFF is not supported yet when using RCCL")
17 |   endif()
18 | endif()
19 | 


--------------------------------------------------------------------------------
/aten/src/THNN/THNN.h:
--------------------------------------------------------------------------------
 1 | #ifndef THNN_H
 2 | #define THNN_H
 3 | 
 4 | #include <stdbool.h>
 5 | #include <TH/TH.h>
 6 | 
 7 | #define THNN_(NAME) TH_CONCAT_3(THNN_, Real, NAME)
 8 | 
 9 | #define THIndexTensor THLongTensor
10 | #define THIndexTensor_(NAME) THLongTensor_ ## NAME
11 | 
12 | #define THIntegerTensor THIntTensor
13 | #define THIntegerTensor_(NAME) THIntTensor_ ## NAME
14 | 
15 | typedef int64_t THIndex_t;
16 | typedef int32_t THInteger_t;
17 | typedef void THNNState;
18 | 
19 | #include <THNN/generic/THNN.h>
20 | #include <THGenerateFloatTypes.h>
21 | 
22 | #include <THNN/generic/THNN.h>
23 | #include <THGenerateLongType.h>
24 | 
25 | #include <THNN/generic/THNN.h>
26 | #include <THGenerateBFloat16Type.h>
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/DeviceSqrt.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace at { namespace native {
 4 | #if defined(__HIP_PLATFORM_HCC__)
 5 | // take these out when ROCm implements std:: math functions
 6 | #include <math.h>
 7 | template <typename scalar_t>
 8 | static __forceinline__ __device__ scalar_t device_sqrt(scalar_t val);
 9 | 
10 | template <>
11 | __forceinline__ __device__ float device_sqrt(float val) {
12 |   return ::sqrtf(val);
13 | }
14 | 
15 | template <>
16 | __forceinline__ __device__ double device_sqrt(double val) {
17 |   return ::sqrt(val);
18 | }
19 | #else
20 | template<typename scalar_t>
21 | __forceinline__ __device__ double device_sqrt(scalar_t val) {
22 |   return std::sqrt(val);
23 | }
24 | #endif
25 | }}
26 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateFloatType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateFloatType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t float
 6 | #define accreal double
 7 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
 8 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
 9 | #define Real Float
10 | #define THInf FLT_MAX
11 | #define TH_REAL_IS_FLOAT
12 | #line 1 TH_GENERIC_FILE
13 | #include TH_GENERIC_FILE
14 | #undef accreal
15 | #undef scalar_t
16 | #undef Real
17 | #undef THInf
18 | #undef TH_REAL_IS_FLOAT
19 | #undef TH_CONVERT_REAL_TO_ACCREAL
20 | #undef TH_CONVERT_ACCREAL_TO_REAL
21 | 
22 | #ifndef THGenerateManyTypes
23 | #undef TH_GENERIC_FILE
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/Types.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/cudnn/Types.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | namespace at { namespace native {
 6 | 
 7 | cudnnDataType_t getCudnnDataType(const at::Tensor& tensor) {
 8 |   if (tensor.scalar_type() == at::kFloat) {
 9 |     return CUDNN_DATA_FLOAT;
10 |   } else if (tensor.scalar_type() == at::kDouble) {
11 |     return CUDNN_DATA_DOUBLE;
12 |   } else if (tensor.scalar_type() == at::kHalf) {
13 |     return CUDNN_DATA_HALF;
14 |   }
15 |   std::string msg("getCudnnDataType() not supported for ");
16 |   msg += toString(tensor.scalar_type());
17 |   throw std::runtime_error(msg);
18 | }
19 | 
20 | int64_t cudnn_version() {
21 |   return CUDNN_VERSION;
22 | }
23 | 
24 | }}  // namespace at::cudnn
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/miopen/Types.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/miopen/Types.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <miopen/version.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | miopenDataType_t getMiopenDataType(const at::Tensor& tensor) {
 9 |   if (tensor.scalar_type() == at::kFloat) {
10 |     return miopenFloat;
11 |   } else if (tensor.scalar_type() == at::kHalf) {
12 |     return miopenHalf;
13 |   }
14 |   std::string msg("getMiopenDataType() not supported for ");
15 |   msg += toString(tensor.scalar_type());
16 |   throw std::runtime_error(msg);
17 | }
18 | 
19 | int64_t miopen_version() {
20 |   return (MIOPEN_VERSION_MAJOR<<8) + (MIOPEN_VERSION_MINOR<<4) + MIOPEN_VERSION_PATCH;
21 | }
22 | 
23 | }}  // namespace at::miopen
24 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateDoubleType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateDoubleType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t double
 6 | #define accreal double
 7 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
 8 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
 9 | #define Real Double
10 | #define THInf DBL_MAX
11 | #define TH_REAL_IS_DOUBLE
12 | #line 1 TH_GENERIC_FILE
13 | #include TH_GENERIC_FILE
14 | #undef accreal
15 | #undef scalar_t
16 | #undef Real
17 | #undef THInf
18 | #undef TH_REAL_IS_DOUBLE
19 | #undef TH_CONVERT_REAL_TO_ACCREAL
20 | #undef TH_CONVERT_ACCREAL_TO_REAL
21 | 
22 | #ifndef THGenerateManyTypes
23 | #undef TH_GENERIC_FILE
24 | #endif
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/reduce_ops_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <torch/types.h>
 4 | #include <torch/utils.h>
 5 | 
 6 | using namespace at;
 7 | 
 8 | TEST(ReduceOpsTest, MaxValuesAndMinValues) {
 9 |   const int W = 10;
10 |   const int H = 10;
11 |   if (hasCUDA()) {
12 |     for (const auto dtype : {kHalf, kFloat, kDouble, kShort, kInt, kLong}) {
13 |       auto a = at::rand({H, W}, TensorOptions(kCUDA).dtype(at::kHalf));
14 |       ASSERT_FLOAT_EQ(
15 |         a.max_values(c10::IntArrayRef{0, 1}).item<double>(),
16 |         a.max().item<double>()
17 |       );
18 |       ASSERT_FLOAT_EQ(
19 |         a.min_values(c10::IntArrayRef{0, 1}).item<double>(),
20 |         a.min().item<double>()
21 |       );
22 |     }
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadFP16.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(fp16-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(fp16
13 |   GIT_REPOSITORY https://github.com/Maratyszcza/FP16.git
14 |   GIT_TAG master
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fp16"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/fp16"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadFXdiv.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(fxdiv-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(fxdiv
13 |   GIT_REPOSITORY https://github.com/Maratyszcza/FXdiv.git
14 |   GIT_TAG master
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fxdiv"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/fxdiv"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadPSimd.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(psimd-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(psimd
13 |   GIT_REPOSITORY https://github.com/Maratyszcza/psimd.git
14 |   GIT_TAG master
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cuda/fake_quantize_core.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/NativeFunctions.h>
 5 | #include <cmath>
 6 | 
 7 | /* FakeQuantize Op for PerChannelAffine quantization scheme */
 8 | namespace at {
 9 | namespace native {
10 | void fake_quantize_slice_cuda(
11 |     Tensor& output,
12 |     const Tensor& input,
13 |     float sc,
14 |     int64_t z_point,
15 |     int64_t quant_min,
16 |     int64_t quant_max);
17 | 
18 | void fake_quantize_grad_slice_cuda(
19 |     Tensor& input_grad,
20 |     const Tensor& output_grad,
21 |     const Tensor& input,
22 |     float sc,
23 |     int64_t z_point,
24 |     int64_t quant_min,
25 |     int64_t quant_max);
26 | 
27 | } // namespace native
28 | } // namespace at
29 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCThrustAllocator.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef THC_THRUST_ALLOCATOR_INC
 2 | #define THC_THRUST_ALLOCATOR_INC
 3 | 
 4 | #include <cstddef>
 5 | 
 6 | /// Allocator for Thrust to re-route its internal device allocations
 7 | /// to the THC allocator
 8 | class THCThrustAllocator {
 9 |  public:
10 |   typedef char value_type;
11 | 
12 |   THCThrustAllocator(THCState* state)
13 |       : state_(state) {
14 |   }
15 | 
16 |   ~THCThrustAllocator() {
17 |   }
18 | 
19 |   char* allocate(std::ptrdiff_t size) {
20 |     return static_cast<char*>(THCudaMalloc(state_, size));
21 |   }
22 | 
23 |   void deallocate(char* p, size_t size) {
24 |     THCudaFree(state_, p);
25 |   }
26 | 
27 |  private:
28 |   THCState* state_;
29 | };
30 | 
31 | #endif // THC_THRUST_ALLOCATOR_INC
32 | 


--------------------------------------------------------------------------------
/aten/src/TH/THDiskFile.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_DISK_FILE_INC
 2 | #define TH_DISK_FILE_INC
 3 | 
 4 | #include <TH/THFile.h>
 5 | 
 6 | TH_API THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet);
 7 | TH_API THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet);
 8 | 
 9 | TH_API const char *THDiskFile_name(THFile *self);
10 | 
11 | TH_API int THDiskFile_isLittleEndianCPU(void);
12 | TH_API int THDiskFile_isBigEndianCPU(void);
13 | TH_API void THDiskFile_nativeEndianEncoding(THFile *self);
14 | TH_API void THDiskFile_littleEndianEncoding(THFile *self);
15 | TH_API void THDiskFile_bigEndianEncoding(THFile *self);
16 | TH_API void THDiskFile_longSize(THFile *self, int size);
17 | TH_API void THDiskFile_noBuffer(THFile *self);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateIntType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateIntType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int32_t
 6 | #define ureal uint32_t
 7 | #define accreal int64_t
 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
10 | #define Real Int
11 | #define THInf INT_MAX
12 | #define TH_REAL_IS_INT
13 | #line 1 TH_GENERIC_FILE
14 | #include TH_GENERIC_FILE
15 | #undef scalar_t
16 | #undef ureal
17 | #undef accreal
18 | #undef Real
19 | #undef THInf
20 | #undef TH_REAL_IS_INT
21 | #undef TH_CONVERT_REAL_TO_ACCREAL
22 | #undef TH_CONVERT_ACCREAL_TO_REAL
23 | 
24 | #ifndef THGenerateManyTypes
25 | #undef TH_GENERIC_FILE
26 | #endif
27 | 


--------------------------------------------------------------------------------
/aten/src/ATen/miopen/Utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <THH/THH.h>
 5 | #include <ATen/miopen/miopen-wrapper.h>
 6 | #include <ATen/miopen/Handle.h>
 7 | 
 8 | namespace at { namespace native {
 9 | 
10 | inline void setMIOpenStreamToCurrent() {
11 |   // NB: Due to in-place HIPify, getCurrentCUDAStream actually means
12 |   // getCurrentHIPStream
13 |   MIOPEN_CHECK(miopenSetStream(getMiopenHandle(), at::hip::getCurrentHIPStream()));
14 | }
15 | 
16 | // This function makes tensors which have zero stride contiguous, by
17 | // setting the strides to 1.
18 | inline Tensor contiguousIfZeroInStrides(const Tensor& t) {
19 |   for (auto s : t.strides()) {
20 |     if (s == 0) return t.contiguous();
21 |   }
22 |   return t;
23 | }
24 | 
25 | }}
26 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadCpuinfo.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(cpuinfo-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(cpuinfo
13 |   GIT_REPOSITORY https://github.com/Maratyszcza/cpuinfo.git
14 |   GIT_TAG master
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/cpuinfo"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateCharType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateCharType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int8_t
 6 | #define ureal uint8_t
 7 | #define accreal int64_t
 8 | #define Real Char
 9 | #define THInf SCHAR_MAX
10 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
11 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
12 | #define TH_REAL_IS_CHAR
13 | #line 1 TH_GENERIC_FILE
14 | #include TH_GENERIC_FILE
15 | #undef scalar_t
16 | #undef ureal
17 | #undef accreal
18 | #undef Real
19 | #undef THInf
20 | #undef TH_REAL_IS_CHAR
21 | #undef TH_CONVERT_REAL_TO_ACCREAL
22 | #undef TH_CONVERT_ACCREAL_TO_REAL
23 | 
24 | #ifndef THGenerateManyTypes
25 | #undef TH_GENERIC_FILE
26 | #endif
27 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateByteType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateByteType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t uint8_t
 6 | #define ureal uint8_t
 7 | #define accreal int64_t
 8 | #define Real Byte
 9 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
10 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
11 | #define THInf UCHAR_MAX
12 | #define TH_REAL_IS_BYTE
13 | #line 1 TH_GENERIC_FILE
14 | #include TH_GENERIC_FILE
15 | #undef scalar_t
16 | #undef ureal
17 | #undef accreal
18 | #undef Real
19 | #undef THInf
20 | #undef TH_REAL_IS_BYTE
21 | #undef TH_CONVERT_REAL_TO_ACCREAL
22 | #undef TH_CONVERT_ACCREAL_TO_REAL
23 | 
24 | #ifndef THGenerateManyTypes
25 | #undef TH_GENERIC_FILE
26 | #endif
27 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateLongType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateLongType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int64_t
 6 | #define ureal uint64_t
 7 | #define accreal int64_t
 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
10 | #define Real Long
11 | #define THInf LONG_MAX
12 | #define TH_REAL_IS_LONG
13 | #line 1 TH_GENERIC_FILE
14 | #include TH_GENERIC_FILE
15 | #undef scalar_t
16 | #undef ureal
17 | #undef accreal
18 | #undef Real
19 | #undef THInf
20 | #undef TH_REAL_IS_LONG
21 | #undef TH_CONVERT_REAL_TO_ACCREAL
22 | #undef TH_CONVERT_ACCREAL_TO_REAL
23 | 
24 | #ifndef THGenerateManyTypes
25 | #undef TH_GENERIC_FILE
26 | #endif
27 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/FillKernel.cu:
--------------------------------------------------------------------------------
 1 | #include <ATen/Dispatch.h>
 2 | #include <ATen/native/cuda/Loops.cuh>
 3 | #include <ATen/native/DispatchStub.h>
 4 | #include <ATen/native/TensorIterator.h>
 5 | #include <ATen/native/Fill.h>
 6 | 
 7 | namespace at { namespace native {
 8 | 
 9 | void fill_kernel_cuda(TensorIterator& iter, Scalar value) {
10 |   AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "fill_cuda", [&]() {
11 |     auto value_converted = value.to<scalar_t>();
12 |     gpu_kernel(iter, [value_converted]GPU_LAMBDA() -> scalar_t {
13 |       return value_converted;
14 |     });
15 |   });
16 | }
17 | 
18 | REGISTER_DISPATCH(fill_stub, &fill_kernel_cuda);
19 | 
20 | } // namespace native
21 | } // namespace at
22 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateHalfType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateHalfType.h"
 3 | #endif
 4 | 
 5 | #include <TH/THHalf.h>
 6 | #define scalar_t THHalf
 7 | #define accreal float
 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
10 | #define Real Half
11 | #define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF)
12 | #define TH_REAL_IS_HALF
13 | #line 1 TH_GENERIC_FILE
14 | #include TH_GENERIC_FILE
15 | #undef scalar_t
16 | #undef accreal
17 | #undef Real
18 | #undef THInf
19 | #undef TH_REAL_IS_HALF
20 | #undef TH_CONVERT_REAL_TO_ACCREAL
21 | #undef TH_CONVERT_ACCREAL_TO_REAL
22 | 
23 | #ifndef THGenerateManyTypes
24 | #undef TH_GENERIC_FILE
25 | #endif
26 | 


--------------------------------------------------------------------------------
/aten/src/TH/THGenerateShortType.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #error "You must define TH_GENERIC_FILE before including THGenerateShortType.h"
 3 | #endif
 4 | 
 5 | #define scalar_t int16_t
 6 | #define ureal uint16_t
 7 | #define accreal int64_t
 8 | #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
 9 | #define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
10 | #define Real Short
11 | #define THInf SHRT_MAX
12 | #define TH_REAL_IS_SHORT
13 | #line 1 TH_GENERIC_FILE
14 | #include TH_GENERIC_FILE
15 | #undef scalar_t
16 | #undef ureal
17 | #undef accreal
18 | #undef Real
19 | #undef THInf
20 | #undef TH_REAL_IS_SHORT
21 | #undef TH_CONVERT_REAL_TO_ACCREAL
22 | #undef TH_CONVERT_ACCREAL_TO_REAL
23 | 
24 | #ifndef THGenerateManyTypes
25 | #undef TH_GENERIC_FILE
26 | #endif
27 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/runtime-assembly.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #ifdef __aarch64__
10 | 
11 | .macro SUB_ZERO_POINT vout, vin1, vin2
12 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION
13 |     USUBL \vout, \vin1, \vin2
14 | #else
15 |     UXTL \vout, \vin1
16 | #endif
17 | .endm
18 | 
19 | #else /* aarch32 */
20 | 
21 | .macro SUB_ZERO_POINT qout, din1, din2
22 | #if PYTORCH_QNNPACK_RUNTIME_QUANTIZATION
23 |     VSUBL.U8 \qout, \din1, \din2
24 | #else
25 |     VMOVL.U8 \qout, \din1
26 | #endif
27 | .endm
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/fake_quantize_core.h:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <ATen/NativeFunctions.h>
 3 | #include <ATen/native/TensorIterator.h>
 4 | #include <ATen/native/cpu/Loops.h>
 5 | 
 6 | /* FakeQuantize Op for PerChannelAffine quantization scheme */
 7 | namespace at {
 8 | namespace native {
 9 | void fake_quantize_slice(
10 |     Tensor& output,
11 |     const Tensor& input,
12 |     float sc,
13 |     int64_t z_point,
14 |     int64_t quant_min,
15 |     int64_t quant_max);
16 | 
17 | void fake_quantize_grad_slice(
18 |     Tensor& input_grad,
19 |     const Tensor& output_grad,
20 |     const Tensor& input,
21 |     float sc,
22 |     int64_t z_point,
23 |     int64_t quant_min,
24 |     int64_t quant_max);
25 | 
26 | } // namespace native
27 | } // namespace at
28 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/dlconvertor_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/DLConvertor.h>
 5 | 
 6 | #include <iostream>
 7 | #include <string.h>
 8 | #include <sstream>
 9 | 
10 | using namespace at;
11 | TEST(TestDlconvertor, TestDlconvertor) {
12 |   manual_seed(123);
13 | 
14 |   Tensor a = rand({3, 4});
15 |   DLManagedTensor* dlMTensor = toDLPack(a);
16 | 
17 |   Tensor b = fromDLPack(dlMTensor);
18 | 
19 |   ASSERT_TRUE(a.equal(b));
20 | }
21 | 
22 | TEST(TestDlconvertor, TestDlconvertorNoStrides) {
23 |   manual_seed(123);
24 | 
25 |   Tensor a = rand({3, 4});
26 |   DLManagedTensor* dlMTensor = toDLPack(a);
27 |   dlMTensor->dl_tensor.strides = nullptr;
28 | 
29 |   Tensor b = fromDLPack(dlMTensor);
30 | 
31 |   ASSERT_TRUE(a.equal(b));
32 | }
33 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Indexing.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Indexing tensors by by tensors
 4 | 
 5 | #include <ATen/ATen.h>
 6 | #include <ATen/native/DispatchStub.h>
 7 | 
 8 | namespace at {
 9 |   struct TensorIterator;
10 | }
11 | 
12 | namespace at { namespace native {
13 | 
14 | using index_fn = void(*)(TensorIterator &, IntArrayRef indexed_sizes, IntArrayRef indexed_strides);
15 | using index_put_fn = void(*)(TensorIterator &, IntArrayRef indexed_sizes, IntArrayRef indexed_strides, bool accumulate);
16 | using index_put_accum_fn = void(*)(Tensor &, TensorList , const Tensor &, bool unsafe);
17 | 
18 | DECLARE_DISPATCH(index_fn, index_stub);
19 | DECLARE_DISPATCH(index_put_fn, index_put_stub);
20 | DECLARE_DISPATCH(index_put_accum_fn, index_put_accum_stub);
21 | 
22 | }} // namespace at::native
23 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadPThreadPool.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(pthreadpool-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(pthreadpool
13 |   GIT_REPOSITORY https://github.com/Maratyszcza/pthreadpool.git
14 |   GIT_TAG master
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/pthreadpool"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorMathMagma.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorMathMagma.h"
 3 | #else
 4 | 
 5 | #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)
 6 | 
 7 | // MAGMA (i.e. CUDA implementation of LAPACK functions)
 8 | THC_API void THCTensor_(gels)(THCState *state, THCTensor *rb_, THCTensor *ra_, THCTensor *b_, THCTensor *a_);
 9 | THC_API void THCTensor_(geev)(THCState *state, THCTensor *re_, THCTensor *rv_, THCTensor *a_, bool eigenvectors);
10 | THC_API void THCTensor_(potri)(THCState *state, THCTensor *ra_, THCTensor *a, bool upper);
11 | THC_API void THCTensor_(geqrf)(THCState *state, THCTensor *ra_, THCTensor *rtau_, THCTensor *a_);
12 | 
13 | #endif // defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/aten/src/ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
 2 | 
 3 | // THIS IS A MASSIVE HACK.  This will BREAK you Caffe2 CUDA code if you
 4 | // load ATen_hip, even if you don't ever actually use ATen_hip at runtime.
 5 | //
 6 | // If you ever link ATen_hip statically into the full library along
 7 | // with ATen_cuda (libomnibus), the loading order of this versus the regular
 8 | // ATen_cuda will be nondeterministic, and you'll nondeterministically get
 9 | // one or the other.  (This will be obvious because all of your code
10 | // will fail.)
11 | //
12 | // This hack can be removed once PyTorch is out-of-place HIPified, and
13 | // doesn't pretend CUDA is HIP.
14 | C10_REGISTER_GUARD_IMPL(CUDA, at::cuda::HIPGuardImplMasqueradingAsCUDA);
15 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorMathMagma.cu:
--------------------------------------------------------------------------------
 1 | #include <THC/THCGeneral.h>
 2 | #include <THC/THCTensorMath.h>
 3 | #include <THC/THCTensorCopy.h>
 4 | #include <THC/THCTensorMathMagma.cuh>
 5 | #include <THC/THCTensor.hpp>
 6 | #include <THC/THCStorage.hpp>
 7 | #include <algorithm>
 8 | 
 9 | #ifdef USE_MAGMA
10 | #include <magma.h>
11 | #else
12 | #include <THC/THCBlas.h>
13 | #endif
14 | 
15 | #ifndef DIVUP
16 | #define DIVUP(x, y) (((x) + (y) - 1) / (y))
17 | #endif
18 | 
19 | #define NoMagma(name) "No CUDA implementation of '" #name "'. Install MAGMA and rebuild cutorch (http://icl.cs.utk.edu/magma/)"
20 | 
21 | void THCMagma_init(THCState *state)
22 | {
23 | #ifdef USE_MAGMA
24 |   magma_init();
25 | #endif
26 | }
27 | 
28 | #include <THC/generic/THCTensorMathMagma.cu>
29 | #include <THC/THCGenerateAllTypes.h>
30 | 


--------------------------------------------------------------------------------
/aten/src/TH/vector/AVX2.h:
--------------------------------------------------------------------------------
 1 | #ifndef TH_AVX2_H
 2 | #define TH_AVX2_H
 3 | 
 4 | #include <TH/THGeneral.h>
 5 | 
 6 | #include <stdint.h>
 7 | #include <stddef.h>
 8 | 
 9 | #include <ATen/core/Generator.h>
10 | #include <ATen/core/DistributionsHelper.h>
11 | 
12 | TH_API void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
13 | TH_API void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
14 | TH_API void THFloatVector_normal_fill_AVX2(float *data,
15 |                                     const int64_t size,
16 |                                     at::Generator *generator,
17 |                                     const float mean,
18 |                                     const float stddev);
19 | #endif
20 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/UnsafeFromTH.h:
--------------------------------------------------------------------------------
 1 | #include <ATen/core/Tensor.h>
 2 | 
 3 | namespace at {
 4 | 
 5 | inline Tensor unsafeTensorFromTH(void * th_pointer, bool retain) {
 6 |   auto tensor_impl = c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl>::reclaim(static_cast<TensorImpl*>(th_pointer));
 7 |   if (retain && tensor_impl.get() != UndefinedTensorImpl::singleton()) {
 8 |     c10::raw::intrusive_ptr::incref(tensor_impl.get());
 9 |   }
10 |   return Tensor(std::move(tensor_impl));
11 | }
12 | 
13 | inline Storage unsafeStorageFromTH(void * th_pointer, bool retain) {
14 |   if (retain && th_pointer) {
15 |     c10::raw::intrusive_ptr::incref(static_cast<StorageImpl*>(th_pointer));
16 |   }
17 |   return Storage(c10::intrusive_ptr<StorageImpl>::reclaim(static_cast<StorageImpl*>(th_pointer)));
18 | }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/aten/src/ATen/detail/ScalarTypeConversions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/Context.h>
 4 | #include <ATen/Dispatch.h>
 5 | #include <c10/core/ScalarType.h>
 6 | #include <c10/util/Half.h>
 7 | 
 8 | namespace at { namespace detail {
 9 | 
10 | template <typename T>
11 | inline T load(const void* data, ScalarType src_type) {
12 |   return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::Bool, src_type, "load", [&]() {
13 |     return at::convert<T>(*(scalar_t*)data);
14 |   });
15 | }
16 | 
17 | template <typename T>
18 | inline void store(T value, void* dst, ScalarType dst_type) {
19 |   AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::Bool, dst_type, "store", [&]() {
20 |     *(scalar_t*)dst = at::convert<scalar_t>(value);
21 |   });
22 | }
23 | 
24 | }} // namespace at::detail
25 | 


--------------------------------------------------------------------------------
/aten/conda/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [ -z "$PREFIX" ]; then
 6 |   PREFIX="$CONDA_PREFIX"
 7 | fi
 8 | 
 9 | # When conda-build constructs a new working copy to perform a build
10 | # in, it recursively copies *all* files and directories in the original
11 | # source directory, including any pre-existing build products (e.g.,
12 | # if you previously ran cmake.)  This is problematic, because if
13 | # a 'build' directory already exists, cmake will reuse build settings
14 | # rather than recompute them from scratch.  We want a fresh build, so
15 | # we prophylactically remove the build directory.
16 | rm -rf build || true
17 | 
18 | mkdir -p build
19 | cd build
20 | cmake -DCMAKE_INSTALL_PREFIX="$PREFIX" -DCMAKE_PREFIX_PATH="$PREFIX" -DCMAKE_BUILD_TYPE=Release $CONDA_CMAKE_ARGS ..
21 | make install -j20
22 | 


--------------------------------------------------------------------------------
/aten/src/ATen/miopen/Handle.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/miopen/Handle.h>
 2 | 
 3 | #include <ATen/miopen/Exceptions.h>
 4 | 
 5 | #include <unordered_map>
 6 | #include <mutex>
 7 | 
 8 | namespace at { namespace native {
 9 | 
10 | namespace {
11 | 
12 | struct Handle {
13 |   miopenHandle_t handle;
14 |   Handle() : handle(NULL) {
15 |     MIOPEN_CHECK(miopenCreate(&handle));
16 |   }
17 |   ~Handle() {
18 |     if (handle) {
19 |       miopenDestroy(handle);
20 |     }
21 |   }
22 | };
23 | 
24 | std::mutex mutex;
25 | std::unordered_map<int, Handle> handles;
26 | 
27 | }  // namespace
28 | 
29 | 
30 | miopenHandle_t getMiopenHandle()
31 | {
32 |   int device;
33 |   HIP_CHECK(hipGetDevice(&device));
34 | 
35 |   std::lock_guard<std::mutex> guard(mutex);
36 |   return handles[device].handle;
37 | }
38 | 
39 | }} // namespace at::native
40 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/operator-delete.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | #include <pytorch_qnnpack.h>
12 | #include <qnnpack/operator.h>
13 | 
14 | enum pytorch_qnnp_status pytorch_qnnp_delete_operator(
15 |     pytorch_qnnp_operator_t op) {
16 |   if (op == NULL) {
17 |     return pytorch_qnnp_status_invalid_parameter;
18 |   }
19 | 
20 |   free(op->indirection_buffer);
21 |   free(op->packed_weights);
22 |   free(op->a_sum);
23 |   free(op->zero_buffer);
24 |   free(op->lookup_table);
25 |   free(op);
26 |   return pytorch_qnnp_status_success;
27 | }
28 | 


--------------------------------------------------------------------------------
/aten/src/ATen/test/cuda_optional_test.cu:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include <c10/util/Optional.h>
 6 | 
 7 | #include <assert.h>
 8 | 
 9 | using namespace at;
10 | 
11 | // optional in cuda files
12 | TEST(OptionalTest, OptionalTestCUDA) {
13 |   if (!at::cuda::is_available()) return;
14 |   c10::optional<int64_t> trivially_destructible;
15 |   c10::optional<std::vector<int64_t>> non_trivially_destructible;
16 |   ASSERT_FALSE(trivially_destructible.has_value());
17 |   ASSERT_FALSE(non_trivially_destructible.has_value());
18 | 
19 |   trivially_destructible = {5};
20 |   non_trivially_destructible = std::vector<int64_t>{5, 10};
21 |   ASSERT_TRUE(trivially_destructible.has_value());
22 |   ASSERT_TRUE(non_trivially_destructible.has_value());
23 | }
24 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Distance.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | 
 6 | namespace at { namespace native {
 7 | 
 8 | using pdist_forward_fn = void(*)(Tensor&, const Tensor&, const double p);
 9 | using pdist_backward_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const double p, const Tensor&);
10 | using cdist_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const double p);
11 | using cdist_backward_fn = void(*)(Tensor&, const Tensor&, const Tensor&, const Tensor&, const double p, const Tensor&);
12 | 
13 | DECLARE_DISPATCH(pdist_forward_fn, pdist_forward_stub);
14 | DECLARE_DISPATCH(pdist_backward_fn, pdist_backward_stub);
15 | DECLARE_DISPATCH(cdist_fn, cdist_stub);
16 | DECLARE_DISPATCH(cdist_backward_fn, cdist_backward_stub);
17 | 
18 | }} // namespace at::native
19 | 


--------------------------------------------------------------------------------
/aten/src/TH/generic/THTensorFill.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef TH_GENERIC_FILE
 2 | #define TH_GENERIC_FILE "TH/generic/THTensorFill.cpp"
 3 | #else
 4 | 
 5 | #include <TH/generic/THTensorApply.hpp>
 6 | 
 7 | void THTensor_(fill)(THTensor *r_, scalar_t value)
 8 | {
 9 |   if (THTensor_(isContiguous)(r_) || THTensor_(isTransposed)(r_)) {
10 |     TH_TENSOR_APPLY_CONTIG(scalar_t, r_, THVector_(fill)(r__data, value, r__len););
11 |   } else {
12 |     TH_TENSOR_APPLY(scalar_t, r_,
13 |       if (r__stride == 1) {
14 |         THVector_(fill)(r__data, value, r__size);
15 |         r__i = r__size;
16 |         r__data += r__stride * r__size;
17 |         break;
18 |       } else {
19 |         *r__data = value;
20 |       }
21 |       );
22 |   }
23 | }
24 | 
25 | void THTensor_(zero)(THTensor *r_)
26 | {
27 |   THTensor_(fill)(r_, 0);
28 | }
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/cuda/CUDAScalar.cu:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <ATen/NativeFunctions.h>
 3 | 
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include <cuda.h>
 6 | 
 7 | namespace at {
 8 | namespace native {
 9 | 
10 | Scalar _local_scalar_dense_cuda(const Tensor& self) {
11 |   Scalar r;
12 |   AT_DISPATCH_ALL_TYPES_AND3(
13 |     at::ScalarType::Half, at::ScalarType::Bool, at::ScalarType::BFloat16, self.scalar_type(), "_local_scalar_dense_cuda", [&] {
14 |         scalar_t value;
15 |         cudaStream_t stream = at::cuda::getCurrentCUDAStream();
16 |         AT_CUDA_CHECK(cudaMemcpyAsync(&value, self.data_ptr<scalar_t>(), sizeof(scalar_t), cudaMemcpyDeviceToHost, stream));
17 |         AT_CUDA_CHECK(cudaStreamSynchronize(stream));
18 |         r = Scalar(value);
19 |       });
20 |   return r;
21 | }
22 | 
23 | }} // at::native
24 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/x8lut.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <stddef.h>
12 | #include <stdint.h>
13 | 
14 | #include <qnnpack/common.h>
15 | #include <qnnpack/params.h>
16 | 
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 | 
21 | #define DECLARE_PYTORCH_X8LUT_UKERNEL_FUNCTION(fn_name) \
22 |   PYTORCH_QNNP_INTERNAL void fn_name(           \
23 |       size_t n, const uint8_t* x, const uint8_t* t, uint8_t* y);
24 | 
25 | DECLARE_PYTORCH_X8LUT_UKERNEL_FUNCTION(pytorch_x8lut_ukernel__scalar)
26 | 
27 | #ifdef __cplusplus
28 | } /* extern "C" */
29 | #endif
30 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/LegacyDeviceTypeInit.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/core/LegacyDeviceTypeInit.h>
 2 | 
 3 | namespace at {
 4 | 
 5 | C10_DEFINE_REGISTRY(
 6 |     LegacyDeviceTypeInitRegistry,
 7 |     LegacyDeviceTypeInitInterface,
 8 |     LegacyDeviceTypeInitArgs)
 9 | 
10 | const LegacyDeviceTypeInitInterface& getLegacyDeviceTypeInit() {
11 |   static std::unique_ptr<LegacyDeviceTypeInitInterface> legacy_device_type_init;
12 |   static std::once_flag once;
13 |   std::call_once(once, [] {
14 |     legacy_device_type_init = LegacyDeviceTypeInitRegistry()->Create("LegacyDeviceTypeInit", LegacyDeviceTypeInitArgs{});
15 |     if (!legacy_device_type_init) {
16 |       legacy_device_type_init =
17 |           std::unique_ptr<LegacyDeviceTypeInitInterface>(new LegacyDeviceTypeInitInterface());
18 |     }
19 |   });
20 |   return *legacy_device_type_init;
21 | }
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleTest.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(googletest-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(googletest
13 |   URL https://github.com/google/googletest/archive/release-1.8.0.zip
14 |   URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/THCUNN/Tanh.cu:
--------------------------------------------------------------------------------
 1 | #include <THCUNN/THCUNN.h>
 2 | #include <TH/THHalf.h>
 3 | #include <THC/THCApply.cuh>
 4 | 
 5 | template <typename T>
 6 | struct tanh_updateGradInput_functor
 7 | {
 8 |   __device__ __forceinline__ void operator()(T *gradInput,
 9 |           const T *output, const T *gradOutput) const {
10 |     *gradInput = *gradOutput * (1.f - *output * *output);
11 |   }
12 | };
13 | 
14 | template <>
15 | struct tanh_updateGradInput_functor<half>
16 | {
17 |   __device__ __forceinline__ void operator()(half *gradInput,
18 |           const half *output, const half *gradOutput) const {
19 |     const float out = __half2float(*output);
20 |     const float go = __half2float(*gradOutput);
21 |     *gradInput = __float2half(go * (1.f - out * out));
22 |   }
23 | };
24 | 
25 | #include <THCUNN/generic/Tanh.cu>
26 | #include <THC/THCGenerateFloatTypes.h>
27 | 


--------------------------------------------------------------------------------
/cmake/GoogleTestPatch.cmake:
--------------------------------------------------------------------------------
 1 | # CMake file to replace the string contents in Google Test and Google Mock
 2 | # Usage example:
 3 | # Patch the cmake file
 4 | #   cmake -DFILENAME=internal_utils.cmake
 5 | #         -DBACKUP=internal_utils.cmake.bak
 6 | #         -DREVERT=0 
 7 | #         -P GoogleTestPatch.cmake 
 8 | # Revert the changes
 9 | #   cmake -DFILENAME=internal_utils.cmake
10 | #         -DBACKUP=internal_utils.cmake.bak
11 | #         -DREVERT=1 
12 | #         -P GoogleTestPatch.cmake 
13 | 
14 | 
15 | if(REVERT)
16 |   file(READ ${BACKUP} content)
17 |   file(WRITE ${FILENAME} "${content}")
18 |   file(REMOVE ${BACKUP})
19 | else(REVERT)
20 |   file(READ ${FILENAME} content)
21 |   file(WRITE ${BACKUP} "${content}")
22 |   string(REGEX REPLACE "[-/]Z[iI]" "/Z7" content "${content}")
23 |   file(WRITE ${FILENAME} "${content}")
24 | endif(REVERT)
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/detail/HIPHooksInterface.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/detail/HIPHooksInterface.h>
 2 | 
 3 | #include <c10/util/Exception.h>
 4 | 
 5 | #include <cstddef>
 6 | #include <memory>
 7 | #include <mutex>
 8 | 
 9 | namespace at {
10 | namespace detail {
11 | 
12 | // See getCUDAHooks for some more commentary
13 | const HIPHooksInterface& getHIPHooks() {
14 |   static std::unique_ptr<HIPHooksInterface> hip_hooks;
15 |   static std::once_flag once;
16 |   std::call_once(once, [] {
17 |     hip_hooks = HIPHooksRegistry()->Create("HIPHooks", HIPHooksArgs{});
18 |     if (!hip_hooks) {
19 |       hip_hooks =
20 |           std::unique_ptr<HIPHooksInterface>(new HIPHooksInterface());
21 |     }
22 |   });
23 |   return *hip_hooks;
24 | }
25 | } // namespace detail
26 | 
27 | C10_DEFINE_REGISTRY(HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs)
28 | 
29 | } // namespace at
30 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/cmake/DownloadGoogleTest.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(googletest-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(googletest
13 |   URL https://github.com/google/googletest/archive/release-1.8.0.zip
14 |   URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCStorage.cu:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCStorage.cu"
 3 | #else
 4 | 
 5 | void THCStorage_(fill)(THCState *state, THCStorage *self, scalar_t value)
 6 | {
 7 |   THCThrustAllocator thrustAlloc(state);
 8 |   thrust::device_ptr<scalar_t> self_data(THCStorage_(data)(state, self));
 9 |   thrust::fill(
10 | #if CUDA_VERSION >= 7000 || defined __HIP_PLATFORM_HCC__
11 |     thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
12 | #endif
13 |     self_data, self_data+self->numel(), value);
14 | }
15 | 
16 | void THCStorage_(resize)(THCState *state, THCStorage *self, ptrdiff_t size)
17 | {
18 |   THCStorage_resize(state, self, size);
19 | }
20 | 
21 | int THCStorage_(getDevice)(THCState* state, const THCStorage* storage) {
22 |   return THCStorage_getDevice(state, storage);
23 | }
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/u8rmax.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <stddef.h>
12 | #include <stdint.h>
13 | 
14 | #include <qnnpack/common.h>
15 | #include <qnnpack/params.h>
16 | 
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 | 
21 | #define DECLARE_PYTORCH_U8RMAX_UKERNEL_FUNCTION(fn_name) \
22 |   PYTORCH_QNNP_INTERNAL uint8_t fn_name(size_t n, const uint8_t* x);
23 | 
24 | DECLARE_PYTORCH_U8RMAX_UKERNEL_FUNCTION(pytorch_u8rmax_ukernel__neon)
25 | DECLARE_PYTORCH_U8RMAX_UKERNEL_FUNCTION(pytorch_u8rmax_ukernel__sse2)
26 | 
27 | #ifdef __cplusplus
28 | } /* extern "C" */
29 | #endif
30 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleBenchmark.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
 8 | 
 9 | PROJECT(googlebenchmark-download NONE)
10 | 
11 | INCLUDE(ExternalProject)
12 | ExternalProject_Add(googlebenchmark
13 |   URL https://github.com/google/benchmark/archive/v1.4.1.zip
14 |   URL_HASH SHA256=61ae07eb5d4a0b02753419eb17a82b7d322786bb36ab62bd3df331a4d47c00a7
15 |   SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
16 |   BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   INSTALL_COMMAND ""
20 |   TEST_COMMAND ""
21 | )
22 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/u8lut32norm.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <stddef.h>
12 | #include <stdint.h>
13 | 
14 | #include <qnnpack/common.h>
15 | #include <qnnpack/params.h>
16 | 
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 | 
21 | #define DECLARE_PYTORCH_X8LUT32NORM_UKERNEL_FUNCTION(fn_name) \
22 |   PYTORCH_QNNP_INTERNAL void fn_name(                 \
23 |       size_t n, const uint8_t* x, const uint32_t* t, uint8_t* y);
24 | 
25 | DECLARE_PYTORCH_X8LUT32NORM_UKERNEL_FUNCTION(pytorch_u8lut32norm_ukernel__scalar)
26 | 
27 | #ifdef __cplusplus
28 | } /* extern "C" */
29 | #endif
30 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCTensorMathCompare.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef THC_TENSORMATH_COMPARE_CUH
 2 | #define THC_TENSORMATH_COMPARE_CUH
 3 | 
 4 | #include <THC/THCTensorMath.h>
 5 | #include <THC/THCGeneral.h>
 6 | #include <THC/THCTensorCopy.h>
 7 | #include <THC/THCApply.cuh>
 8 | #include <THC/THCNumerics.cuh>
 9 | 
10 | template<typename ScalarTypeOut, typename ScalarType, typename TensorTypeOut, typename TensorType, class Op>
11 | void THC_logicalValue(THCState *state,
12 |                       TensorTypeOut *self_,
13 |                       TensorType *src,
14 |                       Op op) {
15 |   THCTensor_resize(state, self_, src->sizes(), {});
16 | 
17 |   if (!THC_pointwiseApply2<ScalarTypeOut, ScalarType>(state, self_, src, op)) {
18 |     THArgCheck(false, 2, CUTORCH_DIM_WARNING);
19 |   }
20 | 
21 |   THCudaCheck(cudaGetLastError());
22 | }
23 | 
24 | #endif // THC_TENSORMATH_COMPARE_CUH
25 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/assembly.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | // clang-format off
10 | #ifdef __ELF__
11 |     .macro BEGIN_FUNCTION name
12 |         .text
13 |         .align 2
14 |         .global \name
15 |         .type \name, %function
16 |         \name:
17 |     .endm
18 | 
19 |     .macro END_FUNCTION name
20 |         .size \name, .-\name
21 |     .endm
22 | #elif defined(__MACH__)
23 |     .macro BEGIN_FUNCTION name
24 |         .text
25 |         .align 2
26 |         .global _\name
27 |         .private_extern _\name
28 |         _\name:
29 |     .endm
30 | 
31 |     .macro END_FUNCTION name
32 |     .endm
33 | #endif
34 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/Formatting.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/core/Scalar.h>
 4 | #include <ATen/core/Tensor.h>
 5 | #include <iostream>
 6 | 
 7 | 
 8 | namespace c10 {
 9 | CAFFE2_API std::ostream& operator<<(std::ostream& out, Backend b);
10 | }
11 | namespace at {
12 | 
13 | CAFFE2_API std::ostream& operator<<(std::ostream& out, const DeprecatedTypeProperties& t);
14 | CAFFE2_API std::ostream& print(
15 |     std::ostream& stream,
16 |     const Tensor& tensor,
17 |     int64_t linesize);
18 | static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
19 |   return print(out,t,80);
20 | }
21 | static inline void print(const Tensor & t, int64_t linesize=80) {
22 |   print(std::cout,t,linesize);
23 | }
24 | 
25 | static inline std::ostream& operator<<(std::ostream & out, Scalar s) {
26 |   return out << (s.isFloatingPoint() ? s.toDouble() : s.toLong());
27 | }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/grad_mode.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/macros/Macros.h>
 4 | 
 5 | namespace at {
 6 | 
 7 | struct CAFFE2_API GradMode {
 8 |   static bool is_enabled();
 9 |   static void set_enabled(bool enabled);
10 | };
11 | 
12 | // A RAII, thread local (!) guard that enables or disables grad mode upon
13 | // construction, and sets it back to the original value upon destruction.
14 | struct CAFFE2_API AutoGradMode {
15 |   AutoGradMode(bool enabled) : prev_mode(GradMode::is_enabled()) {
16 |     GradMode::set_enabled(enabled);
17 |   }
18 |   ~AutoGradMode() {
19 |     GradMode::set_enabled(prev_mode);
20 |   }
21 |   bool prev_mode;
22 | };
23 | 
24 | // A RAII, thread local (!) guard that stops future operations from building
25 | // gradients.
26 | struct CAFFE2_API NoGradGuard : public AutoGradMode {
27 |   NoGradGuard() : AutoGradMode(/*enabled=*/false) {}
28 | };
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/utils/ParamUtils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/util/ArrayRef.h>
 4 | #include <vector>
 5 | 
 6 | namespace at {
 7 | namespace native {
 8 | 
 9 | inline std::vector<int64_t> expand_param_if_needed(
10 |     IntArrayRef list_param,
11 |     const char* param_name,
12 |     int64_t expected_dim) {
13 |   if (list_param.size() == 1) {
14 |     return std::vector<int64_t>(expected_dim, list_param[0]);
15 |   } else if ((int64_t)list_param.size() != expected_dim) {
16 |     std::ostringstream ss;
17 |     ss << "expected " << param_name << " to be a single integer value or a "
18 |        << "list of " << expected_dim << " values to match the convolution "
19 |        << "dimensions, but got " << param_name << "=" << list_param;
20 |     AT_ERROR(ss.str());
21 |   } else {
22 |     return list_param.vec();
23 |   }
24 | }
25 | 
26 | } // namespace native
27 | } // namespace at
28 | 


--------------------------------------------------------------------------------
/aten/src/THCUNN/SharedMem.cuh:
--------------------------------------------------------------------------------
 1 | // Based on the simpleTempltes CUDA example
 2 | 
 3 | #ifndef THCUNN_SHAREDMEM_H
 4 | #define THCUNN_SHAREDMEM_H
 5 | 
 6 | template <typename T>
 7 | struct SharedMem {
 8 |   __device__ T *getPointer()
 9 |   {
10 |     extern __device__ void error(void);
11 |     error();
12 |     return NULL;
13 |   }
14 | };
15 | 
16 | template <>
17 | struct SharedMem<half>
18 | {
19 |   __device__ half *getPointer() {
20 |     extern __shared__ half s_half[];
21 |     return s_half;
22 |   }
23 | };
24 | 
25 | template <>
26 | struct SharedMem<float>
27 | {
28 |   __device__ float *getPointer() {
29 |     extern __shared__ float s_float[];
30 |     return s_float;
31 |   }
32 | };
33 | 
34 | template <>
35 | struct SharedMem<double>
36 | {
37 |   __device__ double *getPointer() {
38 |     extern __shared__ double s_double[];
39 |     return s_double;
40 |   }
41 | };
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/aten/src/ATen/ATen.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/core/Allocator.h>
 4 | #include <ATen/core/ATenGeneral.h>
 5 | #include <ATen/Context.h>
 6 | #include <ATen/Device.h>
 7 | #include <ATen/DeviceGuard.h>
 8 | #include <ATen/DimVector.h>
 9 | #include <ATen/Dispatch.h>
10 | #include <ATen/DynamicLibrary.h>
11 | #include <ATen/Formatting.h>
12 | #include <ATen/Functions.h>
13 | #include <ATen/NamedTensor.h>
14 | #include <ATen/ScalarOps.h>
15 | #include <ATen/Tensor.h>
16 | #include <ATen/TensorGeometry.h>
17 | #include <ATen/TensorOperators.h>
18 | #include <ATen/Version.h>
19 | #include <ATen/core/ATenGeneral.h>
20 | #include <ATen/core/Generator.h>
21 | #include <c10/core/Layout.h>
22 | #include <ATen/core/Scalar.h>
23 | #include <c10/core/Storage.h>
24 | #include <c10/core/TensorOptions.h>
25 | #include <ATen/core/Reduction.h>
26 | #include <c10/util/Exception.h>
27 | #include <ATen/core/UnsafeFromTH.h>
28 | 


--------------------------------------------------------------------------------
/aten/src/ATen/WrapDimUtilsMulti.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/core/TensorImpl.h>
 4 | #include <ATen/WrapDimUtils.h>
 5 | #include <sstream>
 6 | #include <bitset>
 7 | 
 8 | namespace at {
 9 | 
10 | // This is in an extra file to work around strange interaction of
11 | // bitset on Windows with operator overloading
12 | 
13 | constexpr size_t dim_bitset_size = 64;
14 | 
15 | static inline std::bitset<dim_bitset_size> dim_list_to_bitset(IntArrayRef dims, int64_t ndims) {
16 |   TORCH_CHECK(ndims <= (int64_t) dim_bitset_size, "only tensors with up to ", dim_bitset_size, " dims are supported");
17 |   std::bitset<dim_bitset_size> seen;
18 |   for (size_t i = 0; i < dims.size(); i++) {
19 |     size_t dim = maybe_wrap_dim(dims[i], ndims);
20 |     TORCH_CHECK(!seen[dim], "dim ", dim, " appears multiple times in the list of dims");
21 |     seen[dim] = true;
22 |   }
23 |   return seen;
24 | }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cudnn/Utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/Exceptions.h>
 5 | #include <THC/THC.h>
 6 | #include <ATen/cudnn/cudnn-wrapper.h>
 7 | #include <ATen/cudnn/Handle.h>
 8 | 
 9 | namespace at { namespace native {
10 | 
11 | inline void setCuDNNStreamToCurrent() {
12 |   // TODO: Should getCurrentStream be a method on Context?
13 |   AT_CUDNN_CHECK(cudnnSetStream(getCudnnHandle(), at::cuda::getCurrentCUDAStream()));
14 | }
15 | 
16 | // cuDNN has a buggy check for tensor being contiguous (that is, it does
17 | // not ignore stride for dimension that is equal to 0).  This function
18 | // makes tensors which have zero stride contiguous, by setting the
19 | // strides to 1 as cuDNN likes.
20 | inline Tensor contiguousIfZeroInStrides(const Tensor& t) {
21 |   for (auto s : t.strides()) {
22 |     if (s == 0) return t.contiguous();
23 |   }
24 |   return t;
25 | }
26 | 
27 | }}
28 | 


--------------------------------------------------------------------------------
/aten/src/ATen/templates/TypeDefault.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/TypeDefault.h>
 2 | 
 3 | // ${generated_comment}
 4 | 
 5 | #include <ATen/DeviceGuard.h>
 6 | #include <ATen/ExpandUtils.h>
 7 | #include <ATen/Functions.h>
 8 | #include <ATen/NamedTensorUtils.h>
 9 | #include <ATen/NativeFunctions.h>
10 | #include <c10/core/Scalar.h>
11 | #include <c10/core/Storage.h>
12 | #include <ATen/Tensor.h>
13 | #include <c10/core/TensorOptions.h>
14 | #include <ATen/DeviceGuard.h>
15 | #include <ATen/SparseTensorUtils.h>
16 | #include <ATen/core/op_registration/op_registration.h>
17 | #include <ATen/core/EnableNamedTensor.h>
18 | 
19 | namespace at {
20 | namespace TypeDefault {
21 | 
22 | ${type_method_definitions}
23 | 
24 | }  // namespace TypeDefault
25 | 
26 | #ifndef USE_STATIC_DISPATCH
27 | namespace {
28 | auto registerer = torch::RegisterOperators()
29 |   ${function_registrations};
30 | }
31 | #endif
32 | 
33 | }  // namespace at
34 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindNuma.cmake:
--------------------------------------------------------------------------------
 1 | # Find the Numa libraries
 2 | #
 3 | # The following variables are optionally searched for defaults
 4 | #  NUMA_ROOT_DIR:    Base directory where all Numa components are found
 5 | #
 6 | # The following are set after configuration is done:
 7 | #  NUMA_FOUND
 8 | #  Numa_INCLUDE_DIR
 9 | #  Numa_LIBRARIES
10 | 
11 | find_path(
12 |     Numa_INCLUDE_DIR NAMES numa.h
13 |     PATHS ${NUMA_ROOT_DIR} ${NUMA_ROOT_DIR}/include)
14 | 
15 | find_library(
16 |     Numa_LIBRARIES NAMES numa
17 |     PATHS ${NUMA_ROOT_DIR} ${NUMA_ROOT_DIR}/lib)
18 | 
19 | include(FindPackageHandleStandardArgs)
20 | find_package_handle_standard_args(
21 |     Numa DEFAULT_MSG Numa_INCLUDE_DIR Numa_LIBRARIES)
22 | 
23 | if(NUMA_FOUND)
24 |   message(
25 |       STATUS
26 |       "Found Numa  (include: ${Numa_INCLUDE_DIR}, library: ${Numa_LIBRARIES})")
27 |   mark_as_advanced(Numa_INCLUDE_DIR Numa_LIBRARIES)
28 | endif()
29 | 
30 | 


--------------------------------------------------------------------------------
/aten/src/ATen/cuda/detail/IndexUtils.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/detail/TensorInfo.cuh>
 5 | #include <limits>
 6 | 
 7 | namespace at {
 8 | namespace cuda {
 9 | namespace detail {
10 | 
11 | TORCH_CUDA_API bool maybeOverlappingIndices(const at::Tensor& t);
12 | TORCH_CUDA_API bool canUse32BitIndexMath(const at::Tensor &t, int64_t max_elem=std::numeric_limits<int32_t>::max());
13 | 
14 | template <typename scalar, typename IndexType>
15 | TensorInfo<scalar, IndexType>
16 | getTensorInfo(const at::Tensor& t) {
17 |   IndexType sz[MAX_TENSORINFO_DIMS];
18 |   IndexType st[MAX_TENSORINFO_DIMS];
19 | 
20 |   int dims = t.dim();
21 |   for (int i = 0; i < dims; ++i) {
22 |     sz[i] = t.size(i);
23 |     st[i] = t.stride(i);
24 |   }
25 | 
26 |   return TensorInfo<scalar, IndexType>(
27 |     t.data_ptr<scalar>(), dims, sz, st);
28 | }
29 | 
30 | } // detail
31 | } // cuda
32 | } // at
33 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/math.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <stddef.h>
12 | #ifdef _MSC_VER
13 | #undef min
14 | #undef max
15 | #endif
16 | 
17 | inline static size_t min(size_t a, size_t b) {
18 |   return a < b ? a : b;
19 | }
20 | 
21 | inline static size_t max(size_t a, size_t b) {
22 |   return a > b ? a : b;
23 | }
24 | 
25 | inline static size_t doz(size_t a, size_t b) {
26 |   return a < b ? 0 : a - b;
27 | }
28 | 
29 | inline static size_t divide_round_up(size_t n, size_t q) {
30 |   return n % q == 0 ? n / q : n / q + 1;
31 | }
32 | 
33 | inline static size_t round_up(size_t n, size_t q) {
34 |   return divide_round_up(n, q) * q;
35 | }
36 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCStorage.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // STOP!!! Thinking of including this header directly?  Please
 4 | // read Note [TH abstraction violation]
 5 | 
 6 | #include <THC/THCStorage.h>
 7 | // Should work with THStorageClass
 8 | #include <TH/THStorageFunctions.hpp>
 9 | 
10 | #include <c10/core/ScalarType.h>
11 | 
12 | #include <cuda.h>
13 | #include <cuda_runtime.h>
14 | #include <cuda_fp16.h>
15 | 
16 | THC_API THCStorage* THCStorage_new(THCState* state, caffe2::TypeMeta);
17 | 
18 | THC_API void THCStorage_retain(THCState *state, THCStorage *storage);
19 | 
20 | THC_API void THCStorage_resize(THCState *state, THCStorage *storage, ptrdiff_t size);
21 | THC_API int THCStorage_getDevice(THCState* state, const THCStorage* storage);
22 | 
23 | THC_API THCStorage* THCStorage_newWithDataAndAllocator(
24 |   THCState *state, at::ScalarType scalar_type,
25 |   at::DataPtr&& data, ptrdiff_t size,
26 |   at::Allocator* allocator);
27 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGeneral.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <THC/THCGeneral.h>
 4 | 
 5 | /* Global state of THC. */
 6 | struct THCState {
 7 |   /* Set of all allocated resources. */
 8 |   THCCudaResourcesPerDevice* resourcesPerDevice;
 9 |   /* Captured number of devices upon startup; convenience for bounds checking */
10 |   int numDevices;
11 | 
12 |   /* Allocator using cudaMallocHost. */
13 |   // NB: These allocators (specifically, cudaHostAllocator) MUST implement
14 |   // maybeGlobalBoundDeleter, because we have a few use-cases where we need to
15 |   // do raw allocations with them (for Thrust).
16 |   // TODO: Make this statically obvious
17 |   at::Allocator* cudaHostAllocator;
18 |   at::Allocator* cudaDeviceAllocator;
19 | 
20 |   /* Table of enabled peer-to-peer access between directed pairs of GPUs.
21 |      If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
22 |   int** p2pAccessEnabled;
23 | };
24 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/Activation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/native/DispatchStub.h>
 5 | #include <c10/core/Scalar.h>
 6 | 
 7 | namespace at {
 8 | 
 9 | struct TensorIterator;
10 | 
11 | namespace native {
12 | 
13 | using activation_fn = void (*)(TensorIterator&);
14 | using activation_backward_fn = void (*)(TensorIterator&);
15 | using threshold_fn = void (*)(TensorIterator&, Scalar, Scalar);
16 | using hardshrink_cpu_fn = void (*)(TensorIterator&, Scalar);
17 | using hardshrink_backward_cpu_fn = void (*)(TensorIterator&, Scalar);
18 | 
19 | DECLARE_DISPATCH(threshold_fn, threshold_stub);
20 | DECLARE_DISPATCH(activation_fn, GeluKernel);
21 | DECLARE_DISPATCH(activation_backward_fn, GeluBackwardKernel);
22 | DECLARE_DISPATCH(hardshrink_cpu_fn, hardshrink_cpu_stub);
23 | DECLARE_DISPATCH(hardshrink_backward_cpu_fn, hardshrink_backward_cpu_stub);
24 | 
25 | } // namespace native
26 | 
27 | } // namespace at
28 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindBenchmark.cmake:
--------------------------------------------------------------------------------
 1 | # Try to find the Google Benchmark library and headers.
 2 | #  Benchmark_FOUND        - system has benchmark lib
 3 | #  Benchmark_INCLUDE_DIRS - the benchmark include directory
 4 | #  Benchmark_LIBRARIES    - libraries needed to use benchmark
 5 | 
 6 | find_path(Benchmark_INCLUDE_DIR
 7 |   NAMES benchmark/benchmark.h
 8 |   NO_SYSTEM_ENVIRONMENT_PATH
 9 |   DOC "The directory where benchmark includes reside"
10 | )
11 | 
12 | find_library(Benchmark_LIBRARY
13 |   NAMES benchmark
14 |   NO_SYSTEM_ENVIRONMENT_PATH
15 |   DOC "The benchmark library"
16 | )
17 | 
18 | set(Benchmark_INCLUDE_DIRS ${Benchmark_INCLUDE_DIR})
19 | set(Benchmark_LIBRARIES    ${Benchmark_LIBRARY})
20 | 
21 | include(FindPackageHandleStandardArgs)
22 | find_package_handle_standard_args(Benchmark
23 |   FOUND_VAR Benchmark_FOUND
24 |   REQUIRED_VARS Benchmark_INCLUDE_DIR Benchmark_LIBRARY
25 | )
26 | 
27 | mark_as_advanced(Benchmark_FOUND)
28 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/grad_mode.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/core/grad_mode.h>
 2 | 
 3 | #include <stdexcept>
 4 | 
 5 | namespace at {
 6 | 
 7 | /// thread_local is a feature that is not enabled by Caffe2 mobile
 8 | /// build (e.g. iOS). Therefore, we only provide `at::GradMode`
 9 | /// when we are not in mobile build or when FEATURE_TORCH_MOBILE
10 | /// is on.
11 | #if !defined(C10_MOBILE) || defined(FEATURE_TORCH_MOBILE)
12 | 
13 | thread_local bool GradMode_enabled = true;
14 | 
15 | bool GradMode::is_enabled() {
16 |   return GradMode_enabled;
17 | }
18 | 
19 | void GradMode::set_enabled(bool enabled) {
20 |   GradMode_enabled = enabled;
21 | }
22 | 
23 | #else
24 | 
25 | bool GradMode::is_enabled() {
26 |   throw std::runtime_error("GradMode is not supported on mobile");
27 | }
28 | 
29 | void GradMode::set_enabled(bool enabled) {
30 |   throw std::runtime_error("GradMode is not supported on mobile");
31 | }
32 | 
33 | #endif
34 | 
35 | } // namespace at
36 | 


--------------------------------------------------------------------------------
/aten/src/ATen/native/c10_utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/core/dispatch/Dispatcher.h>
 4 | 
 5 | template <class... Inputs>
 6 | inline std::vector<c10::IValue> makeStack(Inputs&&... inputs) {
 7 |   return {std::forward<Inputs>(inputs)...};
 8 | }
 9 | 
10 | template <class... Args>
11 | inline std::vector<c10::IValue> callOp(
12 |     const c10::OperatorHandle& op,
13 |     Args... args) {
14 |   auto stack = makeStack(std::forward<Args>(args)...);
15 |   c10::Dispatcher::singleton().callBoxed(op, &stack);
16 |   return stack;
17 | }
18 | 
19 | template <class... Args>
20 | inline std::vector<c10::IValue> callOp(
21 |     const char* func_name,
22 |     const char* overload_name,
23 |     Args... args) {
24 |   const c10::optional<c10::OperatorHandle> op_handle =
25 |       c10::Dispatcher::singleton().findSchema({func_name, overload_name});
26 |   assert(op_handle.has_value());
27 |   return callOp(op_handle.value(), args...);
28 | }
29 | 


--------------------------------------------------------------------------------
/cmake/Whitelist.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | if (__caffe2_whitelist_included)
 3 |   return()
 4 | endif()
 5 | 
 6 | set (__caffe2_whitelist_included TRUE)
 7 | 
 8 | set(CAFFE2_WHITELISTED_FILES)
 9 | if (NOT CAFFE2_WHITELIST)
10 |   return()
11 | endif()
12 | 
13 | # First read the whitelist file and break it by line.
14 | file(READ "${CAFFE2_WHITELIST}" whitelist_content)
15 | # Convert file contents into a CMake list
16 | string(REGEX REPLACE "\n" ";" whitelist_content ${whitelist_content})
17 | 
18 | foreach(item ${whitelist_content})
19 |   file(GLOB_RECURSE tmp ${item})
20 |   set(CAFFE2_WHITELISTED_FILES ${CAFFE2_WHITELISTED_FILES} ${tmp})
21 | endforeach()
22 | 
23 | macro(caffe2_do_whitelist output whitelist)
24 |   set(_tmp)
25 |   foreach(item ${${output}})
26 |     list(FIND ${whitelist} ${item} _index)
27 |     if (${_index} GREATER -1)
28 |       set(_tmp ${_tmp} ${item})
29 |     endif()
30 |   endforeach()
31 |   set(${output} ${_tmp})
32 | endmacro()
33 | 


--------------------------------------------------------------------------------
/aten/src/THC/THCGenerateFloatTypes.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #error "You must define THC_GENERIC_FILE before including THGenerateFloatTypes.h"
 3 | #endif
 4 | 
 5 | #define THCGenerateFloatTypes
 6 | 
 7 | #define THCTypeIdxByte   1
 8 | #define THCTypeIdxChar   2
 9 | #define THCTypeIdxShort  3
10 | #define THCTypeIdxInt    4
11 | #define THCTypeIdxLong   5
12 | #define THCTypeIdxFloat  6
13 | #define THCTypeIdxDouble 7
14 | #define THCTypeIdxHalf   8
15 | #define THCTypeIdx_(T) TH_CONCAT_2(THCTypeIdx,T)
16 | 
17 | #include <THC/THCGenerateHalfType.h>
18 | #include <THC/THCGenerateFloatType.h>
19 | #include <THC/THCGenerateDoubleType.h>
20 | 
21 | #undef THCTypeIdxByte
22 | #undef THCTypeIdxChar
23 | #undef THCTypeIdxShort
24 | #undef THCTypeIdxInt
25 | #undef THCTypeIdxLong
26 | #undef THCTypeIdxFloat
27 | #undef THCTypeIdxDouble
28 | #undef THCTypeIdxHalf
29 | #undef THCTypeIdx_
30 | 
31 | #undef THCGenerateFloatTypes
32 | #undef THC_GENERIC_FILE
33 | 


--------------------------------------------------------------------------------
/aten/src/ATen/core/DeprecatedTypeProperties.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/core/DeprecatedTypeProperties.h>
 2 | 
 3 | #include <ATen/core/LegacyTypeDispatch.h>
 4 | #include <ATen/core/Tensor.h>
 5 | #include <ATen/core/UnsafeFromTH.h>
 6 | 
 7 | namespace at {
 8 | 
 9 | Tensor DeprecatedTypeProperties::unsafeTensorFromTH(void * th_pointer, bool retain) const {
10 |   return at::unsafeTensorFromTH(th_pointer, retain);
11 | }
12 | 
13 | Storage DeprecatedTypeProperties::unsafeStorageFromTH(void * th_pointer, bool retain) const {
14 |   return at::unsafeStorageFromTH(th_pointer, retain);
15 | }
16 | 
17 | Tensor DeprecatedTypeProperties::copy(const Tensor & src, bool non_blocking, c10::optional<Device> to_device) const {
18 |   if (to_device) {
19 |     return src.to(src.options().dtype(scalarType()).device(to_device), non_blocking, /*copy=*/true);
20 |   }
21 |   return src.to(src.options().dtype(scalarType()), non_blocking, /*copy=*/true);
22 | }
23 | 
24 | } // namespace at
25 | 


--------------------------------------------------------------------------------
/aten/src/THC/generic/THCTensorIndex.h:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "THC/generic/THCTensorIndex.h"
 3 | #else
 4 | 
 5 | THC_API void THCTensor_(indexCopy)(THCState *state, THCTensor *res_, int dim, THCudaLongTensor *indices, THCTensor *src);
 6 | THC_API void THCTensor_(indexFill)(THCState *state, THCTensor *tensor, int dim, THCudaLongTensor *index, scalar_t val);
 7 | THC_API void THCTensor_(indexSelect)(THCState *state, THCTensor *tensor, THCTensor *src, int dim, THCudaLongTensor *index);
 8 | THC_API void THCTensor_(take)(THCState *state, THCTensor *res_, THCTensor *src, THCudaLongTensor *index);
 9 | THC_API void THCTensor_(put)(THCState *state, THCTensor *res_, THCudaLongTensor *indices, THCTensor *src, int accumulate);
10 | 
11 | #if !defined(THC_REAL_IS_BOOL) /* non bool only part */
12 | THC_API void THCTensor_(indexAdd)(THCState *state, THCTensor *res_, int dim, THCudaLongTensor *indices, THCTensor *src);
13 | #endif
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------