├── .gitignore ├── thpp ├── cuda │ ├── detail │ │ ├── TensorDefs.cpp │ │ ├── StorageDefs.cpp │ │ ├── Storage.h │ │ └── Tensor.h │ ├── CudaIOBuf.h │ ├── CudaIOBuf.cpp │ ├── State.cpp │ ├── Storage.cpp │ ├── test │ │ ├── StorageTest.cpp │ │ ├── TensorSerializationTest.cpp │ │ └── TensorTest.cpp │ ├── State.h │ ├── Storage.h │ ├── Tensor.h │ ├── Tensor-inl.h │ └── Storage-inl.h ├── ForwardDeclarations.h ├── detail │ ├── TensorDefs.cpp │ ├── StorageDefs.cpp │ ├── TensorDefsGeneric.h │ ├── StorageDefsGeneric.h │ ├── Storage.h │ ├── Tensor.h │ ├── StorageGeneric.h │ └── TensorGeneric.h ├── test │ ├── CommonTestLib.h │ ├── CMakeLists.txt │ ├── CommonTestLib-inl.h │ ├── StorageTest.cpp │ ├── TensorTest.cpp │ └── TensorSerializationTest.cpp ├── cmake │ ├── FindGlog.cmake │ ├── FindFolly.cmake │ ├── MultiLevelIncludes.cmake │ └── FindThrift.cmake ├── if │ └── Tensor.thrift ├── StorageBase-inl.h ├── build.sh ├── StorageSerialization.cpp ├── build_fbthrift_folly.sh ├── Storage.cpp ├── TensorPtr-inl.h ├── StorageBase.h ├── CMakeLists.txt ├── TensorPtr.h ├── TensorSerialization.cpp ├── Tensor.h ├── Storage.h ├── Tensor-inl.h ├── Storage-inl.h ├── TensorBase.h └── TensorBase-inl.h ├── README.md ├── LICENSE ├── PATENTS ├── CONTRIBUTING.md └── INSTALL.md /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | gtest-* 3 | 4 | -------------------------------------------------------------------------------- /thpp/cuda/detail/TensorDefs.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #include 7 | 8 | namespace thpp { namespace detail { 9 | 10 | constexpr const char* TensorOps>::kLuaTypeName; 11 | 12 | }} // namespaces 13 | -------------------------------------------------------------------------------- /thpp/cuda/detail/StorageDefs.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #include 7 | 8 | namespace thpp { namespace detail { 9 | 10 | constexpr const char* StorageOps>::kLuaTypeName; 11 | 12 | }} // namespaces 13 | -------------------------------------------------------------------------------- /thpp/ForwardDeclarations.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace thpp { 4 | template class Tensor; 5 | template class TensorPtr; 6 | template class Storage; 7 | template class IsTensor; 8 | template class IsTensorPtr; 9 | template class IsStorage; 10 | } 11 | -------------------------------------------------------------------------------- /thpp/detail/TensorDefs.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | #define THPP_INCLUDE_TENSOR_DEFS 14 | #include "thpp/detail/TensorDefsGeneric.h" 15 | #include 16 | #undef THPP_INCLUDE_TENSOR_DEFS 17 | -------------------------------------------------------------------------------- /thpp/detail/StorageDefs.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | #define THPP_INCLUDE_STORAGE_DEFS 14 | #include "thpp/detail/StorageDefsGeneric.h" 15 | #include 16 | #undef THPP_INCLUDE_STORAGE_DEFS 17 | -------------------------------------------------------------------------------- /thpp/test/CommonTestLib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #pragma once 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace thpp { namespace test { 19 | 20 | template 21 | void testUniqueMove(); 22 | 23 | template 24 | void testTensorPtr(); 25 | 26 | }} // namespaces 27 | 28 | #include 29 | -------------------------------------------------------------------------------- /thpp/cmake/FindGlog.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. An additional grant 6 | # of patent rights can be found in the PATENTS file in the same directory. 7 | # 8 | # GLOG_FOUND 9 | # GLOG_INCLUDE_DIR 10 | # GLOG_LIBRARIES 11 | 12 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 13 | 14 | INCLUDE(FindPackageHandleStandardArgs) 15 | 16 | FIND_LIBRARY(GLOG_LIBRARY glog) 17 | FIND_PATH(GLOG_INCLUDE_DIR "glog/logging.h") 18 | 19 | SET(GLOG_LIBRARIES ${GLOG_LIBRARY}) 20 | 21 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( 22 | Glog 23 | REQUIRED_ARGS GLOG_INCLUDE_DIR GLOG_LIBRARY) 24 | -------------------------------------------------------------------------------- /thpp/cmake/FindFolly.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. An additional grant 6 | # of patent rights can be found in the PATENTS file in the same directory. 7 | # 8 | # - Try to find folly 9 | # This will define 10 | # FOLLY_FOUND 11 | # FOLLY_INCLUDE_DIR 12 | # FOLLY_LIBRARIES 13 | 14 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 15 | 16 | INCLUDE(FindPackageHandleStandardArgs) 17 | 18 | FIND_LIBRARY(FOLLY_LIBRARY folly) 19 | FIND_PATH(FOLLY_INCLUDE_DIR "folly/String.h") 20 | 21 | SET(FOLLY_LIBRARIES ${FOLLY_LIBRARY}) 22 | 23 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(Folly 24 | REQUIRED_ARGS FOLLY_INCLUDE_DIR FOLLY_LIBRARIES) 25 | -------------------------------------------------------------------------------- /thpp/detail/TensorDefsGeneric.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | // override-include-guard 12 | 13 | #ifndef THPP_INCLUDE_TENSOR_DEFS 14 | #error This file may only be included from TensorDefs.cpp 15 | #endif 16 | 17 | #ifndef TH_GENERIC_FILE 18 | #define TH_GENERIC_FILE "thpp/detail/TensorDefsGeneric.h" 19 | #else 20 | 21 | namespace thpp { namespace detail { 22 | 23 | constexpr const char* TensorOps>::kLuaTypeName; 24 | 25 | }} // namespaces 26 | 27 | #endif /* TH_GENERIC_FILE */ 28 | -------------------------------------------------------------------------------- /thpp/detail/StorageDefsGeneric.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | // override-include-guard 12 | 13 | #ifndef THPP_INCLUDE_STORAGE_DEFS 14 | #error This file may only be included from StorageDefs.cpp 15 | #endif 16 | 17 | #ifndef TH_GENERIC_FILE 18 | #define TH_GENERIC_FILE "thpp/detail/StorageDefsGeneric.h" 19 | #else 20 | 21 | namespace thpp { namespace detail { 22 | 23 | constexpr const char* StorageOps>::kLuaTypeName; 24 | 25 | }} // namespaces 26 | 27 | #endif /* TH_GENERIC_FILE */ 28 | -------------------------------------------------------------------------------- /thpp/if/Tensor.thrift: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Facebook 2 | 3 | namespace cpp2 thpp 4 | 5 | typedef binary (cpp2.type = "folly::IOBuf") IOBuf 6 | 7 | enum ThriftTensorDataType { 8 | BYTE = 1, 9 | INT32 = 2, 10 | INT64 = 3, 11 | FLOAT = 4, // IEEE-754 "binary32" 12 | DOUBLE = 5, // IEEE-754 "binary64" 13 | } 14 | 15 | enum ThriftTensorEndianness { 16 | LITTLE = 1, 17 | BIG = 2, 18 | 19 | // Native is never used on the wire, just as argument to serialization / 20 | // deserialization functions 21 | NATIVE = 3, 22 | } 23 | 24 | struct ThriftTensor { 25 | 1: required ThriftTensorDataType dataType, 26 | 2: required ThriftTensorEndianness endianness, 27 | 3: required list sizes, 28 | 4: IOBuf data, 29 | } 30 | 31 | struct ThriftStorage { 32 | 1: required ThriftTensorDataType dataType, 33 | 2: required ThriftTensorEndianness endianness, 34 | 3: IOBuf data, 35 | } 36 | -------------------------------------------------------------------------------- /thpp/detail/Storage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef THPP_DETAIL_STORAGE_H_ 12 | #define THPP_DETAIL_STORAGE_H_ 13 | 14 | #include 15 | #ifndef NO_FOLLY 16 | #include 17 | #endif 18 | 19 | namespace thpp { 20 | 21 | #ifndef NO_FOLLY 22 | using folly::Range; 23 | #endif 24 | 25 | template class Storage; 26 | 27 | namespace detail { 28 | template struct StorageOps; 29 | } // namespace detail 30 | 31 | #include 32 | #include 33 | 34 | } // namespaces 35 | 36 | #endif /* THPP_DETAIL_STORAGE_H_ */ 37 | -------------------------------------------------------------------------------- /thpp/detail/Tensor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef THPP_DETAIL_TENSOR_H_ 12 | #define THPP_DETAIL_TENSOR_H_ 13 | 14 | #include 15 | #include 16 | #ifndef NO_FOLLY 17 | #include 18 | #include 19 | #endif 20 | 21 | namespace thpp { 22 | 23 | template class Tensor; 24 | 25 | namespace detail { 26 | template struct TensorOps; 27 | } // namespace detail 28 | 29 | #include 30 | #include 31 | 32 | } // namespaces 33 | 34 | #endif /* THPP_DETAIL_TENSOR_H_ */ 35 | -------------------------------------------------------------------------------- /thpp/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. An additional grant 6 | # of patent rights can be found in the PATENTS file in the same directory. 7 | # 8 | 9 | ADD_EXECUTABLE(storage_test StorageTest.cpp) 10 | TARGET_LINK_LIBRARIES(storage_test thpp gtest gtest_main) 11 | ADD_TEST(storage_test storage_test) 12 | 13 | ADD_EXECUTABLE(tensor_test TensorTest.cpp) 14 | TARGET_LINK_LIBRARIES(tensor_test thpp gtest gtest_main) 15 | ADD_TEST(tensor_test tensor_test) 16 | 17 | IF(NOT NO_THRIFT AND NOT NO_FOLLY) 18 | ADD_EXECUTABLE(tensor_serialization_test TensorSerializationTest.cpp) 19 | TARGET_LINK_LIBRARIES(tensor_serialization_test thpp gtest gtest_main) 20 | ADD_TEST(tensor_serialization_test tensor_serialization_test) 21 | ENDIF() 22 | -------------------------------------------------------------------------------- /thpp/cuda/CudaIOBuf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace thpp { 22 | 23 | // Create an IOBuf of the given capacity. The memory is allocated on the 24 | // requested CUDA device. (-1 = current device) 25 | // Just like IOBuf::CREATE, the buffer is created empty (the initial length 26 | // is 0). Use IOBuf::append() to increase the length. 27 | folly::IOBuf createCudaIOBuf(uint64_t capacity, int device = -1); 28 | 29 | } // namespaces 30 | -------------------------------------------------------------------------------- /thpp/StorageBase-inl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_STORAGEBASE_H_ 7 | #error This file may only be included from thpp/StorageBase.h 8 | #endif 9 | 10 | #ifndef UNLIKELY 11 | #define UNLIKELY(x) (x) 12 | #endif 13 | 14 | namespace thpp { 15 | 16 | template 17 | void StorageBase::up() { 18 | if (t_) Ops::_retain(t_); 19 | } 20 | 21 | template 22 | void StorageBase::down() { 23 | if (t_) Ops::_free(t_); 24 | } 25 | 26 | template 27 | void StorageBase::check(size_t index) const { 28 | if (UNLIKELY(index >= size())) { 29 | throw std::out_of_range("Storage index out of range"); 30 | } 31 | } 32 | 33 | template 34 | auto StorageBase::moveAsTH() -> THType* { 35 | using std::swap; 36 | THType* out = nullptr; 37 | swap(out, t_); 38 | return out; 39 | } 40 | 41 | template 42 | void StorageBase::resizeUninitialized(size_t n) { 43 | if (n == 0) { 44 | down(); 45 | t_ = nullptr; 46 | return; 47 | } 48 | 49 | if (t_) { 50 | Ops::_resize(t_, n); 51 | } else { 52 | t_ = Ops::_newWithSize(n); 53 | } 54 | } 55 | 56 | } // namespaces 57 | -------------------------------------------------------------------------------- /thpp/cuda/CudaIOBuf.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | namespace thpp { 21 | 22 | namespace { 23 | 24 | void freeCudaIOBuf(void* ptr, void* /*userData*/) { 25 | cuda::check(cudaFree(ptr)); 26 | } 27 | 28 | } // namespace 29 | 30 | folly::IOBuf createCudaIOBuf(uint64_t capacity, int device) { 31 | cuda::DeviceGuard guard; 32 | if (device != -1) { 33 | cuda::setDevice(device); 34 | } 35 | 36 | void* ptr; 37 | cuda::check(cudaMalloc(&ptr, capacity)); 38 | 39 | return folly::IOBuf(folly::IOBuf::TAKE_OWNERSHIP, 40 | ptr, capacity, 0 /* initial length */, 41 | freeCudaIOBuf); 42 | } 43 | 44 | } // namespaces 45 | -------------------------------------------------------------------------------- /thpp/cuda/State.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | namespace thpp { 10 | 11 | namespace { 12 | 13 | class THCStateHolder { 14 | public: 15 | THCStateHolder(); 16 | ~THCStateHolder(); 17 | 18 | THCState* state() { return &state_; } 19 | 20 | private: 21 | void* tmp_; 22 | THCState state_; 23 | }; 24 | 25 | THCStateHolder::THCStateHolder() { 26 | memset(&state_, 0, sizeof(THCState)); 27 | THCudaInit(&state_); 28 | // TODO(tudorb): There must be a better way of doing this. We need to check 29 | // that we're not unloading the driver (running during process exit); we 30 | // do that by checking at destruction time if freeing fails with a 31 | // specific error. 32 | CHECK_EQ(cudaMalloc(&tmp_, 1), cudaSuccess); 33 | } 34 | 35 | THCStateHolder::~THCStateHolder() { 36 | cudaError_t err = cudaFree(tmp_); 37 | if (err == cudaSuccess) { 38 | THCudaShutdown(&state_); 39 | } else { 40 | CHECK_EQ(err, cudaErrorCudartUnloading); 41 | } 42 | } 43 | 44 | folly::ThreadLocal gDefaultTHCState; 45 | 46 | } // namespace 47 | 48 | namespace detail { 49 | folly::ThreadLocal gCurrentTHCState; 50 | } // namespace detail 51 | 52 | void setDefaultTHCState() { 53 | setTHCState(gDefaultTHCState->state()); 54 | } 55 | } // namespaces 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TH++: A C++ tensor library 2 | 3 | TH++ is a C++ tensor library, implemented as a wrapper around the 4 | [TH library](https://github.com/torch/torch7/tree/master/lib/TH) (the low-level 5 | tensor library in [Torch](http://torch.ch/)). There is unfortunately little 6 | documentation about TH, but the interface mimics the Lua 7 | [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md) interface. 8 | 9 | The core of the library is the `Tensor` class template, where `T` is a 10 | numeric type (usually floating point, `float` or `double`). A tensor is 11 | a multi-dimensional array, usually in C (row-major) order, but many 12 | operations (transpose, slice, etc) are performed by permuting indexes and 13 | changing offsets, so the data is no longer contiguous / in row-major order. 14 | Read the [numpy.ndarray 15 | documentation](http://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html) 16 | for more details about the strided indexing scheme. 17 | 18 | Tensors may also share memory with other tensors; operations that manipulate 19 | metadata (select, slice, transpose, etc) will make the destination tensor 20 | share memory with the source. To ensure you have a unique copy, call 21 | `force(Tensor::UNIQUE)` on the tensor. Similarly, to ensure you have 22 | a contiguous C (row-major) tensor, call `force(Tensor::CONTIGUOUS)`, which 23 | may also create a unique copy. 24 | 25 | Please see the header file `` for more details. 26 | -------------------------------------------------------------------------------- /thpp/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # 3 | # Copyright (c) 2014, Facebook, Inc. 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. An additional grant 8 | # of patent rights can be found in the PATENTS file in the same directory. 9 | # 10 | # 11 | set -o pipefail 12 | echo "If you don't have folly or thrift installed, try doing" 13 | echo " THPP_NOFB=1 ./build.sh" 14 | 15 | if [ ! -z "$THPP_NOFB" ]; then 16 | FB="-DNO_THRIFT=ON -DNO_FOLLY=ON" 17 | fi 18 | 19 | if [[ ! -r ./Tensor.h ]]; then 20 | echo "Please run from the thpp subdirectory." >&2 21 | exit 1 22 | fi 23 | 24 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 25 | SHA="sha1sum" 26 | elif [[ "$OSTYPE" == "darwin"* ]]; then 27 | SHA="shasum" 28 | fi 29 | 30 | rm -rf googletest-release-1.7.0 googletest-release-1.7.0.zip 31 | curl -JLOk https://github.com/google/googletest/archive/release-1.7.0.zip 32 | if [[ $($SHA -b googletest-release-1.7.0.zip | cut -d' ' -f1) != \ 33 | 'f89bc9f55477df2fde082481e2d709bfafdb057b' ]]; then 34 | echo "Invalid googletest-release-1.7.0.zip file" >&2 35 | exit 1 36 | fi 37 | unzip googletest-release-1.7.0.zip 38 | 39 | # Build in a separate directory 40 | mkdir -p build 41 | cd build 42 | 43 | export CMAKE_LIBRARY_PATH=$(dirname $(which th))/../lib 44 | 45 | # Configure 46 | cmake $FB .. 47 | 48 | # Make 49 | make 50 | 51 | # Run tests 52 | ctest 53 | 54 | # Install 55 | make install 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For TH++ software 4 | 5 | Copyright (c) 2014, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /thpp/StorageSerialization.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | //////////////////////////////////////////////////////////////////////////////// 14 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 15 | //////////////////////////////////////////////////////////////////////////////// 16 | 17 | namespace thpp { 18 | namespace detail { 19 | 20 | void serialize( 21 | ThriftStorage& out, 22 | folly::IOBuf&& data, 23 | ThriftTensorDataType dtype, 24 | ThriftTensorEndianness endianness, 25 | SharingMode sharing) { 26 | DCHECK(!data.isChained()); 27 | if (endianness == ThriftTensorEndianness::NATIVE) { 28 | endianness = gMachineEndianness; 29 | } else { 30 | CHECK(endianness == gMachineEndianness) 31 | << "Non-native endianness not yet implemented"; 32 | } 33 | 34 | out.dataType = dtype; 35 | out.endianness = endianness; 36 | detail::applySharingMode(data, sharing); 37 | out.data = std::move(data); 38 | } 39 | 40 | template folly::IOBuf deserialize(const ThriftStorage& in, 41 | ThriftTensorDataType dtype); 42 | 43 | }} // namespaces 44 | 45 | //////////////////////////////////////////////////////////////////////////////// 46 | #endif // !NO_THRIFT && !NO_FOLLY 47 | //////////////////////////////////////////////////////////////////////////////// 48 | -------------------------------------------------------------------------------- /thpp/cuda/Storage.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace thpp { 20 | 21 | namespace detail { 22 | 23 | CudaIOBufAllocator::CudaIOBufAllocator(folly::IOBuf&& iob) 24 | : iob_(std::move(iob)) { } 25 | 26 | cudaError_t CudaIOBufAllocator::malloc( 27 | void* /*ctx*/, 28 | void** /*ptr*/, 29 | size_t /*size*/, 30 | cudaStream_t /*stream*/) { 31 | LOG(FATAL) << "CudaIOBufAllocator::malloc should never be called"; 32 | return cudaSuccess; // not reached 33 | } 34 | 35 | cudaError_t CudaIOBufAllocator::realloc( 36 | void* /*ctx*/, 37 | void** /*ptr*/, 38 | size_t /*oldSize*/, 39 | size_t /*newSize*/, 40 | cudaStream_t /*stream*/) { 41 | LOG(FATAL) << "CudaIOBufAllocator::realloc should never be called"; 42 | return cudaSuccess; // not reached 43 | } 44 | 45 | cudaError_t CudaIOBufAllocator::free(void* /*stat*/, void* ptr) { 46 | CHECK_EQ(ptr, iob_.writableData()); 47 | delete this; 48 | return cudaSuccess; 49 | } 50 | 51 | } // namespace detail 52 | 53 | } // namespaces 54 | -------------------------------------------------------------------------------- /thpp/test/CommonTestLib-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #pragma once 12 | 13 | namespace thpp { namespace test { 14 | 15 | template 16 | void testUniqueMove() { 17 | auto a = T({2}); 18 | EXPECT_TRUE(a.isUnique()); 19 | EXPECT_EQ(2, a.size()); 20 | 21 | auto b = a; 22 | EXPECT_FALSE(a.isUnique()); 23 | EXPECT_EQ(2, a.size()); 24 | EXPECT_FALSE(b.isUnique()); 25 | EXPECT_EQ(2, b.size()); 26 | 27 | auto c = std::move(a); 28 | EXPECT_TRUE(a.isUnique()); 29 | EXPECT_EQ(0, a.size()); 30 | EXPECT_FALSE(b.isUnique()); 31 | EXPECT_EQ(2, b.size()); 32 | EXPECT_FALSE(c.isUnique()); 33 | EXPECT_EQ(2, c.size()); 34 | 35 | b.clear(); 36 | EXPECT_TRUE(a.isUnique()); 37 | EXPECT_EQ(0, a.size()); 38 | EXPECT_TRUE(b.isUnique()); 39 | EXPECT_EQ(0, b.size()); 40 | EXPECT_TRUE(c.isUnique()); 41 | EXPECT_EQ(2, c.size()); 42 | } 43 | 44 | template 45 | void testTensorPtr() { 46 | auto p = T::makePtr({2}); 47 | auto& x = *p; 48 | x.fill(1); 49 | EXPECT_EQ(2, x.sumall()); 50 | 51 | auto q = p; 52 | auto& y = *q; 53 | y.resize({4}); 54 | y.fill(2); 55 | 56 | EXPECT_EQ(8, x.sumall()); 57 | EXPECT_EQ(8, y.sumall()); 58 | 59 | EXPECT_TRUE(&p != &q); 60 | 61 | auto z = x; 62 | z.resize({6}); 63 | z.fill(3); 64 | 65 | EXPECT_EQ(12, y.sumall()); 66 | EXPECT_EQ(18, z.sumall()); 67 | } 68 | 69 | 70 | }} // namespaces 71 | -------------------------------------------------------------------------------- /thpp/cuda/test/StorageTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace thpp { namespace cuda { namespace test { 14 | 15 | namespace { 16 | 17 | void testStorage(CudaStorage& storage) { 18 | auto byteSize = storage.size() * sizeof(float); 19 | 20 | EXPECT_EQ(cudaSuccess, cudaMemset(storage.data(), 42, byteSize)); 21 | char buf[byteSize]; 22 | memset(buf, 0, byteSize); 23 | 24 | EXPECT_EQ(cudaSuccess, cudaMemcpy(buf, storage.data(), byteSize, 25 | cudaMemcpyDeviceToHost)); 26 | for (size_t i = 0; i < byteSize; ++i) { 27 | EXPECT_EQ(42, buf[i]); 28 | } 29 | } 30 | 31 | } // namespace 32 | 33 | TEST(Storage, Simple) { 34 | constexpr size_t n = 100; 35 | CudaStorage storage; 36 | storage.resizeUninitialized(n); 37 | EXPECT_EQ(n, storage.size()); 38 | testStorage(storage); 39 | } 40 | 41 | TEST(Storage, CudaIOBufEmpty) { 42 | constexpr size_t n = 100; 43 | CudaStorage storage(createCudaIOBuf(n * sizeof(float)), 44 | SHARE_IOBUF_MANAGED, 45 | false /* resizable */); 46 | EXPECT_EQ(0, storage.size()); // buffer is created empty! 47 | } 48 | 49 | TEST(Storage, CudaIOBuf) { 50 | constexpr size_t n = 100; 51 | auto buf = createCudaIOBuf(n * sizeof(float)); 52 | buf.append(n * sizeof(float)); 53 | CudaStorage storage(std::move(buf), SHARE_IOBUF_MANAGED, false); 54 | EXPECT_EQ(n, storage.size()); 55 | testStorage(storage); 56 | } 57 | 58 | }}} // namespaces 59 | -------------------------------------------------------------------------------- /thpp/build_fbthrift_folly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # 3 | # Copyright (c) 2014, Facebook, Inc. 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. An additional grant 8 | # of patent rights can be found in the PATENTS file in the same directory. 9 | # 10 | # 11 | set -o pipefail 12 | 13 | sudo rm -rf /usr/local/include/folly 14 | sudo rm -rf /usr/local/include/wangle 15 | 16 | sudo rm -rf /usr/local/lib/libfolly* 17 | sudo rm -rf /usr/local/lib/libwangle* 18 | 19 | 20 | cd /tmp 21 | 22 | dir=$(mktemp --tmpdir -d follythrift-build.XXXXXX) 23 | 24 | cd $dir 25 | 26 | #git clone -b v0.54.0 --depth 1 https://github.com/facebook/folly.git 27 | git clone -b v0.30.0 --depth 1 https://github.com/facebook/fbthrift.git 28 | git clone -b v0.12.0 --depth 1 https://github.com/facebook/wangle 29 | 30 | git clone https://github.com/facebook/folly.git 31 | cd folly 32 | git reset --hard 0fdbb61ecd5679f0cd2bf13f867e9b72212ec371 33 | cd .. 34 | # git clone --depth 1 https://github.com/facebook/fbthrift.git 35 | # git clone --depth 1 https://github.com/facebook/wangle 36 | 37 | echo 38 | echo Building folly 39 | echo 40 | 41 | cd $dir/folly/folly 42 | autoreconf -ivf 43 | ./configure 44 | make 45 | sudo make install 46 | sudo ldconfig # reload the lib paths after freshly installed folly. fbthrift needs it. 47 | 48 | 49 | echo 50 | echo Building wangle 51 | echo 52 | 53 | cd $dir/wangle/wangle 54 | 55 | cmake . 56 | make 57 | sudo make install 58 | sudo ldconfig # reload the lib paths after freshly installed wangle. fbthrift needs it. 59 | 60 | echo 61 | echo Building fbthrift 62 | echo 63 | 64 | cd $dir/fbthrift/thrift 65 | autoreconf -ivf 66 | ./configure 67 | make 68 | sudo make install 69 | -------------------------------------------------------------------------------- /thpp/cuda/detail/Storage.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_DETAIL_STORAGE_H_ 7 | #define THPP_CUDA_DETAIL_STORAGE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace thpp { 14 | 15 | template class CudaStorage; 16 | 17 | namespace detail { 18 | 19 | // Only float is currently supported. 20 | template <> struct StorageOps> { 21 | typedef THCudaStorage type; 22 | 23 | static THCudaStorage* _newWithSize(long size) { 24 | return THCudaStorage_newWithSize(getTHCState(), size); 25 | } 26 | static THCudaStorage* _newWithData(float* data, long size) { 27 | return THCudaStorage_newWithData(getTHCState(), data, size); 28 | } 29 | static void _setFlag(THCudaStorage* storage, const char flag) { 30 | THCudaStorage_setFlag(getTHCState(), storage, flag); 31 | } 32 | static void _clearFlag(THCudaStorage* storage, const char flag) { 33 | THCudaStorage_clearFlag(getTHCState(), storage, flag); 34 | } 35 | static void _retain(THCudaStorage* storage) { 36 | THCudaStorage_retain(getTHCState(), storage); 37 | } 38 | static void _free(THCudaStorage* storage) { 39 | THCudaStorage_free(getTHCState(), storage); 40 | } 41 | static void _resize(THCudaStorage* storage, long size) { 42 | THCudaStorage_resize(getTHCState(), storage, size); 43 | } 44 | static THCudaStorage* _newWithDataAndAllocator( 45 | float* data, long size, 46 | THCDeviceAllocator* allocator, void* allocatorContext) { 47 | return THCudaStorage_newWithDataAndAllocator( 48 | getTHCState(), data, size, allocator, allocatorContext); 49 | } 50 | 51 | static constexpr const char* kLuaTypeName = "torch.CudaStorage"; 52 | }; 53 | 54 | } // namespace detail 55 | 56 | } // namespaces 57 | 58 | #endif /* THPP_CUDA_DETAIL_STORAGE_H_ */ 59 | -------------------------------------------------------------------------------- /thpp/cuda/State.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_STATE_H_ 7 | #define THPP_CUDA_STATE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace thpp { 14 | 15 | // TODO(tudorb): Nice C++ interface around this. 16 | 17 | // By default, we use one THCState per thread, created on first use. 18 | // 19 | // You may associate the current thread with a different THCState object if 20 | // you wish (for example, so that Lua code using cutorch will use the same 21 | // state) 22 | 23 | namespace detail { 24 | extern folly::ThreadLocal gCurrentTHCState; 25 | } // namespace 26 | 27 | inline void setTHCState(THCState* state) { 28 | DCHECK(state); 29 | *detail::gCurrentTHCState = state; 30 | } 31 | 32 | void setDefaultTHCState(); 33 | 34 | inline THCState* getTHCState() { 35 | auto& state = *detail::gCurrentTHCState; 36 | if (!state) { 37 | setDefaultTHCState(); 38 | DCHECK(state); 39 | } 40 | return state; 41 | } 42 | 43 | namespace cuda { 44 | 45 | inline void check(cudaError_t err) { 46 | if (err != cudaSuccess) { 47 | throw std::runtime_error(folly::to("CUDA error ", err)); 48 | } 49 | } 50 | 51 | inline int getDevice() { 52 | int device; 53 | check(cudaGetDevice(&device)); 54 | return device; 55 | } 56 | 57 | inline void setDevice(int dev) { 58 | check(cudaSetDevice(dev)); 59 | } 60 | 61 | class DeviceGuard { 62 | public: 63 | explicit DeviceGuard() : device_(getDevice()) { } 64 | ~DeviceGuard() { setDevice(device_); } 65 | 66 | private: 67 | DeviceGuard(const DeviceGuard&) = delete; 68 | DeviceGuard(DeviceGuard&& other) = delete; 69 | DeviceGuard& operator=(const DeviceGuard&) = delete; 70 | DeviceGuard& operator=(DeviceGuard&& other) = delete; 71 | 72 | int device_; 73 | }; 74 | 75 | } // namespace cuda 76 | 77 | } // namespaces 78 | 79 | #endif /* THPP_CUDA_STATE_H_ */ 80 | -------------------------------------------------------------------------------- /PATENTS: -------------------------------------------------------------------------------- 1 | Additional Grant of Patent Rights Version 2 2 | 3 | "Software" means the TH++ software distributed by Facebook, Inc. 4 | 5 | Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software 6 | ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable 7 | (subject to the termination provision below) license under any Necessary 8 | Claims, to make, have made, use, sell, offer to sell, import, and otherwise 9 | transfer the Software. For avoidance of doubt, no license is granted under 10 | Facebook’s rights in any patent claims that are infringed by (i) modifications 11 | to the Software made by you or any third party or (ii) the Software in 12 | combination with any software or other technology. 13 | 14 | The license granted hereunder will terminate, automatically and without notice, 15 | if you (or any of your subsidiaries, corporate affiliates or agents) initiate 16 | directly or indirectly, or take a direct financial interest in, any Patent 17 | Assertion: (i) against Facebook or any of its subsidiaries or corporate 18 | affiliates, (ii) against any party if such Patent Assertion arises in whole or 19 | in part from any software, technology, product or service of Facebook or any of 20 | its subsidiaries or corporate affiliates, or (iii) against any party relating 21 | to the Software. Notwithstanding the foregoing, if Facebook or any of its 22 | subsidiaries or corporate affiliates files a lawsuit alleging patent 23 | infringement against you in the first instance, and you respond by filing a 24 | patent infringement counterclaim in that lawsuit against that party that is 25 | unrelated to the Software, the license granted hereunder will not terminate 26 | under section (i) of this paragraph due to such counterclaim. 27 | 28 | A "Necessary Claim" is a claim of a patent owned by Facebook that is 29 | necessarily infringed by the Software standing alone. 30 | 31 | A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, 32 | or contributory infringement or inducement to infringe any patent, including a 33 | cross-claim or counterclaim. -------------------------------------------------------------------------------- /thpp/detail/StorageGeneric.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef TH_GENERIC_FILE 12 | #define TH_GENERIC_FILE "thpp/detail/StorageGeneric.h" 13 | #else 14 | 15 | #ifndef NO_FOLLY 16 | typedef Range TH_CONCAT_2(Real, Range); 17 | typedef Range TH_CONCAT_3(Mutable, Real, Range); 18 | #endif 19 | typedef Storage TH_CONCAT_2(Real, Storage); 20 | 21 | namespace detail { 22 | template <> struct StorageOps> { 23 | typedef THStorage type; 24 | 25 | static THStorage* _newWithSize(long size) { 26 | return THStorage_(newWithSize)(size); 27 | } 28 | static THStorage* _newWithData(real* data, long size) { 29 | return THStorage_(newWithData)(data, size); 30 | } 31 | static THStorage* _newWithDataAndAllocator(real* data, long size, 32 | THAllocator* allocator, 33 | void* allocatorContext) { 34 | return THStorage_(newWithDataAndAllocator)(data, size, 35 | allocator, allocatorContext); 36 | } 37 | static void _setFlag(THStorage* storage, const char flag) { 38 | return THStorage_(setFlag)(storage, flag); 39 | } 40 | static void _clearFlag(THStorage* storage, const char flag) { 41 | return THStorage_(clearFlag)(storage, flag); 42 | } 43 | static void _retain(THStorage* storage) { 44 | return THStorage_(retain)(storage); 45 | } 46 | static void _free(THStorage* storage) { 47 | return THStorage_(free)(storage); 48 | } 49 | static void _resize(THStorage* storage, long size) { 50 | return THStorage_(resize)(storage, size); 51 | } 52 | 53 | #define S1(X) #X 54 | #define S(X) S1(X) 55 | static constexpr const char* kLuaTypeName = "torch." 56 | S(TH_CONCAT_2(Real, Storage)); 57 | #undef S 58 | #undef S1 59 | }; 60 | } // namespace detail 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /thpp/cmake/MultiLevelIncludes.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. An additional grant 6 | # of patent rights can be found in the PATENTS file in the same directory. 7 | 8 | # Some projects are installed individually as part of a larger tree, but 9 | # include paths always reference the full include path in the tree. This 10 | # module makes it easier to do so. 11 | # 12 | # Suppose you have a source tree fblualib/thrift/submodule, which is built at 13 | # the submodule level (so you have fblualib/thrift/submodule/CMakeLists.txt) 14 | # Files inside submodule include each other (and files from other sibling 15 | # directories) with the full path: 16 | # 17 | # #include 18 | # #include 19 | # #include 20 | # #include 21 | # 22 | # MLI_SET_DEPTH(2) at the root of your CMakeLists.txt would set "../.." 23 | # as the include path (so fblualib is a subdirectory of that), making 24 | # the includes work. Also, it will set MLI_INCLUDE_OUTPUT_DIR and 25 | # MLI_INCLUDE_RELATIVE_OUTPUT_DIR to directories inside the build tree 26 | # where any generators should output header files so they can be found 27 | # via #include. (we recreate the lowest 2 levels of the hierarchy underneath 28 | # ${CMAKE_BINARY_DIR}) 29 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 30 | 31 | FUNCTION(MLI_SET_DEPTH level) 32 | SET(dirs) 33 | SET(dir ${CMAKE_SOURCE_DIR}) 34 | SET(relinc) 35 | FOREACH(i RANGE 1 ${level}) 36 | GET_FILENAME_COMPONENT(bn ${dir} NAME) 37 | GET_FILENAME_COMPONENT(dir ${dir} PATH) 38 | LIST(APPEND dirs ${bn}) 39 | SET(relinc "${relinc}/..") 40 | ENDFOREACH() 41 | LIST(REVERSE dirs) 42 | STRING(REPLACE ";" "/" relpath "${dirs}") 43 | SET(MLI_INCLUDE_OUTPUT_DIR 44 | "${CMAKE_BINARY_DIR}/${relpath}" 45 | PARENT_SCOPE) 46 | SET(MLI_INCLUDE_RELATIVE_OUTPUT_DIR 47 | "${relpath}" 48 | PARENT_SCOPE) 49 | INCLUDE_DIRECTORIES( 50 | "${CMAKE_SOURCE_DIR}/${relinc}" 51 | "${CMAKE_BINARY_DIR}") 52 | ENDFUNCTION() 53 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to TH++ 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | This project is developed internally at Facebook inside a private repository. 7 | Changes are periodically pushed to the open-source branch. Pull requests are 8 | integrated manually into our private repository first, and they then get 9 | propagated to the public repository with the next push. 10 | 11 | ## Pull Requests 12 | We actively welcome your pull requests. 13 | 1. Fork the repo and create your branch from `master`. 14 | 2. If you've added code that should be tested, add tests 15 | 3. If you've changed APIs, update the documentation. 16 | 4. Ensure the test suite passes. 17 | 5. Make sure your code lints. 18 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 19 | 20 | ## Contributor License Agreement ("CLA") 21 | In order to accept your pull request, we need you to submit a CLA. You only need 22 | to do this once to work on any of Facebook's open source projects. 23 | 24 | Complete your CLA here: 25 | 26 | ## Issues 27 | We use GitHub issues to track public bugs. Please ensure your description is 28 | clear and has sufficient instructions to be able to reproduce the issue. 29 | 30 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 31 | disclosure of security bugs. In those cases, please go through the process 32 | outlined on that page and do not file a public issue. 33 | 34 | ## Coding Style 35 | 36 | ### C++ 37 | * 2 spaces for indentation rather than tabs 38 | * 80 character line length 39 | * Name classes LikeThis, functions and methods likeThis, data members 40 | likeThis_. 41 | * Most naming and formatting recommendations from 42 | [Google's C++ Coding Style Guide]( 43 | http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml) apply (but 44 | not the restrictions; exceptions and templates are fine.) 45 | * Feel free to use [boost](http://www.boost.org/), 46 | [folly](https://github.com/facebook/folly) and 47 | [fbthrift](https://github.com/facebook/fbthrift) 48 | 49 | ## License 50 | By contributing to TH++, you agree that your contributions will be licensed 51 | under its BSD license. 52 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # TH++: installation 2 | 3 | TH++ requires a Linux x86_64 system with a recent version of gcc (4.8+) that 4 | supports C++11. We confirmed this installation procedure on Ubuntu 13.10 and 5 | 14.04 LTS, but other recent versions of Linux should work as well with some 6 | changes. 7 | 8 | tl;dr - Install [Torch](http://torch.ch/). Then run `THPP_NOFB=1 build.sh`. For 9 | full functionality, also install [folly](https://github.com/facebook/folly) and 10 | [fbthrift](https://github.com/facebook/fbthrift), and instead run `build.sh`. 11 | 12 | 1. Install [Torch](http://torch.ch/). The Torch home page has simple scripts 13 | to automate installing Torch on Ubuntu. **NOTE** that, even though you might 14 | already have Torch installed, you should reinstall, as older versions do not 15 | install LuaJIT with Lua 5.2 compatibility. To check, run 16 | `luajit -e ';;'` -- if you get an error ("unexpected symbol near ';'"), 17 | then you need to reinstall. 18 | 2. **(optional)** Install [folly](https://github.com/facebook/folly). The folly 19 | [README](https://github.com/facebook/folly/blob/master/README) lists the 20 | packages (all from the standard Ubuntu distribution) that you need installed 21 | on your system before compiling folly. 22 | 3. **(optional)** Install [fbthrift](https://github.com/facebook/fbthrift). 23 | fbthrift depends on folly, and fbthrift's 24 | [README](https://github.com/facebook/fbthrift/blob/master/README.md) lists 25 | additional required packages (again, from the standard Ubuntu distribution). 26 | 4. Compile and build TH++. This is a standard cmake project; see 27 | `cd thpp; ./build.sh`, or use cmake directly. If you didn't install folly or 28 | fbthrift, you will have to exclude parts that use them from the build. This 29 | can be achieved with `NO_FOLLY` and `NO_THRIFT` cmake options. 30 | 5. Just like most cmake projects, TH++ builds in a separate build directory; if 31 | anything goes wrong during the build and you want to start over, just delete 32 | the `build` directory and run `build.sh` again. `THPP_NOFB=1 build.sh` 33 | installs without folly and thrift. Some functions will be broken, but most 34 | of the usability is there. 35 | 6. Confirm installation; if you used the default installation options, you 36 | should have `/usr/local/include/thpp/Tensor.h` and 37 | `/usr/local/lib/libthpp.so`. 38 | -------------------------------------------------------------------------------- /thpp/test/StorageTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace thpp { 17 | namespace test { 18 | 19 | typedef Storage FloatStorage; 20 | TEST(Storage, Simple) { 21 | FloatStorage s({2, 3, 4}); 22 | EXPECT_EQ(3, s.size()); 23 | EXPECT_EQ(2, s.at(0)); 24 | EXPECT_EQ(3, s.at(1)); 25 | EXPECT_EQ(4, s.at(2)); 26 | } 27 | 28 | void* g_ptr = nullptr; 29 | int g_context = 0; 30 | int g_nMalloc = 0; 31 | int g_nFree = 0; 32 | 33 | struct TestContext { 34 | int nMalloc, nFree; 35 | TestContext() : nMalloc(0), nFree(0) {} 36 | }; 37 | 38 | void* test_malloc(void* ctx, long size) { 39 | auto myCtx = (TestContext*) ctx; 40 | myCtx->nMalloc++; 41 | return malloc(4*size); 42 | } 43 | void* test_realloc(void* /*ctx*/, void* /*ptr*/, long /*size*/) { 44 | ADD_FAILURE() << "realloc should not be called"; 45 | return nullptr; 46 | } 47 | void test_free(void* ctx, void* /*ptr*/) { 48 | auto myCtx = (TestContext*) ctx; 49 | myCtx->nFree++; 50 | } 51 | 52 | TEST(Storage, CustomAllocator) { 53 | THAllocator testAlloc = { 54 | &test_malloc, &test_realloc, &test_free 55 | }; 56 | 57 | // 1. delete the storage first, then the IO buf 58 | auto ctx = TestContext(); 59 | auto thStorage = THFloatStorage_newWithAllocator(42, &testAlloc, &ctx); 60 | EXPECT_EQ(ctx.nMalloc, 1); 61 | { 62 | auto storage = FloatStorage(thStorage); 63 | g_ptr = thStorage->data; 64 | auto buf = storage.getIOBuf(); 65 | THFloatStorage_free(thStorage); 66 | EXPECT_EQ(ctx.nFree, 0); 67 | } 68 | EXPECT_EQ(ctx.nMalloc, 1); 69 | EXPECT_EQ(ctx.nFree, 1); 70 | 71 | // 2. delete the IO buf first, then the storage 72 | ctx = TestContext(); 73 | thStorage = THFloatStorage_newWithAllocator(42, &testAlloc, &ctx); 74 | EXPECT_EQ(ctx.nMalloc, 1); 75 | { 76 | auto storage = FloatStorage(thStorage); 77 | g_ptr = thStorage->data; 78 | auto buf = storage.getIOBuf(); 79 | } 80 | EXPECT_EQ(ctx.nFree, 0); 81 | THFloatStorage_free(thStorage); 82 | EXPECT_EQ(ctx.nFree, 1); 83 | EXPECT_EQ(ctx.nMalloc, 1); 84 | 85 | } 86 | 87 | }} // namespaces 88 | -------------------------------------------------------------------------------- /thpp/cuda/test/TensorSerializationTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace thpp { namespace test { 14 | 15 | Tensor createTensor(std::vector sizes, 16 | std::vector strides = {}) { 17 | Tensor tensor(LongRange(sizes.data(), sizes.size()), 18 | LongRange(strides.data(), strides.size())); 19 | 20 | std::vector counter; 21 | counter.resize(sizes.size()); 22 | int idx = counter.size(); 23 | float val = 0; 24 | while (idx >= 0) { 25 | if (idx == counter.size()) { 26 | Tensor t(tensor); 27 | for (int i = counter.size() - 1; i >= 0; --i) { 28 | t.select(i, counter[i]); 29 | } 30 | t.front() = val++; 31 | --idx; 32 | continue; 33 | } 34 | if (++counter[idx] == sizes[idx]) { 35 | counter[idx] = 0; 36 | --idx; 37 | } else { 38 | idx = counter.size(); 39 | } 40 | } 41 | 42 | return tensor; 43 | } 44 | 45 | void runTest(std::vector sizes, 46 | std::vector strides = {}) { 47 | Tensor src = createTensor(sizes, strides); 48 | CudaTensor srcCuda; 49 | srcCuda.resize(src.sizes(), src.strides()); 50 | srcCuda.copy(src); 51 | 52 | ThriftTensor serialized; 53 | srcCuda.serialize(serialized); 54 | 55 | src.force(Tensor::CONTIGUOUS); 56 | 57 | CudaTensor deserializedCuda(std::move(serialized)); 58 | auto deserialized = deserializedCuda.toCPU(); 59 | 60 | EXPECT_TRUE(src.sizes() == deserialized->sizes()); 61 | EXPECT_TRUE(src.strides() == deserialized->strides()); 62 | EXPECT_EQ(0, memcmp(src.data(), deserialized->data(), 63 | sizeof(float) * src.size())); 64 | } 65 | 66 | TEST(SerializationTest, Simple) { 67 | runTest({1}); 68 | runTest({2}); 69 | runTest({2}, {1}); 70 | runTest({2}, {2}); 71 | runTest({2}, {200}); 72 | runTest({20, 10}); 73 | runTest({20, 10}, {10, 1}); 74 | runTest({20, 10}, {40, 4}); 75 | runTest({20, 10}, {400, 4}); 76 | runTest({20, 10}, {0, 1}); 77 | runTest({20, 10}, {0, 0}); 78 | runTest({20, 30, 10}); 79 | runTest({20, 30, 10}, {300, 10, 1}); 80 | runTest({20, 30, 10}, {10, 200, 1}); 81 | runTest({20, 30, 10}, {1, 20, 600}); 82 | runTest({20, 30}, {8192 * 30, 8192}); 83 | } 84 | 85 | }} // namespaces 86 | -------------------------------------------------------------------------------- /thpp/cuda/test/TensorTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | namespace thpp { namespace test { 13 | 14 | class TensorTest : public testing::Test { 15 | protected: 16 | void SetUp() override; 17 | 18 | static CudaFloatTensor create(long bias); 19 | 20 | static void check2DTensor(const CudaFloatTensor& a, 21 | float a11, float a12, float a21, float a22); 22 | 23 | CudaFloatTensor a; 24 | CudaFloatTensor b; 25 | }; 26 | 27 | void TensorTest::SetUp() { 28 | a = create(1); 29 | b = create(51); 30 | } 31 | 32 | CudaFloatTensor TensorTest::create(long bias) { 33 | FloatTensor t({10, 20, 30}); 34 | EXPECT_EQ(10 * 20 * 30, t.size()); 35 | for (long i = 0; i < t.size(0); ++i) { 36 | for (long j = 0; j < t.size(1); ++j) { 37 | for (long k = 0; k < t.size(2); ++k) { 38 | t[i][j][k].front() = 39 | ((i + bias) * 10000) + 40 | ((j + bias) * 100) + 41 | (k + bias); 42 | } 43 | } 44 | } 45 | 46 | return CudaFloatTensor(t); 47 | } 48 | 49 | void TensorTest::check2DTensor(const CudaFloatTensor& ca, float a11, float a12, 50 | float a21, float a22) { 51 | auto a = ca.toCPU(); 52 | EXPECT_EQ(2, a->ndims()); 53 | EXPECT_LE(2, a->size(0)); 54 | EXPECT_LE(2, a->size(1)); 55 | EXPECT_EQ(a11, (*a)[0][0].front()); 56 | EXPECT_EQ(a12, (*a)[0][1].front()); 57 | EXPECT_EQ(a21, (*a)[1][0].front()); 58 | EXPECT_EQ(a22, (*a)[1][1].front()); 59 | } 60 | 61 | TEST_F(TensorTest, Simple) { 62 | auto sub = a[5]; 63 | EXPECT_EQ(20 * 30, sub.size()); 64 | check2DTensor(sub, 60101, 60102, 60201, 60202); 65 | 66 | auto sub2 = a[{-1,5}]; 67 | EXPECT_EQ(10 * 30, sub2.size()); 68 | check2DTensor(sub2, 10601, 10602, 20601, 20602); 69 | } 70 | 71 | TEST_F(TensorTest, Add) { 72 | auto sub = (b + a)[5]; 73 | EXPECT_EQ(20 * 30, sub.size()); 74 | check2DTensor(sub, 625252, 625254, 625452, 625454); 75 | } 76 | 77 | TEST_F(TensorTest, Sub) { 78 | auto sub = (b - a)[5]; 79 | EXPECT_EQ(20 * 30, sub.size()); 80 | check2DTensor(sub, 505050, 505050, 505050, 505050); 81 | } 82 | 83 | TEST_F(TensorTest, At) { 84 | EXPECT_EQ(20304, a.at({1, 2, 3})); 85 | a.transpose(0, 1); 86 | EXPECT_EQ(40608, a.at({5, 3, 7})); 87 | a.transpose(1, 0); 88 | a.transpose(); 89 | EXPECT_EQ(50403, a.at({2, 3, 4})); 90 | } 91 | 92 | TEST_F(TensorTest, UniqueMove) { 93 | testUniqueMove>(); 94 | } 95 | 96 | TEST_F(TensorTest, TensorPtr) { 97 | testTensorPtr>(); 98 | } 99 | 100 | }} // namespaces 101 | -------------------------------------------------------------------------------- /thpp/cmake/FindThrift.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. An additional grant 6 | # of patent rights can be found in the PATENTS file in the same directory. 7 | # 8 | # THRIFT_FOUND 9 | # THRIFT_INCLUDE_DIR 10 | # THRIFT_LIBRARIES 11 | # 12 | # ADD_THRIFT2 and INSTALL_THRIFT2_HEADERS assume that you are using 13 | # MultiLevelIncludes. 14 | 15 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 16 | 17 | INCLUDE(FindPackageHandleStandardArgs) 18 | INCLUDE(MultiLevelIncludes) 19 | 20 | FIND_LIBRARY(THRIFT_LIBRARY thrift) 21 | FIND_LIBRARY(THRIFT_CPP2_LIBRARY thriftcpp2) 22 | FIND_PATH(THRIFT_INCLUDE_DIR "thrift/lib/cpp2/Thrift.h") 23 | 24 | SET(THRIFT_LIBRARIES ${THRIFT_LIBRARY} ${THRIFT_CPP2_LIBRARY}) 25 | 26 | # Add a Thrift2 file. 27 | # Add the source files to src_var in parent scope. 28 | # Does not support services (yet). 29 | FUNCTION(ADD_THRIFT2 src_var fn) 30 | GET_FILENAME_COMPONENT(dir ${fn} PATH) 31 | # NAME_WE = name without extension. Because this is the one place where 32 | # cmake developers have decided that verbosity is bad. 33 | GET_FILENAME_COMPONENT(bnwe ${fn} NAME_WE) 34 | SET(absroot "${MLI_INCLUDE_OUTPUT_DIR}/${dir}") 35 | SET(abspath "${absroot}/gen-cpp2") 36 | FILE(MAKE_DIRECTORY "${absroot}") 37 | SET(h_files "${abspath}/${bnwe}_data.h" 38 | "${abspath}/${bnwe}_types.h" 39 | "${abspath}/${bnwe}_types.tcc" 40 | "${abspath}/${bnwe}_constants.h") 41 | SET(src_files "${abspath}/${bnwe}_data.cpp" 42 | "${abspath}/${bnwe}_types.cpp" 43 | "${abspath}/${bnwe}_constants.cpp") 44 | GET_DIRECTORY_PROPERTY(includes INCLUDE_DIRECTORIES) 45 | FOREACH(d ${includes}) 46 | SET(include_args ${include_args} "-I" ${d}) 47 | ENDFOREACH() 48 | ADD_CUSTOM_COMMAND( 49 | OUTPUT ${h_files} ${src_files} 50 | COMMAND python ARGS 51 | -mthrift_compiler.main 52 | --gen cpp2:include_prefix 53 | ${include_args} 54 | "${CMAKE_CURRENT_SOURCE_DIR}/${fn}" 55 | 56 | DEPENDS "${fn}" 57 | WORKING_DIRECTORY "${absroot}") 58 | 59 | SET(${src_var} ${${src_var}} ${src_files} PARENT_SCOPE) 60 | ENDFUNCTION() 61 | 62 | # Install all Thrift2 headers from a directory 63 | # Does not support services (yet). 64 | FUNCTION(INSTALL_THRIFT2_HEADERS dir dest) 65 | SET(relpath "${dir}/gen-cpp2") 66 | SET(abspath "${MLI_INCLUDE_OUTPUT_DIR}/${relpath}") 67 | INSTALL(DIRECTORY "${abspath}/" 68 | DESTINATION "${dest}/${MLI_INCLUDE_RELATIVE_OUTPUT_DIR}/${relpath}" 69 | FILES_MATCHING 70 | PATTERN "*.h" 71 | PATTERN "*.tcc") 72 | 73 | SET(relpath "${dir}") 74 | SET(abspath "${CMAKE_CURRENT_SOURCE_DIR}/${relpath}") 75 | INSTALL(DIRECTORY "${abspath}/" 76 | DESTINATION "${dest}/${MLI_INCLUDE_RELATIVE_OUTPUT_DIR}/${relpath}" 77 | FILES_MATCHING 78 | PATTERN "*.thrift") 79 | ENDFUNCTION() 80 | 81 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( 82 | Thrift 83 | REQUIRED_ARGS 84 | THRIFT_INCLUDE_DIR 85 | THRIFT_LIBRARY 86 | THRIFT_CPP2_LIBRARY) 87 | -------------------------------------------------------------------------------- /thpp/Storage.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | //////////////////////////////////////////////////////////////////////////////// 14 | #ifndef NO_FOLLY 15 | //////////////////////////////////////////////////////////////////////////////// 16 | 17 | namespace thpp { 18 | namespace detail { 19 | 20 | IOBufAllocator::IOBufAllocator(folly::IOBuf&& iob) 21 | : iob_(std::move(iob)), 22 | maxLength_(iob_.isSharedOne() ? iob_.length() : 23 | std::numeric_limits::max()) { 24 | DCHECK(!iob_.isChained()); 25 | } 26 | 27 | void* IOBufAllocator::malloc(long /*size*/) { 28 | CHECK(false) << "IOBufAllocator::malloc should never be called"; 29 | } 30 | 31 | void* IOBufAllocator::realloc(void* ptr, long size) { 32 | CHECK_EQ(ptr, iob_.writableData()); 33 | if (size <= iob_.length()) { 34 | iob_.trimEnd(iob_.length() - size); 35 | } else { 36 | auto extra = size - iob_.length(); 37 | // If we're still using the original buffer (which was shared), we 38 | // may only use up to the original buffer length; the rest of the buffer 39 | // might be filled with something else (other fields if decoding Thrift, 40 | // etc). 41 | if (size > maxLength_ || extra > iob_.tailroom()) { 42 | iob_.unshareOne(); 43 | maxLength_ = std::numeric_limits::max(); 44 | } 45 | if (extra > iob_.tailroom()) { 46 | iob_.reserve(0, extra); 47 | } 48 | iob_.append(extra); 49 | } 50 | return iob_.writableData(); 51 | } 52 | 53 | void IOBufAllocator::free(void* ptr) { 54 | CHECK_EQ(ptr, iob_.writableData()); 55 | delete this; 56 | } 57 | 58 | bool IOBufAllocator::isUnique(const void* ptr) const { 59 | CHECK_EQ(ptr, iob_.data()); 60 | return !iob_.isSharedOne(); 61 | } 62 | 63 | void applySharingMode(folly::IOBuf& iob, SharingMode sharing) { 64 | DCHECK(!iob.isChained()); 65 | switch (sharing) { 66 | case SHARE_NONE: 67 | iob.unshareOne(); 68 | break; 69 | case SHARE_IOBUF_MANAGED: 70 | iob.makeManagedOne(); 71 | break; 72 | case SHARE_ALL: 73 | break; 74 | } 75 | } 76 | 77 | THAllocFreeFuncData::THAllocFreeFuncData(THAllocator* allocator, void* context): 78 | allocator(allocator), context(context) {} 79 | 80 | void THAllocFreeFunc(void* buf, void* userData) { 81 | auto thData = (THAllocFreeFuncData*) userData; 82 | thData->allocator->free(thData->context, buf); 83 | delete thData; 84 | } 85 | 86 | THAllocator ioBufTHAllocator = { 87 | &THAllocatorWrapper::malloc, 88 | &THAllocatorWrapper::realloc, 89 | &THAllocatorWrapper::free, 90 | }; 91 | 92 | THAllocator ioBufTHAllocatorNoRealloc = { 93 | &THAllocatorWrapper::malloc, 94 | nullptr, 95 | &THAllocatorWrapper::free, 96 | }; 97 | 98 | } // namespace detail 99 | 100 | } // namespaces 101 | 102 | //////////////////////////////////////////////////////////////////////////////// 103 | #endif // !NO_FOLLY 104 | //////////////////////////////////////////////////////////////////////////////// 105 | -------------------------------------------------------------------------------- /thpp/TensorPtr-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace thpp { 20 | 21 | template 22 | TensorPtr::TensorPtr() noexcept : hasTensor_(false) { 23 | } 24 | 25 | template 26 | TensorPtr::TensorPtr(THType* th) noexcept 27 | : hasTensor_(th) { 28 | if (hasTensor_) { 29 | construct(th, true); 30 | } 31 | } 32 | 33 | template 34 | TensorPtr::TensorPtr(TensorPtr&& other) noexcept 35 | : hasTensor_(other.hasTensor_) { 36 | if (hasTensor_) { 37 | construct(other.tensor_.mut(), false); 38 | other.hasTensor_ = false; 39 | } 40 | } 41 | 42 | template 43 | TensorPtr::TensorPtr(const TensorPtr& other) noexcept 44 | : hasTensor_(other.hasTensor_) { 45 | if (hasTensor_) { 46 | construct(other.tensor_.mut(), true); 47 | } 48 | } 49 | 50 | template 51 | template 52 | TensorPtr::TensorPtr(detail::MakeTensorPtr, Args&&... args) 53 | : hasTensor_(true), 54 | tensor_(std::forward(args)...) { 55 | } 56 | 57 | template 58 | TensorPtr::~TensorPtr() { 59 | destroy(); 60 | } 61 | 62 | template 63 | void TensorPtr::destroy() noexcept { 64 | if (hasTensor_) { 65 | tensor_.~Tensor(); 66 | hasTensor_ = false; 67 | } 68 | } 69 | 70 | template 71 | void TensorPtr::construct(THType* th, bool incRef) 72 | noexcept { 73 | DCHECK(hasTensor_); 74 | new (&tensor_) Tensor(detail::SetTH(), th, incRef); 75 | } 76 | 77 | template 78 | auto TensorPtr::operator=(TensorPtr&& other) noexcept -> TensorPtr& { 79 | if (this != &other) { 80 | destroy(); 81 | if (other.hasTensor_) { 82 | hasTensor_ = true; 83 | construct(other.tensor_.mut(), false); 84 | other.hasTensor_ = false; 85 | } 86 | } 87 | return *this; 88 | } 89 | 90 | template 91 | auto TensorPtr::operator=(const TensorPtr& other) noexcept 92 | -> TensorPtr& { 93 | if (this != &other) { 94 | destroy(); 95 | if (other.hasTensor_) { 96 | hasTensor_ = true; 97 | construct(other.tensor_.mut(), true); 98 | } 99 | } 100 | return *this; 101 | } 102 | 103 | template 104 | Tensor* TensorPtr::get() const noexcept { 105 | return hasTensor_ ? &tensor_ : nullptr; 106 | } 107 | 108 | template 109 | auto TensorPtr::moveAsTH() noexcept -> THType* { 110 | auto p = hasTensor_ ? tensor_.mut() : nullptr; 111 | hasTensor_ = false; 112 | return p; 113 | } 114 | 115 | template 116 | bool TensorPtr::operator==(const TensorPtr& other) const noexcept { 117 | return (hasTensor_ == other.hasTensor_ && 118 | (!hasTensor_ || tensor_.mut() == other.tensor_.mut())); 119 | } 120 | 121 | 122 | } // namespaces 123 | -------------------------------------------------------------------------------- /thpp/StorageBase.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_STORAGEBASE_H_ 7 | #define THPP_STORAGEBASE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #ifndef NO_FOLLY 13 | #include 14 | #endif 15 | // in order to get default values for template args 16 | #include 17 | 18 | namespace thpp { 19 | 20 | template class TensorBase; 21 | 22 | namespace detail { 23 | template struct StorageOps; 24 | } // namespace detail 25 | 26 | template 27 | class StorageBase { 28 | template 29 | friend class TensorBase; 30 | protected: 31 | typedef detail::StorageOps Ops; 32 | 33 | public: 34 | typedef T value_type; 35 | typedef T& reference; 36 | typedef const T& const_reference; 37 | typedef T* pointer; 38 | typedef const T* const_pointer; 39 | typedef T* iterator; 40 | typedef const T* const_iterator; 41 | typedef size_t size_type; 42 | typedef ptrdiff_t difference_type; 43 | typedef typename Ops::type THType; 44 | 45 | T* data() { return t_ ? t_->data : nullptr; } 46 | const T* data() const { return t_ ? t_->data : nullptr; } 47 | iterator begin() { return data(); } 48 | const_iterator begin() const { return data(); } 49 | const_iterator cbegin() const { return data(); } 50 | iterator end() { return t_ ? (t_->data + t_->size) : nullptr; } 51 | const_iterator end() const { return t_ ? (t_->data + t_->size) : nullptr; } 52 | const_iterator cend() const { return end(); } 53 | 54 | T& operator[](size_t index) { return data()[index]; } 55 | const T& operator[](size_t index) const { return data()[index]; } 56 | T& at(size_t index) { check(index); return operator[](index); } 57 | const T& at(size_t index) const { check(index); return operator[](index); } 58 | 59 | size_t size() const { return t_ ? t_->size : 0; } 60 | 61 | static constexpr const char* kLuaTypeName = Ops::kLuaTypeName; 62 | 63 | // Get a pointer to the underlying TH object; *this releases ownership 64 | // of that object. 65 | THType* moveAsTH(); 66 | 67 | void resizeUninitialized(size_t n); 68 | 69 | protected: 70 | StorageBase() { } // leave t_ uninitialized 71 | explicit StorageBase(THType* t) : t_(t) { } 72 | 73 | THType* th() { return t_; } 74 | const THType* th() const { return t_; } 75 | 76 | void up(); 77 | void down(); 78 | void check(size_t index) const; 79 | 80 | // NOTE: May not have any other fields, as we reinterpret_cast 81 | // liberally between Ops::type* and Storage* 82 | THType* t_; 83 | 84 | private: 85 | inline Derived* D() { return static_cast(this); } 86 | inline const Derived* D() const { return static_cast(this); } 87 | }; 88 | 89 | template 90 | constexpr const char* StorageBase::kLuaTypeName; 91 | 92 | // Define IsStorage to be used in template specializations 93 | 94 | template 95 | struct IsStorage : public std::false_type { }; 96 | 97 | template 98 | struct IsStorage< 99 | T, 100 | typename std::enable_if< 101 | std::is_base_of< 102 | StorageBase, 103 | T>::value>::type> 104 | : public std::true_type { }; 105 | 106 | } // namespaces 107 | 108 | #include 109 | 110 | #endif /* THPP_STORAGEBASE_H_ */ 111 | -------------------------------------------------------------------------------- /thpp/cuda/Storage.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_STORAGE_H_ 7 | #define THPP_CUDA_STORAGE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace thpp { 15 | 16 | template class CudaTensor; 17 | 18 | template 19 | class CudaStorage : public StorageBase> { 20 | typedef StorageBase> Base; 21 | typedef typename Base::Ops Ops; 22 | friend Base; // Yay C++11 23 | public: 24 | typedef typename Base::THType THType; 25 | CudaStorage(); 26 | 27 | explicit CudaStorage(THType* t); 28 | 29 | explicit CudaStorage(const Storage& cpuStorage); 30 | 31 | // Deserialize from Thrift. Throws if wrong type. 32 | explicit CudaStorage(const ThriftStorage& thriftStorage, 33 | SharingMode sharing = SHARE_IOBUF_MANAGED); 34 | 35 | // Note that, despite being default (for consistency with the Storage 36 | // constructors), resizable == true is not yet implemented. 37 | explicit CudaStorage(folly::IOBuf&& iob, 38 | SharingMode sharing = SHARE_IOBUF_MANAGED, 39 | bool resizable = true); 40 | explicit CudaStorage(const folly::IOBuf& iob, 41 | SharingMode sharing = SHARE_IOBUF_MANAGED, 42 | bool resizable = true) 43 | : CudaStorage(folly::IOBuf(iob), sharing, resizable) { } 44 | 45 | ~CudaStorage(); 46 | 47 | CudaStorage(CudaStorage&& other) noexcept; 48 | CudaStorage(const CudaStorage& other); 49 | CudaStorage& operator=(CudaStorage&& other); 50 | CudaStorage& operator=(const CudaStorage& other); 51 | 52 | // Serialize to Thrift. 53 | void serialize(ThriftStorage& out, 54 | ThriftTensorEndianness endianness = 55 | ThriftTensorEndianness::NATIVE, 56 | bool mayShare = true) const; 57 | 58 | Storage toCPU() const; 59 | 60 | T read(size_t offset) const; 61 | void read(size_t offset, T* dest, size_t n) const; 62 | void write(size_t offset, T value); 63 | void write(size_t offset, const T* src, size_t n); 64 | 65 | bool isUnique() const { return isUnique(this->t_); } 66 | // No CUDA support for custom allocators. 67 | static bool isUnique(const THType* th) { 68 | return !th || th->refcount == 1; 69 | } 70 | 71 | void setFromIOBuf(folly::IOBuf&& iob, SharingMode sharing, bool resizable); 72 | 73 | private: 74 | template friend class CudaTensor; 75 | }; 76 | 77 | /** 78 | * Wrap a THCAllocator-like object with a C++ interface into THCAllocator. 79 | */ 80 | template 81 | class THCAllocatorWrapper { 82 | public: 83 | static THCDeviceAllocator thcAllocator; 84 | private: 85 | static cudaError_t malloc(void* ctx, void** ptr, 86 | size_t size, cudaStream_t stream) { 87 | return static_cast(ctx)->malloc(ctx, ptr, size, stream); 88 | } 89 | static cudaError_t realloc(void* ctx, void** ptr, 90 | size_t oldSize, size_t newSize, 91 | cudaStream_t stream) { 92 | return static_cast(ctx)->realloc(ctx, ptr, oldSize, newSize, stream); 93 | } 94 | static cudaError_t free(void* ctx, void* ptr) { 95 | return static_cast(ctx)->free(ctx, ptr); 96 | } 97 | }; 98 | 99 | } // namespaces 100 | 101 | #include 102 | 103 | #endif /* THPP_CUDA_STORAGE_H_ */ 104 | -------------------------------------------------------------------------------- /thpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. An additional grant 6 | # of patent rights can be found in the PATENTS file in the same directory. 7 | # 8 | 9 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 10 | 11 | OPTION(NO_THRIFT "enabling this will exclude all code that depends on Thrift from the build" OFF) 12 | OPTION(NO_FOLLY "enabling this will exclude all code that depends on Folly from the build" OFF) 13 | OPTION(NO_TESTS "enabling this will disable building tests" OFF) 14 | 15 | # Torch messes this up 16 | SET(SAVED_CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) 17 | 18 | # Use our own cmake scripts 19 | SET(CMAKE_MODULE_PATH 20 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake" 21 | "${CMAKE_MODULE_PATH}") 22 | 23 | INCLUDE(MultiLevelIncludes) 24 | MLI_SET_DEPTH(1) 25 | 26 | FIND_PACKAGE(Torch REQUIRED) 27 | 28 | IF(NOT NO_FOLLY) 29 | FIND_PACKAGE(Folly REQUIRED) 30 | INCLUDE_DIRECTORIES(${FOLLY_INCLUDE_DIR}) 31 | ELSE() 32 | ADD_DEFINITIONS(-DNO_FOLLY) 33 | ENDIF() 34 | 35 | IF(NOT NO_THRIFT) 36 | IF(NOT FOLLY_FOUND) 37 | MESSAGE(SEND_ERROR "Thift serialization requires folly") 38 | ENDIF() 39 | FIND_PACKAGE(Thrift REQUIRED) 40 | INCLUDE_DIRECTORIES(${THRIFT_INCLUDE_DIR}) 41 | ELSE() 42 | ADD_DEFINITIONS(-DNO_THRIFT) 43 | ENDIF() 44 | 45 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++11") 46 | 47 | SET(src 48 | Storage.cpp 49 | StorageSerialization.cpp 50 | detail/StorageDefs.cpp 51 | TensorSerialization.cpp 52 | detail/TensorDefs.cpp 53 | ) 54 | 55 | SET(h 56 | Storage.h 57 | Storage-inl.h 58 | Tensor.h 59 | Tensor-inl.h 60 | StorageBase.h 61 | StorageBase-inl.h 62 | TensorBase.h 63 | TensorBase-inl.h 64 | TensorPtr.h 65 | TensorPtr-inl.h 66 | ) 67 | 68 | SET(h_detail 69 | detail/Storage.h 70 | detail/StorageDefsGeneric.h 71 | detail/StorageGeneric.h 72 | detail/Tensor.h 73 | detail/TensorDefsGeneric.h 74 | detail/TensorGeneric.h 75 | ) 76 | 77 | IF(THRIFT_FOUND) 78 | ADD_THRIFT2(src "if/Tensor.thrift") 79 | INSTALL_THRIFT2_HEADERS("if" "include") 80 | ENDIF() 81 | 82 | ADD_LIBRARY(thpp SHARED ${src}) 83 | 84 | if (UNIX AND NOT APPLE) 85 | # Let's check if --no-as-needed is supported by the linker. 86 | INCLUDE(CheckCCompilerFlag) 87 | SET(_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) 88 | SET(CMAKE_REQUIRED_FLAGS "-Wl,--no-as-needed") 89 | CHECK_C_COMPILER_FLAG("" HAS_NO_AS_NEEDED) 90 | SET(CMAKE_REQUIRED_FLAGS ${_CMAKE_REQUIRED_FLAGS}) 91 | IF(NOT HAS_NO_AS_NEEDED) 92 | MESSAGE(SEND_ERROR "Your linker doesn't support a required --no-as-needed flag") 93 | ENDIF() 94 | 95 | # This is a template library, so some dependent libraries are only used from 96 | # header files, so we'd like to force them to be linked. 97 | SET(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-as-needed") 98 | endif (UNIX AND NOT APPLE) 99 | 100 | TARGET_LINK_LIBRARIES(thpp TH) 101 | IF(FOLLY_FOUND) 102 | TARGET_LINK_LIBRARIES(thpp ${FOLLY_LIBRARIES}) 103 | ENDIF() 104 | IF(THRIFT_FOUND) 105 | TARGET_LINK_LIBRARIES(thpp ${THRIFT_LIBRARIES}) 106 | ENDIF() 107 | 108 | 109 | IF(NOT NO_TESTS) 110 | ENABLE_TESTING() 111 | FIND_PACKAGE(Glog REQUIRED) 112 | INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) 113 | TARGET_LINK_LIBRARIES(thpp ${GLOG_LIBRARIES}) 114 | ADD_SUBDIRECTORY("googletest-release-1.7.0") 115 | INCLUDE_DIRECTORIES( 116 | "${CMAKE_CURRENT_SOURCE_DIR}/googletest-release-1.7.0/include" 117 | ) 118 | ADD_SUBDIRECTORY("test") 119 | ENDIF() 120 | 121 | 122 | # SET(CMAKE_INSTALL_PREFIX ${SAVED_CMAKE_INSTALL_PREFIX}) 123 | 124 | INSTALL(TARGETS thpp 125 | RUNTIME DESTINATION bin 126 | ARCHIVE DESTINATION lib 127 | LIBRARY DESTINATION lib) 128 | 129 | INSTALL(FILES ${h} 130 | DESTINATION "include/${MLI_INCLUDE_RELATIVE_OUTPUT_DIR}") 131 | INSTALL(FILES ${h_detail} 132 | DESTINATION "include/${MLI_INCLUDE_RELATIVE_OUTPUT_DIR}/detail") 133 | -------------------------------------------------------------------------------- /thpp/cuda/Tensor.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_TENSOR_H_ 7 | #define THPP_CUDA_TENSOR_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace thpp { 14 | 15 | template 16 | class CudaTensor : public TensorBase, CudaTensor> { 17 | typedef TensorBase, CudaTensor> Base; 18 | typedef typename Base::Ops Ops; 19 | friend class TensorPtr; 20 | public: 21 | typedef typename Base::StorageType StorageType; 22 | typedef typename Base::offset_type offset_type; 23 | typedef typename Base::size_type size_type; 24 | typedef typename Base::THType THType; 25 | typedef typename Base::Ptr Ptr; 26 | 27 | // Default constructor; construct an empty, zero-dimensional Tensor. 28 | CudaTensor(); 29 | 30 | CudaTensor(StorageType storage, offset_type storageOffset, 31 | LongStorage sizes, LongStorage strides = LongStorage()); 32 | 33 | // Constructors from a list of sizes and a list of strides. 34 | // If specified, the list of strides must have the same size as the 35 | // list of sizes. 36 | explicit CudaTensor(LongStorage sizes, LongStorage strides = LongStorage()); 37 | explicit CudaTensor(LongRange sizes, LongRange strides = LongRange()); 38 | explicit CudaTensor(const std::vector& sizes, 39 | const std::vector& strides = 40 | std::vector()); 41 | explicit CudaTensor( 42 | std::initializer_list sizes, 43 | std::initializer_list strides = 44 | std::initializer_list()); 45 | 46 | explicit CudaTensor(const Tensor& cpuTensor); 47 | 48 | // Deserialize from Thrift. Throws if wrong type. 49 | explicit CudaTensor(const ThriftTensor& thriftTensor, 50 | SharingMode sharing = SHARE_IOBUF_MANAGED); 51 | 52 | // Do not alias other, create separate object (with separate metadata); 53 | // might still share data with other, unless UNIQUE requested in 54 | // cloneMode. 55 | explicit CudaTensor(const THType* other, unsigned cloneMode = 0); 56 | 57 | // Move/copy constructors. Enforce requested mode. 58 | /* implicit */ CudaTensor(const CudaTensor& other, unsigned cloneMode = 0); 59 | /* implicit */ /* may throw */ CudaTensor( 60 | CudaTensor&& other, unsigned cloneMode = 0); 61 | 62 | // Move/copy assignment operators. Will share memory with "other". 63 | CudaTensor& operator=(const CudaTensor& other); 64 | /* noexcept override */ CudaTensor& operator=(CudaTensor&& other); 65 | 66 | T at(offset_type idx) const { return at({idx}); } 67 | T at(std::initializer_list indices) const; 68 | 69 | // Copy from another tensor 70 | void copy(const CudaTensor& src); 71 | 72 | template 73 | void copy(const Tensor& src); 74 | 75 | // Copy to another (non-CUDA) tensor 76 | template 77 | void copyTo(Tensor& dest) const; 78 | 79 | // 80 | std::pair max(int dim) const; 81 | 82 | // 83 | std::pair min(int dim) const; 84 | 85 | // Return the CUDA device that this tensor is based on 86 | int getDevice() const; 87 | 88 | // Serialize to Thrift. Won't ever share CUDA memory. 89 | void serialize(ThriftTensor& out, 90 | ThriftTensorEndianness endianness = 91 | ThriftTensorEndianness::NATIVE, 92 | SharingMode sharing = SHARE_IOBUF_MANAGED) const; 93 | 94 | // Copy to CPU 95 | typename Tensor::Ptr toCPU() const; 96 | 97 | // Copy to given CUDA device, unless already there. 98 | Ptr toDevice(int device) const; 99 | 100 | private: 101 | CudaTensor(detail::SetTH, THType* t, bool incRef); 102 | }; 103 | 104 | template 105 | void copyTensor(Tensor& dest, const CudaTensor& src) { 106 | src.copyTo(dest); 107 | } 108 | 109 | template 110 | void copyTensor(CudaTensor& dest, const Tensor& src) { 111 | dest.copy(src); 112 | } 113 | 114 | template 115 | void copyTensor(CudaTensor& dest, const CudaTensor& src) { 116 | dest.copy(src); 117 | } 118 | 119 | } // namespaces 120 | 121 | #include 122 | 123 | #endif /* THPP_CUDA_TENSOR_H_ */ 124 | -------------------------------------------------------------------------------- /thpp/TensorPtr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #ifndef NO_FOLLY 20 | #include 21 | #endif 22 | 23 | namespace thpp { 24 | 25 | namespace detail { 26 | struct MakeTensorPtr {}; 27 | struct SetTH {}; 28 | } // namespace 29 | 30 | /** 31 | * A shared pointer to a tensor. 32 | * 33 | * thpp::Tensor objects share data, but not metadata. That is, when copying 34 | * a thpp::Tensor object, the new object initially shares data with the 35 | * original tensor, but the metadata (sizes, strides) is separate. 36 | * 37 | * TensorPtr allows you to share (reference counted) access to a tensor 38 | * (including metadata). [1] 39 | * 40 | * [1] TensorPtr is very similar to std::shared_ptr, except 41 | * that TensorPtr uses Torch's TH*Tensor internal reference counting mechanism. 42 | */ 43 | template 44 | class TensorPtr { 45 | template 46 | friend TensorPtr makeTensorPtr(Args&&... args); 47 | 48 | template 49 | explicit TensorPtr(detail::MakeTensorPtr, Args&&... args); 50 | public: 51 | typedef Tensor element_type; 52 | typedef typename Tensor::THType THType; 53 | static constexpr const char* kLuaTypeName = Tensor::kLuaTypeName; 54 | 55 | // Create an empty TensorPtr 56 | TensorPtr() noexcept; 57 | 58 | // Create a TensorPtr from a given THFloatTensor / THLongTensor / etc 59 | // raw pointer. Increments the reference count. 60 | explicit TensorPtr(THType* th) noexcept; 61 | 62 | // Move and copy constructors 63 | TensorPtr(TensorPtr&& other) noexcept; 64 | TensorPtr(const TensorPtr& other) noexcept; 65 | 66 | ~TensorPtr(); 67 | 68 | // Move and copy assignment operators 69 | TensorPtr& operator=(TensorPtr&& other) noexcept; 70 | TensorPtr& operator=(const TensorPtr& other) noexcept; 71 | 72 | // Dereference 73 | Tensor& operator*() const noexcept { return *get(); } 74 | Tensor* operator->() const noexcept { return get(); } 75 | Tensor* get() const noexcept; 76 | 77 | // True iff non-empty 78 | explicit operator bool() const noexcept { return hasTensor_; } 79 | 80 | // Return a pointer to the underlying THFloatTensor / THLongTensor etc. 81 | // Does not change the reference count! This is similar to 82 | // get()->asTH(), except that it works (and returns nullptr) if the pointer 83 | // is empty. 84 | THType* th() const noexcept; 85 | 86 | // Steal the reference to the underlying THFloatTensor / THLongTensor etc. 87 | // The TensorPtr is empty at the end of this operation, but the reference 88 | // count is not changed. It is your responsibility to call 89 | // THFloatTensor_(free) (or equivalent for other types) on the returned value 90 | // (or pass it to code that steals the reference, such as luaT_pushudata). 91 | THType* moveAsTH() noexcept; 92 | 93 | // Do two TensorPtr objects point to the same tensor? 94 | bool operator==(const TensorPtr& other) const noexcept; 95 | 96 | private: 97 | void destroy() noexcept; 98 | void construct(THType* th, bool incRef) noexcept; 99 | 100 | // Not using folly::Optional, so we don't accidentally call Tensor's 101 | // (copy / move) (constructor / assignment operator), which has value, 102 | // not reference, semantics. 103 | bool hasTensor_; 104 | union { 105 | mutable Tensor tensor_; 106 | }; 107 | 108 | template 109 | friend bool operator==(const U& y, const TensorPtr& x) { 110 | return x == y; 111 | } 112 | template 113 | friend bool operator!=(const U& y, const TensorPtr& x) { 114 | return !static_cast(x == y); 115 | } 116 | template 117 | friend bool operator!=(const TensorPtr& y, const U& x) { 118 | return !static_cast(y == x); 119 | } 120 | }; 121 | 122 | template 123 | inline TensorPtr makeTensorPtr(Args&&... args) { 124 | return TensorPtr( 125 | detail::MakeTensorPtr(), std::forward(args)...); 126 | } 127 | 128 | } // namespaces 129 | 130 | #include 131 | -------------------------------------------------------------------------------- /thpp/test/TensorTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace thpp { 19 | namespace test { 20 | 21 | class TensorTest : public testing::Test { 22 | protected: 23 | void SetUp() override; 24 | 25 | static LongTensor create(long bias); 26 | 27 | static void check2DTensor(const LongTensor& a, long a11, long a12, long a21, 28 | long a22); 29 | 30 | LongTensor a; 31 | LongTensor b; 32 | }; 33 | 34 | void TensorTest::SetUp() { 35 | a = create(1); 36 | b = create(51); 37 | } 38 | 39 | LongTensor TensorTest::create(long bias) { 40 | LongTensor t({10, 20, 30}); 41 | EXPECT_EQ(10 * 20 * 30, t.size()); 42 | for (long i = 0; i < t.size(0); ++i) { 43 | for (long j = 0; j < t.size(1); ++j) { 44 | for (long k = 0; k < t.size(2); ++k) { 45 | t[i][j][k].front() = 46 | ((i + bias) * 10000) + 47 | ((j + bias) * 100) + 48 | (k + bias); 49 | } 50 | } 51 | } 52 | return t; 53 | } 54 | 55 | void TensorTest::check2DTensor(const LongTensor& a, long a11, long a12, 56 | long a21, long a22) { 57 | EXPECT_EQ(2, a.ndims()); 58 | EXPECT_LE(2, a.size(0)); 59 | EXPECT_LE(2, a.size(1)); 60 | EXPECT_EQ(a11, a[0][0].front()); 61 | EXPECT_EQ(a12, a[0][1].front()); 62 | EXPECT_EQ(a21, a[1][0].front()); 63 | EXPECT_EQ(a22, a[1][1].front()); 64 | } 65 | 66 | TEST_F(TensorTest, Simple) { 67 | auto sub = a[5]; 68 | EXPECT_EQ(20 * 30, sub.size()); 69 | check2DTensor(sub, 60101, 60102, 60201, 60202); 70 | 71 | auto sub2 = a[{-1,5}]; 72 | EXPECT_EQ(10 * 30, sub2.size()); 73 | check2DTensor(sub2, 10601, 10602, 20601, 20602); 74 | } 75 | 76 | TEST_F(TensorTest, Add) { 77 | auto sub = (b + a)[5]; 78 | EXPECT_EQ(20 * 30, sub.size()); 79 | check2DTensor(sub, 625252, 625254, 625452, 625454); 80 | } 81 | 82 | TEST_F(TensorTest, Sub) { 83 | auto sub = (b - a)[5]; 84 | EXPECT_EQ(20 * 30, sub.size()); 85 | check2DTensor(sub, 505050, 505050, 505050, 505050); 86 | } 87 | 88 | TEST_F(TensorTest, At) { 89 | EXPECT_EQ(20304, a.at({1, 2, 3})); 90 | a.transpose(0, 1); 91 | EXPECT_EQ(40608, a.at({5, 3, 7})); 92 | a.transpose(1, 0); 93 | a.transpose(); 94 | EXPECT_EQ(50403, a.at({2, 3, 4})); 95 | } 96 | 97 | TEST_F(TensorTest, NonFloatEqual) { 98 | EXPECT_TRUE(a.isExactlyEqual(a)); 99 | EXPECT_TRUE(a.isApproximatelyEqual(a)); 100 | EXPECT_FALSE(a.isExactlyEqual(b)); 101 | EXPECT_FALSE(a.isApproximatelyEqual(b)); 102 | } 103 | 104 | TEST_F(TensorTest, FloatEqual) { 105 | auto x = Tensor{1}; 106 | auto y = Tensor{1}; 107 | x.at({0}) = 1.0f; 108 | y.at({0}) = 1.0f; 109 | 110 | EXPECT_TRUE(x.isExactlyEqual(y)); 111 | EXPECT_TRUE(x.isApproximatelyEqual(y)); 112 | 113 | y.at({0}) = 1.000001f; 114 | EXPECT_FALSE(x.isExactlyEqual(y)); 115 | EXPECT_TRUE(x.isApproximatelyEqual(y)); 116 | } 117 | 118 | TEST_F(TensorTest, EqualMoreDimensionsThanSize) { 119 | auto x = Tensor{{2, 2, 2}}; 120 | for (long k = 0; k < x.size(0); ++k) { 121 | for (long j = 0; j < x.size(1); ++j) { 122 | for (long i = 0; i < x.size(2); ++i) { 123 | x.at({k, j, i}) = k * 3 + j * 2 + i; 124 | } 125 | } 126 | } 127 | 128 | auto y = Tensor{{2, 2, 2}}; 129 | for (long k = 0; k < x.size(0); ++k) { 130 | for (long j = 0; j < x.size(1); ++j) { 131 | for (long i = 0; i < x.size(2); ++i) { 132 | y.at({k, j, i}) = k * 3 + j * 2 + i; 133 | } 134 | } 135 | } 136 | 137 | auto z = Tensor{{2, 2, 2}}; 138 | for (long k = 0; k < x.size(0); ++k) { 139 | for (long j = 0; j < x.size(1); ++j) { 140 | for (long i = 0; i < x.size(2); ++i) { 141 | z.at({k, j, i}) = k * 4 + j * 3 + i * 2; 142 | } 143 | } 144 | } 145 | 146 | EXPECT_TRUE(x.isApproximatelyEqual(y)); 147 | EXPECT_FALSE(x.isApproximatelyEqual(z)); 148 | EXPECT_TRUE(x.isExactlyEqual(y)); 149 | EXPECT_FALSE(x.isExactlyEqual(z)); 150 | } 151 | 152 | TEST_F(TensorTest, EqualMismatch) { 153 | auto x = Tensor{{1, 1, 1}}; 154 | auto y = Tensor{{1, 1, 1, 1}}; 155 | 156 | EXPECT_THROW(x.isApproximatelyEqual(y), std::invalid_argument); 157 | EXPECT_THROW(x.isExactlyEqual(y), std::invalid_argument); 158 | 159 | auto z = Tensor{{1, 2, 3}}; 160 | auto w = Tensor{{1, 2, 4}}; 161 | 162 | EXPECT_THROW(z.isApproximatelyEqual(w), std::invalid_argument); 163 | EXPECT_THROW(z.isExactlyEqual(w), std::invalid_argument); 164 | } 165 | 166 | TEST_F(TensorTest, Str) { 167 | auto x = Tensor{{2,3,4}}; 168 | 169 | EXPECT_EQ("torch.FloatTensor(2x3x4)", x.str()); 170 | } 171 | 172 | TEST_F(TensorTest, NoAliasing) { 173 | auto x = Tensor{2}; 174 | x.fill(1); 175 | EXPECT_EQ(2, x.sumall()); 176 | 177 | auto y = x; 178 | y.resize({4}); 179 | y.fill(2); 180 | 181 | EXPECT_EQ(4, x.sumall()); 182 | EXPECT_EQ(8, y.sumall()); 183 | } 184 | 185 | TEST_F(TensorTest, UniqueMove) { 186 | testUniqueMove>(); 187 | } 188 | 189 | TEST_F(TensorTest, TensorPtr) { 190 | testTensorPtr>(); 191 | } 192 | 193 | }} // namespaces 194 | -------------------------------------------------------------------------------- /thpp/TensorSerialization.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | #ifndef NO_FOLLY 13 | #include 14 | #include 15 | #endif 16 | 17 | //////////////////////////////////////////////////////////////////////////////// 18 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 19 | //////////////////////////////////////////////////////////////////////////////// 20 | 21 | namespace thpp { 22 | namespace detail { 23 | 24 | namespace { 25 | 26 | std::unique_ptr partialCloneOne(const folly::IOBuf& buf, 27 | uint64_t offset, 28 | uint64_t length) { 29 | DCHECK_LE(offset + length, buf.length()); 30 | auto cloned = buf.cloneOne(); 31 | cloned->trimStart(offset); 32 | cloned->trimEnd(cloned->length() - length); 33 | return cloned; 34 | } 35 | 36 | } // namespace 37 | 38 | void serialize( 39 | ThriftTensor& out, 40 | LongRange sizes, 41 | LongRange strides, 42 | folly::IOBuf&& data, 43 | ThriftTensorDataType dtype, 44 | size_t elementSize, 45 | ThriftTensorEndianness endianness, 46 | SharingMode sharing) { 47 | DCHECK(!data.isChained()); 48 | if (endianness == ThriftTensorEndianness::NATIVE) { 49 | endianness = gMachineEndianness; 50 | } else { 51 | CHECK(endianness == gMachineEndianness) 52 | << "Non-native endianness not yet implemented"; 53 | } 54 | 55 | int ndims = sizes.size(); 56 | uint64_t dataSize = 1; 57 | uint64_t contiguousSize = 1; 58 | int firstContiguousDim = ndims - 1; 59 | 60 | if (!strides.empty()) { 61 | DCHECK_EQ(strides.size(), ndims); 62 | while (firstContiguousDim >= 0) { 63 | if (strides[firstContiguousDim] != contiguousSize) { 64 | break; 65 | } 66 | contiguousSize *= sizes[firstContiguousDim]; 67 | --firstContiguousDim; 68 | } 69 | ++firstContiguousDim; 70 | dataSize = contiguousSize; 71 | for (int i = 0; i < firstContiguousDim; ++i) { 72 | dataSize *= sizes[i]; 73 | } 74 | } else { 75 | for (auto s : sizes) { 76 | dataSize *= s; 77 | } 78 | contiguousSize = dataSize; 79 | firstContiguousDim = 0; 80 | } 81 | 82 | // Dimensions from firstContiguousDim till the last form a contiguous range 83 | // of contiguousSize elements; we'll copy / clone that in one go rather 84 | // than iterating through all elements. 85 | 86 | // We want bytes. 87 | dataSize *= elementSize; 88 | contiguousSize *= elementSize; 89 | 90 | DCHECK_LE(contiguousSize, dataSize); 91 | 92 | out.dataType = dtype; 93 | out.endianness = endianness; 94 | out.sizes.assign(sizes.begin(), sizes.end()); 95 | 96 | if (ndims == 0) { 97 | // Empty tensor, nothing to do. 98 | out.data = folly::IOBuf(); 99 | data = folly::IOBuf(); 100 | return; 101 | } 102 | 103 | if (firstContiguousDim == 0) { 104 | // We're done. 105 | DCHECK_GE(data.length(), dataSize); 106 | data.trimEnd(data.length() - dataSize); 107 | detail::applySharingMode(data, sharing); 108 | out.data = std::move(data); 109 | return; 110 | } 111 | 112 | // We have to do this the hard way... 113 | folly::IOBufQueue outQueue; 114 | 115 | // If the contiguous chunk size is >= kMinCloneSize, we clone rather 116 | // than copying 117 | static constexpr uint64_t kMinCloneSize = 4 << 10; 118 | 119 | // Don't allocate huge contiguous buffers. 120 | // jemalloc defers to mmap() for buffers of 4MiB or more. 121 | static constexpr uint64_t kMaxBlockSize = 2 << 20; 122 | folly::io::QueueAppender appender(&outQueue, 123 | std::min(dataSize, kMaxBlockSize)); 124 | 125 | std::vector counter; 126 | counter.resize(firstContiguousDim); 127 | int idx = firstContiguousDim; 128 | const uint8_t* src = data.data(); 129 | bool mayShare = false; 130 | switch (sharing) { 131 | case SHARE_NONE: 132 | break; 133 | case SHARE_IOBUF_MANAGED: 134 | mayShare = data.isManagedOne(); 135 | break; 136 | case SHARE_ALL: 137 | mayShare = true; 138 | break; 139 | }; 140 | while (idx >= 0) { 141 | if (idx == firstContiguousDim) { 142 | if (mayShare && contiguousSize >= kMinCloneSize) { 143 | appender.insert(partialCloneOne(data, src - data.data(), 144 | contiguousSize)); 145 | } else { 146 | appender.push(src, contiguousSize); 147 | } 148 | --idx; 149 | continue; 150 | } 151 | src += strides[idx] * elementSize; 152 | if (++counter[idx] == sizes[idx]) { 153 | src -= sizes[idx] * strides[idx] * elementSize; 154 | counter[idx] = 0; 155 | --idx; 156 | } else { 157 | idx = firstContiguousDim; 158 | } 159 | } 160 | 161 | outQueue.move()->cloneInto(out.data); 162 | } 163 | 164 | 165 | template folly::IOBuf deserialize(const ThriftTensor& in, 166 | ThriftTensorDataType dtype); 167 | 168 | }} // namespaces 169 | 170 | //////////////////////////////////////////////////////////////////////////////// 171 | #endif // !NO_THRIFT && !NO_FOLLY 172 | //////////////////////////////////////////////////////////////////////////////// 173 | -------------------------------------------------------------------------------- /thpp/cuda/Tensor-inl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_TENSOR_H_ 7 | #error This file may only be included from thpp/cuda/Tensor.h 8 | #endif 9 | 10 | #include 11 | 12 | namespace thpp { 13 | 14 | template 15 | CudaTensor::CudaTensor() : Base(Ops::_new()) { } 16 | 17 | template 18 | CudaTensor::CudaTensor(StorageType storage, offset_type storageOffset, 19 | LongStorage sizes, LongStorage strides) 20 | : CudaTensor() { 21 | Ops::_setStorage(this->t_, storage.th(), storageOffset, sizes.th(), 22 | strides.th()); 23 | } 24 | 25 | template 26 | CudaTensor::CudaTensor(LongStorage sizes, LongStorage strides) 27 | : CudaTensor() { 28 | Ops::_setStorage(this->t_, nullptr, 0, sizes.th(), strides.th()); 29 | } 30 | 31 | template 32 | CudaTensor::CudaTensor(LongRange sizes, LongRange strides) 33 | : CudaTensor(LongStorage::wrap(detail::makeMutable(sizes)), 34 | LongStorage::wrap(detail::makeMutable(strides))) { } 35 | 36 | template 37 | CudaTensor::CudaTensor(std::initializer_list sizes, 38 | std::initializer_list strides) 39 | : CudaTensor(LongStorage(sizes.begin(), sizes.end()), 40 | LongStorage(strides.begin(), strides.end())) { } 41 | 42 | template 43 | CudaTensor::CudaTensor(const std::vector& sizes, 44 | const std::vector& strides) 45 | : CudaTensor(LongStorage(sizes.begin(), sizes.end()), 46 | LongStorage(strides.begin(), strides.end())) { } 47 | 48 | template 49 | CudaTensor::CudaTensor(const Tensor& cpuTensor) 50 | : CudaTensor(cpuTensor.sizes()) { 51 | copy(cpuTensor); 52 | } 53 | 54 | // The CPU tensor is temporary, it may always share memory with Thrift 55 | template 56 | CudaTensor::CudaTensor( 57 | const ThriftTensor& thriftTensor, 58 | SharingMode /*sharing*/) 59 | : CudaTensor(Tensor(thriftTensor, SHARE_ALL)) {} 60 | 61 | template 62 | CudaTensor::CudaTensor(detail::SetTH, THType* t, bool incRef) 63 | : Base(t) { 64 | DCHECK(t); 65 | if (incRef) { 66 | Ops::_retain(this->t_); 67 | } 68 | } 69 | 70 | template 71 | CudaTensor::CudaTensor(const THType* other, unsigned cloneMode) 72 | : Base(Base::cloneTH(other, cloneMode)) { } 73 | 74 | template 75 | CudaTensor::CudaTensor(const CudaTensor& other, unsigned cloneMode) 76 | : CudaTensor(other.t_, cloneMode) { } 77 | 78 | template 79 | CudaTensor::CudaTensor(CudaTensor&& other, unsigned cloneMode) 80 | : CudaTensor(other, cloneMode) { 81 | other.clear(); 82 | } 83 | 84 | template 85 | auto CudaTensor::operator=(const CudaTensor& other) -> CudaTensor& { 86 | if (&other != this) { 87 | Ops::_set(this->t_, other.mut()); 88 | } 89 | return *this; 90 | } 91 | 92 | template 93 | auto CudaTensor::operator=(CudaTensor&& other) -> CudaTensor& { 94 | if (&other != this) { 95 | *this = other; 96 | other.clear(); 97 | } 98 | return *this; 99 | } 100 | 101 | template 102 | T CudaTensor::at(std::initializer_list indices) const { 103 | auto offset = this->storageOffset() + this->offsetOf(std::move(indices)); 104 | typename Base::StorageBuffer buf; 105 | return this->storageRef(&buf).read(offset); 106 | } 107 | 108 | template 109 | void CudaTensor::copy(const CudaTensor& src) { 110 | Ops::_copy(this->t_, src.mut()); 111 | } 112 | 113 | template 114 | template 115 | void CudaTensor::copy(const Tensor& src) { 116 | Ops::_copyFrom(this->t_, src.mut()); 117 | } 118 | 119 | template 120 | template 121 | void CudaTensor::copyTo(Tensor& dest) const { 122 | Ops::_copyTo(dest.mut(), this->mut()); 123 | } 124 | 125 | template 126 | typename Tensor::Ptr CudaTensor::toCPU() const { 127 | auto cpuTensor = Tensor::makePtr(this->sizes()); 128 | copyTo(*cpuTensor); 129 | return cpuTensor; 130 | } 131 | 132 | template 133 | auto CudaTensor::toDevice(int device) const -> Ptr { 134 | int currentDevice = getDevice(); 135 | if (currentDevice == -1 || currentDevice == device) { 136 | return this->copyPtr(); 137 | } 138 | cuda::DeviceGuard guard; 139 | cuda::setDevice(device); 140 | auto result = CudaTensor::makePtr(); 141 | result->resizeAs(*this); 142 | result->copy(*this); 143 | return result; 144 | } 145 | 146 | #define TENSOR_ARGM_OP(name) \ 147 | template \ 148 | auto CudaTensor::name(int dim) const \ 149 | -> std::pair { \ 150 | std::pair dest; \ 151 | Ops::_ ## name(dest.first.t_, dest.second.t_, this->mut(), dim); \ 152 | return dest; \ 153 | } 154 | TENSOR_ARGM_OP(min) 155 | TENSOR_ARGM_OP(max) 156 | #undef TENSOR_ARGM_OP 157 | 158 | template 159 | int CudaTensor::getDevice() const { 160 | return Ops::_getDevice(this->mut()); 161 | } 162 | 163 | namespace detail { 164 | void cudaTensorSerialize( 165 | ThriftTensor& out, 166 | LongRange sizes, 167 | LongRange strides, 168 | const void* data, 169 | ThriftTensorDataType dtype, 170 | size_t elementSize, 171 | ThriftTensorEndianness endianness); 172 | } // namespace detail 173 | 174 | template 175 | void CudaTensor::serialize( 176 | ThriftTensor& out, 177 | ThriftTensorEndianness endianness, 178 | SharingMode /*sharing*/) const { 179 | toCPU()->serialize(out, endianness, SHARE_ALL); 180 | } 181 | 182 | } // namespaces 183 | -------------------------------------------------------------------------------- /thpp/cuda/Storage-inl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_STORAGE_H_ 7 | #error This file may only be included from thpp/cuda/Storage.h 8 | #endif 9 | 10 | namespace thpp { 11 | 12 | template 13 | CudaStorage::CudaStorage() : Base(nullptr) { } 14 | 15 | template 16 | CudaStorage::CudaStorage(THType* t) : Base(t) { 17 | this->up(); 18 | } 19 | 20 | template 21 | CudaStorage::~CudaStorage() { 22 | this->down(); 23 | } 24 | 25 | template 26 | CudaStorage::CudaStorage(CudaStorage&& other) noexcept : Base(other.t_) { 27 | other.t_ = nullptr; 28 | } 29 | 30 | template 31 | CudaStorage::CudaStorage(const CudaStorage& other) : CudaStorage(other.t_) { 32 | } 33 | 34 | template 35 | CudaStorage::CudaStorage(const Storage& cpuStorage) : CudaStorage() { 36 | if (cpuStorage.data()) { 37 | resizeUninitialized(cpuStorage.size()); 38 | this->read(0, cpuStorage.data(), cpuStorage.size()); 39 | } 40 | } 41 | 42 | template 43 | void CudaStorage::read(size_t offset, T* dest, size_t n) const { 44 | DCHECK_LE(offset + n, this->size()); 45 | cuda::check(cudaMemcpy(dest, this->data() + offset, n * sizeof(T), 46 | cudaMemcpyDeviceToHost)); 47 | } 48 | 49 | template 50 | T CudaStorage::read(size_t offset) const { 51 | T result; 52 | this->read(offset, &result, 1); 53 | return result; 54 | } 55 | 56 | template 57 | void CudaStorage::write(size_t offset, const T* src, size_t n) { 58 | DCHECK_LE(offset + n, this->size()); 59 | cuda::check(cudaMemcpy(this->data() + offset, src, n * sizeof(T), 60 | cudaMemcpyHostToDevice)); 61 | } 62 | 63 | template 64 | void CudaStorage::write(size_t offset, T value) { 65 | this->write(offset, &value, 1); 66 | } 67 | 68 | template 69 | CudaStorage& CudaStorage::operator=(CudaStorage&& other) { 70 | if (&other != this) { 71 | this->down(); 72 | this->t_ = other.t_; 73 | other.t_ = nullptr; 74 | } 75 | return *this; 76 | } 77 | 78 | template 79 | CudaStorage& CudaStorage::operator=(const CudaStorage& other) { 80 | if (&other != this) { 81 | this->down(); 82 | this->t_ = other.t_; 83 | this->up(); 84 | } 85 | return *this; 86 | } 87 | 88 | template 89 | CudaStorage::CudaStorage( 90 | const ThriftStorage& thriftStorage, 91 | SharingMode /*sharing*/) 92 | : CudaStorage(Storage(thriftStorage, SHARE_ALL)) {} 93 | 94 | template 95 | CudaStorage::CudaStorage(folly::IOBuf&& iob, 96 | SharingMode sharing, 97 | bool resizable) 98 | : Base(nullptr) { 99 | setFromIOBuf(std::move(iob), sharing, resizable); 100 | } 101 | 102 | namespace detail { 103 | void cudaStorageSerialize(ThriftStorage& out, 104 | const void* src, size_t size, 105 | ThriftTensorDataType dtype, 106 | ThriftTensorEndianness endianness); 107 | } // namespace detail 108 | 109 | template 110 | void CudaStorage::serialize( 111 | ThriftStorage& out, 112 | ThriftTensorEndianness endianness, 113 | bool /*mayShare*/) const { 114 | toCPU().serialize(out, endianness, true); 115 | } 116 | 117 | template 118 | Storage CudaStorage::toCPU() const { 119 | Storage cpuStorage; 120 | if (this->data()) { 121 | cpuStorage.resizeUninitialized(this->size()); 122 | this->write(0, cpuStorage.data(), this->size()); 123 | } 124 | return cpuStorage; 125 | } 126 | 127 | namespace detail { 128 | 129 | class CudaIOBufAllocator { 130 | public: 131 | explicit CudaIOBufAllocator(folly::IOBuf&& iob); 132 | 133 | cudaError_t malloc(void* ctx, void** ptr, size_t size, cudaStream_t); 134 | cudaError_t realloc(void* ctx, void** ptr, 135 | size_t oldSize, size_t newSize, cudaStream_t); 136 | cudaError_t free(void* ctx, void* ptr); 137 | 138 | private: 139 | folly::IOBuf iob_; 140 | }; 141 | 142 | } // namespace detail 143 | 144 | template 145 | void CudaStorage::setFromIOBuf(folly::IOBuf&& iob, SharingMode sharing, 146 | bool resizable) { 147 | if (iob.isChained()) { 148 | throw std::invalid_argument("IOBuf may not be chained"); 149 | } 150 | size_t len = iob.length(); 151 | if (len % sizeof(T) != 0) { 152 | throw std::invalid_argument("IOBuf size must be multiple of data size"); 153 | } 154 | len /= sizeof(T); 155 | 156 | switch (sharing) { 157 | case SHARE_NONE: 158 | throw std::invalid_argument("SHARE_NONE not supported"); 159 | case SHARE_IOBUF_MANAGED: 160 | if (!iob.isManagedOne()) { 161 | throw std::invalid_argument("SHARE_IOBUF_MANAGED requires managed IOBuf"); 162 | } 163 | break; 164 | case SHARE_ALL: 165 | break; 166 | } 167 | 168 | if (resizable) { 169 | throw std::invalid_argument("NYI: Resizable IOBuf CUDA storage"); 170 | } 171 | 172 | // Ensure properly aligned 173 | if ((reinterpret_cast(iob.data()) % alignof(T)) != 0) { 174 | throw std::invalid_argument("IOBuf is not properly aligned"); 175 | } 176 | 177 | T* p = reinterpret_cast(iob.writableData()); 178 | 179 | cudaPointerAttributes attr; 180 | cuda::check(cudaPointerGetAttributes(&attr, p)); 181 | if (attr.memoryType != cudaMemoryTypeDevice) { 182 | throw std::invalid_argument("IOBuf does not point to CUDA memory"); 183 | } 184 | 185 | this->t_ = Ops::_newWithDataAndAllocator( 186 | p, len, 187 | &THCAllocatorWrapper::thcAllocator, 188 | new detail::CudaIOBufAllocator(std::move(iob))); 189 | Ops::_clearFlag(this->t_, TH_STORAGE_RESIZABLE); 190 | } 191 | 192 | template 193 | THCDeviceAllocator THCAllocatorWrapper::thcAllocator = { 194 | &THCAllocatorWrapper::malloc, 195 | &THCAllocatorWrapper::realloc, 196 | &THCAllocatorWrapper::free, 197 | NULL, 198 | NULL, 199 | }; 200 | 201 | } // namespaces 202 | -------------------------------------------------------------------------------- /thpp/Tensor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef THPP_TENSOR_H_ 12 | #define THPP_TENSOR_H_ 13 | 14 | #ifdef THPP_COMPAT_TENSOR_H_ 15 | #error "thpp-compatibility/ is a wrapper of legacy thpp with ATen's updated TH. You should NOT include thpp/ and thpp-compatibility/ in the same binary" 16 | #endif 17 | 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #ifndef NO_THRIFT 24 | #include 25 | #endif 26 | #ifndef NO_FOLLY 27 | #include 28 | #endif 29 | 30 | namespace thpp { 31 | 32 | /** 33 | * A Tensor wraps a pointer to a THTensor, and as such it has reference-counted 34 | * pointer semantics. 35 | * 36 | * Tensors may also share memory with other tensors. Operations that 37 | * manipulate metadata (select, transpose, etc) will make source and 38 | * destination tensors share memory. To ensure you have a unique copy, use 39 | * force(UNIQUE) (or set UNIQUE in the optional cloneMode argument to the copy 40 | * and move constructors). 41 | * 42 | * After metadata manipulation, the resulting tensor might not be stored 43 | * in the usual row-major order in memory. If you need a contiguous 44 | * representation, use force(CONTIGUOUS) (or set CONTIGUOUS in the optional 45 | * argument to the copy and move constructors). Note that this may break 46 | * the memory sharing (it will likely create a UNIQUE copy as well). 47 | */ 48 | template class CudaTensor; 49 | template 50 | class Tensor : public TensorBase, Tensor> { 51 | typedef TensorBase, Tensor> Base; 52 | typedef typename Base::Ops Ops; 53 | template friend class Tensor; 54 | template friend class CudaTensor; 55 | friend class TensorPtr; 56 | 57 | public: 58 | typedef typename Base::StorageType StorageType; 59 | typedef typename Base::offset_type offset_type; 60 | typedef typename Base::size_type size_type; 61 | typedef typename Base::THType THType; 62 | 63 | // Default constructor; construct an empty, zero-dimensional Tensor. 64 | Tensor(); 65 | 66 | Tensor(StorageType storage, offset_type storageOffset, 67 | LongStorage sizes, LongStorage strides = LongStorage()); 68 | 69 | #ifndef NO_FOLLY 70 | Tensor(StorageType storage, offset_type storageOffset, 71 | LongRange sizes, LongRange strides = LongRange()); 72 | #endif 73 | 74 | Tensor(StorageType storage, offset_type storageOffset, 75 | std::initializer_list sizes, 76 | std::initializer_list strides = 77 | std::initializer_list()); 78 | 79 | // Constructors from a list of sizes and a list of strides. 80 | // If specified, the list of strides must have the same size as the 81 | // list of sizes. 82 | explicit Tensor(LongStorage sizes, LongStorage strides = LongStorage()); 83 | #ifndef NO_FOLLY 84 | explicit Tensor(LongRange sizes, LongRange strides = LongRange()); 85 | #endif 86 | explicit Tensor(const std::vector& sizes, 87 | const std::vector& strides = 88 | std::vector()); 89 | explicit Tensor( 90 | std::initializer_list sizes, 91 | std::initializer_list strides = 92 | std::initializer_list()); 93 | 94 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 95 | // Deserialize from Thrift. Throws if wrong type. 96 | explicit Tensor(const ThriftTensor& thriftTensor, 97 | SharingMode sharing = SHARE_IOBUF_MANAGED); 98 | #endif 99 | 100 | // Do not alias other, create separate object (with separate metadata); 101 | // might still share data with other, unless UNIQUE requested in 102 | // cloneMode. 103 | explicit Tensor(const THType* other, unsigned cloneMode = 0); 104 | 105 | // Move/copy constructors. Enforce requested mode. 106 | /* implicit */ Tensor(const Tensor& other, unsigned cloneMode = 0); 107 | /* implicit */ /* may throw */ Tensor(Tensor&& other, unsigned cloneMode = 0); 108 | 109 | // Move/copy assignment operators. Will share memory with "other". 110 | Tensor& operator=(const Tensor& other); 111 | /* noexcept override */ Tensor& operator=(Tensor&& other); 112 | 113 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 114 | // Serialize to Thrift. Note that, if sharing is not SHARE_NONE, the 115 | // resulting ThriftTensor may share memory with *this, so changes in out.data 116 | // may be reflected in *this. 117 | void serialize(ThriftTensor& out, 118 | ThriftTensorEndianness endianness = 119 | ThriftTensorEndianness::NATIVE, 120 | SharingMode sharing = SHARE_IOBUF_MANAGED) const; 121 | #endif 122 | 123 | // Copy from another tensor 124 | template 125 | void copy(const Tensor& src); 126 | 127 | // Operator to return the first element at the given index 128 | T& at(offset_type idx) { return at({idx}); } 129 | const T& at(offset_type idx) const { return at({idx}); } 130 | 131 | T& at(std::initializer_list indices) { 132 | return this->data()[this->offsetOf(std::move(indices))]; 133 | } 134 | 135 | const T& at(std::initializer_list indices) const { 136 | return const_cast(this)->at(std::move(indices)); 137 | } 138 | 139 | // 140 | std::pair> max(int dim) const; 141 | 142 | // 143 | std::pair> min(int dim) const; 144 | 145 | private: 146 | Tensor(detail::SetTH, THType* t, bool incRef); 147 | 148 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 149 | static THType* deserializeTH(const ThriftTensor& thriftTensor, 150 | SharingMode sharing); 151 | #endif 152 | }; 153 | 154 | template 155 | void copyTensor(Tensor& dest, const Tensor& src) { 156 | dest.copy(src); 157 | } 158 | 159 | } // namespaces 160 | 161 | #include 162 | 163 | #endif /* THPP_TENSOR_H_ */ 164 | -------------------------------------------------------------------------------- /thpp/Storage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef THPP_STORAGE_H_ 12 | #define THPP_STORAGE_H_ 13 | 14 | #ifndef DCHECK 15 | #include 16 | #define DCHECK(x) assert(x) 17 | #endif 18 | 19 | #include 20 | #include 21 | #ifndef NO_THRIFT 22 | #include 23 | #endif 24 | #ifndef NO_FOLLY 25 | #include 26 | #include 27 | #include 28 | #endif 29 | #include 30 | #include 31 | 32 | namespace thpp { 33 | 34 | #ifndef NO_FOLLY 35 | using folly::Range; 36 | #endif 37 | 38 | /** 39 | * Wrapper around TH's Storage type, which is a length-aware, 40 | * reference-counted, heap-allocated array. 41 | */ 42 | template class Tensor; 43 | template class CudaTensor; 44 | 45 | #ifndef NO_FOLLY 46 | enum SharingMode { 47 | // Do not share memory with the given IOBuf. 48 | SHARE_NONE, 49 | 50 | // Share memory managed by IOBuf (no additional bookkeeping required) 51 | SHARE_IOBUF_MANAGED, 52 | 53 | // Share all memory, including external buffers (which might require you to 54 | // guarantee that such external buffers remain allocated until all IOBuf 55 | // and Storage objects that refer to them are visible) 56 | SHARE_ALL, 57 | }; 58 | #endif 59 | 60 | template 61 | class Storage : public StorageBase> { 62 | typedef StorageBase> Base; 63 | typedef typename Base::Ops Ops; 64 | friend Base; // Yay C++11 65 | friend class Tensor; 66 | public: 67 | typedef typename Base::THType THType; 68 | Storage(); 69 | 70 | explicit Storage(std::initializer_list data); 71 | template Storage(It begin, It end); 72 | Storage(size_t n, T value); 73 | 74 | explicit Storage(THType* t); 75 | 76 | //////////////////////////////////////////////////////////////////////////////// 77 | #ifndef NO_FOLLY 78 | //////////////////////////////////////////////////////////////////////////////// 79 | 80 | explicit Storage(Range range) 81 | : Storage(range.begin(), range.end()) { } 82 | 83 | 84 | // Create a Storage object containing the data from an IOBuf. 85 | // If sharing is not SHARE_NONE, then the Storage object will share memory 86 | // with the IOBuf, at least until either is resized. 87 | explicit Storage(folly::IOBuf&& iob, 88 | SharingMode sharing = SHARE_IOBUF_MANAGED, 89 | bool resizable = true); 90 | explicit Storage(const folly::IOBuf& iob, 91 | SharingMode sharing = SHARE_IOBUF_MANAGED, 92 | bool resizable = true) 93 | : Storage(folly::IOBuf(iob), sharing, resizable) { } 94 | 95 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 96 | // Deserialize from Thrift. Throws if wrong type. 97 | explicit Storage(const ThriftStorage& thriftStorage, 98 | SharingMode sharing = SHARE_IOBUF_MANAGED); 99 | #endif 100 | 101 | // Takes ownership of a range allocated with malloc() (NOT new or new[]!) 102 | static Storage takeOwnership(Range data); 103 | 104 | // Wrap a range of memory. The range must stay allocated until all Storage 105 | // objects that refer to it are gone. 106 | static Storage wrap(Range data); 107 | 108 | // Wrap a range of memory and use a custom allocator for reallocations. 109 | // You probably don't need this. 110 | static Storage wrapWithAllocator(Range data, 111 | THAllocator* allocator, 112 | void* allocatorContext); 113 | 114 | //////////////////////////////////////////////////////////////////////////////// 115 | #endif // !NO_FOLLY 116 | //////////////////////////////////////////////////////////////////////////////// 117 | 118 | static Storage wrapWithAllocator(T* data, size_t size, 119 | THAllocator* allocator, 120 | void* allocatorContext); 121 | 122 | // Use a custom allocator. The allocator is managed by the caller. 123 | static Storage withAllocator(THAllocator* allocator, 124 | void* allocatorContext); 125 | 126 | ~Storage(); 127 | 128 | Storage(Storage&& other) noexcept; 129 | Storage(const Storage& other); 130 | Storage& operator=(Storage&& other); 131 | Storage& operator=(const Storage& other); 132 | 133 | void resize(size_t n, T value = 0); 134 | 135 | template void assign(It begin, It end); 136 | void assign(size_t n, T value); 137 | 138 | //////////////////////////////////////////////////////////////////////////////// 139 | #ifndef NO_FOLLY 140 | //////////////////////////////////////////////////////////////////////////////// 141 | 142 | // Create a IOBuf that wraps the memory currently allocated to this 143 | // storage offset. The memory won't be freed until all references to it 144 | // are gone, either from IOBufs or from Storage objects. Note that 145 | // if this Storage is resized, it might not share memory with the 146 | // returned IOBuf any more. 147 | folly::IOBuf getIOBuf(); 148 | 149 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 150 | // Serialize to Thrift. 151 | void serialize(ThriftStorage& out, 152 | ThriftTensorEndianness endianness = 153 | ThriftTensorEndianness::NATIVE, 154 | SharingMode sharing = SHARE_IOBUF_MANAGED) const; 155 | #endif 156 | 157 | // This is obvious, except on Cuda, where it isn't. 158 | T read(size_t offset) const { 159 | DCHECK_LT(offset, this->size()); 160 | return this->data()[offset]; 161 | } 162 | 163 | void read(size_t offset, T* dest, size_t n) const { 164 | DCHECK_LE(offset + n, this->size()); 165 | memcpy(dest, this->data() + offset, n * sizeof(T)); 166 | } 167 | 168 | void write(size_t offset, T value) { 169 | DCHECK_LT(offset, this->size()); 170 | this->data()[offset] = value; 171 | } 172 | 173 | void write(size_t offset, const T* src, size_t n) { 174 | DCHECK_LE(offset + n, this->size()); 175 | memcpy(this->data() + offset, src, n * sizeof(T)); 176 | } 177 | 178 | //////////////////////////////////////////////////////////////////////////////// 179 | #endif // !NO_FOLLY 180 | //////////////////////////////////////////////////////////////////////////////// 181 | 182 | bool isUnique() const { return isUnique(this->t_); } 183 | static bool isUnique(const THType* th); 184 | 185 | private: 186 | template friend class Tensor; 187 | template friend class CudaTensor; 188 | 189 | #ifndef NO_FOLLY 190 | void setFromIOBuf(folly::IOBuf&& iob, SharingMode sharing, bool resizable); 191 | #endif 192 | }; 193 | 194 | /** 195 | * Wrap a THAllocator-like object with a C++ interface into THAllocator. 196 | */ 197 | template 198 | class THAllocatorWrapper { 199 | public: 200 | static void* malloc(void* ctx, long size) { 201 | return static_cast(ctx)->malloc(size); 202 | } 203 | static void* realloc(void* ctx, void* ptr, long size) { 204 | return static_cast(ctx)->realloc(ptr, size); 205 | } 206 | static void free(void* ctx, void* ptr) { 207 | return static_cast(ctx)->free(ptr); 208 | } 209 | }; 210 | 211 | } // namespaces 212 | 213 | #include 214 | 215 | #endif /* THPP_STORAGE_H_ */ 216 | -------------------------------------------------------------------------------- /thpp/Tensor-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef THPP_TENSOR_H_ 12 | #error This file may only be included from thpp/Tensor.h 13 | #endif 14 | 15 | namespace thpp { 16 | 17 | //////////////////////////////////////////////////////////////////////////////// 18 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 19 | //////////////////////////////////////////////////////////////////////////////// 20 | namespace detail { 21 | 22 | void serialize( 23 | ThriftTensor& out, 24 | LongRange sizes, 25 | LongRange strides, 26 | folly::IOBuf&& data, 27 | ThriftTensorDataType dtype, 28 | size_t elementSize, 29 | ThriftTensorEndianness endianness, 30 | SharingMode sharing); 31 | 32 | template 33 | folly::IOBuf deserialize(const ThriftObj& in, 34 | ThriftTensorDataType dtype); 35 | } // namespace detail 36 | //////////////////////////////////////////////////////////////////////////////// 37 | #endif // !NO_THRIFT && !NO_FOLLY 38 | //////////////////////////////////////////////////////////////////////////////// 39 | 40 | template 41 | Tensor::Tensor() : Base(Ops::_new()) { } 42 | 43 | template 44 | Tensor::Tensor(StorageType storage, offset_type storageOffset, 45 | LongStorage sizes, LongStorage strides) : Tensor() { 46 | Ops::_setStorage(this->t_, storage.th(), storageOffset, sizes.th(), 47 | strides.th()); 48 | } 49 | 50 | #ifndef NO_FOLLY 51 | template 52 | Tensor::Tensor(StorageType storage, offset_type storageOffset, 53 | LongRange sizes, LongRange strides) 54 | : Tensor(std::move(storage), storageOffset, 55 | LongStorage::wrap(detail::makeMutable(sizes)), 56 | LongStorage::wrap(detail::makeMutable(strides))) { } 57 | #endif 58 | 59 | template 60 | Tensor::Tensor(StorageType storage, offset_type storageOffset, 61 | std::initializer_list sizes, 62 | std::initializer_list strides) 63 | : Tensor(std::move(storage), storageOffset, 64 | LongStorage(sizes.begin(), sizes.end()), 65 | LongStorage(strides.begin(), strides.end())) { } 66 | 67 | 68 | template 69 | Tensor::Tensor(LongStorage sizes, LongStorage strides) : Tensor() { 70 | Ops::_setStorage(this->t_, nullptr, 0, sizes.th(), strides.th()); 71 | } 72 | 73 | #ifndef NO_FOLLY 74 | template 75 | Tensor::Tensor(LongRange sizes, LongRange strides) 76 | : Tensor(LongStorage::wrap(detail::makeMutable(sizes)), 77 | LongStorage::wrap(detail::makeMutable(strides))) { } 78 | #endif 79 | 80 | template 81 | Tensor::Tensor(std::initializer_list sizes, 82 | std::initializer_list strides) 83 | : Tensor(LongStorage(sizes.begin(), sizes.end()), 84 | LongStorage(strides.begin(), strides.end())) { } 85 | 86 | template 87 | Tensor::Tensor(const std::vector& sizes, 88 | const std::vector& strides) 89 | : Tensor(LongStorage(sizes.begin(), sizes.end()), 90 | LongStorage(strides.begin(), strides.end())) { } 91 | 92 | //////////////////////////////////////////////////////////////////////////////// 93 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 94 | //////////////////////////////////////////////////////////////////////////////// 95 | template 96 | auto Tensor::deserializeTH(const ThriftTensor& thriftTensor, 97 | SharingMode sharing) -> THType* { 98 | Storage data(detail::deserialize(thriftTensor, detail::dataType()), 99 | sharing); 100 | 101 | LongStorage s(LongStorage::wrap(detail::makeMutable(LongRange( 102 | thriftTensor.sizes.data(), thriftTensor.sizes.size())))); 103 | 104 | return Ops::_newWithStorage(data.th(), 0, s.th(), nullptr); 105 | } 106 | 107 | template 108 | Tensor::Tensor(const ThriftTensor& thriftTensor, 109 | SharingMode sharing) 110 | : Base(deserializeTH(thriftTensor, sharing)) { 111 | DCHECK_EQ(this->storage().size(), this->size()); 112 | } 113 | //////////////////////////////////////////////////////////////////////////////// 114 | #endif // !NO_THRIFT && !NO_FOLLY 115 | //////////////////////////////////////////////////////////////////////////////// 116 | 117 | template 118 | Tensor::Tensor(detail::SetTH, THType* t, bool incRef) 119 | : Base(t) { 120 | DCHECK(t); 121 | if (incRef) { 122 | Ops::_retain(this->t_); 123 | } 124 | } 125 | 126 | template 127 | Tensor::Tensor(const THType* other, unsigned cloneMode) 128 | : Base(Base::cloneTH(other, cloneMode)) { } 129 | 130 | template 131 | Tensor::Tensor(const Tensor& other, unsigned cloneMode) 132 | : Tensor(other.t_, cloneMode) { } 133 | 134 | template 135 | Tensor::Tensor(Tensor&& other, unsigned cloneMode) 136 | : Tensor(other, cloneMode) { 137 | other.clear(); 138 | } 139 | 140 | template 141 | auto Tensor::operator=(const Tensor& other) -> Tensor& { 142 | if (&other != this) { 143 | Ops::_set(this->t_, other.mut()); 144 | } 145 | return *this; 146 | } 147 | 148 | template 149 | auto Tensor::operator=(Tensor&& other) -> Tensor& { 150 | if (&other != this) { 151 | *this = other; 152 | other.clear(); 153 | } 154 | return *this; 155 | } 156 | 157 | template 158 | template 159 | void Tensor::copy(const Tensor& src) { 160 | Ops::_copyT(this->t_, src.mut()); 161 | } 162 | 163 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 164 | template 165 | void Tensor::serialize(ThriftTensor& out, 166 | ThriftTensorEndianness endianness, 167 | SharingMode sharing) const { 168 | auto buf = Storage(Ops::_storage(this->mut())).getIOBuf(); 169 | buf.trimStart(Ops::_storageOffset(this->mut()) * sizeof(T)); 170 | detail::serialize( 171 | out, 172 | this->sizes(), 173 | this->strides(), 174 | std::move(buf), 175 | detail::dataType(), 176 | sizeof(T), 177 | endianness, 178 | sharing); 179 | } 180 | #endif 181 | 182 | 183 | // These must be defined here, as LongTensor and ByteTensor must be 184 | // complete types 185 | 186 | template 187 | void TensorBase::maskedFill( 188 | const ByteTensor& mask, T value) { 189 | Ops::_maskedFill(t_, mask.mut(), value); 190 | } 191 | 192 | template 193 | void TensorBase::maskedCopy( 194 | const ByteTensor& mask, const TensorBase& src) { 195 | Ops::_maskedCopy(t_, mask.mut(), src.mut()); 196 | } 197 | 198 | template 199 | auto TensorBase::maskedSelect( 200 | const ByteTensor& mask) const -> Derived { 201 | Derived r; 202 | Ops::_maskedSelect(&r.t_, this->mut(), mask.mut()); 203 | return r; 204 | } 205 | 206 | template 207 | auto TensorBase::indexSelect( 208 | int dim, const LongTensor& index) const -> Derived { 209 | Derived r; 210 | Ops::_indexSelect(&r.t_, this->mut(), dim, index.mut()); 211 | return r; 212 | } 213 | 214 | template 215 | void TensorBase::indexFill( 216 | int dim, const LongTensor& index, T val) { 217 | Ops::_indexFill(t_, dim, index.mut(), val); 218 | } 219 | 220 | #define TENSOR_ARGM_OP(name) \ 221 | template \ 222 | auto Tensor::name(int dim) const -> std::pair { \ 223 | std::pair dest; \ 224 | Ops::_ ## name(dest.first.t_, dest.second.t_, this->mut(), dim); \ 225 | return dest; \ 226 | } 227 | TENSOR_ARGM_OP(min) 228 | TENSOR_ARGM_OP(max) 229 | #undef TENSOR_ARGM_OP 230 | 231 | } // namespaces 232 | -------------------------------------------------------------------------------- /thpp/detail/TensorGeneric.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #ifndef TH_GENERIC_FILE 12 | #define TH_GENERIC_FILE "thpp/detail/TensorGeneric.h" 13 | #else 14 | 15 | typedef Tensor TH_CONCAT_2(Real, Tensor); 16 | 17 | namespace detail { 18 | template <> struct TensorOps> { 19 | typedef real value_type; 20 | typedef accreal accurate_type; 21 | typedef THTensor type; 22 | typedef Tensor ArgTensorType; 23 | 24 | static real* _data(THTensor* t) { 25 | return THTensor_(data)(t); 26 | } 27 | static THStorage* _storage(const THTensor* t) { 28 | return THTensor_(storage)(t); 29 | } 30 | static long _storageOffset(const THTensor* t) { 31 | return THTensor_(storageOffset)(t); 32 | } 33 | static THTensor* _new() { 34 | return THTensor_(new)(); 35 | } 36 | static THTensor* _newWithTensor(THTensor* other) { 37 | return THTensor_(newWithTensor)(other); 38 | } 39 | static THTensor* _newWithStorage(THStorage* storage, 40 | long storageOffset, 41 | THLongStorage* size, 42 | THLongStorage* stride) { 43 | return THTensor_(newWithStorage)(storage, storageOffset, size, stride); 44 | } 45 | static THTensor* _newClone(THTensor* self) { 46 | return THTensor_(newClone)(self); 47 | } 48 | static THTensor* _newContiguous(THTensor* self) { 49 | return THTensor_(newContiguous)(self); 50 | } 51 | static void _resize(THTensor* self, THLongStorage* size, 52 | THLongStorage* stride) { 53 | THTensor_(resize)(self, size, stride); 54 | } 55 | static void _resizeAs(THTensor* self, THTensor* src) { 56 | THTensor_(resizeAs)(self, src); 57 | } 58 | static void _set(THTensor* self, THTensor* src) { 59 | THTensor_(set)(self, src); 60 | } 61 | static void _setStorage(THTensor* self, THStorage* storage, 62 | long offset, THLongStorage* size, 63 | THLongStorage* stride) { 64 | THTensor_(setStorage)(self, storage, offset, size, stride); 65 | } 66 | static void _setStorage1d(THTensor* self, THStorage* storage, 67 | long offset, long size0, long stride0) { 68 | THTensor_(setStorage1d)(self, storage, offset, size0, stride0); 69 | } 70 | static void _narrow(THTensor* self, THTensor* src, int dim, 71 | long firstIndex, long size) { 72 | THTensor_(narrow)(self, src, dim, firstIndex, size); 73 | } 74 | static void _select(THTensor* self, THTensor* src, int dim, long index) { 75 | THTensor_(select)(self, src, dim, index); 76 | } 77 | static void _transpose(THTensor* self, THTensor* src, int dim1, int dim2) { 78 | THTensor_(transpose)(self, src, dim1, dim2); 79 | } 80 | static void _squeeze(THTensor* self, THTensor* src) { 81 | THTensor_(squeeze)(self, src); 82 | } 83 | static void _squeeze1d(THTensor* self, THTensor* src, int dim) { 84 | THTensor_(squeeze1d)(self, src, dim); 85 | } 86 | static int _isContiguous(const THTensor* self) { 87 | return THTensor_(isContiguous)(self); 88 | } 89 | static long _nElement(const THTensor* self) { 90 | return THTensor_(nElement)(self); 91 | } 92 | static void _retain(THTensor* self) { 93 | return THTensor_(retain)(self); 94 | } 95 | static void _free(THTensor* self) { 96 | return THTensor_(free)(self); 97 | } 98 | 99 | // THTensorCopy.h 100 | static void _copy(THTensor* self, THTensor* src) { 101 | return THTensor_(copy)(self, src); 102 | } 103 | 104 | template 105 | static void _copyT(THTensor* self, T* src); 106 | 107 | // THTensorMath.h 108 | static void _fill(THTensor* r, real value) { 109 | THTensor_(fill)(r, value); 110 | } 111 | static void _zero(THTensor* r) { 112 | THTensor_(zero)(r); 113 | } 114 | static void _maskedFill(THTensor* tensor, THByteTensor* mask, real value) { 115 | THTensor_(maskedFill)(tensor, mask, value); 116 | } 117 | static void _maskedCopy(THTensor* tensor, THByteTensor* mask, THTensor* src) { 118 | THTensor_(maskedCopy)(tensor, mask, src); 119 | } 120 | static void _maskedSelect(THTensor* tensor, THTensor* src, 121 | THByteTensor* mask) { 122 | THTensor_(maskedSelect)(tensor, src, mask); 123 | } 124 | static void _indexSelect(THTensor* tensor, THTensor* src, int dim, 125 | THLongTensor* index) { 126 | THTensor_(indexSelect)(tensor, src, dim, index); 127 | } 128 | static void _indexCopy(THTensor* tensor, int dim, THLongTensor* index, 129 | THTensor* src) { 130 | THTensor_(indexCopy)(tensor, dim, index, src); 131 | } 132 | static void _indexFill(THTensor* tensor, int dim, THLongTensor* index, 133 | real val) { 134 | THTensor_(indexFill)(tensor, dim, index, val); 135 | } 136 | static accreal _dot(THTensor* t, THTensor* src) { 137 | return THTensor_(dot)(t, src); 138 | } 139 | static real _minall(THTensor* t) { 140 | return THTensor_(minall)(t); 141 | } 142 | static real _maxall(THTensor* t) { 143 | return THTensor_(maxall)(t); 144 | } 145 | static accreal _sumall(THTensor* t) { 146 | return THTensor_(sumall)(t); 147 | } 148 | static accreal _prodall(THTensor* t) { 149 | return THTensor_(prodall)(t); 150 | } 151 | static void _add(THTensor* r, THTensor* t, real value) { 152 | return THTensor_(add)(r, t, value); 153 | } 154 | static void _mul(THTensor* r, THTensor* t, real value) { 155 | return THTensor_(mul)(r, t, value); 156 | } 157 | static void _div(THTensor* r, THTensor* t, real value) { 158 | return THTensor_(div)(r, t, value); 159 | } 160 | static void _cadd(THTensor* r, THTensor* t, real value, THTensor* src) { 161 | return THTensor_(cadd)(r, t, value, src); 162 | } 163 | static void _cmul(THTensor* r, THTensor* t, THTensor* src) { 164 | return THTensor_(cmul)(r, t, src); 165 | } 166 | static void _cdiv(THTensor* r, THTensor* t, THTensor* src) { 167 | return THTensor_(cdiv)(r, t, src); 168 | } 169 | static void _addcmul(THTensor* r, THTensor* t, real value, THTensor* src1, 170 | THTensor* src2) { 171 | return THTensor_(addcmul)(r, t, value, src1, src2); 172 | } 173 | static void _addcdiv(THTensor* r, THTensor* t, real value, THTensor* src1, 174 | THTensor* src2) { 175 | return THTensor_(addcdiv)(r, t, value, src1, src2); 176 | } 177 | static void _addmv(THTensor* r, real beta, THTensor* t, real alpha, 178 | THTensor* mat, THTensor* vec) { 179 | return THTensor_(addmv)(r, beta, t, alpha, mat, vec); 180 | } 181 | static void _addmm(THTensor* r, real beta, THTensor* t, real alpha, 182 | THTensor* m1, THTensor* m2) { 183 | return THTensor_(addmm)(r, beta, t, alpha, m1, m2); 184 | } 185 | static void _addr(THTensor* r, real beta, THTensor* t, real alpha, 186 | THTensor* vec1, THTensor* vec2) { 187 | return THTensor_(addr)(r, beta, t, alpha, vec1, vec2); 188 | } 189 | static void _max(THTensor* values, THLongTensor* indices, 190 | THTensor* t, int dim) { 191 | return THTensor_(max)(values, indices, t, dim); 192 | } 193 | static void _min(THTensor* values, THLongTensor* indices, 194 | THTensor* t, int dim) { 195 | return THTensor_(min)(values, indices, t, dim); 196 | } 197 | static void _sum(THTensor* r, THTensor* t, int dim) { 198 | return THTensor_(sum)(r, t, dim); 199 | } 200 | static void _prod(THTensor* r, THTensor* t, int dim) { 201 | return THTensor_(prod)(r, t, dim); 202 | } 203 | static void _cumsum(THTensor* r, THTensor* t, int dim) { 204 | return THTensor_(cumsum)(r, t, dim); 205 | } 206 | static void _cumprod(THTensor* r, THTensor* t, int dim) { 207 | return THTensor_(cumprod)(r, t, dim); 208 | } 209 | static void _sign(THTensor* r, THTensor* t) { 210 | return THTensor_(sign)(r, t); 211 | } 212 | 213 | #define S1(X) #X 214 | #define S(X) S1(X) 215 | static constexpr const char* kLuaTypeName = "torch." 216 | S(TH_CONCAT_2(Real, Tensor)); 217 | #undef S 218 | #undef S1 219 | }; 220 | 221 | #define S(TYPE) \ 222 | template <> inline void TensorOps>::_copyT( \ 223 | THTensor* self, TH##TYPE##Tensor* src) { \ 224 | return THTensor_(copy##TYPE)(self, src); \ 225 | } 226 | 227 | S(Byte) 228 | S(Char) 229 | S(Short) 230 | S(Int) 231 | S(Long) 232 | S(Float) 233 | S(Double) 234 | 235 | #undef S 236 | 237 | } // namespace detail 238 | 239 | #endif 240 | -------------------------------------------------------------------------------- /thpp/test/TensorSerializationTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | namespace thpp { 22 | namespace test { 23 | 24 | Tensor createTensor(std::vector sizes, 25 | std::vector strides = {}) { 26 | Tensor tensor(LongRange(sizes.data(), sizes.size()), 27 | LongRange(strides.data(), strides.size())); 28 | 29 | std::vector counter; 30 | counter.resize(sizes.size()); 31 | int idx = counter.size(); 32 | float val = 0; 33 | while (idx >= 0) { 34 | if (idx == counter.size()) { 35 | Tensor t(tensor); 36 | for (int i = counter.size() - 1; i >= 0; --i) { 37 | t.select(i, counter[i]); 38 | } 39 | t.front() = val++; 40 | --idx; 41 | continue; 42 | } 43 | if (++counter[idx] == sizes[idx]) { 44 | counter[idx] = 0; 45 | --idx; 46 | } else { 47 | idx = counter.size(); 48 | } 49 | } 50 | 51 | return tensor; 52 | } 53 | 54 | void runTest(std::vector sizes, 55 | std::vector strides = {}) { 56 | Tensor src = createTensor(sizes, strides); 57 | 58 | ThriftTensor serialized; 59 | src.serialize(serialized); 60 | 61 | src.force(Tensor::CONTIGUOUS); 62 | Tensor deserialized(std::move(serialized)); 63 | EXPECT_TRUE(src.sizes() == deserialized.sizes()); 64 | EXPECT_TRUE(src.strides() == deserialized.strides()); 65 | EXPECT_EQ(0, memcmp(src.data(), deserialized.data(), 66 | sizeof(float) * src.size())); 67 | } 68 | 69 | TEST(SerializationTest, Simple) { 70 | runTest({1}); 71 | runTest({2}); 72 | runTest({2}, {1}); 73 | runTest({2}, {2}); 74 | runTest({2}, {200}); 75 | runTest({20, 10}); 76 | runTest({20, 10}, {10, 1}); 77 | runTest({20, 10}, {40, 4}); 78 | runTest({20, 10}, {400, 4}); 79 | runTest({20, 10}, {0, 1}); 80 | runTest({20, 10}, {0, 0}); 81 | runTest({20, 30, 10}); 82 | runTest({20, 30, 10}, {300, 10, 1}); 83 | runTest({20, 30, 10}, {10, 200, 1}); 84 | runTest({20, 30, 10}, {1, 20, 600}); 85 | runTest({20, 30}, {8192 * 30, 8192}); 86 | } 87 | 88 | TEST(SerializationTest, SmallerThanStorage) { 89 | Tensor t({10L}); 90 | for (long i = 0; i < 10L; ++i) { 91 | t.at(i) = i; 92 | } 93 | t.resize(LongStorage{5L}); 94 | 95 | ThriftTensor out; 96 | t.serialize(out); 97 | 98 | Tensor t1(std::move(out)); 99 | EXPECT_EQ(1, t1.ndims()); 100 | EXPECT_EQ(5, t1.size()); 101 | for (long i = 0; i < t1.size(); ++i) { 102 | EXPECT_EQ(i, t1.at(i)); 103 | } 104 | } 105 | 106 | TEST(SerializationTest, StorageOffset) { 107 | Tensor t({10L}); 108 | for (long i = 0; i < 10L; ++i) { 109 | t.at(i) = i; 110 | } 111 | 112 | t.narrow(0, 1, 5); 113 | EXPECT_EQ(5, t.size()); 114 | for (long i = 0; i < t.size(); ++i) { 115 | EXPECT_EQ(i + 1, t.at(i)); 116 | } 117 | 118 | ThriftTensor out; 119 | t.serialize(out); 120 | 121 | Tensor t1(std::move(out)); 122 | EXPECT_EQ(1, t1.ndims()); 123 | EXPECT_EQ(5, t1.size()); 124 | for (long i = 0; i < t1.size(); ++i) { 125 | EXPECT_EQ(i + 1, t1.at(i)); 126 | } 127 | } 128 | 129 | TEST(SerializationTest, Empty0d) { 130 | Tensor t; 131 | EXPECT_EQ(0, t.ndims()); 132 | EXPECT_EQ(0, t.size()); 133 | 134 | ThriftTensor out; 135 | t.serialize(out); 136 | 137 | Tensor t1(std::move(out)); 138 | EXPECT_EQ(0, t1.ndims()); 139 | EXPECT_EQ(0, t1.size()); 140 | } 141 | 142 | constexpr ThriftTensorEndianness nativeEndianness = 143 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 144 | ThriftTensorEndianness::LITTLE; 145 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 146 | ThriftTensorEndianness::BIG; 147 | #else 148 | # error Weird endianness! 149 | #endif 150 | 151 | TEST(SerializationTest, IOBufStorage) { 152 | ThriftTensor serialized; 153 | serialized.dataType = ThriftTensorDataType::FLOAT; 154 | serialized.endianness = nativeEndianness; 155 | constexpr size_t n = 10; 156 | serialized.sizes.push_back(n); 157 | serialized.data = folly::IOBuf(folly::IOBuf::CREATE, n * sizeof(float)); 158 | folly::TypedIOBuf buf(&serialized.data); 159 | buf.append(n); 160 | for (int i = 0; i < n; ++i) { 161 | buf[i] = float(i); 162 | } 163 | const void* ptr = serialized.data.data(); 164 | 165 | Tensor deserialized(std::move(serialized)); 166 | EXPECT_TRUE(deserialized.data() == ptr); // actually sharing memory 167 | EXPECT_EQ(1, deserialized.sizes().size()); 168 | EXPECT_EQ(n, deserialized.sizes()[0]); 169 | EXPECT_EQ(1, deserialized.strides().size()); 170 | EXPECT_EQ(1, deserialized.strides()[0]); 171 | for (int i = 0; i < n; ++i) { 172 | EXPECT_EQ(float(i), deserialized[i].front()); 173 | } 174 | 175 | // Now resize to something big enough that it won't share memory any more 176 | deserialized.resize(LongStorage({1 << 20})); 177 | EXPECT_FALSE(deserialized.data() == ptr); 178 | for (int i = 0; i < n; ++i) { 179 | EXPECT_EQ(float(i), deserialized[i].front()); 180 | } 181 | } 182 | 183 | TEST(SerializationTest, ThriftStorageShare) { 184 | Storage storage(size_t(1000), long(42)); 185 | ThriftStorage serialized; 186 | storage.serialize(serialized); 187 | auto ptr = storage.data(); 188 | EXPECT_TRUE(static_cast(serialized.data.data()) == ptr); 189 | 190 | Storage deserialized(serialized); 191 | EXPECT_EQ(storage.size(), deserialized.size()); 192 | EXPECT_TRUE(deserialized.data() == ptr); // shares memory 193 | } 194 | 195 | TEST(SerializationTest, ThriftStorageNoShare1) { 196 | Storage storage(size_t(1000), long(42)); 197 | ThriftStorage serialized; 198 | storage.serialize(serialized, ThriftTensorEndianness::NATIVE, SHARE_NONE); 199 | auto ptr = storage.data(); 200 | EXPECT_FALSE(static_cast(serialized.data.data()) == ptr); 201 | 202 | Storage deserialized(serialized); 203 | EXPECT_EQ(storage.size(), deserialized.size()); 204 | EXPECT_FALSE(deserialized.data() == storage.data()); // doesn't share 205 | } 206 | 207 | TEST(SerializationTest, ThriftStorageNoShare2) { 208 | Storage storage(size_t(1000), long(42)); 209 | ThriftStorage serialized; 210 | storage.serialize(serialized); 211 | auto ptr = storage.data(); 212 | EXPECT_TRUE(static_cast(serialized.data.data()) == ptr); 213 | 214 | Storage deserialized(serialized, SHARE_NONE); 215 | EXPECT_EQ(storage.size(), deserialized.size()); 216 | EXPECT_FALSE(deserialized.data() == storage.data()); // doesn't share 217 | } 218 | 219 | TEST(SerializationTest, ThriftStorageRefs) { 220 | folly::IOBuf buf2; 221 | Storage s1({1000L}); 222 | folly::IOBuf buf1 = s1.getIOBuf(); 223 | buf2 = s1.getIOBuf(); 224 | } 225 | 226 | TEST(SerializationTest, IOBufUnique) { 227 | folly::IOBuf buf(folly::IOBuf::CREATE, sizeof(int)); 228 | *reinterpret_cast(buf.writableData()) = 42; 229 | buf.append(sizeof(int)); 230 | 231 | // This situation may arise when deserializing: two Storage objects 232 | // constructed from the same IOBuf. 233 | Tensor t1(Storage(buf), 0, {1L}); 234 | Tensor t2(Storage(buf), 0, {1L}); 235 | EXPECT_EQ(42, t1.at(0)); 236 | EXPECT_EQ(42, t2.at(0)); 237 | 238 | // The two tensors are shared 239 | EXPECT_FALSE(t1.isUnique()); 240 | EXPECT_FALSE(t2.isUnique()); 241 | 242 | // And they indeed share memory. 243 | t1.at(0) = 43; 244 | EXPECT_EQ(43, t2.at(0)); 245 | 246 | t2 = Tensor(); 247 | 248 | // Still marked as shared; "buf" is still in scope and bumping the refcount. 249 | EXPECT_FALSE(t1.isUnique()); 250 | 251 | // But no longer shared any more after killing buf. 252 | buf = folly::IOBuf(); 253 | EXPECT_TRUE(t1.isUnique()); 254 | } 255 | 256 | TEST(SerializationTest, Alignment) { 257 | // large enough so the IOBuf data is out of line 258 | constexpr long size = 4096; 259 | constexpr size_t maxOffset = alignof(long); 260 | for (size_t offset = 0; offset < maxOffset; ++offset) { 261 | ThriftTensor serialized; 262 | { 263 | Tensor t1 {size}; 264 | for (long i = 0; i < size; ++i) { 265 | t1.at({i}) = i; 266 | } 267 | t1.serialize(serialized); 268 | } 269 | 270 | serialized.data.reserve(0, maxOffset); 271 | serialized.data.advance(offset); 272 | 273 | Tensor t2(serialized); 274 | 275 | auto ptr = reinterpret_cast(t2.data()); 276 | EXPECT_EQ(0, ptr % alignof(long)); 277 | 278 | EXPECT_EQ(1, t2.ndims()); 279 | EXPECT_EQ(size, t2.size(0)); 280 | for (long i = 0; i < size; ++i) { 281 | EXPECT_EQ(i, t2.at({i})); 282 | } 283 | } 284 | } 285 | 286 | TEST(SerializationTest, BigTensorNarrow) { 287 | auto t = thpp::Tensor({32, 256, 6, 6}); 288 | t.zero(); 289 | 290 | auto t2 = t; 291 | t2.narrow(1, 128, 128); 292 | t2.fill(1); 293 | EXPECT_EQ(32 * 128 * 6 * 6, t.sumall()); // other elements are 0 294 | EXPECT_EQ(32 * 128 * 6 * 6, t2.sumall()); 295 | 296 | ThriftTensor serialized; 297 | t2.serialize(serialized); 298 | 299 | auto t3 = thpp::Tensor(serialized); 300 | EXPECT_EQ(32 * 128 * 6 * 6, t3.sumall()); 301 | } 302 | 303 | }} // namespaces 304 | -------------------------------------------------------------------------------- /thpp/cuda/detail/Tensor.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_CUDA_DETAIL_TENSOR_H_ 7 | #define THPP_CUDA_DETAIL_TENSOR_H_ 8 | 9 | #include 10 | #include 11 | 12 | namespace thpp { 13 | 14 | template class CudaTensor; 15 | 16 | typedef CudaTensor CudaFloatTensor; 17 | 18 | namespace detail { 19 | 20 | template struct TensorOps; 21 | 22 | template <> struct TensorOps> { 23 | typedef float value_type; 24 | typedef float accurate_type; 25 | typedef THCudaTensor type; 26 | typedef CudaTensor ArgTensorType; 27 | 28 | static float* _data(THCudaTensor* t) { 29 | return THCudaTensor_data(getTHCState(), t); 30 | } 31 | static THCudaStorage* _storage(THCudaTensor* t) { 32 | return THCudaTensor_storage(getTHCState(), t); 33 | } 34 | static long _storageOffset(THCudaTensor* t) { 35 | return THCudaTensor_storageOffset(getTHCState(), t); 36 | } 37 | static THCudaTensor* _new() { 38 | return THCudaTensor_new(getTHCState()); 39 | } 40 | static THCudaTensor* _newWithTensor(THCudaTensor* other) { 41 | return THCudaTensor_newWithTensor(getTHCState(), other); 42 | } 43 | static THCudaTensor* _newWithStorage(THCudaStorage* storage, 44 | long storageOffset, 45 | THLongStorage* size, 46 | THLongStorage* stride) { 47 | return THCudaTensor_newWithStorage( 48 | getTHCState(), storage, storageOffset, size, stride); 49 | } 50 | static THCudaTensor* _newClone(THCudaTensor* self) { 51 | return THCudaTensor_newClone(getTHCState(), self); 52 | } 53 | static THCudaTensor* _newContiguous(THCudaTensor* self) { 54 | return THCudaTensor_newContiguous(getTHCState(), self); 55 | } 56 | static void _resize(THCudaTensor* self, THLongStorage* size, 57 | THLongStorage* stride) { 58 | THCudaTensor_resize(getTHCState(), self, size, stride); 59 | } 60 | static void _resizeAs(THCudaTensor* self, THCudaTensor* src) { 61 | THCudaTensor_resizeAs(getTHCState(), self, src); 62 | } 63 | static void _set(THCudaTensor* self, THCudaTensor* src) { 64 | THCudaTensor_set(getTHCState(), self, src); 65 | } 66 | static void _setStorage(THCudaTensor* self, THCudaStorage* storage, 67 | long offset, THLongStorage* size, 68 | THLongStorage* stride) { 69 | THCudaTensor_setStorage(getTHCState(), self, storage, offset, size, stride); 70 | } 71 | static void _setStorage1d(THCudaTensor* self, THCudaStorage* storage, 72 | long offset, long size0, long stride0) { 73 | THCudaTensor_setStorage1d( 74 | getTHCState(), self, storage, offset, size0, stride0); 75 | } 76 | static void _narrow(THCudaTensor* self, THCudaTensor* src, int dim, 77 | long firstIndex, long size) { 78 | THCudaTensor_narrow(getTHCState(), self, src, dim, firstIndex, size); 79 | } 80 | static void _select(THCudaTensor* self, THCudaTensor* src, int dim, 81 | long index) { 82 | THCudaTensor_select(getTHCState(), self, src, dim, index); 83 | } 84 | static void _transpose(THCudaTensor* self, THCudaTensor* src, int dim1, 85 | int dim2) { 86 | THCudaTensor_transpose(getTHCState(), self, src, dim1, dim2); 87 | } 88 | static void _squeeze(THCudaTensor* self, THCudaTensor* src) { 89 | THCudaTensor_squeeze(getTHCState(), self, src); 90 | } 91 | static void _squeeze1d(THCudaTensor* self, THCudaTensor* src, int dim) { 92 | THCudaTensor_squeeze1d(getTHCState(), self, src, dim); 93 | } 94 | static int _isContiguous(const THCudaTensor* self) { 95 | return THCudaTensor_isContiguous(getTHCState(), self); 96 | } 97 | static long _nElement(const THCudaTensor* self) { 98 | return THCudaTensor_nElement(getTHCState(), self); 99 | } 100 | static void _retain(THCudaTensor* self) { 101 | return THCudaTensor_retain(getTHCState(), self); 102 | } 103 | static void _free(THCudaTensor* self) { 104 | return THCudaTensor_free(getTHCState(), self); 105 | } 106 | 107 | static void _copy(THCudaTensor* self, THCudaTensor* src) { 108 | THCudaTensor_copy(getTHCState(), self, src); 109 | } 110 | 111 | // THCudaTensorCopy.h 112 | template 113 | static void _copyFrom(THCudaTensor* self, T* src); 114 | template 115 | static void _copyTo(T* dest, THCudaTensor* src); 116 | 117 | // THCudaTensorMath.h 118 | static void _fill(THCudaTensor* r, float value) { 119 | THCudaTensor_fill(getTHCState(), r, value); 120 | } 121 | static void _zero(THCudaTensor* r) { 122 | THCudaTensor_zero(getTHCState(), r); 123 | } 124 | // Two overloads each: with data on device (as THCudaTensor) or on host 125 | // (as THByteTensor) 126 | static void _maskedFill(THCudaTensor* tensor, THByteTensor* mask, 127 | float value) { 128 | THCudaTensor_maskedFillByte(getTHCState(), tensor, mask, value); 129 | } 130 | static void _maskedFill(THCudaTensor* tensor, THCudaByteTensor* mask, 131 | float value) { 132 | THCudaTensor_maskedFill(getTHCState(), tensor, mask, value); 133 | } 134 | static void _maskedCopy(THCudaTensor* tensor, THByteTensor* mask, 135 | THCudaTensor* src) { 136 | THCudaTensor_maskedCopyByte(getTHCState(), tensor, mask, src); 137 | } 138 | static void _maskedCopy(THCudaTensor* tensor, THCudaByteTensor* mask, 139 | THCudaTensor* src) { 140 | THCudaTensor_maskedCopy(getTHCState(), tensor, mask, src); 141 | } 142 | static void _maskedSelect(THCudaTensor* tensor, THCudaTensor* src, 143 | THByteTensor* mask) { 144 | THCudaTensor_maskedSelectByte(getTHCState(), tensor, src, mask); 145 | } 146 | static void _maskedSelect(THCudaTensor* tensor, THCudaTensor* src, 147 | THCudaByteTensor* mask) { 148 | THCudaTensor_maskedSelect(getTHCState(), tensor, src, mask); 149 | } 150 | static void _indexSelect(THCudaTensor* tensor, THCudaTensor* src, int dim, 151 | THLongTensor* index) { 152 | THCudaTensor_indexSelect_long(getTHCState(), tensor, src, dim, index); 153 | } 154 | static void _indexCopy(THCudaTensor* tensor, int dim, THLongTensor* index, 155 | THCudaTensor* src) { 156 | THCudaTensor_indexCopy_long(getTHCState(), tensor, dim, index, src); 157 | } 158 | static void _indexFill(THCudaTensor* tensor, int dim, THLongTensor* index, 159 | float val) { 160 | THCudaTensor_indexFill_long(getTHCState(), tensor, dim, index, val); 161 | } 162 | static float _dot(THCudaTensor* t, THCudaTensor* src) { 163 | return THCudaTensor_dot(getTHCState(), t, src); 164 | } 165 | static float _minall(THCudaTensor* t) { 166 | return THCudaTensor_minall(getTHCState(), t); 167 | } 168 | static float _maxall(THCudaTensor* t) { 169 | return THCudaTensor_maxall(getTHCState(), t); 170 | } 171 | static float _sumall(THCudaTensor* t) { 172 | return THCudaTensor_sumall(getTHCState(), t); 173 | } 174 | static float _prodall(THCudaTensor* t) { 175 | return THCudaTensor_prodall(getTHCState(), t); 176 | } 177 | static void _add(THCudaTensor* r, THCudaTensor* t, float value) { 178 | return THCudaTensor_add(getTHCState(), r, t, value); 179 | } 180 | static void _mul(THCudaTensor* r, THCudaTensor* t, float value) { 181 | return THCudaTensor_mul(getTHCState(), r, t, value); 182 | } 183 | static void _div(THCudaTensor* r, THCudaTensor* t, float value) { 184 | return THCudaTensor_div(getTHCState(), r, t, value); 185 | } 186 | static void _cadd(THCudaTensor* r, THCudaTensor* t, float value, 187 | THCudaTensor* src) { 188 | return THCudaTensor_cadd(getTHCState(), r, t, value, src); 189 | } 190 | static void _cmul(THCudaTensor* r, THCudaTensor* t, THCudaTensor* src) { 191 | return THCudaTensor_cmul(getTHCState(), r, t, src); 192 | } 193 | static void _cdiv(THCudaTensor* r, THCudaTensor* t, THCudaTensor* src) { 194 | return THCudaTensor_cdiv(getTHCState(), r, t, src); 195 | } 196 | static void _addcmul(THCudaTensor* r, THCudaTensor* t, float value, 197 | THCudaTensor* src1, THCudaTensor* src2) { 198 | return THCudaTensor_addcmul(getTHCState(), r, t, value, src1, src2); 199 | } 200 | static void _addcdiv(THCudaTensor* r, THCudaTensor* t, float value, 201 | THCudaTensor* src1, THCudaTensor* src2) { 202 | return THCudaTensor_addcdiv(getTHCState(), r, t, value, src1, src2); 203 | } 204 | static void _addmv(THCudaTensor* r, float beta, THCudaTensor* t, float alpha, 205 | THCudaTensor* mat, THCudaTensor* vec) { 206 | return THCudaTensor_addmv(getTHCState(), r, beta, t, alpha, mat, vec); 207 | } 208 | static void _addmm(THCudaTensor* r, float beta, THCudaTensor* t, float alpha, 209 | THCudaTensor* m1, THCudaTensor* m2) { 210 | return THCudaTensor_addmm(getTHCState(), r, beta, t, alpha, m1, m2); 211 | } 212 | static void _addr(THCudaTensor* r, float beta, THCudaTensor* t, float alpha, 213 | THCudaTensor* vec1, THCudaTensor* vec2) { 214 | return THCudaTensor_addr(getTHCState(), r, beta, t, alpha, vec1, vec2); 215 | } 216 | static void _max(THCudaTensor* values, THCudaLongTensor* indices, 217 | THCudaTensor* t, int dim) { 218 | return THCudaTensor_max(getTHCState(), values, indices, t, dim); 219 | } 220 | static void _min(THCudaTensor* values, THCudaLongTensor* indices, 221 | THCudaTensor* t, int dim) { 222 | return THCudaTensor_min(getTHCState(), values, indices, t, dim); 223 | } 224 | static void _sum(THCudaTensor* r, THCudaTensor* t, int dim) { 225 | return THCudaTensor_sum(getTHCState(), r, t, dim); 226 | } 227 | static void _prod(THCudaTensor* r, THCudaTensor* t, int dim) { 228 | return THCudaTensor_prod(getTHCState(), r, t, dim); 229 | } 230 | static void _cumsum(THCudaTensor* r, THCudaTensor* t, int dim) { 231 | return THCudaTensor_cumsum(getTHCState(), r, t, dim); 232 | } 233 | static void _cumprod(THCudaTensor* r, THCudaTensor* t, int dim) { 234 | return THCudaTensor_cumprod(getTHCState(), r, t, dim); 235 | } 236 | static void _sign(THCudaTensor* r, THCudaTensor* t) { 237 | return THCudaTensor_sign(getTHCState(), r, t); 238 | } 239 | 240 | // CUDA-specific 241 | static int _getDevice(THCudaTensor* self) { 242 | return THCudaTensor_getDevice(getTHCState(), self); 243 | } 244 | static constexpr const char* kLuaTypeName = "torch.CudaTensor"; 245 | }; 246 | 247 | #define S(TYPE) \ 248 | template <> inline void TensorOps>::_copyFrom< \ 249 | TH##TYPE##Tensor>(THCudaTensor* self, TH##TYPE##Tensor* src) { \ 250 | return THCudaTensor_copy##TYPE(getTHCState(), self, src); \ 251 | } \ 252 | template <> inline void TensorOps>::_copyTo< \ 253 | TH##TYPE##Tensor>(TH##TYPE##Tensor* dest, THCudaTensor* src) { \ 254 | return TH##TYPE##Tensor_copyCudaFloat(getTHCState(), dest, src); \ 255 | } 256 | 257 | S(Byte) 258 | S(Char) 259 | S(Short) 260 | S(Int) 261 | S(Long) 262 | S(Float) 263 | S(Double) 264 | 265 | #undef S 266 | 267 | } // namespace detail 268 | 269 | } // namespaces 270 | 271 | #endif /* THPP_CUDA_DETAIL_TENSOR_H_ */ 272 | -------------------------------------------------------------------------------- /thpp/Storage-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | * 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #ifndef NO_FOLLY 15 | #include 16 | #include 17 | #include 18 | #endif 19 | 20 | #ifndef THPP_STORAGE_H_ 21 | #error This file may only be included from thpp/Storage.h 22 | #endif 23 | 24 | 25 | namespace thpp { 26 | 27 | namespace detail { 28 | 29 | #ifndef NO_FOLLY 30 | void applySharingMode(folly::IOBuf& iob, SharingMode sharing); 31 | #endif 32 | 33 | //////////////////////////////////////////////////////////////////////////////// 34 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 35 | //////////////////////////////////////////////////////////////////////////////// 36 | 37 | // Endianness of current machine. 38 | constexpr ThriftTensorEndianness gMachineEndianness = 39 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 40 | ThriftTensorEndianness::LITTLE; 41 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 42 | ThriftTensorEndianness::BIG; 43 | #else 44 | # error Weird endianness! 45 | #endif 46 | 47 | template struct DataType; 48 | 49 | #define X(TYPE, DTYPE, SIZE) \ 50 | template <> struct DataType { \ 51 | static_assert(sizeof(TYPE) == SIZE, \ 52 | "Invalid size for " #TYPE); \ 53 | static constexpr ThriftTensorDataType value = \ 54 | ThriftTensorDataType::DTYPE; \ 55 | static constexpr size_t size = SIZE; \ 56 | }; 57 | 58 | X(unsigned char, BYTE, 1) 59 | X(int32_t, INT32, 4) 60 | X(int64_t, INT64, 8) 61 | X(float, FLOAT, 4) 62 | X(double, DOUBLE, 8) 63 | 64 | #undef X 65 | 66 | template 67 | constexpr ThriftTensorDataType dataType() { 68 | return DataType::value; 69 | } 70 | 71 | void serialize(ThriftStorage& out, 72 | folly::IOBuf&& data, 73 | ThriftTensorDataType dtype, 74 | ThriftTensorEndianness endianness, 75 | SharingMode sharing); 76 | 77 | template 78 | folly::IOBuf deserialize(const ThriftObj& in, 79 | ThriftTensorDataType dtype) { 80 | if (dtype != in.dataType) { 81 | throw std::invalid_argument(folly::sformat( 82 | "Invalid Thrift tensor data type {}, expected {}", 83 | int(in.dataType), int(dtype))); 84 | } 85 | if (in.endianness != gMachineEndianness) { 86 | throw std::invalid_argument(folly::sformat( 87 | "Non-native endianness not yet implemented: {}, expected {}", 88 | int(in.endianness), int(gMachineEndianness))); 89 | } 90 | 91 | return in.data; 92 | } 93 | 94 | //////////////////////////////////////////////////////////////////////////////// 95 | #endif // !NO_THRIFT && !NO_FOLLY 96 | //////////////////////////////////////////////////////////////////////////////// 97 | 98 | extern THAllocator ioBufTHAllocator; 99 | extern THAllocator ioBufTHAllocatorNoRealloc; 100 | 101 | } // namespace detail 102 | 103 | template 104 | Storage::Storage() : Base(nullptr) { } 105 | 106 | template 107 | Storage::Storage(std::initializer_list data) 108 | : Storage(data.begin(), data.end()) { } 109 | 110 | template 111 | template 112 | Storage::Storage(It begin, It end) { 113 | // Do not use newWithSize, as it leaks memory on exception. 114 | auto n = std::distance(begin, end); 115 | if (n == 0) { 116 | this->t_ = nullptr; 117 | return; 118 | } 119 | auto data = std::unique_ptr({ 120 | static_cast(malloc(n * sizeof(T))), 121 | free}); 122 | if (!data) throw std::bad_alloc(); 123 | std::copy(begin, end, data.get()); 124 | this->t_ = Ops::_newWithData(data.get(), n); 125 | data.release(); 126 | } 127 | 128 | template 129 | Storage::Storage(size_t n, T value) { 130 | if (n == 0) { 131 | this->t_ = nullptr; 132 | return; 133 | } 134 | auto data = std::unique_ptr({ 135 | static_cast(malloc(n * sizeof(T))), 136 | free}); 137 | if (!data) throw std::bad_alloc(); 138 | std::fill_n(data.get(), n, value); 139 | this->t_ = Ops::_newWithData(data.get(), n); 140 | data.release(); 141 | } 142 | 143 | template 144 | Storage::Storage(THType* t) : Base(t) { 145 | this->up(); 146 | } 147 | 148 | //////////////////////////////////////////////////////////////////////////////// 149 | #ifndef NO_FOLLY 150 | //////////////////////////////////////////////////////////////////////////////// 151 | 152 | template 153 | Storage Storage::takeOwnership(Range data) { 154 | Storage s; 155 | if (!data.empty()) { 156 | s.t_ = Ops::_newWithData(data.data(), data.size()); 157 | } 158 | return s; 159 | } 160 | 161 | template 162 | Storage Storage::wrap(Range data) { 163 | Storage s; 164 | if (!data.empty()) { 165 | s.t_ = Ops::_newWithData(data.data(), data.size()); 166 | Ops::_clearFlag(s.t_, TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM); 167 | } 168 | return s; 169 | } 170 | 171 | template 172 | Storage Storage::wrapWithAllocator(Range data, 173 | THAllocator* allocator, 174 | void* allocatorContext) { 175 | Storage s; 176 | s.t_ = Ops::_newWithDataAndAllocator( 177 | data.data(), data.size(), allocator, allocatorContext); 178 | return s; 179 | } 180 | 181 | //////////////////////////////////////////////////////////////////////////////// 182 | #endif // !NO_FOLLY 183 | //////////////////////////////////////////////////////////////////////////////// 184 | 185 | // Fallback without folly 186 | template 187 | Storage Storage::wrapWithAllocator(T* data, size_t size, 188 | THAllocator* allocator, 189 | void* allocatorContext) { 190 | Storage s; 191 | s.t_ = Ops::_newWithDataAndAllocator(data, size, allocator, allocatorContext); 192 | return s; 193 | } 194 | 195 | template 196 | Storage Storage::withAllocator(THAllocator* allocator, 197 | void* allocatorContext) { 198 | Storage s; 199 | s.t_ = Ops::_newWithDataAndAllocator( 200 | nullptr, 0, allocator, allocatorContext); 201 | return s; 202 | } 203 | 204 | 205 | template 206 | Storage::~Storage() { 207 | this->down(); 208 | } 209 | 210 | template 211 | Storage::Storage(Storage&& other) noexcept : Base(other.t_) { 212 | other.t_ = nullptr; 213 | } 214 | 215 | template 216 | Storage::Storage(const Storage& other) : Storage(other.t_) { } 217 | 218 | template 219 | Storage& Storage::operator=(Storage&& other) { 220 | if (&other != this) { 221 | this->down(); 222 | this->t_ = other.t_; 223 | other.t_ = nullptr; 224 | } 225 | return *this; 226 | } 227 | 228 | template 229 | Storage& Storage::operator=(const Storage& other) { 230 | if (&other != this) { 231 | this->down(); 232 | this->t_ = other.t_; 233 | this->up(); 234 | } 235 | return *this; 236 | } 237 | 238 | template 239 | void Storage::resize(size_t n, T value) { 240 | size_t oldSize = this->size(); 241 | this->resizeUninitialized(n); 242 | 243 | if (n > oldSize) { 244 | std::fill(this->data() + oldSize, this->data() + n, value); 245 | } 246 | } 247 | 248 | template 249 | template 250 | void Storage::assign(It begin, It end) { 251 | auto n = std::distance(begin, end); 252 | this->resizeUninitialized(n); 253 | std::copy(begin, end, this->data()); 254 | } 255 | 256 | template 257 | void Storage::assign(size_t n, T value) { 258 | this->resizeUninitialized(n); 259 | std::fill_n(this->data(), n, value); 260 | } 261 | 262 | //////////////////////////////////////////////////////////////////////////////// 263 | #ifndef NO_FOLLY 264 | //////////////////////////////////////////////////////////////////////////////// 265 | 266 | namespace detail { 267 | 268 | /** 269 | * What follows is some ugly acrobatics to allow IOBuf and THStorage to 270 | * share memory. 271 | * 272 | * If we want to create a THStorage object that wraps an IOBuf, 273 | * we'll use a custom allocator that keeps a reference to the IOBuf and 274 | * calls appropriate methods on the IOBuf. We're relying on the slightly 275 | * unsafe (and undocumented) behavior that THStorage will only call the 276 | * "free" method of the allocator once at the end of its lifetime. 277 | * 278 | * If we want to create an IOBuf that wraps a THStorage, we reduce it to 279 | * the case above by converting its memory to an IOBuf. 280 | */ 281 | 282 | class IOBufAllocator { 283 | public: 284 | explicit IOBufAllocator(folly::IOBuf&& iob); 285 | 286 | void* malloc(long size); 287 | void* realloc(void* ptr, long size); 288 | void free(void* ptr); 289 | bool isUnique(const void* ptr) const; 290 | 291 | folly::IOBuf clone() { 292 | folly::IOBuf buf; 293 | iob_.cloneInto(buf); 294 | return buf; 295 | } 296 | 297 | private: 298 | folly::IOBuf iob_; 299 | uint64_t maxLength_; 300 | }; 301 | 302 | struct THAllocFreeFuncData { 303 | THAllocator* allocator; 304 | void* context; 305 | 306 | THAllocFreeFuncData(THAllocator* allocator, void* context); 307 | }; 308 | 309 | void THAllocFreeFunc(void* buf, void* userData); 310 | 311 | } // namespace detail 312 | 313 | template 314 | folly::IOBuf Storage::getIOBuf() { 315 | if (!this->t_) return folly::IOBuf(); 316 | 317 | auto iobTHAllocator = 318 | &detail::ioBufTHAllocator; 319 | auto iobTHAllocatorNoRealloc = 320 | &detail::ioBufTHAllocatorNoRealloc; 321 | 322 | auto len = this->size() * sizeof(T); 323 | auto curAllocator = this->t_->allocator; 324 | if (curAllocator == &THDefaultAllocator) { 325 | // Switch to using IOBuf allocator. 326 | // We know that memory from the default allocator was allocated with 327 | // malloc, just like IOBuf, so we know how to free it. 328 | this->t_->allocator = iobTHAllocator; 329 | this->t_->allocatorContext = new detail::IOBufAllocator(folly::IOBuf( 330 | folly::IOBuf::TAKE_OWNERSHIP, this->data(), len, len)); 331 | } else if (curAllocator == iobTHAllocator || 332 | curAllocator == iobTHAllocatorNoRealloc) { 333 | // do nothing 334 | } else { 335 | // The storage was allocated with an unknown allocator (neither default 336 | // nor IOBuf), so we must remember the previous allocator and allocator 337 | // context and call that allocator's free method when necessary. 338 | 339 | auto freeFuncData = new detail::THAllocFreeFuncData( 340 | this->t_->allocator, this->t_->allocatorContext); 341 | 342 | this->t_->allocator = iobTHAllocatorNoRealloc; 343 | this->t_->allocatorContext = new detail::IOBufAllocator(folly::IOBuf( 344 | folly::IOBuf::TAKE_OWNERSHIP, this->data(), len, len, 345 | detail::THAllocFreeFunc, freeFuncData)); 346 | } 347 | 348 | 349 | auto allocator = static_cast( 350 | this->t_->allocatorContext); 351 | return allocator->clone(); 352 | } 353 | 354 | template 355 | Storage::Storage(folly::IOBuf&& iob, SharingMode sharing, 356 | bool resizable) : Base(nullptr) { 357 | setFromIOBuf(std::move(iob), sharing, resizable); 358 | } 359 | 360 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 361 | template 362 | Storage::Storage(const ThriftStorage& in, SharingMode sharing) 363 | : Base(nullptr) { 364 | setFromIOBuf(detail::deserialize(in, detail::dataType()), sharing, true); 365 | } 366 | #endif 367 | 368 | template 369 | void Storage::setFromIOBuf(folly::IOBuf&& iob, SharingMode sharing, 370 | bool resizable) { 371 | size_t len = iob.computeChainDataLength(); 372 | if (len % sizeof(T) != 0) { 373 | throw std::invalid_argument("IOBuf size must be multiple of data size"); 374 | } 375 | len /= sizeof(T); 376 | 377 | iob.coalesce(); 378 | detail::applySharingMode(iob, sharing); 379 | 380 | // Ensure properly aligned, make a copy otherwise. coalesce() 381 | // and/or applySharingMode() might have already done that for us, 382 | // in which case we're likely already aligned. 383 | if ((reinterpret_cast(iob.data()) % alignof(T)) != 0) { 384 | iob = folly::IOBuf(folly::IOBuf::COPY_BUFFER, iob.data(), iob.length()); 385 | } 386 | 387 | T* p = reinterpret_cast(iob.writableData()); 388 | this->t_ = Ops::_newWithDataAndAllocator( 389 | p, len, 390 | &detail::ioBufTHAllocator, 391 | new detail::IOBufAllocator(std::move(iob))); 392 | 393 | if (!resizable) { 394 | Ops::_clearFlag(this->t_, TH_STORAGE_RESIZABLE); 395 | } 396 | } 397 | 398 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 399 | template 400 | void Storage::serialize(ThriftStorage& out, 401 | ThriftTensorEndianness endianness, 402 | SharingMode sharing) const { 403 | detail::serialize(out, const_cast(this)->getIOBuf(), 404 | detail::dataType(), endianness, sharing); 405 | } 406 | #endif 407 | 408 | //////////////////////////////////////////////////////////////////////////////// 409 | #endif // !NO_FOLLY 410 | //////////////////////////////////////////////////////////////////////////////// 411 | 412 | template 413 | bool Storage::isUnique(const THType* th) { 414 | if (!th) { 415 | return true; 416 | } 417 | if (th->refcount != 1) { 418 | return false; 419 | } 420 | // Even if the refcount is 1, this might share memory with other 421 | // resources from the outside world. Not possible with the default allocator. 422 | if (th->allocator == &THDefaultAllocator) { 423 | return true; 424 | } 425 | 426 | #ifndef NO_FOLLY 427 | // Check all our supported allocators. Currently one. 428 | auto iobTHAllocator = &detail::ioBufTHAllocator; 429 | if (th->allocator == iobTHAllocator) { 430 | return static_cast(th->allocatorContext)-> 431 | isUnique(th->data); 432 | } 433 | #endif 434 | 435 | // Unknown allocator. Be on the safe side. 436 | return false; 437 | } 438 | 439 | } // namespaces 440 | -------------------------------------------------------------------------------- /thpp/TensorBase.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_TENSORBASE_H_ 7 | #define THPP_TENSORBASE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #ifndef NO_FOLLY 13 | #include 14 | #endif 15 | // in order to get default values for template args 16 | #include 17 | #include 18 | #include 19 | 20 | namespace thpp { 21 | 22 | template class Tensor; 23 | 24 | namespace detail { 25 | template struct TensorOps; 26 | } // namespace detail 27 | 28 | template 29 | class TensorBase { 30 | friend class TensorPtr; 31 | protected: 32 | typedef detail::TensorOps Ops; 33 | 34 | public: 35 | typedef TensorPtr Ptr; 36 | typedef typename Ops::type THType; 37 | typedef StorageT StorageType; 38 | typedef T value_type; 39 | typedef typename Ops::accurate_type accurate_type; 40 | typedef long size_type; 41 | typedef long offset_type; 42 | typedef std::true_type IsRelocatable; 43 | 44 | template 45 | static Ptr makePtr(Args&&... args) { 46 | return makeTensorPtr(std::forward(args)...); 47 | } 48 | 49 | // Ayiee. Uniform initialization and perfect forwarding don't play well 50 | // togethr. Explicit specialization. 51 | static Ptr makePtr(std::initializer_list sizes, 52 | std::initializer_list strides = 53 | std::initializer_list()) { 54 | return makeTensorPtr(std::move(sizes), std::move(strides)); 55 | } 56 | 57 | Ptr copyPtr() const { 58 | return makePtr(*D()); 59 | } 60 | 61 | THType* asTH() { return t_; } 62 | const THType* asTH() const { return t_; } 63 | 64 | // Tensor mode. Bitwise OR of: 65 | // UNIQUE: this tensor is unique and does not share storage with any 66 | // other tensor. 67 | // CONTIGUOUS: this tensor is contiguous in row-major (that is, C) order 68 | enum Mode : unsigned { 69 | UNIQUE = 1U << 0, 70 | CONTIGUOUS = 1U << 1, 71 | }; 72 | 73 | static constexpr const char* kLuaTypeName = Ops::kLuaTypeName; 74 | 75 | // Force the tensor to have a certain mode. May copy data. 76 | void force(unsigned mode); 77 | 78 | // Return current mode. 79 | unsigned mode() const { 80 | return mode(t_); 81 | } 82 | static unsigned mode(const THType* th) { 83 | return (isUnique(th) ? UNIQUE : 0) | (isContiguous(th) ? CONTIGUOUS : 0); 84 | } 85 | 86 | // Is this tensor unique? 87 | bool isUnique() const { 88 | return isUnique(t_); 89 | } 90 | static bool isUnique(const THType* th); 91 | 92 | // Is this tensor contiguous? 93 | bool isContiguous() const { 94 | return isContiguous(t_); 95 | } 96 | static bool isContiguous(const THType* th); 97 | 98 | /// Compares two tensors for exact equality 99 | bool isExactlyEqual(const TensorBase& other) const; 100 | 101 | /// Compares two tensors for approximate equality. For integral 102 | /// types, forwards to isExactlyEqual; for floating point types, 103 | /// uses the given relativeError to determine equality. 104 | bool isApproximatelyEqual(const TensorBase& other, 105 | float relativeError = 0.0001f) const; 106 | 107 | // Return number of elements. 108 | size_type size() const; 109 | 110 | // Return number of dimensions. 111 | int ndims() const { return t_->nDimension; } 112 | 113 | #ifndef NO_FOLLY 114 | // Return list of sizes. 115 | LongRange sizes() const; 116 | 117 | // Return list of strides. 118 | LongRange strides() const; 119 | #endif 120 | 121 | // Return a storage of sizes. 122 | LongStorage sizesTH() const; 123 | 124 | // Return a storage of strides. 125 | LongStorage stridesTH() const; 126 | 127 | #ifndef NO_FOLLY 128 | // Return size along dimension dim. 129 | size_type size(int dim) const { return sizes().at(dim); } 130 | 131 | // Return stride along dimension dim. 132 | size_type stride(int dim) const { return strides().at(dim); } 133 | #else 134 | size_type size(int dim) const { return sizesTH()[dim]; } 135 | size_type stride(int dim) const { return stridesTH()[dim]; } 136 | #endif 137 | 138 | // Narrow the tensor along a given dimension; the dimension dim is narrowed 139 | // to [firstIndex, firstIndex + size) 140 | void narrow(const TensorBase& src, int dim, offset_type firstIndex, 141 | size_type size); 142 | void narrow(int dim, offset_type firstIndex, size_type size) { 143 | narrow(*this, dim, firstIndex, size); 144 | } 145 | 146 | // Select one slice of the tensor along a given dimension. The tensor's 147 | // dimensionality is reduced by 1. 148 | void select(const TensorBase& src, int dim, offset_type index); 149 | void select(int dim, offset_type index) { select(*this, dim, index); } 150 | 151 | // Transpose two dimensions. 152 | void transpose(const TensorBase& src, int dim1, int dim2); 153 | void transpose(int dim1, int dim2) { transpose(*this, dim1, dim2); } 154 | 155 | // Full transpose (reverse the order of axes) 156 | void transpose(const TensorBase& src) { *this = src; transpose(); } 157 | void transpose(); 158 | 159 | // Unfold dimension dim along two dimensions: slices of size 'size' (with 160 | // given step between slices) are unfolded among a new dimension that is 161 | // added. 162 | // See http://torch5.sourceforge.net/manual/torch/index-6-8-3.html 163 | void unfold(const TensorBase& src, int dim, size_type size, size_type step); 164 | void unfold(int dim, size_type size, size_type step) { 165 | unfold(*this, dim, size, step); 166 | } 167 | 168 | // Squeeze: remove all 1-sized dimensions. 169 | void squeeze(const TensorBase& src); 170 | void squeeze() { squeeze(*this); } 171 | 172 | // Squeeze: remove one dimension if it is 1-sized. 173 | void squeeze(const TensorBase& src, int dim); 174 | void squeeze(int dim) { squeeze(*this, dim); } 175 | 176 | void resize( 177 | std::initializer_list newSizes, 178 | std::initializer_list newStrides = 179 | std::initializer_list()); 180 | void resize(LongStorage newSizes, LongStorage newStrides = LongStorage()); 181 | #ifndef NO_FOLLY 182 | void resize(LongRange newSizes, LongRange newStrides = LongRange()); 183 | #endif 184 | void resizeAs(const TensorBase& src); 185 | 186 | StorageType storage(); 187 | const StorageType storage() const { 188 | return const_cast(this)->storage(); 189 | } 190 | 191 | typedef typename std::aligned_storage< 192 | sizeof(StorageT), alignof(StorageT)>::type StorageBuffer; 193 | // Hack. You must provide an appropriately-sized buffer. Return a reference 194 | // to a storage object *that does not increment the reference count*, 195 | // so may point into nothingness if this tensor is resized or destroyed. 196 | // You have been warned. 197 | StorageType& storageRef(StorageBuffer* buf); 198 | const StorageType& storageRef(StorageBuffer* buf) const { 199 | return const_cast(this)->storageRef(buf); 200 | } 201 | 202 | offset_type storageOffset() const; 203 | 204 | // Fill with one value. 205 | void fill(T value); 206 | 207 | // Fill with zeros. 208 | void zero(); 209 | 210 | // Given a ByteTensor of the exact same dimensionality as *this, whose 211 | // values are 0 or 1, set elements of *this to value iff the corresponding 212 | // elements in mask are 1. 213 | void maskedFill(const Tensor& mask, T value); 214 | 215 | // Copy corresponding elements of src to *this iff the corresponding elements 216 | // in mask are 1 217 | void maskedCopy(const Tensor& mask, const TensorBase& src); 218 | 219 | // Select elements from *this iff the corresponding elements in mask 220 | // are 1. Returns a 1d tensor with one entry for each selected element. 221 | Derived maskedSelect(const Tensor& mask) const; 222 | 223 | // Select along dimension dim, copying only indices from index. 224 | // Returns a tensor with matching dimensionality, but only index.size() 225 | // elements along dimension dim. 226 | Derived indexSelect(int dim, const Tensor& index) const; 227 | 228 | // Fill along dimension dim, setting entries corresponding to indices 229 | // from index to val. 230 | void indexFill(int dim, const Tensor& index, T val); 231 | 232 | // Dot product 233 | accurate_type dot(const TensorBase& other) const; 234 | 235 | // Minimum value among all elements 236 | T minall() const; 237 | 238 | // Maximum value among all elements 239 | T maxall() const; 240 | 241 | // Sum of all elements 242 | accurate_type sumall() const; 243 | 244 | // Product of all elements 245 | accurate_type prodall() const; 246 | 247 | // Add a value to each element in the tensor 248 | void add(const TensorBase& src, T value); 249 | void add(T value) { add(*this, value); } 250 | 251 | // Multiply each element in the tensor by a value 252 | void mul(const TensorBase& src, T value); 253 | void mul(T value) { mul(*this, value); } 254 | 255 | // Divide each element in the tensor by a value 256 | void div(const TensorBase& src, T value); 257 | void div(T value) { div(*this, value); } 258 | 259 | // *this = a + value * b 260 | void cadd(const TensorBase& a, T value, const TensorBase& b); 261 | void cadd(T value, const TensorBase& b) { cadd(*this, value, b); } 262 | 263 | // *this = a .* b 264 | void cmul(const TensorBase& a, const TensorBase& b); 265 | void cmul(const TensorBase& b) { cmul(*this, b); } 266 | 267 | // *this = a ./ b 268 | void cdiv(const TensorBase& a, const TensorBase& b); 269 | void cdiv(const TensorBase& b) { cdiv(*this, b); } 270 | 271 | // *this = a + value * (b .* c) 272 | void addcmul(const TensorBase& a, T value, const TensorBase& b, 273 | const TensorBase& c); 274 | void addcmul(T value, const TensorBase& b, const TensorBase& c) { 275 | addcmul(*this, value, b, c); 276 | } 277 | 278 | // *this = a + value * (b ./ c) 279 | void addcdiv(const TensorBase& a, T value, const TensorBase& b, 280 | const TensorBase& c); 281 | void addcdiv(T value, const TensorBase& b, const TensorBase& c) { 282 | addcdiv(*this, value, b, c); 283 | } 284 | 285 | // *this = beta * t + alpha * mat * vec 286 | void addmv(T beta, const TensorBase& t, T alpha, const TensorBase& mat, 287 | const TensorBase& vec); 288 | void addmv(T beta, T alpha, const TensorBase& mat, const TensorBase& vec) { 289 | addmv(beta, *this, alpha, mat, vec); 290 | } 291 | 292 | // *this = beta * t + alpha * (m1 X m2) 293 | void addmm(T beta, const TensorBase& t, T alpha, const TensorBase& m1, 294 | const TensorBase& m2); 295 | void addmm(T beta, T alpha, const TensorBase& m1, const TensorBase& m2) { 296 | addmm(beta, *this, alpha, m1, m2); 297 | } 298 | 299 | // outer product 300 | // *this = beta * m + alpha * (v1 (X) v2) 301 | void addr(T beta, const TensorBase& m, T alpha, const TensorBase& v1, 302 | const TensorBase& v2); 303 | void addr(T beta, T alpha, const TensorBase& v1, const TensorBase& v2) { 304 | addr(beta, *this, alpha, v1, v2); 305 | } 306 | 307 | // number of elements, same as size() 308 | size_type numel() const { return size(); } 309 | 310 | // The following functions perform operations along one dimension. 311 | // The returned tensors will have the same shape as *this except that they 312 | // have a size of 1 along dimension dim. (That is, they're not squeezed) 313 | 314 | // sum 315 | Derived sum(int dim) const; 316 | 317 | // product 318 | Derived prod(int dim) const; 319 | 320 | // cumulative sum 321 | Derived cumsum(int dim) const; 322 | 323 | // cumulative product 324 | Derived cumprod(int dim) const; 325 | 326 | // Element-wise sign 327 | Derived sign() const; 328 | 329 | // TODO(tudorb): TH doesn't distinguish between a 1-element 1-dimensional 330 | // array (aka 1-element vector) and a scalar. 331 | bool isScalar() const; 332 | 333 | // Access the underlying data 334 | T* data(); 335 | const T* data() const; 336 | 337 | // First element 338 | const T& front() const { return *data(); } 339 | T& front() { return *data(); } 340 | 341 | // Index along the first dimension 342 | Derived operator[](offset_type index) const; 343 | 344 | // Index along dimensions 0, 1, ..., indices.size() - 1. 345 | // Pass -1 as an index to keep that dimension unchanged. 346 | // 347 | // Example: given a 5-dimensional tensor foo, 348 | // foo[-1,2,-1,2,1] returns a 2-dimensional tensor corresponding 349 | // to the hyperplane that has d1=2, d3=2, d4=1 in foo. 350 | Derived operator[](std::initializer_list indices) const; 351 | 352 | // Clear the tensor. 353 | void clear(); 354 | 355 | std::string str() const; 356 | 357 | #if !defined(NO_THRIFT) && !defined(NO_FOLLY) 358 | // const version of serialize() that won't share, but will always copy 359 | void serializeUnshared(ThriftTensor& out, 360 | ThriftTensorEndianness endianness = 361 | ThriftTensorEndianness::NATIVE) const { 362 | const_cast(this)->D()->serialize(out, endianness, false); 363 | } 364 | #endif 365 | 366 | protected: 367 | size_t offsetOf(std::initializer_list indices) const; 368 | 369 | static THType* cloneTH(const THType* other, unsigned cloneMode); 370 | 371 | explicit TensorBase(THType* t); 372 | ~TensorBase(); 373 | THType* mut() const { return mut(t_); } 374 | static THType* mut(const THType* th) { return const_cast(th); } 375 | 376 | THType* t_; 377 | 378 | private: 379 | inline Derived* D() { return static_cast(this); } 380 | inline const Derived* D() const { return static_cast(this); } 381 | }; 382 | 383 | template 384 | constexpr const char* TensorBase::kLuaTypeName; 385 | 386 | // Unary - 387 | template 388 | Derived operator-(const TensorBase& a); 389 | 390 | // Binary operators. We don't define multiplication and division as they're 391 | // ambiguous: do you mean pointwise? matrix multiplication? inner product? 392 | // outer product? 393 | template 394 | Derived operator+(const TensorBase& a, 395 | const TensorBase& b); 396 | template 397 | Derived& operator+=(TensorBase& a, 398 | const TensorBase& b); 399 | 400 | template 401 | Derived operator-(const TensorBase& a, 402 | const TensorBase& b); 403 | template 404 | Derived& operator-=(TensorBase& a, 405 | const TensorBase& b); 406 | 407 | // Multiplication / division by scalar 408 | template 409 | Derived operator*(const TensorBase& a, T b); 410 | template 411 | Derived operator*(T a, const TensorBase& b) { 412 | return b * a; 413 | } 414 | template 415 | Derived& operator*=(TensorBase& a, T b); 416 | 417 | template 418 | Derived operator/(const TensorBase& a, T b); 419 | template 420 | Derived& operator/=(TensorBase& a, T b); 421 | 422 | template 423 | std::ostream& operator<<(std::ostream& s, 424 | const TensorBase& t) { 425 | return s << t.str(); 426 | } 427 | 428 | #ifndef NO_FOLLY 429 | namespace detail { 430 | template 431 | Range makeMutable(Range r) { 432 | return Range(const_cast(r.begin()), const_cast(r.end())); 433 | } 434 | } // namespace detail 435 | #endif 436 | 437 | // Define IsTensor to be used in template specializations 438 | 439 | template 440 | struct IsTensor : public std::false_type { }; 441 | 442 | template 443 | struct IsTensor< 444 | T, 445 | typename std::enable_if< 446 | std::is_base_of< 447 | TensorBase, 450 | T>::value>::type> 451 | : public std::true_type { }; 452 | 453 | template 454 | struct IsTensorPtr : public std::false_type { }; 455 | 456 | template 457 | struct IsTensorPtr< 458 | TensorPtr, 459 | typename std::enable_if::value>::type> 460 | : public std::true_type { }; 461 | 462 | } // namespaces 463 | 464 | #include 465 | 466 | #endif /* THPP_TENSORBASE_H_ */ 467 | -------------------------------------------------------------------------------- /thpp/TensorBase-inl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015 Facebook 3 | * @author Tudor Bosman (tudorb@fb.com) 4 | */ 5 | 6 | #ifndef THPP_TENSORBASE_H_ 7 | #error This file may only be included from thpp/TensorBase.h 8 | #endif 9 | 10 | #include 11 | #include 12 | #ifndef NO_FOLLY 13 | #include 14 | #include 15 | #else 16 | #define UNLIKELY(x) (x) 17 | #endif 18 | 19 | namespace thpp { 20 | 21 | namespace detail { 22 | 23 | template 24 | inline Derived& D(TensorBase& v) { 25 | return static_cast(v); 26 | } 27 | 28 | template 29 | inline const Derived& D(const TensorBase& v) { 30 | return static_cast(v); 31 | } 32 | 33 | } // namespace detail 34 | 35 | template 36 | TensorBase::TensorBase(THType* t) : t_(t) { 37 | DCHECK(t_); 38 | } 39 | 40 | template 41 | TensorBase::~TensorBase() { 42 | DCHECK(t_); 43 | Ops::_free(t_); 44 | #ifndef NDEBUG 45 | t_ = nullptr; 46 | #endif 47 | } 48 | 49 | template 50 | void TensorBase::force(unsigned newMode) { 51 | if ((mode() & newMode) == newMode) 52 | return; 53 | 54 | *D() = Derived(std::move(*D()), newMode); 55 | } 56 | 57 | //////////////////////////////////////////////////////////////////////////////// 58 | #ifndef NO_FOLLY 59 | //////////////////////////////////////////////////////////////////////////////// 60 | template 61 | LongRange TensorBase::sizes() const { 62 | return LongRange(t_->size, t_->nDimension); 63 | } 64 | 65 | template 66 | LongRange TensorBase::strides() const { 67 | return LongRange(t_->stride, t_->nDimension); 68 | } 69 | //////////////////////////////////////////////////////////////////////////////// 70 | #endif 71 | //////////////////////////////////////////////////////////////////////////////// 72 | 73 | template 74 | LongStorage TensorBase::sizesTH() const { 75 | return LongStorage(t_->size, t_->size + t_->nDimension); 76 | } 77 | 78 | template 79 | LongStorage TensorBase::stridesTH() const { 80 | return LongStorage(t_->stride, t_->stride + t_->nDimension); 81 | } 82 | 83 | template 84 | bool TensorBase::isUnique(const THType* th) { 85 | return StorageType::isUnique(th->storage); 86 | } 87 | 88 | template 89 | bool TensorBase::isContiguous(const THType* th) { 90 | return Ops::_isContiguous(th); 91 | } 92 | 93 | template 94 | bool TensorBase::isExactlyEqual( 95 | const TensorBase& other) const { 96 | if (ndims() != other.ndims()) { 97 | throw std::invalid_argument("isExactlyEqual: dimension mismatch"); 98 | } 99 | 100 | for (int i = 0; i < ndims(); ++i) { 101 | if (size(i) != other.size(i)) { 102 | throw std::invalid_argument("isExactlyEqual: size mismatch"); 103 | } 104 | } 105 | 106 | if (ndims() == 1) { 107 | for (int i = 0; i < size(0); ++i) { 108 | if (D()->at({i}) != other.D()->at({i})) { 109 | return false; 110 | } 111 | } 112 | } else { 113 | for (int i = 0; i < size(0); ++i) { 114 | if (!(*D())[i].isExactlyEqual(other[i])) { 115 | return false; 116 | } 117 | } 118 | } 119 | 120 | return true; 121 | } 122 | 123 | template 124 | bool TensorBase::isApproximatelyEqual( 125 | const TensorBase& other, 126 | float relativeError) const { 127 | if (!std::is_floating_point::value) { 128 | return isExactlyEqual(other); 129 | } 130 | 131 | if (ndims() != other.ndims()) { 132 | throw std::invalid_argument("isApproximatelyEqual: dimension mismatch"); 133 | } 134 | 135 | for (int i = 0; i < ndims(); ++i) { 136 | if (size(i) != other.size(i)) { 137 | throw std::invalid_argument("isApproximatelyEqual: size mismatch"); 138 | } 139 | } 140 | 141 | if (ndims() == 1) { 142 | const auto adjRelativeError = 0.5f * relativeError; 143 | 144 | for (int i = 0; i < size(0); ++i) { 145 | const auto a = D()->at({i}); 146 | const auto b = other.D()->at({i}); 147 | 148 | // Handle special cases 149 | if (a == b || (std::isnan(a) && std::isnan(b))) { 150 | continue; 151 | } else if (!std::isfinite(a) && !std::isfinite(b)) { 152 | if (std::signbit(a) == std::signbit(b)) { 153 | continue; 154 | } else { 155 | return false; 156 | } 157 | } 158 | 159 | // Compare the difference against the mean values 160 | if (std::abs(a - b) > adjRelativeError * (std::abs(a) + std::abs(b))) { 161 | return false; 162 | } 163 | } 164 | } else { 165 | for (int i = 0; i < size(0); ++i) { 166 | if (!(*D())[i].isApproximatelyEqual(other[i], relativeError)) { 167 | return false; 168 | } 169 | } 170 | } 171 | 172 | return true; 173 | } 174 | 175 | template 176 | long TensorBase::size() const { 177 | return Ops::_nElement(t_); 178 | } 179 | 180 | template 181 | void TensorBase::narrow( 182 | const TensorBase& src, int dim, long firstIndex, long size) { 183 | Ops::_narrow(t_, src.mut(), dim, firstIndex, size); 184 | } 185 | 186 | template 187 | void TensorBase::select( 188 | const TensorBase& src, int dim, long index) { 189 | if (src.ndims() == 1) { 190 | if (UNLIKELY(dim != 0)) { 191 | throw std::invalid_argument("invalid dimension for vector select"); 192 | } 193 | if (UNLIKELY(index < 0 || index >= src.size(0))) { 194 | throw std::invalid_argument("invalid index for vector select"); 195 | } 196 | auto s = src.mut(); 197 | Ops::_setStorage1d(t_, s->storage, 198 | s->storageOffset + index * s->stride[0], 199 | 1, 1); 200 | } else { 201 | Ops::_select(t_, src.mut(), dim, index); 202 | } 203 | } 204 | 205 | template 206 | void TensorBase::transpose( 207 | const TensorBase& src, int dim1, int dim2) { 208 | Ops::_transpose(t_, src.mut(), dim1, dim2); 209 | } 210 | 211 | template 212 | void TensorBase::transpose() { 213 | std::reverse(t_->stride, t_->stride + t_->nDimension); 214 | std::reverse(t_->size, t_->size + t_->nDimension); 215 | } 216 | 217 | template 218 | void TensorBase::unfold( 219 | const TensorBase& src, int dim, long size, long step) { 220 | Ops::_unfold(t_, src.mut(), dim, size, step); 221 | } 222 | 223 | template 224 | void TensorBase::squeeze(const TensorBase& src) { 225 | Ops::_squeeze(t_, src.mut()); 226 | } 227 | 228 | template 229 | void TensorBase::squeeze(const TensorBase& src, int dim) { 230 | Ops::_squeeze1d(t_, src.mut(), dim); 231 | } 232 | 233 | template 234 | void TensorBase::resize( 235 | std::initializer_list newSizes, 236 | std::initializer_list newStrides) { 237 | resize(LongStorage(newSizes.begin(), newSizes.end()), 238 | LongStorage(newStrides.begin(), newStrides.end())); 239 | } 240 | 241 | template 242 | void TensorBase::resize( 243 | LongStorage sizes, LongStorage strides) { 244 | Ops::_resize(t_, sizes.th(), strides.th()); 245 | } 246 | 247 | #ifndef NO_FOLLY 248 | template 249 | void TensorBase::resize( 250 | LongRange sizes, LongRange strides) { 251 | resize(LongStorage::wrap(detail::makeMutable(sizes)), 252 | LongStorage::wrap(detail::makeMutable(strides))); 253 | } 254 | #endif 255 | 256 | template 257 | void TensorBase::resizeAs(const TensorBase& src) { 258 | Ops::_resizeAs(t_, src.mut()); 259 | } 260 | 261 | template 262 | bool TensorBase::isScalar() const { 263 | return ndims() == 1 && size(0) == 1 && stride(0) == 1; 264 | } 265 | 266 | template 267 | T* TensorBase::data() { 268 | return Ops::_data(t_); 269 | } 270 | 271 | template 272 | const T* TensorBase::data() const { 273 | return Ops::_data(t_); 274 | } 275 | 276 | template 277 | auto TensorBase::storage() -> StorageType { 278 | return StorageType(Ops::_storage(t_)); 279 | } 280 | 281 | template 282 | auto TensorBase::storageRef(StorageBuffer* buf) 283 | -> StorageType& { 284 | auto pbuf = reinterpret_cast(buf); 285 | *pbuf = Ops::_storage(t_); 286 | // This relies on the fact that StorageT doesn't contain any members 287 | // other than the pointer to the appopriate THStorage 288 | return *reinterpret_cast(pbuf); 289 | } 290 | 291 | template 292 | long TensorBase::storageOffset() const { 293 | return Ops::_storageOffset(t_); 294 | } 295 | 296 | template 297 | Derived TensorBase::operator[](long index) const { 298 | Derived nt(*D()); 299 | nt.select(0, index); 300 | return nt; 301 | } 302 | 303 | template 304 | Derived TensorBase::operator[]( 305 | std::initializer_list indexes) const { 306 | Derived nt(*D()); 307 | int dim = 0; 308 | for (long index : indexes) { 309 | if (index == -1) { 310 | ++dim; 311 | } else { 312 | nt.select(dim, index); 313 | } 314 | } 315 | return nt; 316 | } 317 | 318 | template 319 | size_t TensorBase::offsetOf( 320 | std::initializer_list indexes) const { 321 | if (indexes.size() != ndims()) { 322 | throw std::invalid_argument("must provide ndims() indices"); 323 | } 324 | 325 | size_t offset = 0; 326 | auto dim = 0; 327 | for (auto it = indexes.begin(); it != indexes.end(); ++it) { 328 | const auto idx = *it; 329 | if (idx >= size(dim)) { 330 | throw std::invalid_argument("index out of range"); 331 | } 332 | 333 | offset += idx * stride(dim++); 334 | } 335 | 336 | return offset; 337 | } 338 | 339 | template 340 | void TensorBase::fill(T value) { 341 | Ops::_fill(t_, value); 342 | } 343 | 344 | template 345 | void TensorBase::zero() { 346 | Ops::_zero(t_); 347 | } 348 | 349 | template 350 | auto TensorBase::dot( 351 | const TensorBase& other) const -> accurate_type { 352 | return Ops::_dot(t_, other.t_); 353 | } 354 | 355 | #define TENSOR_REDUCE_OP(ret, name) \ 356 | template \ 357 | auto TensorBase::name() const -> ret { \ 358 | return Ops::_ ## name(mut()); \ 359 | } 360 | TENSOR_REDUCE_OP(T, minall) 361 | TENSOR_REDUCE_OP(T, maxall) 362 | TENSOR_REDUCE_OP(accurate_type, sumall) 363 | #undef TENSOR_REDUCE_OP 364 | 365 | #define TENSOR_ST_OP(name) \ 366 | template \ 367 | void TensorBase::name( \ 368 | const TensorBase& src, T value) { \ 369 | Ops::_ ## name(t_, src.mut(), value); \ 370 | } 371 | TENSOR_ST_OP(add) 372 | TENSOR_ST_OP(mul) 373 | TENSOR_ST_OP(div) 374 | #undef TENSOR_ST_OP 375 | 376 | #define TENSOR_TST_OP(name) \ 377 | template \ 378 | void TensorBase::name( \ 379 | const TensorBase& a, T value, const TensorBase& b) { \ 380 | Ops::_ ## name(t_, a.mut(), value, b.mut()); \ 381 | } 382 | TENSOR_TST_OP(cadd) 383 | #undef TENSOR_TST_OP 384 | 385 | #define TENSOR_TT_OP(name) \ 386 | template \ 387 | void TensorBase::name( \ 388 | const TensorBase& a, const TensorBase& b) { \ 389 | Ops::_ ## name(t_, a.mut(), b.mut()); \ 390 | } 391 | TENSOR_TT_OP(cmul) 392 | TENSOR_TT_OP(cdiv) 393 | #undef TENSOR_TT_OP 394 | 395 | #define TENSOR_TSTT_OP(name) \ 396 | template \ 397 | void TensorBase::name( \ 398 | const TensorBase& a, T value, const TensorBase& b, \ 399 | const TensorBase& c) { \ 400 | Ops::_ ## name(t_, a.mut(), value, b.mut(), c.mut()); \ 401 | } 402 | TENSOR_TSTT_OP(addcmul) 403 | TENSOR_TSTT_OP(addcdiv) 404 | #undef TENSOR_TSTT_OP 405 | 406 | #define TENSOR_STSTT_OP(name) \ 407 | template \ 408 | void TensorBase::name(T val1, const TensorBase& a, \ 409 | T val2, const TensorBase& b, const TensorBase& c) { \ 410 | Ops::_ ## name(t_, val1, a.mut(), val2, b.mut(), c.mut()); \ 411 | } 412 | TENSOR_STSTT_OP(addmv) 413 | TENSOR_STSTT_OP(addmm) 414 | TENSOR_STSTT_OP(addr) 415 | #undef TENSOR_STSTT_OP 416 | 417 | #define TENSOR_DIM_OP(name) \ 418 | template \ 419 | auto TensorBase::name(int dim) const -> Derived { \ 420 | Derived dest; \ 421 | Ops::_ ## name(dest.t_, mut(), dim); \ 422 | return dest; \ 423 | } 424 | TENSOR_DIM_OP(sum) 425 | TENSOR_DIM_OP(prod) 426 | TENSOR_DIM_OP(cumsum) 427 | #undef TENSOR_DIM_OP 428 | 429 | template 430 | auto TensorBase::sign() const -> Derived { 431 | Derived dest; 432 | Ops::_sign(dest.t_, mut()); 433 | return dest; 434 | } 435 | 436 | template 437 | auto TensorBase::cloneTH(const THType* other, 438 | unsigned cloneMode) -> THType* { 439 | if ((cloneMode & UNIQUE) || 440 | ((cloneMode & CONTIGUOUS) && !isContiguous(other))) { 441 | return Ops::_newClone(mut(other)); 442 | } 443 | 444 | return Ops::_newWithTensor(mut(other)); 445 | } 446 | 447 | template 448 | void TensorBase::clear() { 449 | Ops::_setStorage(t_, nullptr, 0, nullptr, nullptr); 450 | } 451 | 452 | template 453 | Derived operator-(const TensorBase& a) { 454 | Derived r; 455 | r.mul(a, -1); 456 | return r; 457 | } 458 | 459 | template 460 | Derived operator+(const TensorBase& a, 461 | const TensorBase& b) { 462 | Derived r; 463 | r.cadd(a, 1, b); 464 | return r; 465 | } 466 | 467 | template 468 | Derived& operator+=(TensorBase& a, 469 | const TensorBase& b) { 470 | a.cadd(1, b); 471 | return detail::D(a); 472 | } 473 | 474 | template 475 | Derived operator-(const TensorBase& a, 476 | const TensorBase& b) { 477 | Derived r; 478 | r.cadd(a, -1, b); 479 | return r; 480 | } 481 | 482 | template 483 | Derived& operator-=(TensorBase& a, 484 | const TensorBase& b) { 485 | a.cadd(-1, b); 486 | return detail::D(a); 487 | } 488 | 489 | template 490 | Derived operator*(const TensorBase& a, T b) { 491 | Derived r; 492 | r.mul(a, b); 493 | return r; 494 | } 495 | 496 | template 497 | Derived& operator*=(TensorBase& a, T b) { 498 | a.mul(b); 499 | return detail::D(a); 500 | } 501 | 502 | template 503 | Derived operator/(const TensorBase& a, T b) { 504 | Derived r; 505 | r.div(a, b); 506 | return r; 507 | } 508 | 509 | template 510 | Derived& operator/=(TensorBase& a, T b) { 511 | a.div(b); 512 | return detail::D(a); 513 | } 514 | 515 | template 516 | std::string TensorBase::str() const { 517 | std::string out; 518 | auto sz = sizesTH(); 519 | out.reserve(20 + 4 * sz.size()); 520 | #ifndef NO_FOLLY 521 | folly::toAppend(kLuaTypeName, "(", &out); 522 | #else 523 | out += kLuaTypeName; 524 | out += "("; 525 | #endif 526 | 527 | bool first = true; 528 | for (long s : sz) { 529 | if (!first) { 530 | out += "x"; 531 | } 532 | first = false; 533 | #ifndef NO_FOLLY 534 | folly::toAppend(s, &out); 535 | #else 536 | out += std::to_string(s); 537 | #endif 538 | } 539 | 540 | out += ")"; 541 | return out; 542 | } 543 | 544 | } // namespaces 545 | --------------------------------------------------------------------------------