├── .coveragerc
├── .gitattributes
├── .gitignore
├── .vscode
    └── settings.json
├── LICENSE
├── MANIFEST.in
├── README.md
├── pyproject.toml
├── requirements.txt
├── setup.py
├── src
    ├── CPUComplexCopy.h
    ├── CPUComplexType.h
    ├── CPUComplexTypeImpl.h
    ├── ComplexTensorApply.h
    ├── ComplexTypeInfo.h
    ├── General.h
    ├── SIMD
    │   ├── AVX.h
    │   ├── AVX2.h
    │   ├── Default.h
    │   ├── DefaultImpl.h
    │   └── SIMD.h
    ├── THTensorApply.h
    ├── Utils.h
    └── module.cpp
├── test.py
├── tests
    ├── __init__.py
    └── test_tensor.py
├── torch_complex
    └── __init__.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source =
 3 |     torch_complex
 4 |     tests
 5 | branch = True
 6 | omit =
 7 |     torch_complex/cli.py
 8 | 
 9 | [report]
10 | exclude_lines =
11 |     no cov
12 |     no qa
13 |     noqa
14 |     pragma: no cover
15 |     if __name__ == .__main__.:
16 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
  2 | 
  3 | # Created by https://www.gitignore.io/api/macos,visualstudiocode,c,c++,python
  4 | 
  5 | ### C ###
  6 | # Prerequisites
  7 | *.d
  8 | 
  9 | # Object files
 10 | *.o
 11 | *.ko
 12 | *.obj
 13 | *.elf
 14 | 
 15 | # Linker output
 16 | *.ilk
 17 | *.map
 18 | *.exp
 19 | 
 20 | # Precompiled Headers
 21 | *.gch
 22 | *.pch
 23 | 
 24 | # Libraries
 25 | *.lib
 26 | *.a
 27 | *.la
 28 | *.lo
 29 | 
 30 | # Shared objects (inc. Windows DLLs)
 31 | *.dll
 32 | *.so
 33 | *.so.*
 34 | *.dylib
 35 | 
 36 | torch_complex/*.dll
 37 | torch_complex/*.so
 38 | torch_complex/*.so.*
 39 | torch_complex/*.dylib
 40 | 
 41 | # Executables
 42 | *.exe
 43 | *.out
 44 | *.app
 45 | *.i*86
 46 | *.x86_64
 47 | *.hex
 48 | 
 49 | # Debug files
 50 | *.dSYM/
 51 | *.su
 52 | *.idb
 53 | *.pdb
 54 | 
 55 | # Kernel Module Compile Results
 56 | *.mod*
 57 | *.cmd
 58 | .tmp_versions/
 59 | modules.order
 60 | Module.symvers
 61 | Mkfile.old
 62 | dkms.conf
 63 | 
 64 | ### C++ ###
 65 | # Prerequisites
 66 | 
 67 | # Compiled Object files
 68 | *.slo
 69 | 
 70 | # Precompiled Headers
 71 | 
 72 | # Compiled Dynamic libraries
 73 | 
 74 | # Fortran module files
 75 | *.mod
 76 | *.smod
 77 | 
 78 | # Compiled Static libraries
 79 | *.lai
 80 | 
 81 | # Executables
 82 | 
 83 | ### macOS ###
 84 | # General
 85 | .DS_Store
 86 | .AppleDouble
 87 | .LSOverride
 88 | 
 89 | # Icon must end with two \r
 90 | Icon
 91 | 
 92 | # Thumbnails
 93 | ._*
 94 | 
 95 | # Files that might appear in the root of a volume
 96 | .DocumentRevisions-V100
 97 | .fseventsd
 98 | .Spotlight-V100
 99 | .TemporaryItems
100 | .Trashes
101 | .VolumeIcon.icns
102 | .com.apple.timemachine.donotpresent
103 | 
104 | # Directories potentially created on remote AFP share
105 | .AppleDB
106 | .AppleDesktop
107 | Network Trash Folder
108 | Temporary Items
109 | .apdisk
110 | 
111 | ### Python ###
112 | # Byte-compiled / optimized / DLL files
113 | __pycache__/
114 | *.py[cod]
115 | *$py.class
116 | 
117 | # C extensions
118 | 
119 | # Distribution / packaging
120 | .Python
121 | build/
122 | develop-eggs/
123 | dist/
124 | downloads/
125 | eggs/
126 | .eggs/
127 | lib/
128 | lib64/
129 | parts/
130 | sdist/
131 | var/
132 | wheels/
133 | *.egg-info/
134 | .installed.cfg
135 | *.egg
136 | MANIFEST
137 | 
138 | # PyInstaller
139 | #  Usually these files are written by a python script from a template
140 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
141 | *.manifest
142 | *.spec
143 | 
144 | # Installer logs
145 | pip-log.txt
146 | pip-delete-this-directory.txt
147 | 
148 | # Unit test / coverage reports
149 | htmlcov/
150 | .tox/
151 | .coverage
152 | .coverage.*
153 | .cache
154 | nosetests.xml
155 | coverage.xml
156 | *.cover
157 | .hypothesis/
158 | .pytest_cache/
159 | 
160 | # Translations
161 | *.mo
162 | *.pot
163 | 
164 | # Django stuff:
165 | *.log
166 | local_settings.py
167 | db.sqlite3
168 | 
169 | # Flask stuff:
170 | instance/
171 | .webassets-cache
172 | 
173 | # Scrapy stuff:
174 | .scrapy
175 | 
176 | # Sphinx documentation
177 | docs/_build/
178 | 
179 | # PyBuilder
180 | target/
181 | 
182 | # Jupyter Notebook
183 | .ipynb_checkpoints
184 | 
185 | # IPython
186 | profile_default/
187 | ipython_config.py
188 | 
189 | # pyenv
190 | .python-version
191 | 
192 | # celery beat schedule file
193 | celerybeat-schedule
194 | 
195 | # SageMath parsed files
196 | *.sage.py
197 | 
198 | # Environments
199 | .env
200 | .venv
201 | env/
202 | venv/
203 | ENV/
204 | env.bak/
205 | venv.bak/
206 | 
207 | # Spyder project settings
208 | .spyderproject
209 | .spyproject
210 | 
211 | # Rope project settings
212 | .ropeproject
213 | 
214 | # mkdocs documentation
215 | /site
216 | 
217 | # mypy
218 | .mypy_cache/
219 | .dmypy.json
220 | dmypy.json
221 | 
222 | ### Python Patch ###
223 | .venv/
224 | 
225 | ### Python.VirtualEnv Stack ###
226 | # Virtualenv
227 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
228 | [Bb]in
229 | [Ii]nclude
230 | [Ll]ib
231 | [Ll]ib64
232 | [Ll]ocal
233 | [Ss]cripts
234 | pyvenv.cfg
235 | pip-selfcheck.json
236 | 
237 | ### VisualStudioCode ###
238 | .vscode/*
239 | !.vscode/settings.json
240 | !.vscode/tasks.json
241 | !.vscode/launch.json
242 | !.vscode/extensions.json
243 | 
244 | 
245 | # End of https://www.gitignore.io/api/macos,visualstudiocode,c,c++,python
246 | 
247 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
248 | 
249 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.venvPath": "/Users/roger/.virtualenvs",
 3 |     "python.pythonPath": "/Users/roger/.virtualenvs/torch/bin/python",
 4 |     "files.associations": {
 5 |         "forward_list": "cpp",
 6 |         "list": "cpp",
 7 |         "string": "cpp",
 8 |         "valarray": "cpp",
 9 |         "vector": "cpp",
10 |         "__bit_reference": "cpp",
11 |         "__string": "cpp",
12 |         "algorithm": "cpp",
13 |         "string_view": "cpp",
14 |         "__config": "cpp",
15 |         "__nullptr": "cpp",
16 |         "cstddef": "cpp",
17 |         "exception": "cpp",
18 |         "initializer_list": "cpp",
19 |         "new": "cpp",
20 |         "stdexcept": "cpp",
21 |         "type_traits": "cpp",
22 |         "typeinfo": "cpp",
23 |         "variant": "cpp"
24 |     }
25 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 U.N. Owen
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch-Complex
 2 | 
 3 | Complex-valued tensor support for [PyTorch](https://github.com/pytorch/pytorch). (Work in progress)
 4 | 
 5 | **Warning**: This package is at very early stage, do not use it.
 6 | 
 7 | ## Usage
 8 | 
 9 | **Warning**: this package requires a fresh build of PyTorch
10 | revision 6cb593b88cb0c411690b4957850058329526d87b.  Other
11 | revisions may work, but you will void the warranty.
12 | 
13 | To use this commit, `git clone` pytorch and checkout to this commit, then build pytorch from soruce.
14 | After you build PyTorch successfully, you will be able to build this plugin just like a normal Python package:
15 | 
16 | ```sh
17 | python setup.py install
18 | python setup.py build
19 | python setup.py test
20 | ```
21 | 
22 | ```python
23 | from torch_complex import torch
24 | ```
25 | 
26 | or
27 | 
28 | ```python
29 | import torch_complex.torch as torch
30 | ```
31 | 
32 | then the complex tensor support will be in `torch` module. Use it just like the other tensor types.
33 | 
34 | ## Contribution
35 | 
36 | Please read [Pytorch/#755](https://github.com/pytorch/pytorch/issues/755) first.
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = 'pytorch-complex'
 3 | version = '0.0.1'
 4 | description = 'Complex tensor support for PyTorch'
 5 | author = 'Roger Luo'
 6 | author_email = 'rogerluo.rl18@gmail.com'
 7 | license = 'MIT'
 8 | url = 'https://github.com/Roger-luo/pytorch-complex'
 9 | 
10 | [requires]
11 | python_version = ['2.7', '3.5', '3.6', 'pypy', 'pypy3']
12 | 
13 | [build-system]
14 | requires = ['setuptools', 'wheel']
15 | 
16 | [tool.hatch.commands]
17 | prerelease = 'hatch build'
18 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from io import open
  2 | 
  3 | import platform
  4 | import os, shutil, torch
  5 | from setuptools import setup, find_packages
  6 | import distutils.command.clean
  7 | from torch.utils.cpp_extension import CppExtension
  8 | 
  9 | class clean(distutils.command.clean.clean):
 10 | 
 11 |     def run(self):
 12 |         import glob
 13 |         import re
 14 |         with open('.gitignore', 'r') as f:
 15 |             ignores = f.read()
 16 |             pat = re.compile(r'^#( BEGIN NOT-CLEAN-FILES )?')
 17 |             for wildcard in filter(None, ignores.split('\n')):                    
 18 | 
 19 |                 match = pat.match(wildcard)
 20 |                 if match:
 21 |                     if match.group(1):
 22 |                         # Marker is found and stop reading .gitignore.
 23 |                         break
 24 |                     # Ignore lines which begin with '#'.
 25 |                 else:
 26 |                     for filename in glob.glob(wildcard):
 27 |                         # skip vscode
 28 |                         vscode_pat = re.compile(r'.vscode/.*')
 29 |                         if re.match(vscode_pat, filename):
 30 |                             continue
 31 | 
 32 |                         try:
 33 |                             os.remove(filename)
 34 |                         except OSError:
 35 |                             shutil.rmtree(filename, ignore_errors=True)
 36 | 
 37 |         # It's an old-style class in Python 2.7...
 38 |         distutils.command.clean.clean.run(self)
 39 | 
 40 | 
 41 | with open('torch_complex/__init__.py', 'r') as f:
 42 |     for line in f:
 43 |         if line.startswith('__version__'):
 44 |             version = line.strip().split('=')[1].strip(' \'"')
 45 |             break
 46 |     else:
 47 |         version = '0.0.1'
 48 | 
 49 | with open('README.md', 'r', encoding='utf-8') as f:
 50 |     readme = f.read()
 51 | 
 52 | REQUIRES = []
 53 | 
 54 | cmdclass = {
 55 |     "build_ext": torch.utils.cpp_extension.BuildExtension,
 56 |     'clean': clean,
 57 | }
 58 | 
 59 | if platform.system() == 'Darwin':
 60 |     extra_compile_args = ["-g", "-stdlib=libc++", "-std=c++11"]
 61 | else:
 62 |     extra_compile_args = ["-g"]
 63 | 
 64 | ext_modules = [
 65 |     CppExtension(
 66 |         "torch_complex.cpp",
 67 |         ["src/module.cpp"],
 68 |         extra_compile_args=extra_compile_args,
 69 |     )
 70 | ]
 71 | 
 72 | setup(
 73 |     name='torch-complex',
 74 |     version=version,
 75 |     description='',
 76 |     long_description=readme,
 77 |     author='Roger Luo',
 78 |     author_email='rogerluo.rl18@gmail.com',
 79 |     maintainer='Roger Luo',
 80 |     maintainer_email='rogerluo.rl18@gmail.com',
 81 |     url='https://github.com/_/torch-complex',
 82 |     license='MIT',
 83 | 
 84 |     keywords=[
 85 |         '',
 86 |     ],
 87 | 
 88 |     classifiers=[
 89 |         'Development Status :: 4 - Beta',
 90 |         'Intended Audience :: Developers',
 91 |         'License :: OSI Approved :: MIT License',
 92 |         'Natural Language :: English',
 93 |         'Operating System :: OS Independent',
 94 |         'Programming Language :: Python :: 2.7',
 95 |         'Programming Language :: Python :: 3.5',
 96 |         'Programming Language :: Python :: 3.6',
 97 |         'Programming Language :: Python :: Implementation :: CPython',
 98 |     ],
 99 | 
100 |     install_requires=REQUIRES,
101 |     tests_require=['coverage', 'pytest'],
102 | 
103 |     packages=find_packages(),
104 |     ext_modules=ext_modules,
105 |     cmdclass=cmdclass,
106 | )
107 | 


--------------------------------------------------------------------------------
/src/CPUComplexCopy.h:
--------------------------------------------------------------------------------
  1 | #ifndef CPU_COMPLEX_COPY_H
  2 | #define CPU_COMPLEX_COPY_H
  3 | 
  4 | #include "General.h"
  5 | #include "ComplexTypeInfo.h"
  6 | #include "CPUComplexType.h"
  7 | 
  8 | namespace at {
  9 | 
 10 | template <typename DST, typename SRC>
 11 | struct CPUCopy;
 12 | 
 13 | // template arguments is mixed with C macros, e.g
 14 | // TH_TENSOR_APPLY2(CPUTypeInfo<double, double>, ...)
 15 | // will not be correct...
 16 | 
 17 | // template <typename Type>
 18 | // struct CPUCopy<Type, Type> {
 19 | 
 20 | //     inline static void eval(TensorImpl *dst, TensorImpl *src) {
 21 | //         CPUTypeInfo<Type>::scalar_t *dst_data = NULL;
 22 | 
 23 | //         TH_TENSOR_APPLY2(
 24 | //             CPUTypeInfo<Type>::scalar_t, dst,
 25 | //             CPUTypeInfo<Type>::scalar_t, src,
 26 | //             *dst_data = static_cast<CPUTypeInfo<Type>::scalar_t>(static_cast<inter_copy_type_t<CPUTypeInfo<Type>::scalar_t>>(*src_data));
 27 | //         )
 28 | //     }
 29 | // };
 30 | 
 31 | // Copy from THTensorCopy
 32 | // 
 33 | // C and C++ have a lovely set of implicit conversion rules, where casting
 34 | // signed integral values to unsigned integral values is always valid
 35 | // (it basically treats the value as if using modulo arithmetic), however
 36 | // converting negative floating point values to unsigned integral types
 37 | // is UB! This means that: (double)-1 -> (int64_t)-1 -> (uint8_t)255 is
 38 | // guaranteed to look like this, but we have (double)-1 -> (uint8_t)<ANYTHING>
 39 | // because it's UB. This also makes UBSan really angry.
 40 | //
 41 | // I think those rules are stupid and we really shouldn't conform to them.
 42 | // The structs below ensure that for all unsigned types we use (currently
 43 | // only uint8_t), we will do an intermediate convertion via int64_t,
 44 | // to ensure that any negative values are wrapped around correctly.
 45 | //
 46 | // Note that conversions from doubles to signed integral types that can't
 47 | // represent a particular value after truncating the fracitonal part are UB as well,
 48 | // but fixing them is not as simple as adding an int64_t intermediate, beacuse the
 49 | // int64_t -> <smaller signed type> conversion is UB for those large values anyway.
 50 | // I guess in that case we just have to live with that, but it's definitely less
 51 | // surprising than the thing above.
 52 | //
 53 | // For the curious:
 54 | //   https://en.cppreference.com/w/cpp/language/implicit_conversion
 55 | //   The relevant paragraph is "Floating–integral conversions".
 56 | 
 57 | template<typename T>
 58 | struct inter_copy_type {
 59 |   using type = T;
 60 | };
 61 | 
 62 | template<>
 63 | struct inter_copy_type<uint8_t> {
 64 |   using type = int64_t;
 65 | };
 66 | 
 67 | template<typename T>
 68 | using inter_copy_type_t = typename inter_copy_type<T>::type;
 69 | 
 70 | 
 71 | template <typename DST, typename SRC>
 72 | struct CPUCopy {
 73 |     inline static void eval(TensorImpl *dst, TensorImpl *src) {
 74 |         TH_TENSOR_APPLY2(
 75 |             DST, dst,
 76 |             SRC, src,
 77 |             *dst_data = static_cast<DST>(static_cast<inter_copy_type_t<SRC>>(*src_data));
 78 |         )
 79 |     }
 80 | };
 81 | 
 82 | // copy from complex to real
 83 | template <typename DST, typename SRC>
 84 | struct CPUCopy<DST, std::complex<SRC>> {
 85 |     inline static void eval(TensorImpl *dst, TensorImpl *src) {
 86 |         TH_TENSOR_APPLY2(
 87 |             DST, dst,
 88 |             std::complex<SRC>, src,
 89 |             *dst_data = static_cast<DST>(static_cast<inter_copy_type_t<SRC>>((*src_data).real()));
 90 |         )
 91 |     }
 92 | };
 93 | 
 94 | 
 95 | template <typename PT>
 96 | Tensor & CPUComplexType<PT>::s_copy_(Tensor & dst, const Tensor & src, bool non_blocking) const {
 97 |     checked_tensor_unwrap(dst, "dst", 0, false, Backend::CPU, CPUComplexTypeInfo<PT>::scalar_type);
 98 | 
 99 |     switch (src.type().ID()) {
100 |         case TypeID::CPUByte:
101 |             CPUCopy<std::complex<PT>, int8_t>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
102 |             break;
103 |         case TypeID::CPUChar:
104 |             CPUCopy<std::complex<PT>, int8_t>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
105 |             break;
106 |         case TypeID::CPUDouble:
107 |             std::cout << "double is copied to complex" << std::endl;
108 |             CPUCopy<std::complex<PT>, double>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
109 |             break;
110 |         case TypeID::CPUFloat:
111 |             CPUCopy<std::complex<PT>, float>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
112 |             break;
113 |         case TypeID::CPUComplexFloat:
114 |             CPUCopy<std::complex<PT>, std::complex<float>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
115 |             break;
116 |         case TypeID::CPUComplexDouble:
117 |             CPUCopy<std::complex<PT>, std::complex<double>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
118 |             break;
119 |         case TypeID::CPUInt:
120 |             CPUCopy<std::complex<PT>, int32_t>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
121 |             break;
122 |         case TypeID::CPULong:
123 |             CPUCopy<std::complex<PT>, int64_t>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
124 |             break;
125 |         case TypeID::CPUShort:
126 |             CPUCopy<std::complex<PT>, int16_t>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
127 |             break;
128 |         case TypeID::CPUHalf:
129 |             std::cout << "copy half" << std::endl;
130 |             break;
131 |         default:
132 |             at::_s_copy_from(src, dst, non_blocking);
133 |             return dst;
134 |     }
135 | 
136 |     dst.unsafeGetTensorImpl()->maybe_zero_dim(src.dim() == 0);
137 |     return dst;
138 | }
139 | 
140 | template <typename PT>
141 | Tensor CPUComplexType<PT>::_s_copy_from(const Tensor & src, const Tensor & dst, bool non_blocking) const {
142 |     // This handles the copy from other types
143 | 
144 |     switch (dst.type().ID()) {
145 |         case TypeID::CPUByte:
146 |             CPUCopy<int8_t, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
147 |             break;
148 |         case TypeID::CPUChar:
149 |             CPUCopy<int8_t, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
150 |             break;
151 |         case TypeID::CPUDouble:
152 |             CPUCopy<double, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
153 |             break;
154 |         case TypeID::CPUFloat:
155 |             CPUCopy<float, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
156 |             break;
157 |         case TypeID::CPUInt:
158 |             CPUCopy<int32_t, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
159 |             break;
160 |         case TypeID::CPULong:
161 |             CPUCopy<int64_t, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
162 |             break;
163 |         case TypeID::CPUShort:
164 |             CPUCopy<int16_t, std::complex<PT>>::eval(dst.unsafeGetTensorImpl(), src.unsafeGetTensorImpl());
165 |             break;
166 |         default:
167 |             AT_ERROR("copy does not support ", src.type().toString(), " to ", dst.type().toString(), " copy (copy_from case).");
168 |     }
169 |     dst.unsafeGetTensorImpl()->maybe_zero_dim(src.dim() == 0);
170 |     return dst;
171 | }
172 | 
173 | } // at
174 | 
175 | #endif // CPU_COMPLEX_COPY_H
176 | 


--------------------------------------------------------------------------------
/src/CPUComplexType.h:
--------------------------------------------------------------------------------
 1 | #ifndef CPUComplexType_H
 2 | #define CPUComplexType_H
 3 | 
 4 | #include "General.h"
 5 | #include "ComplexTypeInfo.h"
 6 | 
 7 | namespace at {
 8 | 
 9 | template <typename PrecisionType>
10 | struct CPUComplexType: public at::CPUTypeDefault {
11 | 
12 |     CPUComplexType()
13 |     : CPUTypeDefault(CPUTensorId(), /*is_variable=*/false, /*is_undefined=*/false) {}
14 | 
15 |     virtual ScalarType scalarType() const override;
16 |     virtual caffe2::TypeMeta typeMeta() const override;
17 |     Backend backend() const override;
18 |     const char * toString() const override;
19 |     size_t elementSizeInBytes() const override;
20 |     TypeID ID() const override;
21 |     Tensor & s_copy_(Tensor & self, const Tensor & src, bool non_blocking) const override;
22 |     Tensor _s_copy_from(const Tensor & self, const Tensor & dst, bool non_blocking) const override;
23 | 
24 |     // Tensor & resize_(Tensor & self, IntList size) const override;
25 | 
26 |     /*
27 |     Tensor _th_tensor(Storage storage, int64_t storageOffset, IntList sizes, IntList strides) const override;
28 |     Tensor _th_tensor(IntList sizes, IntList strides) const override;
29 |     */
30 |     Tensor empty(IntList size, const TensorOptions & options) const override;
31 |     /*
32 |     Tensor tensor() const override;
33 |     */
34 | 
35 |     Tensor & set_(Tensor & self, Storage source, int64_t storage_offset, IntList size, IntList stride) const override;
36 |     // Tensor & set_(Tensor & self, Storage source) const override;
37 |     // Tensor & set_(Tensor & self, const Tensor & source) const override;
38 |     // Tensor & set_(Tensor & self) const override;
39 | 
40 |     Tensor & cat_out(Tensor & self, TensorList tensors, int64_t dim) const override;
41 |     Tensor cat(TensorList tensors, int64_t dim) const override;
42 | 
43 |     Tensor & fill_(Tensor & self, Scalar value) const override;
44 |     Tensor & fill_(Tensor & self, const Tensor & value) const override;
45 | 
46 |     Tensor & zero_(Tensor & self) const override;
47 |     Tensor & native_zero_(Tensor & self) const override;
48 |     void* data_ptr(const Tensor & self) const override;
49 |     Scalar _local_scalar_dense(const Tensor & self) const override;
50 | 
51 |     // LinearAlgebra
52 |     Tensor & mv_out(Tensor & result, const Tensor & self, const Tensor & vec) const override;
53 |     Tensor mv(const Tensor & self, const Tensor & vec) const override;
54 |     Tensor & mm_out(Tensor & result, const Tensor & self, const Tensor & mat2) const override;
55 |     Tensor mm(const Tensor & self, const Tensor & mat2) const override;
56 | };
57 | 
58 | } // namespace at
59 | 
60 | #include "CPUComplexTypeImpl.h"
61 | #include "CPUComplexCopy.h"
62 | 
63 | #endif // CPUComplexType_H
64 | 


--------------------------------------------------------------------------------
/src/CPUComplexTypeImpl.h:
--------------------------------------------------------------------------------
  1 | #include "CPUComplexType.h"
  2 | #include "Utils.h"
  3 | #include "ComplexTensorApply.h"
  4 | #include "SIMD/SIMD.h"
  5 | 
  6 | namespace at {
  7 | 
  8 | template <typename PT>
  9 | ScalarType CPUComplexType<PT>::scalarType() const {
 10 |     return CPUComplexTypeInfo<PT>::scalar_type;
 11 | }
 12 | 
 13 | template <typename PT>
 14 | caffe2::TypeMeta CPUComplexType<PT>::typeMeta() const {
 15 |     return scalarTypeToTypeMeta(CPUComplexTypeInfo<PT>::scalar_type);
 16 | }
 17 | 
 18 | template <typename PT>
 19 | Backend CPUComplexType<PT>::backend() const {
 20 |     return Backend::CPU;
 21 | }
 22 | 
 23 | template <typename PT>
 24 | TypeID CPUComplexType<PT>::ID() const {
 25 |     return CPUComplexTypeInfo<PT>::type_id;
 26 | }
 27 | 
 28 | template <typename PT>
 29 | size_t CPUComplexType<PT>::elementSizeInBytes() const {
 30 |     return 2 * sizeof(PT);
 31 | }
 32 | 
 33 | #if 0
 34 | template <typename PT>
 35 | Tensor CPUComplexType<PT>::_th_tensor(Storage storage, int64_t storageOffset, IntList sizes, IntList strides) const {
 36 |     // DeviceGuard omitted
 37 | 
 38 |     // checks
 39 |     if (strides.data()) {AT_CHECK(sizes.size() == strides.size(), "number of sizes and strides must match");}
 40 |     auto storage_ = checked_storage(storage, "storage", 1, DeviceType::CPU, at::scalarTypeToDataType(CPUComplexTypeInfo<PT>::scalar_type));
 41 | 
 42 |     // make tensor
 43 |     auto self = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(
 44 |         /* storage */ std::move(storage_),
 45 |         /* tensor type id */ at::CPUTensorId(),
 46 |         /* is_variable */ false);
 47 | 
 48 |     /* storageOffset */
 49 |     if(storageOffset < 0)
 50 |         THError("Tensor: invalid storage offset");
 51 |     self->set_storage_offset(storageOffset);
 52 | 
 53 |     // set size
 54 |     self->set_sizes_and_strides(sizes, strides);
 55 |     return Tensor(self);
 56 | }
 57 | 
 58 | template <typename PT>
 59 | Tensor CPUComplexType<PT>::_th_tensor(IntList sizes, IntList strides) const {
 60 |     // DeviceGuard omitted
 61 |     int64_t numel = 1;
 62 |     for (auto s : sizes) {
 63 |         numel *= s;
 64 |     }
 65 | 
 66 |     Storage s{c10::make_intrusive<StorageImpl>(
 67 |         scalarTypeToTypeMeta(CPUComplexTypeInfo<PT>::scalar_type),
 68 |         numel,
 69 |         getCPUAllocator(),
 70 |         /* resizable */ true)};
 71 | 
 72 |     return tensor(s, 0, sizes, strides);
 73 | }
 74 | #endif
 75 | 
 76 | template <typename PT>
 77 | Tensor CPUComplexType<PT>::empty(IntList size, const TensorOptions & options) const {
 78 |     const DeviceGuard device_guard(options.device());
 79 |     return at::native::empty_cpu(/* actuals */ size, options);
 80 | }
 81 | 
 82 | #if 0
 83 | template <typename PT>
 84 | Tensor CPUComplexType<PT>::tensor() const {
 85 |     Storage s{c10::make_intrusive<StorageImpl>(
 86 |         scalarTypeToTypeMeta(CPUComplexTypeInfo<PT>::scalar_type),
 87 |         0,
 88 |         getCPUAllocator(),
 89 |         /* resizable */ true)};
 90 | 
 91 |     // make tensor
 92 |     Tensor t{c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(
 93 |         /* storage */ std::move(s),
 94 |         /* tensor type id */ at::CPUTensorId(),
 95 |         /* is_variable */ false)};
 96 | 
 97 |     return t;
 98 | }
 99 | #endif
100 | 
101 | template <typename PT>
102 | Tensor & CPUComplexType<PT>::set_(Tensor & self, Storage source, int64_t storage_offset, IntList sizes, IntList strides) const {
103 |     // DeviceGuard omitted
104 |     auto self_ = checked_tensor_unwrap(self,"self",1, false, Backend::CPU, CPUComplexTypeInfo<PT>::scalar_type);
105 |     auto source_ = checked_storage(source,"source",2, DeviceType::CPU, at::scalarTypeToDataType(CPUComplexTypeInfo<PT>::scalar_type));
106 | 
107 |     StorageImpl *storage_ptr = source.unsafeGetStorageImpl();
108 |     StorageImpl *self_storage_ptr = self_->storage().unsafeGetStorageImpl();
109 | 
110 |     if (self_storage_ptr != storage_ptr)
111 |     {
112 |         if (!self_storage_ptr) {
113 |             AT_ERROR("Tensor: invalid null storage");
114 |         }
115 | 
116 |         // steal storage
117 |         self_->set_storage(at::Storage(c10::intrusive_ptr<THStorage>::reclaim(storage_ptr)));
118 |     }
119 | 
120 |     if (storage_offset < 0)
121 |         AT_ERROR("Tensor: invalid storage offset");
122 | 
123 |     self_->set_storage_offset(storage_offset);
124 |     // set size
125 |     self_->set_sizes_and_strides(sizes, strides);
126 |     self_->maybe_zero_dim(false);
127 |     return self;
128 | }
129 | 
130 | template <typename PT>
131 | Tensor & CPUComplexType<PT>::cat_out(Tensor & self, TensorList tensors, int64_t dim) const {
132 |     const OptionalDeviceGuard device_guard(device_of(self));
133 |     // auto self_ = checked_tensor_unwrap(self, "self", 1, false, Backend::CPU, CPUComplexTypeInfo<PT>::scalar_type);
134 |     // auto tensors_ = checked_tensor_unwrap(tensors, "tensors", 1, Backend::CPU, CPUComplexTypeInfo<PT>::scalar_type);
135 | 
136 |     AT_ERROR("catArray is not implemented, it's in THTensorMoreMath.cpp");
137 | };
138 | 
139 | template <typename PT>
140 | Tensor CPUComplexType<PT>::cat(TensorList tensors, int64_t dim) const {
141 |     AT_ERROR("cat not implemented");
142 | };
143 | 
144 | /* NOTE: This C macro here mainly because ISO C++03 14.2/4 
145 |  * 
146 |  * When the name of a member template specialization appears after . or -> in a postfix-expression,
147 |  * or after nested-name-specifier in a qualified-id, and the postfix-expression or qualified-id 
148 |  * explicitly depends on a template-parameter (14.6.2), the member template name must be prefixed 
149 |  * by the keyword template. Otherwise the name is assumed to name a non-template.
150 |  * 
151 |  * We have TENSOR->data inside the TH_TENSOR_APPLY macro without template, but our implementation via
152 |  * C++ templates for generic complex number requires a template keyword for data.
153 |  * 
154 |  * This is just a workaround, when everything moves to ATen/native, we can use the new protocals.
155 |  */
156 | #define IMPLEMENT_FILL(PrecisionType) \
157 | template <> \
158 | Tensor & CPUComplexType<PrecisionType>::fill_(Tensor & self, Scalar value) const { \
159 |     const OptionalDeviceGuard device_guard(device_of(self)); \
160 |     auto self_ = checked_tensor_unwrap(self,"self",1, false, Backend::CPU, CPUComplexTypeInfo<PrecisionType>::scalar_type); \
161 |     auto value_ = value.to<std::complex<PrecisionType>>(); \
162 | \
163 |     if(self_->is_contiguous() || is_transposed(self_)) { \
164 |         TH_TENSOR_APPLY_CONTIG(std::complex<PrecisionType>, self_, simd::Default<std::complex<PrecisionType>>::fill(self__data, value_, self__len); ); \
165 |     } else { \
166 |         TH_TENSOR_APPLY(std::complex<PrecisionType>, self_, \
167 |             if (self__stride == 1) { \
168 |                 simd::Default<std::complex<PrecisionType>>::fill(self__data, value_, self__size); \
169 | 	            self__i = self__size; \
170 | 	            self__data += self__stride * self__size; \
171 | 	            break; \
172 |             } else { \
173 |                 *self__data = value_; \
174 |             } \
175 |         ); \
176 |     } \
177 | \
178 |     return self; \
179 | }
180 | 
181 | IMPLEMENT_FILL(double)
182 | IMPLEMENT_FILL(float)
183 | 
184 | template <typename PT>
185 | Tensor &CPUComplexType<PT>::fill_(Tensor &self, const Tensor & value) const {
186 |     const OptionalDeviceGuard device_guard(device_of(self));
187 |     if (value.dim() == 0) {
188 |         return static_cast<const TypeExtendedInterface*>(this)->fill_(self, value.item());
189 |     }
190 |     AT_ERROR("fill_ only supports a 0-dimensional value tensor, but got tensor "
191 |         "with ", value.dim(), " dimension(s).");
192 | }
193 | 
194 | template <typename PT>
195 | Tensor & CPUComplexType<PT>::zero_(Tensor & self) const {
196 |     return fill_(self, Scalar(0.0));
197 | }
198 | 
199 | template <typename PT>
200 | Tensor &CPUComplexType<PT>::native_zero_(Tensor & self) const {
201 |     return fill_(self, Scalar(0.0));
202 | }
203 | 
204 | template <typename PT>
205 | void *CPUComplexType<PT>::data_ptr(const Tensor & self) const {
206 |     auto self_ = checked_tensor_unwrap(self,"self",1, false, Backend::CPU, CPUComplexTypeInfo<PT>::scalar_type);
207 |     return self_->template data<std::complex<PT>>();
208 | }
209 | 
210 | template <typename PT>
211 | Scalar CPUComplexType<PT>::_local_scalar_dense(const Tensor & self) const {
212 |     const OptionalDeviceGuard device_guard(device_of(self));
213 |     const auto& self_ty = *this;
214 |     (void)self_ty;
215 |     return at::native::_local_scalar_dense_cpu(/* actuals */ self);
216 | }
217 | 
218 | template <>
219 | inline const char * CPUComplexType<float>::toString() const {
220 |     return "CPUComplexTensor<float>";
221 | }
222 | 
223 | template <>
224 | inline const char * CPUComplexType<double>::toString() const {
225 |     return "CPUComplexType<double>";
226 | }
227 | 
228 | // Linear Algebra
229 | template <typename PT>
230 | Tensor & CPUComplexType<PT>::mv_out(Tensor & result, const Tensor & self, const Tensor & vec) const {
231 |     AT_ERROR("mv_out not implemented");
232 | }
233 | 
234 | template <typename PT>
235 | Tensor CPUComplexType<PT>::mv(const Tensor & self, const Tensor & vec) const {
236 |     AT_ERROR("mv not implemented");
237 | }
238 | 
239 | template <typename PT>
240 | Tensor CPUComplexType<PT>::mm(const Tensor &self, const Tensor &mat2) const {
241 |     AT_ERROR("mm not implemented");
242 | }
243 | 
244 | template <typename PT>
245 | Tensor & CPUComplexType<PT>::mm_out(Tensor & result, const Tensor & self, const Tensor & mat2) const {
246 |     AT_ERROR("mm_out not implemented");
247 | }
248 | 
249 | } // at
250 | 


--------------------------------------------------------------------------------
/src/ComplexTensorApply.h:
--------------------------------------------------------------------------------
  1 | #include "ComplexTypeInfo.h"
  2 | 
  3 | #ifndef NAN
  4 |   #define NAN (nan(NULL))
  5 | #endif
  6 | 
  7 | #ifdef _OPENMP
  8 | #include <omp.h>
  9 | #endif
 10 | 
 11 | #define HYPER_TH_OMP_OVERHEAD_THRESHOLD 2000
 12 | #define ORDIN_TH_OMP_OVERHEAD_THRESHOLD 20000
 13 | #define UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD 50000
 14 | #define TH_OMP_OVERHEAD_THRESHOLD 100000
 15 | 
 16 | #ifdef _OPENMP
 17 | 
 18 | #ifndef _WIN32
 19 | #define PRAGMA(P) _Pragma(#P)
 20 | #else
 21 | #define PRAGMA(P) __pragma(P)
 22 | #endif
 23 | 
 24 | #define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \
 25 | { \
 26 |   int inOmp = omp_in_parallel(); \
 27 |   ptrdiff_t TH_TENSOR_size = TENSOR->numel(); \
 28 |   PRAGMA(omp parallel if ((TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD) && (!inOmp))) \
 29 |   { \
 30 |     size_t num_threads = omp_get_num_threads(); \
 31 |     size_t tid = omp_get_thread_num(); \
 32 |     ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \
 33 |     ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \
 34 |       TH_TENSOR_offset + TH_TENSOR_size / num_threads; \
 35 |     ptrdiff_t TENSOR##_len = TH_TENSOR_end - TH_TENSOR_offset; \
 36 |     TYPE *TENSOR##_data = TENSOR->template data<TYPE>() + TH_TENSOR_offset; \
 37 |     CODE \
 38 |   } \
 39 | }
 40 | #else
 41 | #define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \
 42 | { \
 43 |   TYPE *TENSOR##_data = TENSOR->template data<TYPE>(); \
 44 |   ptrdiff_t TENSOR##_len = TENSOR->numel(); \
 45 |   CODE \
 46 | }
 47 | #endif
 48 | 
 49 | #ifdef _OPENMP
 50 | #define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
 51 | { \
 52 |   int inOmp = omp_in_parallel(); \
 53 |   ptrdiff_t TH_TENSOR_size = TENSOR->numel(); \
 54 |   PRAGMA(omp parallel if ((TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD) && (!inOmp))) \
 55 |   { \
 56 |     size_t num_threads = omp_get_num_threads(); \
 57 |     size_t tid = omp_get_thread_num(); \
 58 |     ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \
 59 |     ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \
 60 |       TH_TENSOR_offset + TH_TENSOR_size / num_threads; \
 61 |     ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \
 62 |     TYPE1 *TENSOR1##_data = TENSOR1->template data<TYPE1>() + TH_TENSOR_offset; \
 63 |     TYPE2 *TENSOR2##_data = TENSOR2->template data<TYPE2>() + TH_TENSOR_offset; \
 64 |     CODE \
 65 |   } \
 66 | }
 67 | #else
 68 | #define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
 69 | { \
 70 |   TYPE1 *TENSOR1##_data = TENSOR1->template data<TYPE1>(); \
 71 |   TYPE2 *TENSOR2##_data = TENSOR2->template data<TYPE2>(); \
 72 |   ptrdiff_t TENSOR1##_len = TENSOR1->numel(); \
 73 |   CODE \
 74 | }
 75 | #endif
 76 | 
 77 | #ifdef _OPENMP
 78 | #define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
 79 | { \
 80 |   int inOmp = omp_in_parallel(); \
 81 |   ptrdiff_t TH_TENSOR_size = TENSOR1->numel(); \
 82 |   PRAGMA(omp parallel if ((TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD) && (!inOmp))) \
 83 |   { \
 84 |     size_t num_threads = omp_get_num_threads(); \
 85 |     size_t tid = omp_get_thread_num(); \
 86 |     ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \
 87 |     ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \
 88 |       TH_TENSOR_offset + TH_TENSOR_size / num_threads; \
 89 |     ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \
 90 |     TYPE1 *TENSOR1##_data = TENSOR1->template data<TYPE1>() + TH_TENSOR_offset; \
 91 |     TYPE2 *TENSOR2##_data = TENSOR2->template data<TYPE2>() + TH_TENSOR_offset; \
 92 |     TYPE3 *TENSOR3##_data = TENSOR3->template data<TYPE3>() + TH_TENSOR_offset; \
 93 |     CODE \
 94 |   } \
 95 | }
 96 | #else
 97 | #define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
 98 | { \
 99 |   TYPE1 *TENSOR1##_data = TENSOR1->template data<TYPE1>(); \
100 |   TYPE2 *TENSOR2##_data = TENSOR2->template data<TYPE2>(); \
101 |   TYPE3 *TENSOR3##_data = TENSOR3->template data<TYPE3>(); \
102 |   ptrdiff_t TENSOR1##_len = TENSOR1->numel(); \
103 |   CODE \
104 | }
105 | #endif
106 | 


--------------------------------------------------------------------------------
/src/ComplexTypeInfo.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMPLEX_TYPE_INFO_H
 2 | #define COMPLEX_TYPE_INFO_H
 3 | 
 4 | #include "General.h"
 5 | 
 6 | namespace at {
 7 | 
 8 | template <typename T, Backend device>
 9 | struct TypeInfo;
10 | 
11 | template <>
12 | struct TypeInfo<float, Backend::CPU> {
13 |     using scalar_t = float;
14 |     using precision_t = float;
15 | 
16 |     static const auto scalar_type = ScalarType::Float;
17 |     static const auto type_id = TypeID::CPUFloat;
18 | };
19 | 
20 | template <>
21 | struct TypeInfo<double, Backend::CPU> {
22 |     using scalar_t = double;
23 |     using precision_t = double;
24 | 
25 |     static const auto scalar_type = ScalarType::Double;
26 |     static const auto type_id = TypeID::CPUDouble;
27 | };
28 | 
29 | template <>
30 | struct TypeInfo<std::complex<float>, Backend::CPU> {
31 |     using scalar_t = std::complex<float>;
32 |     using precision_t = float;
33 | 
34 |     static const auto scalar_type = ScalarType::ComplexFloat;
35 |     static const auto type_id = TypeID::CPUComplexFloat;
36 | };
37 | 
38 | template <>
39 | struct TypeInfo<std::complex<double>, Backend::CPU> {
40 |     using scalar_t = std::complex<double>;
41 |     using precision_t = double;
42 | 
43 |     static const auto scalar_type = ScalarType::ComplexDouble;
44 |     static const auto type_id = TypeID::CPUComplexDouble;
45 | };
46 | 
47 | 
48 | template <typename T>
49 | using CPUTypeInfo = TypeInfo<T, Backend::CPU>;
50 | 
51 | template <typename T, Backend device>
52 | using ComplexTypeInfo = TypeInfo<std::complex<T>, device>;
53 | 
54 | template <typename PrecisionType>
55 | using CPUComplexTypeInfo = ComplexTypeInfo<PrecisionType, Backend::CPU>;
56 | 
57 | } // at
58 | 
59 | #endif // COMPLEX_TYPE_INFO_H


--------------------------------------------------------------------------------
/src/General.h:
--------------------------------------------------------------------------------
 1 | #ifndef GENERAL_H
 2 | #define GENERAL_H
 3 | 
 4 | #include <ATen/detail/ComplexHooksInterface.h>
 5 | #include <ATen/Type.h>
 6 | #include <ATen/CPUFloatType.h>
 7 | 
 8 | #include <TH/THTensor.hpp>
 9 | // #include "THTensorApply.h"
10 | #include <TH/THTensorApply.h>
11 | 
12 | #include <c10/core/TensorImpl.h>
13 | #include <ATen/CPUGenerator.h>
14 | #include <c10/core/Allocator.h>
15 | #include <c10/core/DeviceGuard.h>
16 | #include <ATen/NativeFunctions.h>
17 | #include <c10/core/UndefinedTensorImpl.h>
18 | #include <c10/util/intrusive_ptr.h>
19 | #include <ATen/Utils.h>
20 | #include <ATen/WrapDimUtils.h>
21 | #include <c10/util/Half.h>
22 | #include <c10/util/Optional.h>
23 | 
24 | #include <cstddef>
25 | #include <functional>
26 | #include <memory>
27 | #include <utility>
28 | 
29 | #include "ATen/Config.h"
30 | 
31 | #endif // GENERAL_H
32 | 


--------------------------------------------------------------------------------
/src/SIMD/AVX.h:
--------------------------------------------------------------------------------
 1 | #ifndef AVX_H
 2 | #define AVX_H
 3 | 
 4 | #if defined(__AVX__)
 5 | #ifndef _MSC_VER
 6 | #include <x86intrin.h>
 7 | #else
 8 | #include <intrin.h>
 9 | #endif
10 | 
11 | #include <complex>
12 | 
13 | namespace simd {
14 | 
15 | template <typename T>
16 | struct AVX {
17 |     static inline void copy(T *y, const T *x, const ptrdiff_t n);
18 |     static inline void fill(T *z, const T c, const ptrdiff_t n);
19 |     static inline void cdiv(T *z, const T *x, const T *y, const ptrdiff_t n);
20 |     static inline void divs(T *z, const T *x, const T c, const ptrdiff_t n);
21 |     static inline void cmul(T *z, const T *x, const T *y, const ptrdiff_t n);
22 |     static inline void muls(T *z, const T *x, const T c, const ptrdiff_t n);
23 |     static inline void cadd(T *z, const T *x, const T c, const ptrdiff_t n);
24 | };
25 | 
26 | } // simd
27 | 
28 | #endif // defined(__AVX__)
29 | #endif // AVX_H


--------------------------------------------------------------------------------
/src/SIMD/AVX2.h:
--------------------------------------------------------------------------------
 1 | #ifndef AVX2_H
 2 | #define AVX2_H
 3 | 
 4 | #if defined(__AVX2__)
 5 | #ifndef _MSC_VER
 6 | #include <x86intrin.h>
 7 | #else
 8 | #include <intrin.h>
 9 | #endif
10 | 
11 | #include <complex>
12 | 
13 | namespace simd {
14 | 
15 | template <typename T>
16 | struct AVX2 {
17 |     static inline void copy(T *y, const T *x, const ptrdiff_t n);
18 |     static inline void fill(T *z, const T c, const ptrdiff_t n);
19 |     static inline void cdiv(T *z, const T *x, const T *y, const ptrdiff_t n);
20 |     static inline void divs(T *z, const T *x, const T c, const ptrdiff_t n);
21 |     static inline void cmul(T *z, const T *x, const T *y, const ptrdiff_t n);
22 |     static inline void muls(T *z, const T *x, const T c, const ptrdiff_t n);
23 |     static inline void cadd(T *z, const T *x, const T c, const ptrdiff_t n);
24 | };
25 | 
26 | } // simd
27 | 
28 | #endif // defined(__AVX__)
29 | #endif // AVX2_H


--------------------------------------------------------------------------------
/src/SIMD/Default.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEFAULT_H
 2 | #define DEFAULT_H
 3 | 
 4 | #include <complex>
 5 | 
 6 | namespace simd {
 7 | 
 8 | template <typename T>
 9 | struct Default {
10 |     static inline void copy(T *y, const T *x, const ptrdiff_t n);
11 |     static inline void fill(T *z, const T c, const ptrdiff_t n);
12 |     static inline void cdiv(T *z, const T *x, const T *y, const ptrdiff_t n);
13 |     static inline void divs(T *z, const T *x, const T c, const ptrdiff_t n);
14 |     static inline void cmul(T *z, const T *x, const T *y, const ptrdiff_t n);
15 |     static inline void muls(T *y, const T *x, const T c, const ptrdiff_t n);
16 |     static inline void cadd(T *z, const T *x, const T *y, const T c, const ptrdiff_t n);
17 |     static inline void adds(T *y, const T *x, const T c, const ptrdiff_t n);
18 | };
19 | 
20 | } // simd
21 | 
22 | #include "DefaultImpl.h"
23 | 
24 | #endif // DEFAULT_H
25 | 


--------------------------------------------------------------------------------
/src/SIMD/DefaultImpl.h:
--------------------------------------------------------------------------------
  1 | #include "Default.h"
  2 | 
  3 | namespace simd {
  4 | 
  5 | template <typename T>
  6 | inline void Default<T>::copy(T *y, const T *x, const ptrdiff_t n) {
  7 |     ptrdiff_t i = 0;
  8 | 
  9 |     for(; i <n-4; i+=4)
 10 |     {
 11 |         x[i] = y[i];
 12 |         x[i+1] = y[i+1];
 13 |         x[i+2] = y[i+2];
 14 |         x[i+3] = y[i+3];
 15 |     }
 16 | 
 17 |     for(; i < n; i++)
 18 |         x[i] = y[i];
 19 | }
 20 | 
 21 | template <typename T>
 22 | inline void Default<T>::fill(T *x, const T c, const ptrdiff_t n) {
 23 |     ptrdiff_t i = 0;
 24 | 
 25 |     for(; i <n-4; i+=4)
 26 |     {
 27 |         x[i] = c;
 28 |         x[i+1] = c;
 29 |         x[i+2] = c;
 30 |         x[i+3] = c;
 31 |     }
 32 | 
 33 |     for(; i < n; i++)
 34 |         x[i] = c;
 35 | }
 36 | 
 37 | template <typename T>
 38 | inline void Default<T>::cadd(T *z, const T *x, const T *y, const T c, const ptrdiff_t n) {
 39 |     ptrdiff_t i = 0;
 40 | 
 41 |     for(; i<n-4; i+=4)
 42 |     {
 43 |         z[i] = x[i] + c * y[i];
 44 |         z[i+1] = x[i+1] + c * y[i+1];
 45 |         z[i+2] = x[i+2] + c * y[i+2];
 46 |         z[i+3] = x[i+3] + c * y[i+3];
 47 |     }
 48 | 
 49 |     for(; i<n; i++)
 50 |         z[i] = x[i] + c * y[i];
 51 | }
 52 | 
 53 | template <typename T>
 54 | inline void Default<T>::adds(T *y, const T *x, const T c, const ptrdiff_t n) {
 55 |     ptrdiff_t i = 0;
 56 | 
 57 |     for(; i<n-4; i+=4)
 58 |     {
 59 |         y[i] = x[i] + c;
 60 |         y[i+1] = x[i+1] + c;
 61 |         y[i+2] = x[i+2] + c;
 62 |         y[i+3] = x[i+3] + c;
 63 |     }
 64 | 
 65 |     for(; i<n; i++)
 66 |         y[i] = x[i] + c;
 67 | }
 68 | 
 69 | template <typename T>
 70 | inline void Default<T>::cmul(T *z, const T *x, const T*y, const ptrdiff_t n) {
 71 |     ptrdiff_t i = 0;
 72 | 
 73 |     for(; i <n-4; i+=4)
 74 |     {
 75 |         z[i] = x[i] * y[i];
 76 |         z[i+1] = x[i+1] * y[i+1];
 77 |         z[i+2] = x[i+2] * y[i+2];
 78 |         z[i+3] = x[i+3] * y[i+3];
 79 |     }
 80 | 
 81 |     for(; i < n; i++)
 82 |         z[i] = x[i] * y[i];
 83 | }
 84 | 
 85 | template <typename T>
 86 | inline void Default<T>::muls(T *y, const T *x, const T c, const ptrdiff_t n)
 87 | {
 88 |     ptrdiff_t i = 0;
 89 | 
 90 |     for(; i <n-4; i+=4)
 91 |     {
 92 |         y[i] = x[i] * c;
 93 |         y[i+1] = x[i+1] * c;
 94 |         y[i+2] = x[i+2] * c;
 95 |         y[i+3] = x[i+3] * c;
 96 |     }
 97 | 
 98 |     for(; i < n; i++)
 99 |         y[i] = x[i] * c;
100 | }
101 | 
102 | template <typename T>
103 | inline void Default<T>::cdiv(T *z, const T *x, const T *y, const ptrdiff_t n)
104 | {
105 |     ptrdiff_t i = 0;
106 | 
107 |     for(; i<n-4; i+=4)
108 |     {
109 |         z[i] = x[i] / y[i];
110 |         z[i+1] = x[i+1] / y[i+1];
111 |         z[i+2] = x[i+2] / y[i+2];
112 |         z[i+3] = x[i+3] / y[i+3];
113 |     }
114 | 
115 |     for(; i < n; i++)
116 |         z[i] = x[i] / y[i];
117 | }
118 | 
119 | template <typename T>
120 | inline void Default<T>::divs(T *y, const T *x, const T c, const ptrdiff_t n)
121 | {
122 |     ptrdiff_t i = 0;
123 | 
124 |     for(; i<n-4; i+=4)
125 |     {
126 |         y[i] = x[i] / c;
127 |         y[i+1] = x[i+1] / c;
128 |         y[i+2] = x[i+2] / c;
129 |         y[i+3] = x[i+3] / c;
130 |     }
131 | 
132 |     for(; i < n; i++)
133 |         y[i] = x[i] / c;
134 | }
135 | 
136 | } // simd


--------------------------------------------------------------------------------
/src/SIMD/SIMD.h:
--------------------------------------------------------------------------------
1 | #ifndef SIMD_H
2 | #define SIMD_H
3 | 
4 | #include "Default.h"
5 | #include "AVX.h"
6 | #include "AVX2.h"
7 | 
8 | #endif // SIMD_H
9 | 


--------------------------------------------------------------------------------
/src/THTensorApply.h:
--------------------------------------------------------------------------------
  1 | #ifndef TH_TENSOR_APPLY_INC
  2 | #define TH_TENSOR_APPLY_INC
  3 | 
  4 | /*
  5 |  * The basic strategy for apply is as follows:
  6 |  *
  7 |  * 1. Starting with the outermost index, loop until we reach a dimension where the
  8 |  * data is no longer contiguous, i.e. the stride at that dimension is not equal to
  9 |  * the size of the tensor defined by the outer dimensions. Let's call this outer
 10 |  * (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal
 11 |  * to the entire Tensor. Let's call the inner tensor B.
 12 |  *
 13 |  * 2. We loop through the indices in B, starting at its outermost dimension. For
 14 |  * example, if B is a 2x2 matrix, then we do:
 15 |  *
 16 |  * B[0][0]
 17 |  * B[0][1]
 18 |  * B[1][0]
 19 |  * B[1][1]
 20 |  *
 21 |  * We set the offset into the underlying storage as (storageOffset + stride_B * index_B),
 22 |  * i.e. basically we compute the offset into the storage as we would normally for a
 23 |  * Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we
 24 |  * can simply loop for sizeof(A) iterations and perform the operation, without having to
 25 |  * follow the order described by the strides of A.
 26 |  *
 27 |  * 3. As an optimization, we merge dimensions of A that are contiguous in memory. For
 28 |  * example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two
 29 |  * dimensions can be merged for the purposes of APPLY, reducing the number of nested
 30 |  * loops.
 31 |  */
 32 | 
 33 | #define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \
 34 |   TYPE *TENSOR##_data = NULL; \
 35 |   int64_t *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \
 36 |   int64_t TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \
 37 |   int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \
 38 |   TENSOR##_n = 1; \
 39 |   for(TENSOR##_i = 0; TENSOR##_i < TENSOR->dim(); TENSOR##_i++) \
 40 |     TENSOR##_n *= TENSOR->size(TENSOR##_i); \
 41 | \
 42 |   if(TENSOR->is_empty()) \
 43 |     TH_TENSOR_APPLY_hasFinished = 1; \
 44 |   else \
 45 |   { \
 46 |     TENSOR##_data = THTensor_getStoragePtr(TENSOR)->template data<TYPE>()+TENSOR->storage_offset(); \
 47 |     TENSOR##_size = 1; \
 48 |     TENSOR##_stride = 1; \
 49 |     for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-1; TENSOR##_i >= 0; TENSOR##_i--) { \
 50 |       if(THTensor_sizeLegacyNoScalars(TENSOR, TENSOR##_i) != 1) { \
 51 |         if(THTensor_strideLegacyNoScalars(TENSOR, TENSOR##_i) == TENSOR##_size && TENSOR##_i != DIM) \
 52 |           TENSOR##_size *= THTensor_sizeLegacyNoScalars(TENSOR, TENSOR##_i); \
 53 |         else{ \
 54 |           TENSOR##_contiguous = 0; \
 55 |           break; \
 56 |         } \
 57 |       } \
 58 |     } \
 59 |     if (!TENSOR##_contiguous) { \
 60 |       /* Find the dimension of contiguous sections */ \
 61 |       TENSOR##_dim = 1; \
 62 |       for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-2; TENSOR##_i >= 0; TENSOR##_i--) \
 63 |       { \
 64 |         if(TENSOR->stride(TENSOR##_i) != TENSOR->stride(TENSOR##_i+1) * TENSOR->size(TENSOR##_i+1) || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \
 65 |           TENSOR##_dim++; \
 66 |       } \
 67 |       /* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \
 68 |       TENSOR##_counter = (int64_t*)THAlloc(sizeof(int64_t)*(3*TENSOR##_dim)); \
 69 |       TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \
 70 |       TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \
 71 |       TH_TENSOR_dim_index = TENSOR##_dim-1; \
 72 |       TENSOR##_dimOffset = (DIM == THTensor_nDimensionLegacyAll(TENSOR)-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \
 73 |       TENSOR##_sizes[TH_TENSOR_dim_index] = THTensor_sizeLegacyNoScalars(TENSOR, THTensor_nDimensionLegacyAll(TENSOR)-1); \
 74 |       TENSOR##_strides[TH_TENSOR_dim_index] = THTensor_strideLegacyNoScalars(TENSOR, THTensor_nDimensionLegacyAll(TENSOR)-1); \
 75 |       /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
 76 |       /* storage is given by storage_offset + (i * j), where i is the stride */ \
 77 |       /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \
 78 |       for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \
 79 |         TENSOR##_counter[TENSOR##_i] = 0; \
 80 |       } \
 81 |       for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-2; TENSOR##_i >= 0; --TENSOR##_i) { \
 82 |         if (TENSOR->stride(TENSOR##_i) == TENSOR->stride(TENSOR##_i+1) * TENSOR->size(TENSOR##_i+1) && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \
 83 |           TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size(TENSOR##_i) * TENSOR##_sizes[TH_TENSOR_dim_index]; \
 84 |           if (DIM != THTensor_nDimensionLegacyAll(TENSOR)-1 && TENSOR##_i < DIM) \
 85 |             TENSOR##_dimOffset--; \
 86 |         } else { \
 87 |           --TH_TENSOR_dim_index; \
 88 |           TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size(TENSOR##_i); \
 89 |           TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride(TENSOR##_i); \
 90 |         } \
 91 |       } \
 92 |       /* Size of the inner most section */ \
 93 |       TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \
 94 |       /* Stride of the inner most section */ \
 95 |       TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \
 96 |     } \
 97 |     else{\
 98 |       TENSOR##_dim = 1;\
 99 |       TENSOR##_counter = (int64_t*)THAlloc(sizeof(int64_t)*3);\
100 |       TENSOR##_sizes = TENSOR##_counter + 1;\
101 |       TENSOR##_strides = TENSOR##_counter + 2;\
102 |       TENSOR##_sizes[0] = TENSOR##_n;\
103 |       TENSOR##_strides[0] = 1;\
104 |       TENSOR##_size = TENSOR##_sizes[0];\
105 |       TENSOR##_stride = TENSOR##_strides[0];\
106 |     }\
107 |   } \
108 |   TENSOR##_i = 0;
109 | 
110 | #define  __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \
111 |   if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \
112 |   { \
113 |     if(TENSOR##_contiguous) \
114 |       break; \
115 | \
116 |     if(TENSOR##_dim == 1) \
117 |        break; \
118 | \
119 |     /* Reset pointer to beginning of loop */ \
120 |     TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \
121 |     for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \
122 |     { \
123 |       TENSOR##_counter[TENSOR##_i]++; \
124 |       /* Jump ahread by the stride of this dimension */ \
125 |       TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \
126 | \
127 |       if(TENSOR##_counter[TENSOR##_i]  == TENSOR##_sizes[TENSOR##_i]) \
128 |       { \
129 |         if(TENSOR##_i == 0) \
130 |         { \
131 |           TH_TENSOR_APPLY_hasFinished = 1; \
132 |           break; \
133 |         } \
134 |           else \
135 |         { \
136 |           /* Reset the pointer to the beginning of the chunk defined by this dimension */ \
137 |           TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \
138 |           TENSOR##_counter[TENSOR##_i] = 0; \
139 |         } \
140 |       } \
141 |       else \
142 |         break; \
143 |     } \
144 |     TENSOR##_i = 0; \
145 |   } \
146 | 
147 | #define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \
148 | { \
149 |   int TH_TENSOR_APPLY_hasFinished = 0; \
150 |   int64_t TH_TENSOR_dim_index = 0; \
151 |   __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
152 |   __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
153 |   __TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \
154 |                                                                         \
155 |   int elements_equal = 1;                                               \
156 |   if(TENSOR1##_n != TENSOR2##_n) {                                      \
157 |     elements_equal = 0;                                                 \
158 |   }                                                                     \
159 |   else if(TENSOR1##_n != TENSOR3##_n) {                                 \
160 |     elements_equal = 0;                                                 \
161 |   }                                                                     \
162 |   if (elements_equal == 0) {                                            \
163 |     AT_ERROR("inconsistent tensor size, expected ",                     \
164 |             #TENSOR1, " ", TENSOR1->sizes(), ", ",                      \
165 |             #TENSOR2, " ", TENSOR2->sizes(), " and ",                   \
166 |             #TENSOR3, " ", TENSOR3->sizes(), " to have the same "       \
167 |             "number of elements, but got ", TENSOR1##_n, ", ",          \
168 |             TENSOR2##_n, " and ", TENSOR3##_n, " elements respectively"); \
169 |   }                                                                     \
170 |                                                                         \
171 |   while(!TH_TENSOR_APPLY_hasFinished) \
172 |   { \
173 |     /* Loop through the inner most region of the Tensor */ \
174 |     for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \
175 |     { \
176 |       CODE \
177 |     } \
178 |     __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
179 |     __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
180 |     __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \
181 |   } \
182 |   if(TENSOR1##_counter != NULL) \
183 |     THFree(TENSOR1##_counter); \
184 |   if(TENSOR2##_counter != NULL) \
185 |     THFree(TENSOR2##_counter); \
186 |   if(TENSOR3##_counter != NULL) \
187 |     THFree(TENSOR3##_counter); \
188 | }
189 | 
190 | #define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
191 |   TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE)
192 | 
193 | #define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \
194 | { \
195 |   int TH_TENSOR_APPLY_hasFinished = 0; \
196 |   int64_t TH_TENSOR_dim_index = 0; \
197 |   __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
198 |   __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
199 | \
200 |     if(TENSOR1##_n != TENSOR2##_n) {                                    \
201 |       AT_ERROR("inconsistent tensor size, expected ",                   \
202 |       #TENSOR1, " ", TENSOR1->sizes(), " and ",                         \
203 |       #TENSOR2, " ", TENSOR2->sizes(),                                  \
204 |       " to have the same number of elements, but got ",                 \
205 |       TENSOR1##_n, " and ", TENSOR2##_n, " elements respectively");     \
206 |     }                                                                   \
207 |   while(!TH_TENSOR_APPLY_hasFinished) \
208 |   { \
209 |     /* Loop through the inner most region of the Tensor */ \
210 |     for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \
211 |     { \
212 |       CODE \
213 |     } \
214 |     __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
215 |     __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
216 |   } \
217 |   if(TENSOR1##_counter != NULL) \
218 |     THFree(TENSOR1##_counter); \
219 |   if(TENSOR2##_counter != NULL) \
220 |     THFree(TENSOR2##_counter); \
221 | }
222 | 
223 | #define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
224 |   TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE)
225 | 
226 | #define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \
227 | { \
228 |   int TH_TENSOR_APPLY_hasFinished = 0; \
229 |   int64_t TH_TENSOR_dim_index = 0; \
230 |   __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \
231 | \
232 |   while(!TH_TENSOR_APPLY_hasFinished) \
233 |   { \
234 |     /* Loop through the inner most region of the Tensor */ \
235 |     for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \
236 |     { \
237 |       CODE \
238 |     } \
239 |     __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \
240 |   } \
241 |   THFree(TENSOR##_counter); \
242 | }
243 | 
244 | #define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \
245 |   TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE)
246 | 
247 | 
248 | #ifdef _OPENMP
249 | 
250 | #ifndef _WIN32
251 | #define PRAGMA(P) _Pragma(#P)
252 | #else
253 | #define PRAGMA(P) __pragma(P)
254 | #endif
255 | 
256 | #include <omp.h>
257 | 
258 | /*
259 |  * Calcuate the memory offset of an element in a tensor. The strategy is below:
260 |  *
261 |  * 1. convert the line index(the index of the element) to the indexs(coordinates) in the tensor.
262 |  *    It can hinted by a classical problem: Getting each individual digit from a whole integer(Decimal base).
263 |  *    A N-digit decimal base number could be view as a N-dimension tensor and the sizes of the tensor are 10.
264 |  *    So the value the  whole interger is the line index. And the digits could be viewed as the indexes in
265 |  *    different dimentions.
266 |  *
267 |  * 2. convert the indexs(coordinates) in the tensor to the memory offset.
268 |  *
269 |  *  You can get the detailes in the for-statement iterations.
270 |  *
271 |  * The macro is only used in the first element in each thread. For the rest, the memory offset could update
272 |  * according to info of the tensor in order to get better performance. So we should also record the each
273 |  * indexs in coresponding dimension of first element.
274 |  * The recorded info is stored in the TENSOR##_counter_tmp.
275 |  *
276 |  */
277 | #define __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR) \
278 |   int64_t *TENSOR##_counter_tmp = (int64_t*)THAlloc(sizeof(int64_t) * TENSOR##_dim);                 \
279 |   ptrdiff_t TENSOR##_memory_offset = 0;                                                              \
280 |   ptrdiff_t TENSOR##_quot = line_index_start;                                                        \
281 |   for (TENSOR##_i = TENSOR##_dim-1; TENSOR##_i>=0; --TENSOR##_i) {                                   \
282 |     TENSOR##_counter_tmp[TENSOR##_i] = TENSOR##_quot%TENSOR##_sizes[TENSOR##_i];                     \
283 |     TENSOR##_quot /= TENSOR##_sizes[TENSOR##_i];                                                     \
284 |     TENSOR##_memory_offset += TENSOR##_counter_tmp[TENSOR##_i] * TENSOR##_strides[TENSOR##_i];         \
285 |   }
286 | 
287 | /*
288 |  * The macro update the indexes in each dimension of the elements except for the first one allocated in
289 |  * each thread.
290 |  * For a tensor, if the index of some dimension reaches the size of the corresponding dimension. It will carry and clear.
291 |  * If the index of next high dimension does do, the index of next high dimension should carry and clear, too.
292 |  *
293 |  * The momery offset calculatation is a little confusing. If current index carries, the current index is set to 0. So
294 |  * the offset should decrease by size*stride of the last dimension. Then the index next high dimension increases by 1. So
295 |  * the offset should increase by stride of next high dimension.
296 |  */
297 | #define __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR) \
298 |   if(TENSOR##_i == TENSOR##_size && TENSOR##_dim > 1){ /*reaches the edge*/ \
299 |     int TENSOR##_carry_coord = 1;                      /*set carry flag to true*/ \
300 |     TENSOR##_start = 0;                                /*the current index be cleared to 0*/\
301 |     TENSOR##_data -= TENSOR##_size * TENSOR##_stride;  /*the momery offset reset to the first one in current dimension  */\
302 |     for(TENSOR##_i = TENSOR##_dim - 2; (TENSOR##_i >= 0) && (TENSOR##_carry_coord); TENSOR##_i--){ \
303 |       TENSOR##_counter_tmp[TENSOR##_i]++;             /*the index of next high dimension update*/ \
304 |       TENSOR##_data += TENSOR##_strides[TENSOR##_i];   /*memory offset increase by stride of next high dimension*/\
305 |       if(TENSOR##_counter_tmp[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]){ /*The next high dimension also carry, continue
306 |         to clear and carry*/\
307 |         TENSOR##_data -= TENSOR##_sizes[TENSOR##_i] * TENSOR##_strides[TENSOR##_i]; \
308 |         TENSOR##_counter_tmp[TENSOR##_i] = 0; \
309 |       } else { \
310 |         TENSOR##_carry_coord = 0; \
311 |       } \
312 |     } \
313 |   } else { \
314 |     TENSOR##_start = TENSOR##_i;                               \
315 |   }
316 | 
317 | 
318 | #define TH_TENSOR_APPLY_REDUCTION_OMP(TYPE, TENSOR, OPERATION, CODE, OMP_THRESHOLD) \
319 | {\
320 |   int TENSOR##Contg = THTensor_(isContiguous)(TENSOR);                      \
321 |   ptrdiff_t TENSOR##Size = THTensor_(nElement)(TENSOR);                     \
322 |   if(TENSOR##Contg){                                                        \
323 |     ptrdiff_t iter = 0;                                                     \
324 |     TYPE *rp = THTensor_getStoragePtr(TENSOR)->template data<TYPE>()+TENSOR->storage_offset();         \
325 |     PRAGMA( omp parallel for if (TENSOR##Size > OMP_THRESHOLD * 10) firstprivate(rp) reduction(OPERATION) ) \
326 |     for (iter = 0; iter < TENSOR##Size; iter++) { \
327 |       TYPE *TENSOR##_data = rp+iter;                    \
328 |       CODE                                         \
329 |     }                                              \
330 |   } else {                                         \
331 |     int TH_TENSOR_APPLY_hasFinished = 0;           \
332 |     int64_t TH_TENSOR_dim_index = 0;               \
333 |     __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, -1, 1);\
334 |     if (0 == TH_TENSOR_APPLY_hasFinished) {          \
335 |       PRAGMA(omp parallel if (TENSOR##Size > OMP_THRESHOLD) firstprivate(TENSOR##_data, TENSOR##_sizes, TENSOR##_strides, TENSOR##_dim, TENSOR##_stride, TENSOR##_size, TENSOR##_i) reduction(OPERATION))\
336 |       {\
337 |         size_t num_threads = omp_get_num_threads();\
338 |         size_t tid = omp_get_thread_num();\
339 |         size_t line_seg_length_avg = TENSOR##Size/num_threads;                                                     \
340 |         ptrdiff_t line_index_start = tid * line_seg_length_avg;                                            \
341 |         ptrdiff_t line_seg_length = (tid == num_threads - 1)? (TENSOR##Size - line_index_start):line_seg_length_avg;  \
342 |         __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR);\
343 |         TENSOR##_data += TENSOR##_memory_offset;\
344 |         ptrdiff_t count = 0;\
345 |         ptrdiff_t TENSOR##_start = TENSOR##_counter_tmp[TENSOR##_dim - 1];\
346 |         while(count < line_seg_length){\
347 |           for(TENSOR##_i=TENSOR##_start; (count < line_seg_length)&&(TENSOR##_i < TENSOR##_size); ++TENSOR##_i, ++count){\
348 |             CODE\
349 |             TENSOR##_data += TENSOR##_stride;\
350 |           }\
351 |           if(count < line_seg_length){\
352 |             __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR);\
353 |           }\
354 |         }\
355 |         if(TENSOR##_counter_tmp != NULL) \
356 |           THFree(TENSOR##_counter_tmp); \
357 |       }\
358 |     }\
359 |     if(TENSOR##_counter != NULL)\
360 |       THFree(TENSOR##_counter);\
361 |   }\
362 | }
363 | 
364 | #define TH_TENSOR_APPLY2_OMP(SIZE, CONTIG1, CONTIG2, TYPE1, TENSOR1, TYPE2, TENSOR2, CODE, OMP_THRESHOLD) \
365 | {                                                                                              \
366 |   /* for advanced searching index*/                                                            \
367 |   if( CONTIG1 && CONTIG2 ){                                                                    \
368 |     TYPE1 *rp = THTensor_getStoragePtr(TENSOR1)->template data<TYPE1>()+TENSOR1->storage_offset();                        \
369 |     TYPE2 *tp = THTensor_getStoragePtr(TENSOR2)->template data<TYPE2>()+TENSOR2->storage_offset();                        \
370 |     ptrdiff_t iter = 0;                                                                        \
371 |     if(tp != (TYPE2*)rp) {                                                                             \
372 |       PRAGMA(ivdep) \
373 |       PRAGMA( omp parallel for if (SIZE > OMP_THRESHOLD * 10) firstprivate(rp, tp)) \
374 |       for (iter = 0; iter < SIZE; iter++) {                             \
375 |         TYPE2 *TENSOR2##_data = tp+iter;                                \
376 |         TYPE1 *TENSOR1##_data = rp+iter;                                \
377 |         CODE                                                            \
378 |       }\
379 |     } else {\
380 |       PRAGMA(simd) \
381 |       PRAGMA( omp parallel for if (SIZE > OMP_THRESHOLD * 10) firstprivate(rp, tp) )  \
382 |       for (iter = 0; iter < SIZE; iter++) {\
383 |         TYPE2* TENSOR2##_data = tp+iter;\
384 |         TYPE1* TENSOR1##_data = rp+iter;\
385 |         CODE                                \
386 |       }\
387 |     }\
388 |   } else {                               \
389 |     /* The following strategy is not easy to understand.
390 |      * 1. Collapse the dimension of the tensors in order to decrease the number of nested loops.
391 |      * 2. Calculate the numbers of elements allocated in each thread and the line index of the first one.
392 |      * 3. Calculate the memory offset of the first element and the indexes in each dimension of the
393 |      *    first one.
394 |      * 4. iterate all elements in each thread. update the indexes in each dimension of the rest.
395 |     */                                                                                             \
396 |     int TH_TENSOR_APPLY_hasFinished = 0; \
397 |     int64_t TH_TENSOR_dim_index = 0;     \
398 |     /*step 1*/                           \
399 |     __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, -1, 1) \
400 |     __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, -1, 1) \
401 |     if (0 == TH_TENSOR_APPLY_hasFinished) {            \
402 |       PRAGMA(omp parallel if (SIZE > OMP_THRESHOLD) firstprivate(TENSOR2##_data, TENSOR2##_sizes, TENSOR2##_strides, TENSOR2##_dim, TENSOR2##_stride, TENSOR2##_size, TENSOR2##_i, TENSOR1##_data, TENSOR1##_sizes, TENSOR1##_strides, TENSOR1##_dim, TENSOR1##_stride, TENSOR1##_size, TENSOR1##_i)) \
403 |       {                                   \
404 |         /*step 2*/                                                                 \
405 |         size_t num_threads = omp_get_num_threads();                                                        \
406 |         size_t tid = omp_get_thread_num();                                                                 \
407 |         size_t line_seg_length_avg = SIZE/num_threads;                                                     \
408 |         ptrdiff_t line_index_start = tid * line_seg_length_avg;                                            \
409 |         ptrdiff_t line_seg_length = (tid == num_threads - 1)? (SIZE - line_index_start):line_seg_length_avg;  \
410 |         /* step 3*/                                                                                        \
411 |         __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR2);                                                            \
412 |         __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR1);                                                            \
413 |         TENSOR2##_data += TENSOR2##_memory_offset;                                              \
414 |         TENSOR1##_data += TENSOR1##_memory_offset;                                              \
415 |         ptrdiff_t count = 0;                                                                               \
416 |         ptrdiff_t TENSOR2##_start =  TENSOR2##_counter_tmp[TENSOR2##_dim-1];                               \
417 |         ptrdiff_t TENSOR1##_start =  TENSOR1##_counter_tmp[TENSOR1##_dim-1];                               \
418 |         /* step 4*/                                                                                        \
419 |         while (count < line_seg_length) {                                                                     \
420 |           for(TENSOR2##_i=TENSOR2##_start, TENSOR1##_i = TENSOR1##_start; ((count < line_seg_length) && (TENSOR2##_i < TENSOR2##_size) && (TENSOR1##_i < TENSOR1##_size)); ++TENSOR2##_i, ++TENSOR1##_i, ++count){ \
421 |             CODE                                                                                               \
422 |             TENSOR2##_data += TENSOR2##_stride;                                                                \
423 |             TENSOR1##_data += TENSOR1##_stride;                                                                \
424 |           }                                                                                                    \
425 |           if (count < line_seg_length){                                                                           \
426 |             __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR2);                                                   \
427 |             __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR1);                                                   \
428 |           }                                                                                                    \
429 |         }                                                                                                      \
430 |         if(TENSOR1##_counter_tmp != NULL) \
431 |           THFree(TENSOR1##_counter_tmp); \
432 |         if(TENSOR2##_counter_tmp != NULL) \
433 |           THFree(TENSOR2##_counter_tmp); \
434 |       } \
435 |     }                                                                                                        \
436 |     if(TENSOR2##_counter != NULL) \
437 |       THFree(TENSOR2##_counter); \
438 |     if(TENSOR1##_counter != NULL) \
439 |       THFree(TENSOR1##_counter);\
440 |   }\
441 | }
442 | 
443 | #define TH_TENSOR_APPLY3_OMP(SIZE, CONTIG1, CONTIG2, CONTIG3, TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE, OMP_THRESHOLD) \
444 | {                                                                             \
445 |   /* for adveanced searching index*/                                                                    \
446 |   if(CONTIG1 && CONTIG2 && CONTIG3){                                                                    \
447 |     TYPE1 *rp = THTensor_getStoragePtr(TENSOR1)->template data<TYPE1>()+TENSOR1->storage_offset();                                 \
448 |     TYPE2 *tp = THTensor_getStoragePtr(TENSOR2)->template data<TYPE2>()+TENSOR2->storage_offset();                                 \
449 |     TYPE3 *srcp = THTensor_getStoragePtr(TENSOR3)->template data<TYPE3>()+TENSOR3->storage_offset();                               \
450 |     ptrdiff_t iter = 0;\
451 |     if(tp != (TYPE2*)rp) {                                                                             \
452 |       PRAGMA(ivdep) \
453 |       PRAGMA( omp parallel for if (SIZE > OMP_THRESHOLD * 10) )  \
454 |       for (iter = 0; iter < SIZE; iter++) {\
455 |         TYPE1 *TENSOR1##_data = rp+iter;\
456 |         TYPE2 *TENSOR2##_data = tp+iter; \
457 |         TYPE3 *TENSOR3##_data = srcp+iter;\
458 |         CODE                                \
459 |       } \
460 |     } else {\
461 |       PRAGMA(simd) \
462 |       PRAGMA( omp parallel for if (SIZE > OMP_THRESHOLD * 10) )  \
463 |       for (iter = 0; iter < SIZE; iter++) {\
464 |         TYPE1 *TENSOR1##_data = rp+iter;\
465 |         TYPE2 *TENSOR2##_data = tp+iter; \
466 |         TYPE3 *TENSOR3##_data = srcp+iter;\
467 |         CODE                                \
468 |       } \
469 |     }\
470 |   } else{              \
471 |     int TH_TENSOR_APPLY_hasFinished = 0;\
472 |     int64_t TH_TENSOR_dim_index = 0;\
473 |     __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, -1, 1) \
474 |     __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, -1, 1) \
475 |     __TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, -1, 1) \
476 |     if (0 == TH_TENSOR_APPLY_hasFinished) {            \
477 |       PRAGMA(omp parallel if (SIZE > OMP_THRESHOLD) firstprivate(TENSOR1##_data, TENSOR1##_sizes, TENSOR1##_strides, TENSOR1##_dim, TENSOR1##_stride, TENSOR1##_size, TENSOR1##_i, TENSOR2##_data, TENSOR2##_sizes, TENSOR2##_strides, TENSOR2##_dim, TENSOR2##_stride, TENSOR2##_size, TENSOR2##_i, TENSOR3##_data, TENSOR3##_sizes, TENSOR3##_strides, TENSOR3##_dim, TENSOR3##_stride, TENSOR3##_size, TENSOR3##_i))\
478 |       {\
479 |         size_t num_threads = omp_get_num_threads();\
480 |         size_t tid = omp_get_thread_num();\
481 |         size_t line_seg_length_avg = SIZE/num_threads;                                                     \
482 |         ptrdiff_t line_index_start = tid * line_seg_length_avg;                                            \
483 |         ptrdiff_t line_seg_length = (tid == num_threads - 1)? (SIZE - line_index_start):line_seg_length_avg;  \
484 |         __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR1);\
485 |         __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR2);\
486 |         __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR3);\
487 |         TENSOR1##_data += TENSOR1##_memory_offset;\
488 |         TENSOR2##_data += TENSOR2##_memory_offset;\
489 |         TENSOR3##_data += TENSOR3##_memory_offset;\
490 |         ptrdiff_t count = 0;\
491 |         ptrdiff_t TENSOR1##_start = TENSOR1##_counter_tmp[TENSOR1##_dim - 1];\
492 |         ptrdiff_t TENSOR2##_start = TENSOR2##_counter_tmp[TENSOR2##_dim - 1];\
493 |         ptrdiff_t TENSOR3##_start = TENSOR3##_counter_tmp[TENSOR3##_dim - 1];\
494 |         while(count < line_seg_length){\
495 |           for(TENSOR1##_i=TENSOR1##_start, TENSOR2##_i=TENSOR2##_start,TENSOR3##_i=TENSOR3##_start; (count<line_seg_length)&&(TENSOR1##_i<TENSOR1##_size)&&(TENSOR2##_i<TENSOR2##_size)&&(TENSOR3##_i<TENSOR3##_size); ++TENSOR1##_i,++TENSOR2##_i,++TENSOR3##_i,++count){\
496 |             CODE\
497 |             TENSOR1##_data += TENSOR1##_stride;\
498 |             TENSOR2##_data += TENSOR2##_stride;\
499 |             TENSOR3##_data += TENSOR3##_stride;\
500 |           }\
501 |           if(count < line_seg_length){\
502 |             __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR1);\
503 |             __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR2);\
504 |             __TH_TENSOR_APPLYX_UPDATE_COUNTERS_OMP(TENSOR3);\
505 |           }\
506 |         }\
507 |         if(TENSOR1##_counter_tmp != NULL) \
508 |           THFree(TENSOR1##_counter_tmp); \
509 |         if(TENSOR2##_counter_tmp != NULL) \
510 |           THFree(TENSOR2##_counter_tmp); \
511 |         if(TENSOR3##_counter_tmp != NULL) \
512 |           THFree(TENSOR3##_counter_tmp);\
513 |       }\
514 |     }\
515 |     if(TENSOR1##_counter != NULL)\
516 |       THFree(TENSOR1##_counter);\
517 |     if(TENSOR2##_counter != NULL)\
518 |       THFree(TENSOR2##_counter);\
519 |     if(TENSOR3##_counter != NULL)\
520 |       THFree(TENSOR3##_counter);\
521 |   }\
522 | }
523 | 
524 | #endif
525 | #endif
526 | 


--------------------------------------------------------------------------------
/src/Utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | 
 4 | #include "General.h"
 5 | 
 6 | namespace at {
 7 | 
 8 | 
 9 | std::vector<int64_t> calculate_contiguous_stride(IntList sizes) {
10 |     std::vector<int64_t> strides(sizes.size());
11 |     int ndim = sizes.size();
12 | 
13 |     for (int d = ndim - 1; d >= 0; d--)
14 |     {
15 |         if (d == ndim - 1) {
16 |             strides[d] = 1;
17 |         }
18 |         else {
19 |             strides[d] = std::max<int64_t>(sizes[d+1], 1) * strides[d+1];
20 |         }
21 |     }
22 |     return strides;
23 | }
24 | 
25 | // Maybe someone wants to move this in Tensor/TensorImpl?
26 | bool is_transposed(const TensorImpl *self) {
27 |     int64_t max_stride = 1;
28 |     int64_t size_max_stride = 1;
29 |     int64_t z = 1;
30 |     int d;
31 |     for (d = 0; d < self->dim(); ++d) {
32 |       if (self->stride(d) == 0 && self->size(d) != 1)
33 |         return false;
34 |       if (self->stride(d) > max_stride) {
35 |         max_stride = self->stride(d);
36 |         size_max_stride = self->size(d);
37 |       }
38 |       z *= self->size(d);
39 |     }
40 |     if (z == max_stride * size_max_stride) {
41 |       return true;
42 |     }
43 |     return false;
44 | }
45 | 
46 | }
47 | 
48 | #endif // UTILS_H


--------------------------------------------------------------------------------
/src/module.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | #include "CPUComplexType.h"
 3 | 
 4 | namespace at {
 5 | 
 6 | struct ComplexHooks : public at::ComplexHooksInterface {
 7 |     ComplexHooks(ComplexHooksArgs) {};
 8 |     void registerComplexTypes(Context* context) const override {
 9 |         context->registerType(Backend::CPU, CPUComplexTypeInfo<float>::scalar_type, new CPUComplexType<float>());
10 |         context->registerType(Backend::CPU, CPUComplexTypeInfo<double>::scalar_type, new CPUComplexType<double>());
11 |     }
12 | };
13 | 
14 | 
15 | REGISTER_COMPLEX_HOOKS(ComplexHooks);
16 | 
17 | }
18 | 
19 | // create the extension module
20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
21 | //    m.def("cpptest", &cpptest, "cpp test");
22 | }
23 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | from torch_complex import torch
2 | 
3 | a = torch.ones(2, 2, dtype=torch.complex128)
4 | # b = torch.ones(2, 2, dtype=torch.complex128)
5 | 
6 | # torch.matmul(a, b)
7 | 2j * a
8 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Roger-luo/pytorch-complex/9c52991228cbacb1519e22d54ede164d9ef63f3a/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_tensor.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from torch_complex import torch
 3 | 
 4 | class TestComplexTensor(unittest.TestCase):
 5 | 
 6 |     def test_empty(self):
 7 |         torch.empty(2, 2, dtype=torch.complex64)
 8 |         torch.empty(2, 2, dtype=torch.complex128)
 9 | 
10 |     def test_indexing(self):
11 |         t = torch.empty(2, 2, dtype=torch.complex128)
12 |         t[1]
13 |         t[1, 1]
14 | 
15 |     def test_fill(self):
16 |         t = torch.empty(2, 2, dtype=torch.complex128)
17 |         t.fill_(1.0)
18 |         t.fill_(1.0 + 2.0j)
19 |     
20 |     def test_scalar_binary_op(self):
21 |         a = torch.ones(2, 2, dtype=torch.complex128)
22 |         2 * a
23 |         2 / a
24 |         2 - a
25 |         2 + a
26 | 
27 |     def test_blas(self):
28 |         pass
29 | 
30 |     def test_rand(self):
31 |         pass
32 | 
33 | if __name__ == '__main__':
34 |     unittest.main()
35 | 


--------------------------------------------------------------------------------
/torch_complex/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = '0.0.1'
 2 | 
 3 | import torch
 4 | import torch_complex.cpp
 5 | # import importlib
 6 | 
 7 | # # pretend to be pytorch
 8 | # __globals__ = globals()
 9 | # for each in dir(torch):
10 | #     __globals__[each] = getattr(torch, each)
11 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist =
 3 |     py27,
 4 |     py35,
 5 |     py36,
 6 |     pypy,
 7 |     pypy3,
 8 | 
 9 | [testenv]
10 | passenv = *
11 | deps =
12 |     coverage
13 |     pytest
14 | commands =
15 |     python setup.py --quiet clean develop
16 |     coverage run --parallel-mode -m pytest
17 |     coverage combine --append
18 |     coverage report -m
19 | 


--------------------------------------------------------------------------------