├── install
    ├── __init__.py
    └── versioning.py
├── tfopgen
    ├── __init__.py
    ├── version.py
    ├── bin
    │   └── tfopgen
    ├── examples
    │   ├── simple.yml
    │   └── complex_phase.yml
    ├── templates
    │   ├── cuda_source.j2
    │   ├── main_header.j2
    │   ├── Makefile.j2
    │   ├── cpp_header.j2
    │   ├── cpp_source.j2
    │   ├── test_source.j2
    │   └── cuda_header.j2
    ├── util.py
    └── create_op.py
├── setup.cfg
├── MANIFEST.in
├── CHANGELOG.rst
├── LICENSE.txt
├── .travis.yml
├── setup.py
├── .gitignore
└── README.rst


/install/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tfopgen/__init__.py:
--------------------------------------------------------------------------------
1 | from tfopgen.create_op import run
2 | import tfopgen.version


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 | 
4 | [metadata]
5 | license_file = LICENSE.txt


--------------------------------------------------------------------------------
/tfopgen/version.py:
--------------------------------------------------------------------------------
1 | # Do not edit this file, pipeline versioning is governed by git tags
2 | __version__="0.2.6"
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tfopgen templates/*.j2
2 | recursive-include tfopgen examples/*.yml
3 | include LICENSE.txt


--------------------------------------------------------------------------------
/tfopgen/bin/tfopgen:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | import sys
4 | import tfopgen
5 | 
6 | tfopgen.run(sys.argv[1:])
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | CHANGELOG
 2 | =========
 3 | 
 4 | 0.2.1
 5 | -----
 6 | 
 7 | - Fix python3 unicode handling in versioning code
 8 | 
 9 | 0.2.0
10 | -----
11 | 
12 | Initial Release


--------------------------------------------------------------------------------
/tfopgen/examples/simple.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | project: tensorflow
 3 | library: custom
 4 | name: Custom
 5 | inputs:
 6 |   - ["input: int32", [100, null]]
 7 | outputs:
 8 |   - ["output: int32", [10, 10]]
 9 | type_attrs:
10 |   - "FT: {float, double} = DT_FLOAT"
11 | other_attrs: []
12 | doc: "How now brown cow"
13 | 
14 | 


--------------------------------------------------------------------------------
/tfopgen/examples/complex_phase.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | project: astronomy
 3 | library: fourier
 4 | name: ComplexPhase
 5 | type_attrs:
 6 |   - "FT: {float, double} = DT_FLOAT"
 7 |   - "CT: {complex64, complex128} = DT_COMPLEX64"
 8 | inputs:
 9 |   - ["uvw: FT", [null, null, 3]]   # (ntime, nbl, 3)
10 |   - ["frequency: FT", [null]]      # (nchan, )
11 |   - ["lm: FT", [null, 2]]          # (nsrc, 2)
12 | outputs:
13 |   - ["complex_phase: CT", [null, null, null, null]]
14 | doc: >
15 |   Given tensors
16 |     (1) of (U, V, W) baseline coordinates with shape (ntime, nbl, 3)
17 |     (2) of (L, M) sky coordinates with shape (nsrc, 2)
18 |     (3) of frequencies,
19 |   compute the complex phase with shape (nsrc, ntime, nbl, nchan)


--------------------------------------------------------------------------------
/tfopgen/templates/cuda_source.j2:
--------------------------------------------------------------------------------
 1 | #if GOOGLE_CUDA
 2 | 
 3 | #include "{{cuda_header_file}}"
 4 | 
 5 | {{project_namespace_start}}
 6 | {{op_namespace_start}}
 7 | 
 8 | {% for perm in op_tf_type_perms %}
 9 | {% set template_specialise = ['GPUDevice'] + perm -%}
10 | // Register a GPU kernel for {{op_name}}
11 | // handling permutation {{perm}}
12 | REGISTER_KERNEL_BUILDER(
13 |     Name("{{op_name}}")
14 |     {% for t in perm -%}
15 |     .TypeConstraint<{{t}}>("{{op_type_attrs[loop.index0].name}}")
16 |     {% endfor -%}
17 |     .Device(tensorflow::DEVICE_GPU),
18 |     {{op_name}}<{{template_specialise | join(", ")}}>);
19 | {% endfor %}
20 | 
21 | 
22 | {{op_namespace_stop}}
23 | {{project_namespace_stop}}
24 | 
25 | #endif // #if GOOGLE_CUDA


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Simon Perkins
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | language: python
 3 | 
 4 | # Need a modern C++11 compiler
 5 | addons:
 6 |     apt:
 7 |         sources:
 8 |             - ubuntu-toolchain-r-test
 9 |         packages:
10 |             - g++-5
11 |             - gcc-5
12 | 
13 | python:
14 |   - "2.7"
15 |   - "3.5"
16 |   - "3.6"
17 | 
18 | install:
19 |   - export CXX="g++-5" CC="gcc-5"
20 |   - pip install tensorflow
21 |   - pip install .
22 | 
23 | # command to run tests
24 | script:
25 |   # Create a test directory and change to it
26 |   - mkdir -p $TRAVIS_BUILD_DIR/test
27 | 
28 |   # Test the simple custom operator
29 |   - cd $TRAVIS_BUILD_DIR/test
30 |   - tfopgen $TRAVIS_BUILD_DIR/tfopgen/examples/simple.yml
31 |   - export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/test/custom:$LD_LIBRARY_PATH
32 |   - cd $TRAVIS_BUILD_DIR/test/custom && make && python test_custom.py
33 | 
34 |   # Test the complex phase operator
35 |   - cd $TRAVIS_BUILD_DIR/test
36 |   - tfopgen $TRAVIS_BUILD_DIR/tfopgen/examples/complex_phase.yml
37 |   - export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/test/fourier:$LD_LIBRARY_PATH
38 |   - cd $TRAVIS_BUILD_DIR/test/fourier && make && python test_complex_phase.py
39 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | from install.versioning import maintain_version
 6 | 
 7 | def readme(filename):
 8 |   return open(filename).read()
 9 | 
10 | setup(name='tfopgen',
11 |       description='Generates tensorflow custom operator boilerplate',
12 |       long_description=readme('README.rst'),
13 |       url='',
14 |       classifiers=[
15 |           "Development Status :: 3 - Alpha",
16 |           "Intended Audience :: Developers",
17 |           "License :: OSI Approved :: MIT License",
18 |           "Operating System :: OS Independent",
19 |           "Programming Language :: Python",
20 |           "Topic :: Software Development :: Libraries :: Python Modules",
21 |       ],
22 |       author='Simon Perkins',
23 |       author_email='simon.perkins@gmail.com',
24 |       license='',
25 |       packages=find_packages(),
26 |       include_package_data=True,
27 |       install_requires=[
28 |           'jinja2 >= 2.8.0',
29 |           'numpy >= 1.12.0',
30 |           'ruamel.yaml >= 0.13.7'
31 |       ],
32 |       scripts=[os.path.join('tfopgen', 'bin', 'tfopgen')],
33 |       version=maintain_version(os.path.join('tfopgen', 'version.py')),
34 |       zip_safe=True)
35 | 


--------------------------------------------------------------------------------
/tfopgen/templates/main_header.j2:
--------------------------------------------------------------------------------
 1 | #ifndef {{main_header_guard}}
 2 | #define {{main_header_guard}}
 3 | 
 4 | // {{project}} namespace start and stop defines
 5 | #define {{project_namespace_start}} namespace {{project}} {
 6 | #define {{project_namespace_stop}} }
 7 | 
 8 | // {{snake_case}} namespace start and stop defines
 9 | #define {{op_namespace_start}} namespace {{snake_case}} {
10 | #define {{op_namespace_stop}} }
11 | 
12 | {{project_namespace_start}}
13 | {{op_namespace_start}}
14 | 
15 | // General definition of the {{op_name}} op, which will be specialised in:
16 | //   - {{cpp_header_file}} for CPUs
17 | //   - {{cuda_header_file}} for CUDA devices
18 | // Concrete template instantions of this class are provided in:
19 | //   - {{cpp_source_file}} for CPUs
20 | //   - {{cuda_source_file}} for CUDA devices
21 | {% set class_template = op_type_attrs | map(attribute="name") | list -%}
22 | {% set class_template = ["Device"] + class_template  -%}
23 | {% set class_template = class_template | format_list("typename %s") | join(", ") -%}
24 | {% set class_template = "template <" + class_template + ">" -%}
25 | 
26 | {{ class_template }}
27 | class {{op_name}} {};
28 | 
29 | {{op_namespace_stop}}
30 | {{project_namespace_stop}}
31 | 
32 | #endif // #ifndef {{main_header_guard}}


--------------------------------------------------------------------------------
/install/versioning.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | # Versioning code here, based on
 5 | # http://blogs.nopcode.org/brainstorm/2013/05/20/pragmatic-python-versioning-via-setuptools-and-git-tags/
 6 | def maintain_version(version_file):
 7 |     """
 8 |     Get the version from git tags, and write it to version.py.
 9 |     When git is not available (PyPi package), use version in version.py.
10 |     """
11 | 
12 |     try:
13 |         version_git = (subprocess
14 |             .check_output(['git', 'describe', '--tags'])
15 |             .rstrip()
16 |             .decode('utf-8'))
17 |     except:
18 |         with open(version_file, 'r') as fh:
19 |             version_git = (fh.read()
20 |                             .strip()
21 |                             .split('=')[-1]
22 |                             .replace('"','')
23 |                             .decode('utf-8'))
24 | 
25 |     version_msg = ("# Do not edit this file, "
26 |         "pipeline versioning is governed by git tags")
27 | 
28 |     with open(version_file, 'w') as fh:
29 |         components = [version_msg, os.linesep,
30 |                         '__version__="', version_git, '"']
31 |         print([type(c) for c in components])
32 |         content = ''.join(c for c in components)
33 |         fh.write(content)
34 | 
35 |     return version_git


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Created by https://www.gitignore.io/api/python
 3 | 
 4 | ### Python ###
 5 | # Byte-compiled / optimized / DLL files
 6 | __pycache__/
 7 | *.py[cod]
 8 | *$py.class
 9 | 
10 | # C extensions
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | 
32 | # PyInstaller
33 | #  Usually these files are written by a python script from a template
34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 | 
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 | 
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .hypothesis/
52 | 
53 | # Translations
54 | *.mo
55 | *.pot
56 | 
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | 
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 | 
65 | # Scrapy stuff:
66 | .scrapy
67 | 
68 | # Sphinx documentation
69 | docs/_build/
70 | 
71 | # PyBuilder
72 | target/
73 | 
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 | 
77 | # pyenv
78 | .python-version
79 | 
80 | # celery beat schedule file
81 | celerybeat-schedule
82 | 
83 | # dotenv
84 | .env
85 | 
86 | # virtualenv
87 | .venv/
88 | venv/
89 | ENV/
90 | 
91 | # Spyder project settings
92 | .spyderproject
93 | 
94 | # Rope project settings
95 | .ropeproject
96 | 
97 | # End of https://www.gitignore.io/api/python
98 | 


--------------------------------------------------------------------------------
/tfopgen/templates/Makefile.j2:
--------------------------------------------------------------------------------
 1 | # Tensorflow includes and defines
 2 | TF_CFLAGS = $(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
 3 | TF_LFLAGS = $(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
 4 | TF_CUDA = $(shell python -c 'import tensorflow as tf; print(int(tf.test.is_built_with_cuda()))')
 5 | 
 6 | TF_FLAGS=-D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D_GLIBCXX_USE_CXX11_ABI=0
 7 | 
 8 | # Dependencies
 9 | DEPDIR:=.d
10 | $(shell mkdir -p $(DEPDIR) >/dev/null)
11 | DEPFLAGS=-MT $@ -MMD -MP -MF $(DEPDIR)/$*.Td
12 | 
13 | # Define our sources, compiling CUDA code if it's enabled
14 | ifeq ($(TF_CUDA), 1)
15 |     SOURCES=$(wildcard *.cpp *.cu)
16 | else
17 |     SOURCES=$(wildcard *.cpp)
18 | endif
19 | 
20 | # Define objects and shared_library
21 | OBJECTS=$(addsuffix .o, $(basename $(SOURCES)))
22 | LIBRARY={{shared_library}}
23 | 
24 | # Compiler flags
25 | CPPFLAGS =-std=c++11 $(TF_CFLAGS) -fPIC -fopenmp \
26 |          -O2 -march=native -mtune=native
27 | NVCCFLAGS =-std=c++11 -DGOOGLE_CUDA=$(TF_CUDA) $(TF_CFLAGS) $(INCLUDES) \
28 |         -x cu --compiler-options "-fPIC" --gpu-architecture=sm_30 -lineinfo
29 | 
30 | LDFLAGS = -fPIC -fopenmp $(TF_LFLAGS)
31 | 
32 | ifeq ($(TF_CUDA), 1)
33 |     LDFLAGS := $(LDFLAGS) -L /usr/local/cuda/lib64
34 |     LDFLAGS := $(LDFLAGS) -lcuda -lcudart
35 | endif
36 | 
37 | # Compiler directives
38 | COMPILE.cpp = g++ $(DEPFLAGS) $(CPPFLAGS) -c
39 | COMPILE.nvcc = nvcc --compiler-options " $(DEPFLAGS)" $(NVCCFLAGS) -c
40 | 
41 | all : $(LIBRARY)
42 | 
43 | %.o : %.cpp
44 | 	$(COMPILE.cpp) $<
45 | 
46 | %.o : %.cu
47 | 	$(COMPILE.nvcc) $<
48 | 
49 | clean :
50 | 	rm -f $(OBJECTS) $(LIBRARY)
51 | 
52 | $(LIBRARY) : $(OBJECTS)
53 | 	g++  -shared $(OBJECTS) -o $(LIBRARY) $(LDFLAGS)
54 | 
55 | $(DEPDIR)/%.d: ;
56 | .PRECIOUS: $(DEPDIR)/%.d
57 | 
58 | -include $(patsubst %,$(DEPDIR)/%.d,$(basename $(SRCS)))
59 | 


--------------------------------------------------------------------------------
/tfopgen/templates/cpp_header.j2:
--------------------------------------------------------------------------------
 1 | #ifndef {{cpp_header_guard}}
 2 | #define {{cpp_header_guard}}
 3 | 
 4 | #include "{{main_header_file}}"
 5 | 
 6 | // Required in order for Eigen::ThreadPoolDevice to be an actual type
 7 | #define EIGEN_USE_THREADS
 8 | 
 9 | #include "tensorflow/core/framework/op.h"
10 | #include "tensorflow/core/framework/op_kernel.h"
11 | 
12 | {{project_namespace_start}}
13 | {{op_namespace_start}}
14 | 
15 | // For simpler partial specialisation
16 | typedef Eigen::ThreadPoolDevice CPUDevice;
17 | 
18 | {% set op_templates = op_type_attrs | map(attribute="name") | list -%}
19 | {% set template_specialise = ['CPUDevice'] + op_templates -%}
20 | {% set template_specialise = template_specialise | join(", ")  -%}
21 | {% set class_template = op_templates | format_list("typename %s") -%}
22 | 
23 | // Specialise the {{op_name}} op for CPUs
24 | template <{{ class_template | join(", ") }}>
25 | class {{op_name}}<{{ template_specialise }}> : public tensorflow::OpKernel
26 | {
27 | public:
28 |     explicit {{op_name}}(tensorflow::OpKernelConstruction * context) :
29 |         tensorflow::OpKernel(context) {}
30 | 
31 |     void Compute(tensorflow::OpKernelContext * context) override
32 |     {
33 |         namespace tf = tensorflow;
34 | 
35 |         // Create reference to input Tensorflow tensors
36 |         {% for item in op_inputs -%}
37 |         const auto & in_{{ item.name }} = context->input({{loop.index0}});
38 |         {% endfor %}
39 | 
40 |         // Extract Eigen tensors
41 |         {% for item in op_inputs -%}
42 |         auto {{ item.name }} = in_{{ item.name }}.tensor<{{item.tf_type}}, {{ item.shape | length }}>();
43 |         {% endfor  %}
44 | 
45 |         // Allocate output tensors
46 |         {% for item in op_outputs -%}
47 |         // Allocate space for output tensor '{{item.name}}'
48 |         tf::Tensor * {{ item.name }}_ptr = nullptr;
49 |         tf::TensorShape {{ item.name }}_shape = tf::TensorShape({ {{ item.shape | join(", ") | replace("None", "1") }} });
50 |         OP_REQUIRES_OK(context, context->allocate_output(
51 |             {{loop.index0}}, {{ item.name }}_shape, &{{ item.name }}_ptr));
52 |         {% endfor %}
53 |     }
54 | };
55 | 
56 | {{op_namespace_stop}}
57 | {{project_namespace_stop}}
58 | 
59 | #endif // #ifndef {{cpp_header_guard}}


--------------------------------------------------------------------------------
/tfopgen/templates/cpp_source.j2:
--------------------------------------------------------------------------------
 1 | #include "{{cpp_header_file}}"
 2 | 
 3 | #include "tensorflow/core/framework/shape_inference.h"
 4 | 
 5 | {{project_namespace_start}}
 6 | {{op_namespace_start}}
 7 | 
 8 | using tensorflow::shape_inference::InferenceContext;
 9 | using tensorflow::shape_inference::ShapeHandle;
10 | using tensorflow::shape_inference::DimensionHandle;
11 | using tensorflow::Status;
12 | 
13 | auto shape_function = [](InferenceContext* c) {
14 |     // Dummies for tests
15 |     ShapeHandle input;
16 |     DimensionHandle d;
17 | 
18 |     {% for item in op_inputs -%}
19 |     // TODO. Check shape and dimension sizes for '{{item.name}}'
20 |     ShapeHandle in_{{item.name}} = c->input({{loop.index0}});
21 |     // Assert '{{item.name}}' number of dimensions
22 |     TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(in_{{item.name}}, {{ item.shape | length }}, &input),
23 |         "{{item.name}} must have shape {{item.shape|list}} but is " +
24 |         c->DebugString(in_{{item.name}}));
25 |     {% for dim in item.shape -%}
26 |     {% if dim is not none -%}
27 |     // Assert '{{item.name}}' dimension '{{loop.index0}}' size
28 |     TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithValue(c->Dim(in_{{item.name}}, {{loop.index0}}), {{dim}}, &d),
29 |         "{{item.name}} must have shape {{item.shape|list}} but is " +
30 |         c->DebugString(in_{{item.name}}));
31 |     {% endif -%}
32 |     {% endfor %}
33 |     {% endfor %}
34 | 
35 |     // TODO: Supply a proper shapes for output variables here,
36 |     // usually derived from input shapes
37 |     // ShapeHandle output_1 = c->MakeShape({
38 |     //      c->Dim(input_1, 0),  // input_1 dimension 0
39 |     //      c->Dim(input_2, 1)}); // input_2 dimension 1""")
40 | 
41 |     {% for item in op_outputs -%}
42 |     ShapeHandle out_{{item.name}} = c->MakeShape({ {{ item.shape | join(", ") | replace("None", "1")}} });
43 |     {% endfor %}
44 |     {% for item in op_outputs -%}
45 |     c->set_output({{loop.index0}}, out_{{item.name}});
46 |     {% endfor %}
47 | 
48 |     // printf("output shape %s\\n", c->DebugString(out).c_str());;
49 | 
50 |     return Status::OK();
51 | };
52 | 
53 | // Register the {{op_name}} operator.
54 | REGISTER_OP("{{op_name}}")
55 |     {% for item in op_inputs -%}
56 |     .Input("{{item.name}}: {{item.type}}")
57 |     {% endfor %}
58 |     {%- for item in op_outputs -%}
59 |     .Output("{{item.name}}: {{item.type}}")
60 |     {% endfor %}
61 |     {%- for item in op_type_attrs -%}
62 |     .Attr("{{item.original}}")
63 |     {% endfor -%}
64 |     {%- for item in op_other_attrs -%}
65 |     .Attr("{{item}}")
66 |     {% endfor -%}
67 |     .Doc(R"doc({{op_doc}})doc")
68 |     .SetShapeFn(shape_function);
69 | 
70 | {% for perm in op_tf_type_perms %}
71 | {% set template_specialise = ['CPUDevice'] + perm -%}
72 | // Register a CPU kernel for {{op_name}}
73 | // handling permutation {{perm}}
74 | REGISTER_KERNEL_BUILDER(
75 |     Name("{{op_name}}")
76 |     {% for t in perm -%}
77 |     .TypeConstraint<{{t}}>("{{op_type_attrs[loop.index0].name}}")
78 |     {% endfor -%}
79 |     .Device(tensorflow::DEVICE_CPU),
80 |     {{op_name}}<{{template_specialise | join(", ")}}>);
81 | {% endfor %}
82 | 
83 | 
84 | {{op_namespace_stop}}
85 | {{project_namespace_stop}}


--------------------------------------------------------------------------------
/tfopgen/templates/test_source.j2:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from tensorflow.python.client import device_lib
 6 | 
 7 | class Test{{op_name}}(unittest.TestCase):
 8 |     """ Tests the {{op_name}} operator """
 9 | 
10 |     def setUp(self):
11 |         # Load the custom operation library
12 |         self.{{library}} = tf.load_op_library('{{shared_library}}')
13 |         # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
14 |         self.gpu_devs = [d.name for d in device_lib.list_local_devices()
15 |                                 if d.device_type == 'GPU']
16 | 
17 |     def test_{{py_op_name}}(self):
18 |         """ Test the {{op_name}} operator """
19 |         {%- set base_permute_types = op_type_attrs | map(attribute="name") | list %}
20 |         {%- set permute_types = base_permute_types | join(", ") | trim %}
21 | 
22 |         {%- if op_np_type_perms | length > 0 %}
23 |         # List of type constraint for testing this operator
24 |         {% set comma = last_joiner(",") -%}
25 |         type_permutations = [
26 |         {%- for perm in op_np_type_perms %}
27 |         {{ perm | replace("'", "") | indent(4, True) }}{{comma(loop.last)}}
28 |         {%- endfor -%}
29 |         ]
30 | 
31 |         # Run test with the type combinations above
32 |         for {{ permute_types }} in type_permutations:
33 |             self._impl_test_{{py_op_name}}({{ permute_types }})
34 |         {%- else %}
35 |         self._impl_test_{{py_op_name}}()
36 |         {%- endif %}
37 | 
38 |     {% set permute_types = ["self"] + base_permute_types -%}
39 |     {% set permute_types =  permute_types | join(", ") -%}
40 |     def _impl_test_{{py_op_name}}({{permute_types}}):
41 |         """ Implementation of the {{op_name}} operator test """
42 | 
43 |         # Create input variables
44 |         {% for op in op_inputs -%}
45 |         {{op.name}} = np.random.random(size={{op.shape | replace('None', '1')}}).astype({{op.np_type}})
46 |         {% endfor %}
47 | 
48 |         {% set arg_names = op_inputs | map(attribute="name") | format_list("'%s'")  -%}
49 |         {% set arg_names = "arg_names = [" + arg_names | join(", ") + "]" -%}
50 |         {%- set np_args = op_inputs | map(attribute="name") | join(", ") -%}
51 |         {%- set np_args = "np_args = [" + np_args + "]" -%}
52 | 
53 |         # Argument list
54 |         {{ np_args | wordwrap(79-16) | indent(12)}}
55 |         # Argument string name list
56 |         {{ arg_names | wordwrap(79-16) | indent(12) }}
57 |         # Constructor tensorflow variables
58 |         tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, arg_names)]
59 | 
60 |         def _pin_op(device, *tf_args):
61 |             """ Pin operation to device """
62 |             with tf.device(device):
63 |                 return self.{{library}}.{{py_op_name}}(*tf_args)
64 | 
65 |         # Pin operation to CPU
66 |         cpu_op = _pin_op('/cpu:0', *tf_args)
67 | 
68 |         # Run the op on all GPUs
69 |         gpu_ops = [_pin_op(d, *tf_args) for d in self.gpu_devs]
70 | 
71 |         # Initialise variables
72 |         init_op = tf.global_variables_initializer()
73 | 
74 |         with tf.Session() as S:
75 |             S.run(init_op)
76 |             S.run(cpu_op)
77 |             S.run(gpu_ops)
78 | 
79 | if __name__ == "__main__":
80 |     unittest.main()
81 | 


--------------------------------------------------------------------------------
/tfopgen/util.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from collections import namedtuple
  3 | import itertools
  4 | import os
  5 | import re
  6 | 
  7 | import jinja2
  8 | import ruamel.yaml
  9 | 
 10 | FIRST_CAP_RE = re.compile('(.)([A-Z][a-z]+)')
 11 | ALL_CAP_RE = re.compile('([a-z0-9])([A-Z])')
 12 | 
 13 | 
 14 | def camel_to_snake_case(name):
 15 |     """ Convert CamelCase op names to snake case """
 16 |     s1 = FIRST_CAP_RE.sub(r'\1_\2', name)
 17 |     return ALL_CAP_RE.sub(r'\1_\2', s1).lower()
 18 | 
 19 | 
 20 | def header_guard(library, header_name):
 21 |     """ Derive a C++ header guard from the library and header name """
 22 |     guard_str = header_name.replace('.', '_')
 23 |     return ''.join([library, '_', guard_str]).upper()
 24 | 
 25 | 
 26 | def strip_and_split(s, sep):
 27 |     """ Split s on sep and strip the resulting elements """
 28 |     return (c.strip() for c in s.split(sep))
 29 | 
 30 | 
 31 | def parse_args(args):
 32 |     """ Parse and return arguments """
 33 |     default_conf_path = os.path.join(
 34 |         os.path.dirname(__file__), 'conf', 'default.yml')
 35 | 
 36 |     parser = argparse.ArgumentParser("Tensorflow Custom Operator Generator")
 37 |     parser.add_argument('config', help="Configuration File", default=default_conf_path)
 38 |     return parser.parse_args(args)
 39 | 
 40 | 
 41 | def load_config(config_file):
 42 |     """ Load the configuration file """
 43 |     with open(config_file, "r") as f:
 44 |         return ruamel.yaml.load(f, ruamel.yaml.RoundTripLoader)
 45 | 
 46 | 
 47 | def jinja_env_factory(template_path):
 48 |     """ Creates a jinja environment, loading templates from template_path """
 49 |     jinja_loader = jinja2.FileSystemLoader(template_path)
 50 |     jinja_env = jinja2.Environment(loader=jinja_loader,
 51 |                                    trim_blocks=False, lstrip_blocks=False)
 52 | 
 53 |     class LastJoiner(object):
 54 |         """
 55 |         Opposite of jinja2.utils.Joiner.
 56 |         Won't emit separator on the last iteration.
 57 |         """
 58 | 
 59 |         def __init__(self, sep=u', '):
 60 |             self.sep = sep
 61 | 
 62 |         def __call__(self, loop_last):
 63 |             if loop_last:
 64 |                 return u''
 65 | 
 66 |             return self.sep
 67 | 
 68 |     jinja_env.globals['last_joiner'] = LastJoiner
 69 | 
 70 |     # Create a filter for formatting a list
 71 |     jinja_env.filters['format_list'] = lambda l, p: [p % s for s in l]
 72 | 
 73 |     return jinja_env
 74 | 
 75 | # Create types for Inputs/Outputs
 76 | InOut = namedtuple("InOut", ["name", "type",
 77 |                              "tf_type", "np_type", "shape"])
 78 | # Create types for Typed Attributes
 79 | Attr = namedtuple("Attr", ["original", "name", "types",
 80 |                            "tf_types", "np_types", "default"])
 81 | 
 82 | 
 83 | def parse_inout(s, shape):
 84 |     """ Parse .Input() and .Output() directives """
 85 |     var, type_ = tuple(c.strip() for c in s.split(":"))
 86 | 
 87 |     if "*" in type_:
 88 |         raise ValueError("Failed to parse '{}'. "
 89 |                          "List lengths are not yet supported".format(s))
 90 | 
 91 |     from tensorflow.python.framework.dtypes import (
 92 |         _STRING_TO_TF,
 93 |         _TYPE_TO_STRING,
 94 |         _TF_TO_NP)
 95 | 
 96 |     TF_TYPES = _TYPE_TO_STRING.values()
 97 |     tf_type = "tensorflow::" + type_ if type_ in TF_TYPES else type_
 98 |     np_type = ("np." + _TF_TO_NP[_STRING_TO_TF[type_]].__name__
 99 |                if type_ in _STRING_TO_TF else type_)
100 | 
101 |     # Set a default shape for variable if None exists
102 |     shape = (1024, ) if shape is None else shape
103 | 
104 |     return InOut(var, type_, tf_type, np_type, shape)
105 | 
106 | 
107 | def parse_attr_type(s):
108 |     """
109 |     Parse type attribute directives. For example
110 |     "FT: {float, double} = DT_FLOAT64"
111 |     """
112 | 
113 |     # Separate s into "FT" and "{float, double} = DT_FLOAT64"
114 |     var, types = tuple(strip_and_split(s, ":"))
115 | 
116 |     # Separate types into "{float, double}", "DT_FLOAT64"
117 |     split = types.split("=")
118 |     default = split[1].strip() if len(split) > 1 else None
119 |     types = split[0].strip()
120 | 
121 |     # Handle the multiple types case
122 |     if types.startswith("{") and types.endswith("}"):
123 |         types = tuple(c.strip() for c in types[1:-1].split(","))
124 |     else:
125 |         types = (types,)
126 | 
127 |     from tensorflow.python.framework.dtypes import (
128 |         _STRING_TO_TF,
129 |         _TYPE_TO_STRING,
130 |         _TF_TO_NP)
131 | 
132 |     TF_TYPES = _TYPE_TO_STRING.values()
133 |     tf_types = tuple("tensorflow::" + t if t in TF_TYPES else t for t in types)
134 |     np_types = ["np." + _TF_TO_NP[_STRING_TO_TF[t]].__name__
135 |                 if t in _STRING_TO_TF
136 |                 else "np." + t for t in types]
137 | 
138 |     return Attr(s, var, types, tf_types, np_types, default)
139 | 


--------------------------------------------------------------------------------
/tfopgen/templates/cuda_header.j2:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #ifndef {{cuda_header_guard}}
  4 | #define {{cuda_header_guard}}
  5 | 
  6 | #include "{{main_header_file}}"
  7 | 
  8 | // Required in order for Eigen::GpuDevice to be an actual type
  9 | #define EIGEN_USE_GPU
 10 | 
 11 | #include "tensorflow/core/framework/op.h"
 12 | #include "tensorflow/core/framework/op_kernel.h"
 13 | 
 14 | {{project_namespace_start}}
 15 | {{op_namespace_start}}
 16 | 
 17 | // For simpler partial specialisation
 18 | typedef Eigen::GpuDevice GPUDevice;
 19 | 
 20 | {% set op_templates = op_type_attrs | map(attribute="name") | list -%}
 21 | 
 22 | {% if op_templates | length > 0 -%}
 23 | {# Templated Case -#}
 24 | 
 25 | {% set kernel_template = op_templates | format_list("typename %s") | join(", ") -%}
 26 | {% set ltr = "<" + op_templates | join(", ") + ">"-%}
 27 | {% set kernel_template = "template <" + kernel_template + "> " -%}
 28 | // LaunchTraits struct defining
 29 | // kernel block sizes for type permutations
 30 | {{kernel_template}}struct LaunchTraits {};
 31 | 
 32 | {% for perm in op_tf_type_perms -%}
 33 | {% set lt_template_specialise = perm | join(", ") -%}
 34 | // Specialise for {{lt_template_specialise}}
 35 | {% set lt_template_specialise = "<" + lt_template_specialise + ">" -%}
 36 | // Should really be .cu file as this is a concrete type
 37 | // but this works because this header is included only once
 38 | template <> struct LaunchTraits{{lt_template_specialise}}
 39 | {
 40 |     static constexpr int BLOCKDIMX = 1024;
 41 |     static constexpr int BLOCKDIMY = 1;
 42 |     static constexpr int BLOCKDIMZ = 1;
 43 | };
 44 | {% endfor %}
 45 | 
 46 | {% else -%}
 47 | {# Untemplated Case -#}
 48 | 
 49 | {% set kernel_template = "" %}
 50 | {% set ltr = "" -%}
 51 | {% set lt_template_specialise = "" %}
 52 | // Should really be .cu file but this works because header is included once
 53 | struct LaunchTraits
 54 | {
 55 |     static constexpr int BLOCKDIMX = 1024;
 56 |     static constexpr int BLOCKDIMY = 1;
 57 |     static constexpr int BLOCKDIMZ = 1;
 58 | };
 59 | 
 60 | {% endif -%}
 61 | 
 62 | 
 63 | // CUDA kernel outline
 64 | {{ kernel_template  }}
 65 | __global__ void {{kernel_name}}(
 66 |     {% for item in op_inputs -%}
 67 |     const {{ item.tf_type }} * in_{{ item.name }},
 68 |     {% endfor %}
 69 |     {%- for item in op_outputs -%}
 70 |     {{ item.tf_type }} * out_{{ item.name }}
 71 |     {%- if loop.last %}){% else %},{%- endif %}
 72 |     {% endfor %}
 73 | {
 74 |     // Shared memory usage unnecesssary, but demonstrates use of
 75 |     // constant Trait members to create kernel shared memory.
 76 |     using LTr = LaunchTraits{{ltr}};
 77 |     __shared__ int buffer[LTr::BLOCKDIMX];
 78 | 
 79 |     int i = blockIdx.x*blockDim.x + threadIdx.x;
 80 | 
 81 |     if(i >= LTr::BLOCKDIMX)
 82 |         { return; }
 83 | 
 84 |     // Set shared buffer to thread index
 85 |     buffer[i] = i;
 86 | }
 87 | 
 88 | {% set template_specialise = ['GPUDevice'] + op_templates -%}
 89 | {% set template_specialise = template_specialise | join(", ")  -%}
 90 | {% set class_template = op_templates | format_list("typename %s") -%}
 91 | 
 92 | // Specialise the {{op_name}} op for GPUs
 93 | template <{{ class_template | join(", ")}}>
 94 | class {{op_name}}<{{ template_specialise }}> : public tensorflow::OpKernel
 95 | {
 96 | public:
 97 |     explicit {{op_name}}(tensorflow::OpKernelConstruction * context) :
 98 |         tensorflow::OpKernel(context) {}
 99 | 
100 |     void Compute(tensorflow::OpKernelContext * context) override
101 |     {
102 |         namespace tf = tensorflow;
103 | 
104 |         // Create variables for input tensors
105 |         {% for item in op_inputs -%}
106 |         const auto & in_{{ item.name }} = context->input({{loop.index0}});
107 |         {% endfor %}
108 | 
109 |         // Allocate output tensors
110 |         {% for item in op_outputs -%}
111 |         // Allocate space for output tensor '{{item.name}}'
112 |         tf::Tensor * {{ item.name }}_ptr = nullptr;
113 |         tf::TensorShape {{ item.name}}_shape = tf::TensorShape({ {{ item.shape | join(", ") | replace("None", "1") }} });
114 |         OP_REQUIRES_OK(context, context->allocate_output(
115 |             {{loop.index0}}, {{item.name}}_shape, &{{item.name}}_ptr));
116 |         {% endfor %}
117 | 
118 |         using LTr = LaunchTraits{{ltr}};
119 | 
120 |         // Set up our CUDA thread block and grid
121 |         dim3 block(LTr::BLOCKDIMX);
122 |         dim3 grid(1);
123 | 
124 |         // Get pointers to flattened tensor data buffers
125 |         {% for item in op_inputs -%}
126 |         const auto fin_{{ item.name }} = in_{{ item.name }}.flat<{{item.tf_type}}>().data();
127 |         {% endfor %}
128 |         {%- for item in op_outputs -%}
129 |         auto fout_{{ item.name }} = {{ item.name }}_ptr->flat<{{item.tf_type}}>().data();
130 |         {% endfor %}
131 | 
132 |         // Get the GPU device
133 |         const auto & device = context->eigen_device<GPUDevice>();
134 | 
135 |         // Call the {{kernel_name}} CUDA kernel
136 |         {{kernel_name}}{{ltr}}
137 |             <<<grid, block, 0, device.stream()>>>(
138 |                 {% for item in op_inputs -%}
139 |                 fin_{{ item.name }},
140 |                 {% endfor %}
141 |                 {%- for item in op_outputs -%}
142 |                 fout_{{ item.name }}
143 |                 {%- if loop.last %});{% else %},{%- endif %}
144 |                 {% endfor %}
145 |     }
146 | };
147 | 
148 | {{op_namespace_stop}}
149 | {{project_namespace_stop}}
150 | 
151 | #endif // #ifndef {{cuda_header_guard}}
152 | 
153 | #endif // #if GOOGLE_CUDA


--------------------------------------------------------------------------------
/tfopgen/create_op.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import os
  3 | 
  4 | from tfopgen.util import (parse_args, load_config, parse_inout,
  5 |                           parse_attr_type, camel_to_snake_case,
  6 |                           header_guard, jinja_env_factory)
  7 | 
  8 | 
  9 | def make_template_kwargs(op_name, py_op_name,
 10 |                          project, library, op_inputs, op_outputs,
 11 |                          op_type_attrs, op_other_attrs, op_doc):
 12 |     """
 13 |     Creates a dictionary suitable for rendering the jinja2 templates in this package
 14 |     """
 15 | 
 16 |     NB = '_namespace_begin'
 17 |     NE = '_namespace_stop'
 18 | 
 19 |     type_constraints = [tuple(t for t in a.np_types) for a in op_type_attrs]
 20 | 
 21 |     # Permute the type constraints
 22 |     op_tf_type_perms = itertools.product(*(a.tf_types for a in op_type_attrs))
 23 |     op_tf_type_perms = [list(p) for p in op_tf_type_perms]
 24 | 
 25 |     op_np_type_perms = itertools.product(*(a.np_types for a in op_type_attrs))
 26 |     op_np_type_perms = [list(p) for p in op_np_type_perms]
 27 | 
 28 |     # Create dictionary with variables required for creating the templates
 29 |     template_kwargs = {
 30 |         # Names
 31 |         'op_name': op_name,
 32 |         'py_op_name': py_op_name,
 33 |         'project': project,
 34 |         'library': library,
 35 |         'kernel_name': ''.join([library, '_', py_op_name]),
 36 | 
 37 |         # Operator inputs, outputs, attributes and documentation
 38 |         'op_inputs': op_inputs,
 39 |         'op_outputs': op_outputs,
 40 |         'op_type_attrs': op_type_attrs,
 41 |         'op_other_attrs': op_other_attrs,
 42 |         'op_tf_type_perms': op_tf_type_perms,
 43 |         'op_np_type_perms': op_np_type_perms,
 44 |         'type_constraints': type_constraints,
 45 |         'op_doc': op_doc,
 46 | 
 47 |         # Filenames
 48 |         'main_header_file': ''.join([py_op_name, '_op.h']),
 49 |         'cpp_header_file': ''.join([py_op_name, '_op_cpu.h']),
 50 |         'cpp_source_file': ''.join([py_op_name, '_op_cpu.cpp']),
 51 |         'cuda_header_file': ''.join([py_op_name, '_op_gpu.cuh']),
 52 |         'cuda_source_file': ''.join([py_op_name, '_op_gpu.cu']),
 53 |         'python_test_file': ''.join(['test_', py_op_name, '.py']),
 54 |         'makefile': 'Makefile',
 55 |         'shared_library': ''.join([library, '.so']),
 56 | 
 57 |         # C++ namespace
 58 |         'project_namespace_start': ''.join([project, NB]).upper(),
 59 |         'project_namespace_stop': ''.join([project, NE]).upper(),
 60 |         'op_namespace_start': ''.join([project, '_', py_op_name, NB]).upper(),
 61 |         'op_namespace_stop': ''.join([project, '_', py_op_name, NE]).upper(),
 62 |     }
 63 | 
 64 |     template_kwargs.update({
 65 |         # C++ header guards
 66 |         'main_header_guard': header_guard(library, template_kwargs['main_header_file']),
 67 |         'cpp_header_guard': header_guard(library, template_kwargs['cpp_header_file']),
 68 |         'cuda_header_guard': header_guard(library, template_kwargs['cuda_header_file']),
 69 |     })
 70 | 
 71 |     return template_kwargs
 72 | 
 73 | 
 74 | def run(args):
 75 |     """
 76 |     Runs the operator generator
 77 | 
 78 |     Arguments:
 79 |         args: list
 80 |             List of command line arguments stripped of the program name.
 81 |             sys.argv[1:] is appropriate in most cases.
 82 |     """
 83 |     args = parse_args(args)
 84 |     cfg = load_config(args.config)
 85 | 
 86 |     try:
 87 |         op_name = cfg['name']
 88 |         library = cfg['library']
 89 |         project = cfg['project']
 90 |     except KeyError as e:
 91 |         raise ValueError("Key '{}' was not present in '{}'"
 92 |             .format(e.message, args.config))
 93 | 
 94 |     op_inputs = cfg.get('inputs', [])
 95 |     op_outputs = cfg.get('outputs', [])
 96 |     op_type_attrs = cfg.get('type_attrs', [])
 97 |     op_other_attrs = cfg.get('other_attrs', [])
 98 |     op_doc = cfg.get('doc', "Documentation")
 99 | 
100 |     # Parse input ops
101 |     op_inputs = [parse_inout(i, s) for i, s in op_inputs]
102 | 
103 |     # Parse output ops
104 |     op_outputs = [parse_inout(o, s) for o, s in op_outputs]
105 | 
106 |     # Parse type constrained attrs
107 |     op_type_attrs = [parse_attr_type(a) for a in op_type_attrs]
108 | 
109 |     # Snake case python version of the operator
110 |     py_op_name = camel_to_snake_case(op_name)
111 | 
112 |     template_path = os.path.join(os.path.dirname(__file__), 'templates')
113 |     jinja_env = jinja_env_factory(template_path)
114 | 
115 |     # Create library directory if it does not exist
116 |     if not os.path.exists(library):
117 |         os.makedirs(library)
118 | 
119 |     # Create dictionary for rendering jinja2 templates
120 |     kwargs = make_template_kwargs(op_name, py_op_name,
121 |                                   project, library, op_inputs, op_outputs,
122 |                                   op_type_attrs, op_other_attrs, op_doc)
123 | 
124 |     def render(template, output):
125 |         """ Hook to render template file to output """
126 |         with open(os.path.join(library, kwargs[output]), 'w') as f:
127 |             header_template = jinja_env.get_template(template)
128 |             f.write(header_template.render(**kwargs))
129 | 
130 |     render('main_header.j2', 'main_header_file')
131 |     render('cpp_header.j2', 'cpp_header_file')
132 |     render('cpp_source.j2', 'cpp_source_file')
133 |     render('cuda_header.j2', 'cuda_header_file')
134 |     render('cuda_source.j2', 'cuda_source_file')
135 |     render('test_source.j2', 'python_test_file')
136 |     render('Makefile.j2', 'makefile')
137 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Tensorflow Custom Operator Code Outline Generator
  2 | =================================================
  3 | 
  4 | .. image:: https://travis-ci.org/sjperkins/tfopgen.svg?branch=master
  5 |     :target: https://travis-ci.org/sjperkins/tfopgen
  6 | 
  7 | Writing a tensorflow operator requires writing fair amounts of
  8 | boilerplate C++ and CUDA code. This script generates code for the CPU
  9 | and GPU version of a tensorflow operator. More specifically, given
 10 | tensorflow ``inputs``, ``outputs`` and ``attribute``\ s, it generates:
 11 | 
 12 | -  C++ Header file that defines the operator class, templated on Device.
 13 | -  C++ Header file that defines the CPU implementation of the operator.
 14 | -  C++ Source file with Shape Function, REGISTER\_OP and
 15 |    REGISTER\_KERNEL\_BUILDER constructs.
 16 | -  Cuda Header that defines the GPU implementation of the operator,
 17 |    including a CUDA kernel.
 18 | -  Cuda Source file with GPU REGISTER\_KERNEL\_BUILDER's for the
 19 |    operator.
 20 | -  python unit test case, which constructs random input data, and calls
 21 |    the operator.
 22 | -  Makefile for compiling the operator into a shared library, using g++
 23 |    and nvcc.
 24 | 
 25 | Requirements
 26 | ------------
 27 | 
 28 | A tensorflow installation, required for building the operator.
 29 | 
 30 | .. code:: bash
 31 | 
 32 |     pip install tensorflow
 33 | 
 34 | 
 35 | Installation
 36 | ------------
 37 | 
 38 | .. code:: bash
 39 | 
 40 |     pip install tfopgen
 41 | 
 42 | Usage
 43 | -----
 44 | 
 45 | The user should provide a YAML configuration file defining the operator:
 46 | 
 47 | -  inputs and optionally, their shapes.
 48 | -  outputs and optionally, their outputs.
 49 | -  polymorphic type attributes.
 50 | -  other attributes.
 51 | -  documentation.
 52 | 
 53 | For example, we can define the outline for a ``ComplexPhase`` operator in the ``complex_phase.yml`` file.
 54 | 
 55 | .. code:: yaml
 56 | 
 57 |     ---
 58 |     project: astronomy
 59 |     library: fourier
 60 |     name: ComplexPhase
 61 |     type_attrs:
 62 |       - "FT: {float, double} = DT_FLOAT"
 63 |       - "CT: {complex64, complex128} = DT_COMPLEX64"
 64 |     inputs:
 65 |       - ["uvw: FT", [null, null, 3]]   # (ntime, nbl, 3)
 66 |       - ["frequency: FT", [null]]      # (nchan, )
 67 |       - ["lm: FT", [null, 2]]          # (nsrc, 2)
 68 |     outputs:
 69 |       - ["complex_phase: CT", [null, null, null, null]]
 70 |     doc: >
 71 |       Given tensors
 72 |         (1) of (U, V, W) baseline coordinates with shape (ntime, nbl, 3)
 73 |         (2) of (L, M) sky coordinates with shape (nsrc, 2)
 74 |         (3) of frequencies,
 75 |       compute the complex phase with shape (nsrc, ntime, nbl, nchan)
 76 | 
 77 | We can then run:
 78 | 
 79 | .. code:: bash
 80 | 
 81 |     $ tfopgen complex_phase.yml
 82 | 
 83 | to create the following directory structure and files:
 84 | 
 85 | .. code:: bash
 86 | 
 87 |     $ tree fourier/
 88 |     fourier/
 89 |     ├── complex_phase_op_cpu.cpp
 90 |     ├── complex_phase_op_cpu.h
 91 |     ├── complex_phase_op_gpu.cu
 92 |     ├── complex_phase_op_gpu.cuh
 93 |     ├── complex_phase_op.h
 94 |     ├── Makefile
 95 |     └── test_complex_phase.py
 96 | 
 97 | The ``project`` and ``library`` options specify C++ namespaces within
 98 | which the operator is created. Additionally, the Makefile will create a
 99 | ``fourier.so`` shared library that can be loaded with ``tf.load_op_library('fourier.so')``.
100 | 
101 | Any polymorphic type attributes should be supplied. The generator will
102 | template the operators on type attributes. It will also generate
103 | concrete permutations of REGISTER\_KERNEL\_BUILDER for both the CPU and
104 | GPU op using the actual types supplied in the type attributes (float,
105 | double, complex64 and complex128) below:
106 | 
107 | .. code:: yaml
108 | 
109 |     type_attrs:
110 |       - "FT: {float, double} = DT_FLOAT"
111 |       - "CT: {complex64, complex128} = DT_COMPLEX64"
112 | 
113 | 
114 | The operator inputs and their optional shapes should be specified as a
115 | list containing a string defining the ``.Input`` directive, and a list
116 | describing the shape of the input tensor. A ``null`` value in the shape
117 | will be translated into a python ``None``. If concrete dimensions are specified,
118 | corresponding checks will be generated in the Shape Function associated with the
119 | operator.
120 | 
121 | .. code:: yaml
122 | 
123 |     inputs:
124 |       - ["uvw: FT", [null, null, 3]]   # (ntime, nbl, 3)
125 |       - ["frequency: FT", [null]]      # (nchan, )
126 |       - ["lm: FT", [null, 2]]          # (nsrc, 2)
127 | 
128 | The operator outputs should similarly defined.
129 | 
130 | .. code:: yaml
131 | 
132 |     outputs:
133 |       - ["complex_phase: CT", [null, null, null, null]]
134 | 
135 | Given these inputs and outputs, CPU and GPU operators are created with
136 | named variables corresponding to the inputs and outputs. Additionally, a
137 | CUDA kernel with the given inputs and outputs is created, as well as a
138 | shape function checking the rank and dimensions of the supplied inputs.
139 | 
140 | 
141 | Other attributes may be specified (and will be output in the
142 | REGISTER\_OP) directive, but are not catered for automatically by the
143 | generator code as the range of attribute behaviour is complex.
144 | 
145 | .. code:: yaml
146 | 
147 |     op_other_attrs:
148 |         - "iterations: int32 >= 2",
149 | 
150 | Finally operator documentation may also be supplied.
151 | 
152 | .. code:: yaml
153 | 
154 |     doc: >
155 |       Given tensors
156 |         (1) of (U, V, W) baseline coordinates with shape (ntime, nbl, 3)
157 |         (2) of (L, M) sky coordinates with shape (nsrc, 2)
158 |         (3) of frequencies,
159 |       compute the complex phase with shape (nsrc, ntime, nbl, nchan)
160 | 


--------------------------------------------------------------------------------