├── .gitignore
├── 4build
    ├── README
    ├── cmake
    │   ├── CUDA.cmake
    │   ├── FindCython.cmake
    │   ├── FindNumpy.cmake
    │   └── UseCython.cmake
    ├── download_and_build_openblas.py
    └── gen_py.py
├── 4dev
    ├── README
    ├── lintfiles.txt
    ├── pylintrc
    ├── run_pylint.py
    ├── style_check.py
    └── valgrind-python.supp
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cgt
    ├── __init__.py
    ├── api.py
    ├── api_autogen.py
    ├── compilation.py
    ├── core.py
    ├── display.py
    ├── distributions.py
    ├── img_ops.py
    ├── nn.py
    ├── nn_ops
    │   ├── __init__.py
    │   ├── cross_channel_lrn.py
    │   ├── cudnn_ops.py
    │   ├── im2col.py
    │   └── max_pool_2d.py
    ├── numeric_diff.py
    ├── tests
    │   ├── __init__.py
    │   ├── _test_assert.py
    │   ├── _test_cycgt.py
    │   ├── _test_eg.py
    │   ├── _test_flatvec.py
    │   ├── _test_shapecheck.py
    │   ├── _test_tuples.py
    │   ├── test_affine.py
    │   ├── test_array_wrapper.py
    │   ├── test_conv.py
    │   ├── test_devices.py
    │   ├── test_einsum.py
    │   ├── test_examples.py
    │   ├── test_imgproc.py
    │   ├── test_inc_subtensor.py
    │   ├── test_informative_errors.py
    │   ├── test_input_conversions.py
    │   ├── test_linreg.py
    │   ├── test_multi_output.py
    │   ├── test_optimizers.py
    │   ├── test_par_interp.py
    │   ├── test_scalars.py
    │   └── test_stack.py
    └── utils.py
├── cgtrc.example
├── cgtrc_spec.ini
├── doc
    ├── Makefile
    ├── README
    ├── _static
    │   └── my_theme.css
    ├── build_and_view.sh
    ├── conf.py
    ├── index.rst
    ├── notebook_sphinxext1.py
    ├── spelling_wordlist.txt
    ├── sphinx_preview.py
    ├── tutorial-notes.txt
    └── upload.sh
├── examples
    ├── README
    ├── alice
    │   └── input.txt
    ├── bench
    │   ├── cgt_gru.py
    │   ├── gru.py
    │   ├── seq_model.py
    │   └── theano_gru.py
    ├── broken
    │   ├── caffe2cgt.py
    │   ├── internals_tour.ipynb
    │   └── mnist_torchstyle.py
    ├── cgt_theano_feedforward_comparison.py
    ├── demo_char_rnn.py
    ├── demo_cifar.py
    ├── demo_mnist.py
    ├── demo_neural_turing_machine.py
    ├── demo_variational_autoencoder.py
    ├── example_utils.py
    ├── param_collection.py
    └── tutorial.ipynb
├── include
    ├── IRC.h
    ├── cgt_common.h
    ├── cgt_cuda.h
    ├── cudnn_support.h
    ├── execution.h
    ├── im2col.h
    ├── lrn.cuh
    └── pooling.h
├── scripts
    └── cgt-clear-cache
├── src
    ├── cgt_common.cpp
    ├── cuda_setup.c
    ├── cycgt.pyx
    ├── execution.cpp
    └── util
    │   └── ThreadPool.h
└── thirdparty
    ├── __init__.py
    ├── configobj.py
    ├── tabulate.py
    └── validate.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # IDE junk
 3 | .settings
 4 | .cproject
 5 | ._*
 6 | .\#*
 7 | \#*
 8 | *.pyc
 9 | .project
10 | .pydevproject
11 | *~
12 | *.orig
13 | *~
14 | 
15 | 
16 | # Build generated stuff
17 | doc/build
18 | doc/tex-images
19 | build
20 | *.swp
21 | openblas.tar.gz
22 | 
23 | # Tmp stuff
24 | junk
25 | doc/primer
26 | doc/images
27 | _build
28 | _junk
29 | .coverage
30 | htmlcov
31 | cgt.sublime-project
32 | cgt.sublime-workspace
33 | 
34 | 
35 | doc2
36 | notes
37 | notes2
38 | build2
39 | .idea
40 | *-checkpoint.ipynb
41 | docjunk
42 | downloads/*
43 | doc/tex-src


--------------------------------------------------------------------------------
/4build/README:
--------------------------------------------------------------------------------
1 | Files for building CGT


--------------------------------------------------------------------------------
/4build/cmake/CUDA.cmake:
--------------------------------------------------------------------------------
 1 | ################################################################################################
 2 | # Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution.
 3 | # That's why not FindcuDNN.cmake file, but just the macro
 4 | # Usage:
 5 | #   detect_cuDNN()
 6 | function(detect_cuDNN)
 7 |   set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder")
 8 | 
 9 |   find_path(CUDNN_INCLUDE cudnn.h
10 |             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDA_TOOLKIT_INCLUDE}
11 |             DOC "Path to cuDNN include directory." )
12 | 
13 |   get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
14 |   find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a
15 |                              PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist}
16 |                              DOC "Path to cuDNN library.")
17 | 
18 |   if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
19 |     set(HAVE_CUDNN  TRUE PARENT_SCOPE)
20 |     set(CUDNN_FOUND TRUE PARENT_SCOPE)
21 | 
22 |     mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
23 |     message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")
24 |   endif()
25 | endfunction()
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/4build/cmake/FindCython.cmake:
--------------------------------------------------------------------------------
 1 | # Find the Cython compiler.
 2 | #
 3 | # This code sets the following variables:
 4 | #
 5 | #  CYTHON_EXECUTABLE
 6 | #
 7 | # See also UseCython.cmake
 8 | 
 9 | #=============================================================================
10 | # Copyright 2011 Kitware, Inc.
11 | #
12 | # Licensed under the Apache License, Version 2.0 (the "License");
13 | # you may not use this file except in compliance with the License.
14 | # You may obtain a copy of the License at
15 | #
16 | #     http://www.apache.org/licenses/LICENSE-2.0
17 | #
18 | # Unless required by applicable law or agreed to in writing, software
19 | # distributed under the License is distributed on an "AS IS" BASIS,
20 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 | # See the License for the specific language governing permissions and
22 | # limitations under the License.
23 | #=============================================================================
24 | 
25 | # Use the Cython executable that lives next to the Python executable
26 | # if it is a local installation.
27 | find_package( PythonInterp )
28 | if( PYTHONINTERP_FOUND )
29 |   get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH )
30 |   find_program( CYTHON_EXECUTABLE
31 |     NAMES cython cython.bat
32 |     HINTS ${_python_path}
33 |     )
34 | else()
35 |   find_program( CYTHON_EXECUTABLE
36 |     NAMES cython cython.bat cython3
37 |     )
38 | endif()
39 | 
40 | 
41 | include( FindPackageHandleStandardArgs )
42 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
43 | 
44 | mark_as_advanced( CYTHON_EXECUTABLE )
45 | 
46 | 


--------------------------------------------------------------------------------
/4build/cmake/FindNumpy.cmake:
--------------------------------------------------------------------------------
 1 | # - Find the NumPy libraries
 2 | # This module finds if NumPy is installed, and sets the following variables
 3 | # indicating where it is.
 4 | #
 5 | # TODO: Update to provide the libraries and paths for linking npymath lib.
 6 | #
 7 | #  NUMPY_FOUND               - was NumPy found
 8 | #  NUMPY_VERSION             - the version of NumPy found as a string
 9 | #  NUMPY_VERSION_MAJOR       - the major version number of NumPy
10 | #  NUMPY_VERSION_MINOR       - the minor version number of NumPy
11 | #  NUMPY_VERSION_PATCH       - the patch version number of NumPy
12 | #  NUMPY_VERSION_DECIMAL     - e.g. version 1.6.1 is 10601
13 | #  NUMPY_INCLUDE_DIR         - path to the NumPy include files
14 | 
15 | unset(NUMPY_VERSION)
16 | unset(NUMPY_INCLUDE_DIR)
17 | 
18 | if(PYTHONINTERP_FOUND)
19 |   execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
20 |     "import numpy as n; print(n.__version__); print(n.get_include());"
21 |     RESULT_VARIABLE __result
22 |     OUTPUT_VARIABLE __output
23 |     OUTPUT_STRIP_TRAILING_WHITESPACE)
24 | 
25 |   if(__result MATCHES 0)
26 |     string(REGEX REPLACE ";" "\\\\;" __values ${__output})
27 |     string(REGEX REPLACE "\r?\n" ";"    __values ${__values})
28 |     list(GET __values 0 NUMPY_VERSION)
29 |     list(GET __values 1 NUMPY_INCLUDE_DIR)
30 | 
31 |     string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
32 |     if(NOT "${__ver_check}" STREQUAL "")
33 |       set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
34 |       set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
35 |       set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
36 |       math(EXPR NUMPY_VERSION_DECIMAL
37 |         "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
38 |       string(REGEX REPLACE "\\\\" "/"  NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
39 |     else()
40 |      unset(NUMPY_VERSION)
41 |      unset(NUMPY_INCLUDE_DIR)
42 |      message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
43 |     endif()
44 |   endif()
45 | else()
46 |   message(STATUS "To find NumPy Python interpretator is required to be found.")
47 | endif()
48 | 
49 | include(FindPackageHandleStandardArgs)
50 | find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
51 |                                         VERSION_VAR   NUMPY_VERSION)
52 | 
53 | if(NUMPY_FOUND)
54 |   message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
55 | endif()
56 | 
57 | 


--------------------------------------------------------------------------------
/4build/download_and_build_openblas.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import subprocess,sys,os,shutil,os.path as osp
 3 | import urllib
 4 | import multiprocessing
 5 | 
 6 | unpack_dir = sys.argv[1]
 7 | max_openblas_threads = int(sys.argv[2]) if len(sys.argv) > 2 else multiprocessing.cpu_count()
 8 | 
 9 | def call_and_print(cmd):
10 |     print "\x1b[32m%s\x1b[0m"%cmd
11 |     subprocess.check_call(cmd,shell=True)
12 | 
13 | fname = "openblas.tar.gz"
14 | url = "https://github.com/xianyi/OpenBLAS/archive/v0.2.14.tar.gz"
15 | 
16 | if osp.exists(fname):
17 |     print "already downloaded openblas.tar.gz"
18 | else:
19 |     print "will download openblas and unpack to %s"%unpack_dir
20 |     urllib.urlretrieve(url, fname+".part")
21 |     shutil.move("{fname}.part".format(fname=fname),"{fname}".format(fname=fname))
22 | call_and_print("mkdir -p {unpack_dir} && tar -xf {fname} --directory {unpack_dir}  --strip-components=1".format(
23 |     fname=fname,unpack_dir=unpack_dir))
24 | os.chdir(unpack_dir)
25 | print "Compiling OpenBLAS...this will take a minute or so"
26 | call_and_print("make -j ONLY_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_OPENMP=0 NUM_THREADS=%i"%max_openblas_threads)
27 | 


--------------------------------------------------------------------------------
/4build/gen_py.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from cgt.core import UNARY_INFO, BINARY_INFO
 3 | import cgt, os, os.path as osp
 4 | fh = sys.stdout
 5 | 
 6 | os.chdir(osp.dirname(osp.dirname(osp.realpath(cgt.__file__))))
 7 | 
 8 | with open("cgt/api_autogen.py","w") as fh:
 9 |     fh.write("# This file was autogenerated by gen_py.py. Do not edit.")
10 |     fh.write("\nfrom . import core\n")
11 | 
12 |     for (shortname,info) in sorted(UNARY_INFO.iteritems(), key = lambda x:x[0]):    
13 |         fh.write(
14 | """
15 | def {npname}(x):
16 |     "Applies function {npname} elementwise to argument x"
17 |     return core.Result(core.ElwiseUnary("{shortname}"), [x])
18 |     """.format(shortname=shortname,npname=info.short.lower()))
19 | 
20 |     for (infixname,info) in sorted(BINARY_INFO.iteritems(), key = lambda x:x[1].short):    
21 |         fh.write(
22 | """
23 | def {npname}(x, y):
24 |     "Applies function {npname} elementwise to arguments x,y"
25 |     return core.elwise_binary("{infixname}", x,y)
26 |     """.format(infixname = infixname, npname=info.short))
27 |     
28 | 


--------------------------------------------------------------------------------
/4dev/README:
--------------------------------------------------------------------------------
1 | Files for development


--------------------------------------------------------------------------------
/4dev/lintfiles.txt:
--------------------------------------------------------------------------------
1 | + cgt/*.py


--------------------------------------------------------------------------------
/4dev/pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # Specify a configuration file.
  4 | #rcfile=
  5 | 
  6 | # Python code to execute, usually for sys.path manipulation such as
  7 | # pygtk.require().
  8 | #init-hook=
  9 | 
 10 | # Profiled execution.
 11 | profile=no
 12 | 
 13 | # Add files or directories to the blacklist. They should be base names, not
 14 | # paths.
 15 | ignore=CVS
 16 | 
 17 | # Pickle collected data for later comparisons.
 18 | persistent=yes
 19 | 
 20 | # List of plugins (as comma separated values of python modules names) to load,
 21 | # usually to register additional checkers.
 22 | load-plugins=
 23 | 
 24 | 
 25 | [MESSAGES CONTROL]
 26 | 
 27 | # Enable the message, report, category or checker with the given id(s). You can
 28 | # either give multiple identifier separated by comma (,) or put this option
 29 | # multiple time. See also the "--disable" option for examples.
 30 | #enable=
 31 | 
 32 | # Disable the message, report, category or checker with the given id(s). You
 33 | # can either give multiple identifiers separated by comma (,) or put this
 34 | # option multiple times (only on the command line, not in the configuration
 35 | # file where it should appear only once).You can also use "--disable=all" to
 36 | # disable everything first and then reenable specific checks. For example, if
 37 | # you want to run only the similarities checker, you can use "--disable=all
 38 | # --enable=similarities". If you want to run only the classes checker, but have
 39 | # no Warning level messages displayed, use"--disable=all --enable=classes
 40 | # --disable=W"
 41 | disable=C,R,W0221,I,W0614,W0201,W0142,W0141,W0401,W0760,W59,W0123,W0603,W0621,W0622
 42 | 
 43 | 
 44 | [REPORTS]
 45 | 
 46 | # Set the output format. Available formats are text, parseable, colorized, msvs
 47 | # (visual studio) and html. You can also give a reporter class, eg
 48 | # mypackage.mymodule.MyReporterClass.
 49 | output-format=text
 50 | 
 51 | # Include message's id in output
 52 | #include-ids=yes
 53 | 
 54 | # Include symbolic ids of messages in output
 55 | #symbols=no
 56 | 
 57 | # Put messages in a separate file for each module / package specified on the
 58 | # command line instead of printing them on stdout. Reports (if any) will be
 59 | # written in a file name "pylint_global.[txt|html]".
 60 | files-output=no
 61 | 
 62 | # Tells whether to display a full report or only the messages
 63 | reports=yes
 64 | 
 65 | # Python expression which should return a note less than 10 (10 is the highest
 66 | # note). You have access to the variables errors warning, statement which
 67 | # respectively contain the number of errors / warnings messages and the total
 68 | # number of statements analyzed. This is used by the global evaluation report
 69 | # (RP0004).
 70 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 71 | 
 72 | # Add a comment according to your evaluation note. This is used by the global
 73 | # evaluation report (RP0004).
 74 | comment=no
 75 | 
 76 | 
 77 | [FORMAT]
 78 | 
 79 | # Maximum number of characters on a single line.
 80 | max-line-length=80
 81 | 
 82 | # Maximum number of lines in a module
 83 | max-module-lines=1000
 84 | 
 85 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
 86 | # tab).
 87 | indent-string='    '
 88 | 
 89 | # Regexp for a line that is allowed to be longer than the limit.
 90 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
 91 | 
 92 | [BASIC]
 93 | 
 94 | # Required attributes for module, separated by a comma
 95 | required-attributes=
 96 | 
 97 | # List of builtins function names that should not be used, separated by a comma
 98 | bad-functions=map,filter,apply,input
 99 | 
100 | # Regular expression which should only match correct module names
101 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
102 | 
103 | # Regular expression which should only match correct module level names
104 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
105 | 
106 | # Regular expression which should only match correct class names
107 | class-rgx=[A-Z_][a-zA-Z0-9]+$
108 | 
109 | # Regular expression which should only match correct function names
110 | function-rgx=[a-z_][a-z0-9_]{2,30}$
111 | 
112 | # Regular expression which should only match correct method names
113 | method-rgx=[a-z_][a-z0-9_]{2,30}$
114 | 
115 | # Regular expression which should only match correct instance attribute names
116 | attr-rgx=[a-z_][a-z0-9_]{2,30}$
117 | 
118 | # Regular expression which should only match correct argument names
119 | argument-rgx=[a-z_][a-z0-9_]{2,30}$
120 | 
121 | # Regular expression which should only match correct variable names
122 | variable-rgx=[a-z_][a-z0-9_]{2,30}$
123 | 
124 | # Regular expression which should only match correct list comprehension /
125 | # generator expression variable names
126 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
127 | 
128 | # Good variable names which should always be accepted, separated by a comma
129 | good-names=i,j,k,ex,Run,_
130 | 
131 | # Bad variable names which should always be refused, separated by a comma
132 | bad-names=foo,bar,baz,toto,tutu,tata
133 | 
134 | # Regular expression which should only match functions or classes name which do
135 | # not require a docstring
136 | no-docstring-rgx=__.*__
137 | 
138 | 
139 | [SIMILARITIES]
140 | 
141 | # Minimum lines number of a similarity.
142 | min-similarity-lines=4
143 | 
144 | # Ignore comments when computing similarities.
145 | ignore-comments=yes
146 | 
147 | # Ignore docstrings when computing similarities.
148 | ignore-docstrings=yes
149 | 
150 | # Ignore imports when computing similarities.
151 | ignore-imports=no
152 | 
153 | 
154 | [MISCELLANEOUS]
155 | 
156 | # List of note tags to take in consideration, separated by a comma.
157 | notes=FIXME,XXX,TODO
158 | 
159 | 
160 | [TYPECHECK]
161 | 
162 | # Tells whether missing members accessed in mixin class should be ignored. A
163 | # mixin class is detected if its name ends with "mixin" (case insensitive).
164 | ignore-mixin-members=yes
165 | 
166 | # List of classes names for which member attributes should not be checked
167 | # (useful for classes with attributes dynamically set).
168 | ignored-classes=SQLObject
169 | 
170 | # When zope mode is activated, add a predefined set of Zope acquired attributes
171 | # to generated-members.
172 | zope=no
173 | 
174 | # List of members which are set dynamically and missed by pylint inference
175 | # system, and so shouldn't trigger E0201 when accessed. Python regular
176 | # expressions are accepted.
177 | generated-members=REQUEST,acl_users,aq_parent
178 | 
179 | ignored-modules=numpy,numpy.linalg,theano,numpy.random,scipy
180 | 
181 | 
182 | [VARIABLES]
183 | 
184 | # Tells whether we should check for unused import in __init__ files.
185 | init-import=no
186 | 
187 | # A regular expression matching the beginning of the name of dummy variables
188 | # (i.e. not used).
189 | dummy-variables-rgx=_|dummy
190 | 
191 | # List of additional names supposed to be defined in builtins. Remember that
192 | # you should avoid to define new builtins when possible.
193 | additional-builtins=
194 | 
195 | 
196 | [IMPORTS]
197 | 
198 | # Deprecated modules which should not be used, separated by a comma
199 | deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
200 | 
201 | # Create a graph of every (i.e. internal and external) dependencies in the
202 | # given file (report RP0402 must not be disabled)
203 | import-graph=
204 | 
205 | # Create a graph of external dependencies in the given file (report RP0402 must
206 | # not be disabled)
207 | ext-import-graph=
208 | 
209 | # Create a graph of internal dependencies in the given file (report RP0402 must
210 | # not be disabled)
211 | int-import-graph=
212 | 
213 | 
214 | [DESIGN]
215 | 
216 | # Maximum number of arguments for function / method
217 | max-args=5
218 | 
219 | # Argument names that match this expression will be ignored. Default to name
220 | # with leading underscore
221 | ignored-argument-names=_.*
222 | 
223 | # Maximum number of locals for function / method body
224 | max-locals=15
225 | 
226 | # Maximum number of return / yield for function / method body
227 | max-returns=6
228 | 
229 | # Maximum number of branch for function / method body
230 | max-branchs=12
231 | 
232 | # Maximum number of statements in function / method body
233 | max-statements=50
234 | 
235 | # Maximum number of parents for a class (see R0901).
236 | max-parents=7
237 | 
238 | # Maximum number of attributes for a class (see R0902).
239 | max-attributes=7
240 | 
241 | # Minimum number of public methods for a class (see R0903).
242 | min-public-methods=2
243 | 
244 | # Maximum number of public methods for a class (see R0904).
245 | max-public-methods=20
246 | 
247 | 
248 | [CLASSES]
249 | 
250 | # List of interface methods to ignore, separated by a comma. This is used for
251 | # instance to not check methods defines in Zope's Interface base class.
252 | ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
253 | 
254 | # List of method names used to declare (i.e. assign) instance attributes.
255 | defining-attr-methods=__init__,__new__,setUp
256 | 
257 | # List of valid names for the first argument in a class method.
258 | valid-classmethod-first-arg=cls
259 | 
260 | # List of valid names for the first argument in a metaclass class method.
261 | valid-metaclass-classmethod-first-arg=mcs
262 | 
263 | 
264 | [EXCEPTIONS]
265 | 
266 | # Exceptions that will emit a warning when being caught. Defaults to
267 | # "Exception"
268 | overgeneral-exceptions=Exception
269 | 


--------------------------------------------------------------------------------
/4dev/run_pylint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | parser = argparse.ArgumentParser()
 4 | parser.add_argument("--files",nargs="+")
 5 | parser.add_argument("--patfile", type=argparse.FileType("r"))
 6 | args = parser.parse_args()
 7 | 
 8 | 
 9 | import subprocess, os, os.path as osp, cgt
10 | from glob import glob
11 | 
12 | os.chdir(osp.dirname(osp.dirname(osp.realpath(cgt.__file__))))
13 | 
14 | if args.files is None and args.patfile is None: args.patfile=open("4dev/lintfiles.txt","r")
15 | 
16 | def cap(cmd):
17 |     "call and print"
18 |     print "\x1b[32m%s\x1b[0m"%cmd
19 |     subprocess.call(cmd,shell=True)
20 | 
21 | def filelist_from_patterns(pats, rootdir=None):
22 |     if rootdir is None: rootdir = "."
23 |     # filelist = []
24 |     fileset = set([])
25 |     lines = [line.strip() for line in pats]
26 |     for line in lines:
27 |         pat  = line[2:]
28 |         newfiles = glob(osp.join(rootdir,pat))
29 |         if line.startswith("+"):
30 |             fileset.update(newfiles)
31 |         elif line.startswith("-"):
32 |             fileset.difference_update(newfiles)
33 |         else:
34 |             raise ValueError("line must start with + or -")
35 |     filelist = list(fileset)
36 |     return filelist
37 | 
38 | assert args.files is not None or args.patfile is not None
39 | if args.files is not None:
40 |     filelist = args.files
41 | elif args.patfile is not None:
42 |     filelist = filelist_from_patterns(args.patfile.readlines())
43 | else:
44 |     raise Exception("unreachable")
45 | 
46 | rcfile = "4dev/pylintrc"
47 | lint = "pylint"
48 | if filelist is not None:
49 |     for fname in filelist:
50 |         result = cap("%s -f colorized --rcfile %s -r n %s"%(lint, rcfile, fname))
51 | else:
52 |     result = cap("%s -f colorized  --rcfile %s -r n  *.py"%(lint,rcfile))
53 | 
54 | 


--------------------------------------------------------------------------------
/4dev/style_check.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import cgt
 3 | for (name,val) in cgt.__dict__.iteritems():
 4 |     if not name.startswith("_"):
 5 |         if not val.__doc__:
 6 |             print "API function %s requires docstring!"%name 
 7 | 
 8 | 
 9 | for (name,val) in cgt.core.__dict__.iteritems():
10 |     if isinstance(val, type) and issubclass(val, cgt.core.Op):
11 |         if val.get_native_compile_info == cgt.core.Op.get_native_compile_info:
12 |             print "Op %s is missing 'get_native_compile_info'!"%name
13 | 
14 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 2.8.11)
  2 | 
  3 | if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
  4 |   message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there.\nBut first clean the build files that just got created in the source directory:\nrm -rf CMakeFiles CMakeCache.txt")
  5 | endif()
  6 | 
  7 | project(cgt)
  8 | 
  9 | # http://cmake.3232098.n2.nabble.com/Default-value-for-CMAKE-BUILD-TYPE-td7550756.html
 10 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 11 |   message(STATUS "Setting build type to 'Release' as none was specified.")
 12 |   set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
 13 |   # Set the possible values of build type for cmake-gui
 14 |   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
 15 |     "MinSizeRel" "RelWithDebInfo")
 16 | endif()
 17 | 
 18 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
 19 |     CACHE PATH "Output directory for static libraries.")
 20 | 
 21 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
 22 |     CACHE PATH "Output directory for shared libraries.")
 23 | 
 24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
 25 |     CACHE PATH "Output directory for executables and DLL's.")
 26 | 
 27 | # set(CMAKE_C_STANDARD 99)
 28 | list( APPEND CMAKE_C_FLAGS "-std=c99")
 29 | if(APPLE)
 30 |   list( APPEND CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++")
 31 | else()
 32 |   list( APPEND CMAKE_CXX_FLAGS "-std=c++11")
 33 | endif()
 34 | 
 35 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/4build/cmake)
 36 | 
 37 | include(${CMAKE_SOURCE_DIR}/4build/cmake/UseCython.cmake)
 38 | include(${CMAKE_SOURCE_DIR}/4build/cmake/CUDA.cmake)
 39 | 
 40 | find_package(PythonLibs 2.7 REQUIRED)
 41 | find_package(Numpy REQUIRED)
 42 | 
 43 | if (APPLE)
 44 |   set(CMAKE_MACOSX_RPATH 0)
 45 | endif()
 46 | 
 47 | 
 48 | option(CGT_ENABLE_CUDA OFF "Enable CUDA support")
 49 | option(CGT_ENABLE_CUDNN ON "Enable CUDNN support")
 50 | option(CGT_DEVEL_MODE OFF "Build with more compiler errors and warnings")
 51 | SET(CGT_MAX_CPUS "16" CACHE STRING "Max CPUs that might ever be used")
 52 | 
 53 | message(
 54 |     "\n"
 55 |     "*****************************\n"
 56 |     "OPTIONS: \n"
 57 |     "   CGT_ENABLE_CUDA:    ${CGT_ENABLE_CUDA}\n"
 58 |     "   CGT_ENABLE_CUDNN:    ${CGT_ENABLE_CUDNN}\n"
 59 |     # "   CGT_DEVEL_MODE:     ${CGT_DEVEL_MODE}\n"
 60 |     "   CMAKE_BUILD_TYPE:   ${CMAKE_BUILD_TYPE}\n"
 61 |     "*****************************\n"
 62 |     )
 63 | 
 64 | 
 65 | if (CGT_DEVEL_MODE)
 66 |   set(CMAKE_C_FLAGS "-Wall -Wextra -Wno-unused-function -Wno-sign-compare")
 67 | endif(CGT_DEVEL_MODE)
 68 | 
 69 | set(CGT_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/include)
 70 | message("include dir ${CGT_INCLUDE_DIR}")
 71 | 
 72 | add_library(cgt SHARED src/cgt_common.cpp include/cgt_common.h)
 73 | if (CGT_ENABLE_CUDA)
 74 |   find_package(CUDA REQUIRED)
 75 |   add_definitions(-DCGT_ENABLE_CUDA)  
 76 | endif()
 77 | 
 78 | if (CGT_ENABLE_CUDNN)
 79 |     detect_cuDNN()
 80 |     if(NOT CUDNN_FOUND AND NOT CUDNN_ROOT)
 81 |       message(FATAL_ERROR "Couldn't find CUDNN. Set CUDNN_ROOT")
 82 |     endif()
 83 | endif()
 84 | 
 85 | target_link_libraries(cgt ${CUDA_CUDART_LIBRARY})
 86 | target_include_directories(cgt PRIVATE ${CUDA_INCLUDE_DIRS} ${CGT_INCLUDE_DIR})
 87 | 
 88 | 
 89 | cython_add_module(cycgt ${CMAKE_SOURCE_DIR}/src/cycgt.pyx 
 90 |   ${CMAKE_SOURCE_DIR}/include/execution.h
 91 |   ${CMAKE_SOURCE_DIR}/src/execution.cpp
 92 | )
 93 | set_source_files_properties(${CMAKE_SOURCE_DIR}/src/cycgt.pyx PROPERTIES CYTHON_IS_CXX TRUE)
 94 | target_include_directories(cycgt PRIVATE ${NUMPY_INCLUDE_DIR} ${CGT_INCLUDE_DIR})
 95 | target_link_libraries(cycgt cgt)
 96 | 
 97 | 
 98 | 
 99 | # add_custom_command(
100 | #   COMMAND ./do_autogen.py ${CMAKE_BINARY_DIR}
101 | #   OUTPUT ${CMAKE_BINARY_DIR}/cgtcorefuns.c ${CMAKE_BINARY_DIR}/cgtcorefunscuda.cu
102 | #   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
103 | #   # DEPENDS cgt.py
104 | # )
105 | # add_custom_target(gencfiles ALL DEPENDS cgtcorefuns.c)
106 | 
107 | set(OPENBLAS_LIBRARY ${CMAKE_BINARY_DIR}/OpenBLAS/libopenblas.a)
108 | 
109 | add_custom_command(
110 |   COMMAND 4build/download_and_build_openblas.py ${CMAKE_BINARY_DIR}/OpenBLAS ${CGT_MAX_CPUS}
111 |   OUTPUT ${OPENBLAS_LIBRARY}
112 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
113 | )
114 | add_custom_target(openblas ALL DEPENDS ${OPENBLAS_LIBRARY})
115 | 
116 | # add_library(cgtcorefuns SHARED cgtcorefuns.c)
117 | # add_dependencies(cgtcorefuns openblas) # WHY IS THIS NECESSARY?
118 | # target_link_libraries(cgtcorefuns ${OPENBLAS_LIBRARY})
119 | # target_include_directories(cgtcorefuns PRIVATE ${CMAKE_BINARY_DIR}/OpenBLAS ${CGT_INCLUDE_DIR})
120 | # set_target_properties(cgtcorefuns PROPERTIES SUFFIX .so)
121 | 
122 | # if (CGT_ENABLE_CUDA)
123 | #   # http://cuda-insight-toolkit.googlecode.com/svn-history/r46/trunk/Examples/Architecture1/src/CMakeLists.txt
124 | #   find_package(CUDA REQUIRED)
125 | #   cuda_include_directories(${CUDA_INCLUDE_DIRS} ${CGT_INCLUDE_DIR})
126 | #   cuda_add_library(cgtcorefunscuda SHARED ${CMAKE_BINARY_DIR}/cgtcorefunscuda.cu)
127 | #   target_link_libraries(cgtcorefunscuda ${CUDA_LIBRARIES})
128 | #   set_target_properties(cgtcorefunscuda PROPERTIES SUFFIX .so)
129 | # endif()
130 | 
131 | # cuda_add_library(cudadummy SHARED dummy.cu)
132 | 
133 | message("link flags ${CMAKE_SHARED_LINKER_FLAGS}")
134 | 
135 | get_property(CGT_LIBRARY_PATH TARGET cgt PROPERTY LOCATION)
136 | file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/build_info.txt 
137 |   "CUDA_ROOT := ${CUDA_TOOLKIT_ROOT_DIR}\n"
138 |   "CGT_INCLUDE_DIR := ${CGT_INCLUDE_DIR}\n"
139 |   "C_COMPILER := ${CMAKE_C_COMPILER}\n"
140 |   "CGT_LIBRARY_PATH := ${CGT_LIBRARY_PATH}\n"
141 |   "CUDA_LIBRARIES := ${CUDA_LIBRARIES}\n"
142 |   "CGT_ENABLE_CUDA := ${CGT_ENABLE_CUDA}\n"
143 |   "CGT_ENABLE_CUDNN := ${CGT_ENABLE_CUDNN}\n"
144 |   "CUDNN_ROOT := ${CUDNN_ROOT}\n"
145 | )
146 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | CGT is licensed under the MIT License.
 2 | 
 3 | > Copyright (c) 2015: Contributors
 4 | >
 5 | > Permission is hereby granted, free of charge, to any person obtaining
 6 | > a copy of this software and associated documentation files (the
 7 | > "Software"), to deal in the Software without restriction, including
 8 | > without limitation the rights to use, copy, modify, merge, publish,
 9 | > distribute, sublicense, and/or sell copies of the Software, and to
10 | > permit persons to whom the Software is furnished to do so, subject to
11 | > the following conditions:
12 | >
13 | > The above copyright notice and this permission notice shall be
14 | > included in all copies or substantial portions of the Software.
15 | >
16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | > NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | > LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | > OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | > WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | Computation Graph Toolkit (CGT) is a library for evaluation and differentiation of functions of multidimensional arrays.
3 | 
4 | Full documentation can be found at [http://rll.berkeley.edu/cgt](http://rll.berkeley.edu/cgt)
5 | 
6 | [Release announcement](http://joschu.github.io/index.html#Announcing CGT)


--------------------------------------------------------------------------------
/cgt/__init__.py:
--------------------------------------------------------------------------------
 1 | from .api import *
 2 | from .display import print_tree, print_expr, print_text, as_dot
 3 | from .compilation import function, numeric_eval, profiler
 4 | from .core import (grad, get_config, update_config, simplify, reset_config, 
 5 |     Device, scoped_update_config, infer_shape, count_nodes)
 6 | try: 
 7 |     import cycgt
 8 | except ImportError:
 9 |     import warnings
10 |     msg = """
11 |     Could not import the compiled extension module cycgt
12 |     Only pure python mode is available. If you have compiled "
13 |     this extension (via 'make'), you may need to add build/lib 
14 |     to your PYTHONPATH. Try 'import cycgt' to debug the problem further."""
15 |     warnings.warn(msg, UserWarning)
16 |     del warnings
17 | 
18 | 
19 | floatX = "f4"
20 | complexX = "c8"
21 | 
22 | # Get rid of names we don't want to export
23 | del np
24 | del cgt
25 | del operator
26 | del sys
27 | 
28 | get_config()


--------------------------------------------------------------------------------
/cgt/api_autogen.py:
--------------------------------------------------------------------------------
  1 | # This file was autogenerated by gen_py.py. Do not edit.
  2 | from . import core
  3 | 
  4 | def abs(x):
  5 |     "Applies function abs elementwise to argument x"
  6 |     return core.Result(core.ElwiseUnary("abs"), [x])
  7 |     
  8 | def ceil(x):
  9 |     "Applies function ceil elementwise to argument x"
 10 |     return core.Result(core.ElwiseUnary("ceil"), [x])
 11 |     
 12 | def conj(x):
 13 |     "Applies function conj elementwise to argument x"
 14 |     return core.Result(core.ElwiseUnary("conj"), [x])
 15 |     
 16 | def cos(x):
 17 |     "Applies function cos elementwise to argument x"
 18 |     return core.Result(core.ElwiseUnary("cos"), [x])
 19 |     
 20 | def exp(x):
 21 |     "Applies function exp elementwise to argument x"
 22 |     return core.Result(core.ElwiseUnary("exp"), [x])
 23 |     
 24 | def iceil(x):
 25 |     "Applies function iceil elementwise to argument x"
 26 |     return core.Result(core.ElwiseUnary("iceil"), [x])
 27 |     
 28 | def ifloor(x):
 29 |     "Applies function ifloor elementwise to argument x"
 30 |     return core.Result(core.ElwiseUnary("ifloor"), [x])
 31 |     
 32 | def log(x):
 33 |     "Applies function log elementwise to argument x"
 34 |     return core.Result(core.ElwiseUnary("log"), [x])
 35 |     
 36 | def negative(x):
 37 |     "Applies function negative elementwise to argument x"
 38 |     return core.Result(core.ElwiseUnary("neg"), [x])
 39 |     
 40 | def sigmoid(x):
 41 |     "Applies function sigmoid elementwise to argument x"
 42 |     return core.Result(core.ElwiseUnary("sigmoid"), [x])
 43 |     
 44 | def sign(x):
 45 |     "Applies function sign elementwise to argument x"
 46 |     return core.Result(core.ElwiseUnary("sign"), [x])
 47 |     
 48 | def sin(x):
 49 |     "Applies function sin elementwise to argument x"
 50 |     return core.Result(core.ElwiseUnary("sin"), [x])
 51 |     
 52 | def sqrt(x):
 53 |     "Applies function sqrt elementwise to argument x"
 54 |     return core.Result(core.ElwiseUnary("sqrt"), [x])
 55 |     
 56 | def square(x):
 57 |     "Applies function square elementwise to argument x"
 58 |     return core.Result(core.ElwiseUnary("square"), [x])
 59 |     
 60 | def tanh(x):
 61 |     "Applies function tanh elementwise to argument x"
 62 |     return core.Result(core.ElwiseUnary("tanh"), [x])
 63 |     
 64 | def add(x, y):
 65 |     "Applies function add elementwise to arguments x,y"
 66 |     return core.elwise_binary("+", x,y)
 67 |     
 68 | def divide(x, y):
 69 |     "Applies function divide elementwise to arguments x,y"
 70 |     return core.elwise_binary("/", x,y)
 71 |     
 72 | def equal(x, y):
 73 |     "Applies function equal elementwise to arguments x,y"
 74 |     return core.elwise_binary("==", x,y)
 75 |     
 76 | def greater(x, y):
 77 |     "Applies function greater elementwise to arguments x,y"
 78 |     return core.elwise_binary(">", x,y)
 79 |     
 80 | def greater_equal(x, y):
 81 |     "Applies function greater_equal elementwise to arguments x,y"
 82 |     return core.elwise_binary(">=", x,y)
 83 |     
 84 | def less(x, y):
 85 |     "Applies function less elementwise to arguments x,y"
 86 |     return core.elwise_binary("<", x,y)
 87 |     
 88 | def less_equal(x, y):
 89 |     "Applies function less_equal elementwise to arguments x,y"
 90 |     return core.elwise_binary("<=", x,y)
 91 |     
 92 | def multiply(x, y):
 93 |     "Applies function multiply elementwise to arguments x,y"
 94 |     return core.elwise_binary("*", x,y)
 95 |     
 96 | def not_equal(x, y):
 97 |     "Applies function not_equal elementwise to arguments x,y"
 98 |     return core.elwise_binary("!=", x,y)
 99 |     
100 | def power(x, y):
101 |     "Applies function power elementwise to arguments x,y"
102 |     return core.elwise_binary("**", x,y)
103 |     
104 | def subtract(x, y):
105 |     "Applies function subtract elementwise to arguments x,y"
106 |     return core.elwise_binary("-", x,y)
107 |     


--------------------------------------------------------------------------------
/cgt/display.py:
--------------------------------------------------------------------------------
  1 | from . import core, utils
  2 | import sys
  3 | 
  4 | # ================================================================
  5 | # Printing 
  6 | # ================================================================
  7 | 
  8 | def print_tree(outputs, o=sys.stdout, nodefn=None):
  9 |     """
 10 |     Print out a representation of the computation graph as a tree
 11 |     nodefn is called after printing the result for every node, as
 12 |     nodefn(node, o)
 13 |     So you can print more attributes of the node
 14 |     """
 15 |     if isinstance(outputs, core.Node):
 16 |         outputs = [outputs]
 17 |     node2name = {}
 18 |     expands = []
 19 |     for node in outputs:
 20 |         _print_tree(node, 0, node2name, expands, o, nodefn)
 21 |     assert expands == []
 22 |     return node2name
 23 | 
 24 | def _print_tree(node, depth, node2name, expands, o, nodefn):
 25 |     o.write("| "*depth)
 26 |     if node in node2name: 
 27 |         varname = node2name[node]
 28 |         new = False
 29 |     else:
 30 |         varname = _node_name(node) + "@%i"%len(node2name)
 31 |         node2name[node] = varname
 32 |         new = True
 33 | 
 34 |     color = utils.Color.GREEN if node.is_input() else utils.Color.RED
 35 |     utils.colorprint(color, varname, o)
 36 | 
 37 |     if new:
 38 |         if nodefn is not None: nodefn(node, o)
 39 |         o.write("\n")
 40 |         for p in node.parents:
 41 |             _print_tree(p, depth+1, node2name, expands, o, nodefn)
 42 |     else:
 43 |         if not node.is_input(): o.write(" (see above)")
 44 |         o.write("\n")
 45 | 
 46 | def print_expr(x, o=sys.stdout):
 47 |     """
 48 |     Returns a string that represents a computation graph
 49 |     """
 50 |     node2s = {}
 51 |     o.write(_get_expr(x, node2s))
 52 |     o.write("\n")
 53 | 
 54 | def _get_expr(node, node2s):
 55 |     if node in node2s:
 56 |         return node2s[node]
 57 |     else:
 58 |         if node.is_input():
 59 |             name = node2s[node] = _node_name(node) or "@%i"%len(node2s)
 60 |             return name
 61 |         else:
 62 |             parent_exprs = [_get_expr(parent, node2s) 
 63 |                 for parent in node.parents]
 64 |             return node.op.get_expr(parent_exprs)
 65 | 
 66 | def print_text(outputs, o=sys.stdout):
 67 |     """
 68 |     Print computation graph in single-statement assignment form,
 69 |     inspired by LLVM IR. (needs work)
 70 |     """
 71 |     if isinstance(outputs, core.Node):
 72 |         outputs = [outputs]
 73 |     node2name = {}
 74 |     for node in core.topsorted(outputs):
 75 |         thisname = node2name[node] = _node_name(node) + "@%i"%len(node2name)
 76 |         if node.is_argument():
 77 |             o.write("%s <- argument\n"%thisname)
 78 |         else:
 79 |             o.write("%s <- %s %s\n"%(thisname, str(node.op), " ".join(node2name[parent]
 80 |                 for parent in node.parents)))
 81 | 
 82 | def as_dot(nodes):
 83 |     """
 84 |     Returns graphviz Digraph object that contains the nodes of the computation graph
 85 |     with names assigned.
 86 |     """
 87 |     if isinstance(nodes, core.Node):
 88 |         nodes = [nodes]
 89 |     from graphviz import Digraph
 90 |     g = Digraph()
 91 |     for n in core.topsorted(nodes):
 92 |         g.node(str(id(n)), _node_name(n))
 93 |         for (i,p) in enumerate(n.parents):
 94 |             g.edge(str(id(n)), str(id(p)),taillabel=str(i))
 95 |     return g
 96 | 
 97 | def _node_name(node):
 98 |     if node.is_input():
 99 |         return node.name
100 |     else:
101 |         return str(node.op)
102 | 


--------------------------------------------------------------------------------
/cgt/distributions.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | from . import core
 3 | 
 4 | class Distribution(object):
 5 |     def lik(self, x, p):
 6 |         raise NotImplementedError
 7 |     def loglik(self, x, p):
 8 |         raise NotImplementedError
 9 |     def crossent(self, p, q):
10 |         raise NotImplementedError
11 |     def kl(self, p, q):
12 |         raise NotImplementedError
13 |     def sample(self, p):
14 |         raise NotImplementedError
15 | 
16 | class _Bernoulli(Distribution):
17 |     def sample(self, p, shape=None):
18 |         p = core.as_node(p)
19 |         shape = shape or cgt.shape(p)
20 |         return cgt.rand(*shape) <= p
21 | 
22 | bernoulli = _Bernoulli()
23 | 
24 | class _Categorical(Distribution):
25 |     def crossent(self, p, q):
26 |         assert p.ndim==2 and q.ndim==2
27 |         return -(p*cgt.log(q)).sum(axis=1)
28 |     def loglik(self, labels, p):
29 |         return cgt.log(p[cgt.arange(cgt.size(labels,0)),labels])
30 | categorical = _Categorical()
31 | 
32 | class _DiagonalGaussian(Distribution):
33 |     pass
34 | 
35 | class Product(Distribution):    
36 |     r"""
37 |     Factored distribution obtained by taking the product of several component distributions
38 |     E.g. suppose we have p0(x), p1(y), p2(z),
39 |     then p3 := ProductDistribution(p1,p2,p3) is a distribution satisfying
40 |     p3(x,y,z) = p0(x)p1(y)p2(z)
41 |     """
42 |     pass
43 | 


--------------------------------------------------------------------------------
/cgt/img_ops.py:
--------------------------------------------------------------------------------
  1 | import cgt
  2 | from cgt.core import Op, Result, TensorType, size, shape, ceil_divide
  3 | import ctypes
  4 | 
  5 | # Maybe we shouldn't have special CuDNN ops, we should just have the same
  6 | # convolution interface with various implementations
  7 | # see nice blog post http://benanne.github.io/2014/12/09/theano-metaopt.html
  8 | 
  9 | def cudnn_conv_closure(*ints):
 10 |     return (ctypes.c_int*len(ints))(*ints)
 11 | 
 12 | class CudnnConvForward(Op):
 13 |     def __init__(self, ph, pw, sv, sh):
 14 |         "pad_height, pad_width, stride_vertical, stride_horizontal"
 15 |         self.ph = ph
 16 |         self.pw = pw
 17 |         self.sv = sv
 18 |         self.sh = sh
 19 | 
 20 |     def cuda_code(self, _inputs, funcname):
 21 |         return """
 22 | void %(funcname)s(void* cldata, cgt_array** io) {
 23 |     CudaPerformConvForward(io[0], io[1], io[2], io[3], (conv_closure*)cldata, stream, handle)
 24 | }
 25 | """%dict(funcname=funcname)
 26 |     def cuda_includes(self): 
 27 |         return ["cudnn_conv.cuh"]
 28 |     def impl_data(self):        
 29 |         return (self.__class__.__name__,), cudnn_conv_closure(self.ph, self.pw, self.sv, self.sh)
 30 |     def shp_apply(self, inputs):
 31 |         X,W,_b = inputs
 32 |         h = ceil_divide(size(X,2)  - size(W, 2) + 1, self.sv)
 33 |         w = ceil_divide(size(X,3)  - size(W, 3) + 1, self.sh)
 34 |         return [size(X,0), size(W,0), h, w]
 35 |     def typ_apply(self, _inputs):
 36 |         return TensorType(cgt.floatX, 4)
 37 |     def pullback(self, inputs, output, gout):
 38 |         X,W,b = inputs
 39 |         # pass in an extra first argument to make output shape computation simpler
 40 |         return [Result(CudnnConvBackwardData(self.ph, self.pw, self.sv, self.sh),   [X,   gout, W]), 
 41 |                 Result(CudnnConvBackwardFilter(self.ph, self.pw, self.sv, self.sh), [W,   gout, X]), 
 42 |                 Result(CudnnConvBackwardBias(self.ph, self.pw, self.sv, self.sh),   [b,   gout])]
 43 | 
 44 | class CudnnConvBackwardData(Op):
 45 |     def __init__(self, ph, pw, sv, sh):
 46 |         self.ph = ph
 47 |         self.pw = pw
 48 |         self.sv = sv
 49 |         self.sh = sh    
 50 |     def cuda_code(self, _inputs, funcname):
 51 |         return """
 52 | void %(funcname)s(void* cldata, cgt_array** io) {
 53 |     CudaPerformConvBackwardData(io[1], io[2], io[3], (conv_closure*)cldata, stream, handle);
 54 | }
 55 | """%dict(funcname=funcname)
 56 |     def cuda_includes(self): 
 57 |         return ["cudnn_conv.hpp"]
 58 |     def shp_apply(self, inputs):
 59 |         return shape(inputs[0])
 60 |     def typ_apply(self, _inputs):
 61 |         return TensorType(cgt.floatX, 4)
 62 | 
 63 | class CudnnConvBackwardFilter(Op):
 64 |     def __init__(self, ph, pw, sv, sh):
 65 |         self.ph = ph
 66 |         self.pw = pw
 67 |         self.sv = sv
 68 |         self.sh = sh        
 69 |     def cuda_code(self, _inputs, funcname):
 70 |         return """
 71 | void %(funcname)s(void* cldata, cgt_array** io) {
 72 |     CudaPerformConvBackwardFilter(io[1], io[2], io[3],  (conv_closure*)cldata, stream, handle);
 73 | }
 74 | """%dict(funcname=funcname)
 75 |     def cuda_includes(self): 
 76 |         return ["cudnn_conv.hpp"]
 77 |     def impl_data(self):        
 78 |         return (self.__class__.__name__,), cudnn_conv_closure(self.ph, self.pw, self.sv, self.sh)
 79 |     def shp_apply(self, inputs):
 80 |         return shape(inputs[0])
 81 |     def typ_apply(self, _inputs):
 82 |         return TensorType(cgt.floatX, 4)
 83 | 
 84 | class CudnnConvBackwardBias(Op):
 85 |     def __init__(self, ph, pw, sv, sh):
 86 |         self.ph = ph
 87 |         self.pw = pw
 88 |         self.sv = sv
 89 |         self.sh = sh    
 90 |     def cuda_code(self, _inputs, funcname):
 91 |         return """
 92 | void %(funcname)s(void* cldata, cgt_array** io) {
 93 |     CudaPerformConvBackwardBias(io[1], io[2], io[3],  (conv_closure*)cldata, stream, handle);
 94 | }
 95 | """%dict(funcname=funcname)
 96 |     def cuda_includes(self): 
 97 |         return ["cudnn_conv.hpp"]
 98 |     def shp_apply(self, inputs):
 99 |         return shape(inputs[0])
100 |     def typ_apply(self, _inputs):
101 |         return TensorType(cgt.floatX, 4)
102 | 
103 | # def pool(x_ncuv, rows_in, cols_in, poolshp, pool_type='max'):
104 | #     if rows_in % poolshp[0] != 0 or cols_in % poolshp[0] != 0:
105 | #         row_residue = rows_in%poolshp[0]
106 | #         col_residue = cols_in%poolshp[1]
107 | #         warn("image shape not divisible by pool size. cropping %i/%i on top, %i/%i on left"%(row_residue,rows_in,col_residue,cols_in))
108 | #         x_ncuv = x_ncuv[:,:,:rows_in - row_residue, :cols_in - col_residue]
109 | #     x_ncpaqb = x_ncuv.reshape( (x_ncuv.shape[0], x_ncuv.shape[1], rows_in // poolshp[0], poolshp[0], cols_in // poolshp[1], poolshp[1]) )
110 | #     x_ncpqab = x_ncpaqb.transpose([0,1,2,4,3,5])
111 | #     x_ncpq_ab = cgt.reshape(x_ncpqab, shape(x_ncpqab)[:4] + [size(x_ncpqab,4)*size(x_ncpqab,5)])
112 | #     if pool_type == 'max':
113 | #         x_ncpq = x_ncpq_ab.max(axis=4)
114 | #     elif pool_type == 'mean':
115 | #         x_ncpq = x_ncpq_ab.mean(axis=4)
116 | #     elif pool_type == '2norm':
117 | #         x_ncpq = cgt.sqrt(cgt.square(x_ncpq_ab).sum(axis=4)) #pylint: disable=E1111
118 | #     elif pool_type == 'softmax':
119 | #         x_ncpq = cgt.log(cgt.exp(x_ncpq_ab).sum(axis=4)) #pylint: disable=E1111
120 | #     assert x_ncpq.ndim==4
121 | #     return x_ncpq
122 | 
123 | 
124 | class Pool(Op):
125 |     def __init__(self, kind, stride, kernel, pad):
126 |         self.kind = kind
127 |         self.stride = stride
128 |         self.kernel = kernel
129 |         self.pad = pad
130 |     def get_diff(self, _):
131 |         return [True]
132 |     def get_name(self):
133 |         return "%spool"%self.kind
134 |     def get_numeric_py(self):
135 |         raise cgt.core.Todo
136 |     def pullback(self, inputs, output, goutput):
137 |         raise cgt.core.Todo
138 |     def shp_apply(self, inputs):        
139 |         x = inputs[0]
140 |         assert x.ndim == 4
141 |         return [size(x,0), size(x,1), (size(x,2)-self.pad[0]-self.kernel[0]+1)//self.stride[0],  
142 |                                       (size(x,3)-self.pad[1]-self.kernel[1]+1)//self.stride[0]] 
143 |                                       # XXX round up or down?
144 |     def typ_apply(self, inputs):
145 |         return inputs[0].typ
146 | 
147 | def pool(kind, x, stride, kernel, pad):
148 |     return Result(Pool(kind,stride,kernel,pad), [x])
149 | 
150 | def lrn(x, alpha, beta, local_size):
151 |     s = Result(CudaLRNScaling(alpha, local_size), [x])
152 |     return s/cgt.power(s, -beta)
153 | 
154 | # XXX needs params
155 | class CudaLRNScaling(Op):
156 |     def __init__(self, alpha, local_size):
157 |         self.alpha = alpha
158 |         self.local_size = local_size
159 |     def cuda_code(self, _inputs, funcname):
160 |         return """
161 | void %(funcname)s(void* cldata, cgt_array** io) {
162 |   int block, thread, size;
163 |   size = num_img * height * width;
164 |   FindConfiguration(size, block, thread);
165 |   cgt_array* bottom=io[0], *scale=io[1];
166 |   ing num_img = bottom->shape[0], channel = bottom->shape[1],
167 |     height = bottom->shape[2], width=bottom->shape[2];
168 |   LRNFillScale<<<block, thread, 0, stream>>>(
169 |       size, bottom->data, num_img, channel, height, width, cl->local_size,
170 |       cl->alpha / cl->local_size, scale->data);
171 | }"""%dict(funcname=funcname)
172 |     def cuda_headers(self):
173 |         return ["cgt_cuda.h","lrn.cuh"]
174 |     def shp_apply(self, inputs):
175 |         return shape(inputs[0])
176 |     def typ_apply(self, _inputs):
177 |         return TensorType(cgt.floatX, 4)
178 | 


--------------------------------------------------------------------------------
/cgt/nn_ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joschu/cgt/90b15ab041fc2137e62b96e8612ccee605f71ceb/cgt/nn_ops/__init__.py


--------------------------------------------------------------------------------
/cgt/nn_ops/cross_channel_lrn.py:
--------------------------------------------------------------------------------
  1 | import ctypes
  2 | import cgt
  3 | from cgt import core
  4 | from collections import namedtuple
  5 | 
  6 | LRNInfo = namedtuple("LRNInfo",["localsize","alpha","beta"])
  7 | 
  8 | def make_closure(info):
  9 |     return [
 10 |         ("localsize",ctypes.c_int,info.localsize),
 11 |         ("alpha",ctypes.c_double,info.alpha),
 12 |         ("beta",ctypes.c_double,info.beta)
 13 |     ]
 14 | 
 15 | class CrossChannelLRNForward(core.Op):
 16 |     available_impls = ("native_gpu",)    
 17 |     def __init__(self, info):
 18 |         assert isinstance(info, LRNInfo)
 19 |         self.info = info
 20 | 
 21 |     def get_native_compile_info(self, input_types, devtype):
 22 |         assert devtype == "gpu"
 23 |         d = dict(cdtype=core.np2c[input_types[0].dtype])
 24 |         cuda_code = r"""
 25 |             #include "cgt_cuda.h"
 26 |             #include "lrn.cuh"
 27 |             void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, %(cdtype)s* topdata, %(cdtype)s* scaledata) {
 28 |                 int size = num_img * height * width;
 29 |                 int nblocks, nthreads;
 30 |                 cgt_get_bt(size, nblocks, nthreads);
 31 |                 LRNFillScale<%(cdtype)s><<<nblocks, nthreads, 0>>>(
 32 |                     size, Xdata, num_img, channels, height, width, localsize, alpha / localsize, scaledata);
 33 |                 CUDA_CHECK_ERROR("LRNFillScale");
 34 | 
 35 |                 size = num_img * channels * width * height;
 36 |                 cgt_get_bt(size, nblocks, nthreads);
 37 |                 LRNComputeOutput<%(cdtype)s><<<nblocks, nthreads, 0>>>(size, Xdata, scaledata, -beta, topdata);
 38 |                 CUDA_CHECK_ERROR("LRNComputeOutput");
 39 |             }"""%d
 40 |         code = r"""
 41 |             extern void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, %(cdtype)s* topdata, %(cdtype)s* scaledata);
 42 |             CGT_EXPORT_C void $function($closure* cldata, cgtArray** reads, cgtTuple* write) {
 43 |                 cgtArray* X = reads[0];
 44 |                 int num_img = X->shape()[0],
 45 |                     channels = X->shape()[1],
 46 |                     height = X->shape()[2],
 47 |                     width = X->shape()[3];
 48 |                 cgtArray* top = (cgtArray*)write->getitem(0);                
 49 |                 cgtArray* scale = (cgtArray*)write->getitem(1);
 50 |                 launchker_$function(num_img, channels, height, width, cldata->localsize, cldata->alpha, cldata->beta, (%(cdtype)s*)X->data(), (%(cdtype)s*)top->data(), (%(cdtype)s*)scale->data());
 51 | 
 52 |             }"""%d
 53 |         return core.NativeCompileInfo(code, closure_triples = make_closure(self.info),
 54 |             link_flags="-lcudart", gpu_deref_mask=(True,), 
 55 |             extra_srcs=[core.SrcFile("cuda",cuda_code)])
 56 |     def shp_apply(self, inputs):
 57 |         return (inputs[0].shape,inputs[0].shape)
 58 |     def typ_apply(self, input_types):
 59 |         return core.TupleType(input_types[0], input_types[0])
 60 |     def pullback(self, inputs, output, gout):
 61 |         top, scaling = cgt.core.unpack(output)
 62 |         gtop, _ = gout
 63 |         return [core.Result(CrossChannelLRNBackward(self.info), [inputs[0], top, scaling, gtop])]
 64 | 
 65 | class CrossChannelLRNBackward(core.Op):
 66 |     available_impls = ("native_gpu",)    
 67 |     def __init__(self, info):
 68 |         self.info = info
 69 |     def get_native_compile_info(self, input_types, devtype):
 70 |         assert devtype == "gpu"   
 71 |         d = dict(cdtype=core.np2c[input_types[0].dtype])     
 72 |         cuda_code=r"""
 73 |             #include "cgt_cuda.h"
 74 |             #include "lrn.cuh"
 75 |             void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, 
 76 |                 %(cdtype)s* topdata, %(cdtype)s* scalingdata, %(cdtype)s* topdiffdata, %(cdtype)s* bottomdiffdata) {
 77 |                 int nblocks, nthreads;
 78 |                 int size = num_img * width * height;
 79 |                 cgt_get_bt(size, nblocks, nthreads);
 80 |                 LRNComputeDiff<%(cdtype)s><<<nblocks, nthreads, 0>>>(size, (%(cdtype)s*)Xdata, (%(cdtype)s*)topdata, 
 81 |                     (%(cdtype)s*)scalingdata, (%(cdtype)s*)topdiffdata,  num_img, channels, height, width, localsize, 
 82 |                     -beta, 2. * alpha * beta / localsize, (%(cdtype)s*)bottomdiffdata);
 83 |                 CUDA_CHECK_ERROR("CrossChannelLRNBackward");
 84 |             }
 85 |         """%d
 86 |         code = """        
 87 |             void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, 
 88 |                 %(cdtype)s* topdata, %(cdtype)s* scaledata, %(cdtype)s* topdiffdata, %(cdtype)s* bottomdiffdata);
 89 |             CGT_EXPORT_C void $function($closure* cldata, cgtArray** reads, cgtArray* bottom_diff) {
 90 |             cgtArray *X=reads[0], *top=reads[1], *scaling=reads[2], *top_diff=reads[3];
 91 |             int num_img = X->shape()[0],
 92 |                 channels = X->shape()[1],
 93 |                 height = X->shape()[2],
 94 |                 width = X->shape()[3];
 95 |             launchker_$function(num_img, channels, height, width, cldata->localsize, cldata->alpha, cldata->beta, (%(cdtype)s*)X->data(), 
 96 |                 (%(cdtype)s*)top->data(), (%(cdtype)s*)scaling->data(), (%(cdtype)s*)top_diff->data(), (%(cdtype)s*)bottom_diff->data());            
 97 |             }"""%d
 98 |         return core.NativeCompileInfo(code, closure_triples = make_closure(self.info),
 99 |             link_flags="-lcudart", gpu_deref_mask=(True,True,True,True), 
100 |             extra_srcs=[core.SrcFile("cuda",cuda_code)])
101 |     def shp_apply(self, inputs):
102 |         return cgt.shape(inputs[0])
103 |     def typ_apply(self, _inputs):
104 |         return core.TensorType(cgt.floatX, 4)
105 | 
106 | def cross_channel_lrn(X, localsize, alpha, beta):
107 |     assert X.ndim == 4
108 |     return core.Result(CrossChannelLRNForward(LRNInfo(localsize,alpha,beta)), [X])[0]
109 | 
110 | 
111 |     # print q[:-1].sum(), s[:-1].sum()
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/cgt/nn_ops/cudnn_ops.py:
--------------------------------------------------------------------------------
  1 | import ctypes
  2 | import cgt
  3 | from cgt import core
  4 | from collections import namedtuple
  5 | 
  6 | def cudnn_conv_closure(*ints):
  7 |     return (ctypes.c_int*len(ints))(*ints)
  8 | 
  9 | def make_closure(ph, pw, sv, sh):
 10 |     return [
 11 |         ("ph",ctypes.c_int,ph),
 12 |         ("pw",ctypes.c_int,pw),
 13 |         ("sv",ctypes.c_int,sv),
 14 |         ("sh",ctypes.c_int,sh),
 15 |         ("handle",ctypes.c_void_p,0),
 16 |         ("stream",ctypes.c_void_p,0),
 17 |     ]
 18 | 
 19 | class CudnnConvForward(core.Op):
 20 |     available_impls = ("native_gpu",)    
 21 |     def __init__(self, ph, pw, sv, sh):
 22 |         "pad_height, pad_width, stride_vertical, stride_horizontal"
 23 |         self.ph = ph
 24 |         self.pw = pw
 25 |         self.sv = sv
 26 |         self.sh = sh
 27 | 
 28 |     def get_native_compile_info(self, _input_types, devtype):
 29 |         assert devtype=="gpu"
 30 |         code = """
 31 |             CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);}
 32 |             CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);}
 33 |             CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) {
 34 |                 if (!closure->handle) setup_cudnn(closure);
 35 |                 performConvForward(closure, reads[0], reads[1], reads[2], write);
 36 |             }"""
 37 |         return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh),
 38 |             includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart")
 39 |     def shp_apply(self, inputs):
 40 |         X,W,_b = inputs
 41 |         h = cgt.ceil_divide(cgt.size(X,2) + self.ph*2 - cgt.size(W, 2) + 1, self.sv)
 42 |         w = cgt.ceil_divide(cgt.size(X,3) + self.pw*2 - cgt.size(W, 3) + 1, self.sh)
 43 |         return [cgt.size(X,0), cgt.size(W,0), h, w]
 44 |     def typ_apply(self, _inputs):
 45 |         return core.TensorType(cgt.floatX, 4)
 46 |     def pullback(self, inputs, _output, gout):
 47 |         X,W,b = inputs
 48 |         # pass in an extra first argument to make output shape computation simpler
 49 |         return [core.Result(CudnnConvBackwardData(self.ph, self.pw, self.sv, self.sh),   [X,   gout, W]), 
 50 |                 core.Result(CudnnConvBackwardFilter(self.ph, self.pw, self.sv, self.sh), [W,   gout, X]), 
 51 |                 core.Result(CudnnConvBackwardBias(self.ph, self.pw, self.sv, self.sh),   [b,   gout])]
 52 | 
 53 | class CudnnConvBackwardData(core.Op):
 54 |     available_impls = ("native_gpu",)    
 55 |     def __init__(self, ph, pw, sv, sh):
 56 |         self.ph = ph
 57 |         self.pw = pw
 58 |         self.sv = sv
 59 |         self.sh = sh    
 60 |     def get_native_compile_info(self, input_types, devtype):
 61 |         assert devtype=="gpu"
 62 |         code="""
 63 |             CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);}
 64 |             CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);}
 65 |             CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) {
 66 |                 if (!closure->handle) setup_cudnn(closure);
 67 |                 performConvBackwardData(closure, reads[1], reads[2], write);
 68 |             }"""
 69 |         return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh),
 70 |             includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart")
 71 |     def shp_apply(self, inputs):
 72 |         return cgt.shape(inputs[0])
 73 |     def typ_apply(self, _inputs):
 74 |         return core.TensorType(cgt.floatX, 4)
 75 | 
 76 | class CudnnConvBackwardFilter(core.Op):
 77 |     available_impls = ("native_gpu",)    
 78 |     def __init__(self, ph, pw, sv, sh):
 79 |         self.ph = ph
 80 |         self.pw = pw
 81 |         self.sv = sv
 82 |         self.sh = sh        
 83 |     def get_native_compile_info(self, input_types, devtype):
 84 |         assert devtype=="gpu"
 85 |         code = """
 86 |             CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);}
 87 |             CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);}
 88 |             CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) {
 89 |                 if (!closure->handle) setup_cudnn(closure);
 90 |                 performConvBackwardFilter(closure, reads[1], reads[2], write);
 91 |             }"""
 92 |         return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh),
 93 |             includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart")
 94 |     def shp_apply(self, inputs):
 95 |         return cgt.shape(inputs[0])
 96 |     def typ_apply(self, _inputs):
 97 |         return core.TensorType(cgt.floatX, 4)
 98 | 
 99 | class CudnnConvBackwardBias(core.Op):
100 |     available_impls = ("native_gpu",)
101 |     def __init__(self, ph, pw, sv, sh):
102 |         self.ph = ph
103 |         self.pw = pw
104 |         self.sv = sv
105 |         self.sh = sh    
106 |     def get_native_compile_info(self, input_types, devtype):
107 |         assert devtype == "gpu"
108 |         code = """
109 |             CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);}
110 |             CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);}
111 |             CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) {
112 |                 if (!closure->handle) setup_cudnn(closure);
113 |                 performConvBackwardBias(closure, reads[1], write);
114 |             }"""
115 |         return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh),
116 |             includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart")            
117 |     def shp_apply(self, inputs):
118 |         return cgt.shape(inputs[0])
119 |     def typ_apply(self, _inputs):
120 |         return core.TensorType(cgt.floatX, 4)
121 | 
122 | PoolInfo = namedtuple("PoolInfo", ["kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w"])
123 | 
124 | def poolinfo2closure(info):
125 |     return [
126 |         ("kernel_h", ctypes.c_int, info.kernel_h),
127 |         ("kernel_w", ctypes.c_int, info.kernel_w),
128 |         ("pad_h", ctypes.c_int, info.pad_h),
129 |         ("pad_w", ctypes.c_int, info.pad_w),
130 |         ("stride_h", ctypes.c_int, info.stride_h),
131 |         ("stride_w", ctypes.c_int, info.stride_w),
132 |         ("handle",ctypes.c_void_p,0),
133 |         ("stream",ctypes.c_void_p,0),
134 |     ]    
135 | 
136 | class CudnnPoolForward(core.Op):
137 |     available_impls = ("native_gpu",)    
138 |     def __init__(self, info):
139 |         self.info = info
140 | 
141 |     def get_native_compile_info(self, _input_types, devtype):
142 |         assert devtype == "gpu"
143 |         code = """
144 |             CGT_EXPORT_C void $setup(pooling_closure* closure) {setup_cudnn(closure);}
145 |             CGT_EXPORT_C void $teardown(pooling_closure* closure) {teardown_cudnn(closure);}
146 |             CGT_EXPORT_C void $function(pooling_closure* closure, cgtArray** reads, cgtArray* write) {
147 |                 if (!closure->handle) setup_cudnn(closure);
148 |                 performPoolingForward(closure, reads[0], write);
149 |             }"""
150 |         return core.NativeCompileInfo(code, closure_triples = poolinfo2closure(self.info),
151 |             includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart")
152 |     def shp_apply(self, inputs):
153 |         info = self.info
154 |         batch_size, channels, height, width = cgt.shape(inputs[0])
155 |         pooled_height =  cgt.ceil_divide(height + 2*info.pad_h - info.kernel_h, info.stride_h)
156 |         pooled_width = cgt.ceil_divide(width + 2*info.pad_w - info.kernel_w, info.stride_w)
157 |         outshape = [batch_size ,  channels, pooled_height, pooled_width]
158 |         return outshape
159 |     def typ_apply(self, input_types):
160 |         return input_types[0]
161 |     def pullback(self, inputs, output, gout):
162 |         return [core.Result(CudnnPoolBackward(self.info),   [inputs[0], output, gout])]
163 | 
164 | 
165 | class CudnnPoolBackward(core.Op):
166 |     available_impls = ("native_gpu",)    
167 |     def __init__(self, info):
168 |         self.info = info
169 | 
170 |     def get_native_compile_info(self, _input_types, devtype):
171 |         assert devtype == "gpu"
172 |         code = """
173 |             CGT_EXPORT_C void $setup(pooling_closure* closure) {setup_cudnn(closure);}
174 |             CGT_EXPORT_C void $teardown(pooling_closure* closure) {teardown_cudnn(closure);}
175 |             CGT_EXPORT_C void $function(pooling_closure* closure, cgtArray** reads, cgtArray* write) {
176 |                 if (!closure->handle) setup_cudnn(closure);
177 |                 performPoolingBackward(closure, reads[0], reads[1], reads[2], write);
178 |             }"""
179 |         return core.NativeCompileInfo(code, closure_triples = poolinfo2closure(self.info),
180 |             includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart")
181 |     def shp_apply(self, inputs):
182 |         return cgt.shape(inputs[0])
183 |     def typ_apply(self, input_types):
184 |         return input_types[0]
185 |     def pullback(self, inputs, output, gout):
186 |         raise NotImplementedError
187 | 
188 | 
189 | 
190 | 


--------------------------------------------------------------------------------
/cgt/nn_ops/im2col.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | from cgt import core
 3 | import ctypes
 4 | from collections import namedtuple
 5 | 
 6 | Im2ColInfo = namedtuple("Im2ColInfo", ["kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w"])
 7 | 
 8 | def im2col(x, kernelshape, pad, stride):
 9 |     assert stride[0] > 0 and stride[1] > 0
10 |     assert kernelshape[0] > 0 and kernelshape[1] > 0
11 |     kernelshape, pad, stride = map(tuple, (kernelshape, pad, stride))
12 |     return core.Result(Im2Col(Im2ColInfo(*(kernelshape+pad+stride))), [x])
13 | 
14 | def info2closure(info):
15 |     return [
16 |         ("kernel_h", ctypes.c_int, info.kernel_h),
17 |         ("kernel_w", ctypes.c_int, info.kernel_w),
18 |         ("pad_h", ctypes.c_int, info.pad_h),
19 |         ("pad_w", ctypes.c_int, info.pad_w),
20 |         ("stride_h", ctypes.c_int, info.stride_h),
21 |         ("stride_w", ctypes.c_int, info.stride_w),
22 |     ]    
23 | 
24 | 
25 | class Im2Col(core.Op):
26 |     available_impls = ("native_cpu",)        
27 |     def __init__(self, info):
28 |         assert info.stride_h>0 and info.stride_w>0
29 |         self.info = info
30 |     def get_diff(self, _):
31 |         return [True]
32 |     def get_py_impl(self):
33 |         raise core.MethodNotDefined
34 |     def pullback(self, (x,), _y, gy):
35 |         return [core.Result(Col2Im(self.info), [gy] + cgt.shape(x))]
36 |     def shp_apply(self, inputs):
37 |         info = self.info
38 |         batch_size, channels, height, width = cgt.shape(inputs[0])
39 |         height_out = (height + 2 * info.pad_h - info.kernel_h) // info.stride_h + 1
40 |         width_out = (width + 2 * info.pad_w - info.kernel_w) // info.stride_w + 1
41 |         return [batch_size ,  height_out,  width_out, channels * info.kernel_w * info.kernel_h]
42 |     def typ_apply(self, inputs):
43 |         assert inputs[0].ndim == 4
44 |         return core.TensorType(inputs[0].dtype, 4)
45 |     def get_native_compile_info(self, input_types, devtype):
46 |         assert devtype == "cpu"
47 |         d = dict(cdtype=core.np2c[input_types[0].dtype])
48 |         d.update(self.info._asdict())
49 |         code = r"""
50 |             CGT_EXPORT_C void $function($closure* cl, cgtArray** reads, cgtArray* write) {
51 |                 cgtArray* im = reads[0];
52 |                 const long* imshape = im->shape();
53 |                 int batchsize = imshape[0],
54 |                     channels = imshape[1],
55 |                     height = imshape[2],
56 |                     width = imshape[3];
57 |                 for (int i=0; i < batchsize; ++i) {
58 |                     im2col_cpu<%(cdtype)s, %(kernel_h)s,%(kernel_w)s,%(pad_h)s,%(pad_w)s,%(stride_h)s,%(stride_w)s>
59 |                     ((%(cdtype)s*)im->data() + im->stride(0)*i, channels, height, width, (%(cdtype)s*)write->data() + write->stride(0)*i);
60 |                 }
61 |             }"""%d
62 |         return core.NativeCompileInfo(code, includes=["im2col.h"], closure_triples=info2closure(self.info))
63 | 
64 | class Col2Im(core.Op):
65 |     available_impls = ("native_cpu",)            
66 |     def __init__(self, info):
67 |         self.info = info
68 |     def get_diff(self, _):
69 |         return [True]
70 |     def get_py_impl(self):
71 |         raise core.MethodNotDefined
72 |     def shp_apply(self, inputs):
73 |         return inputs[1:]
74 |     def typ_apply(self, inputs):
75 |         return core.TensorType(inputs[0].dtype, 4)
76 |     def get_closure(self, _inputs):
77 |         return info2closure(self.info)
78 |     def get_native_compile_info(self, input_types, devtype):
79 |         d = dict(cdtype=core.np2c[input_types[0].dtype])
80 |         d.update(self.info._asdict())
81 |         code = r"""
82 |             CGT_EXPORT_C void $function($closure* cl, cgtArray** reads, cgtArray* write) {
83 |                 cgtArray* col = reads[0];
84 |                 long batchsize = reads[1]->at<long>(0),
85 |                        channels  = reads[2]->at<long>(0),
86 |                        height    = reads[3]->at<long>(0),
87 |                        width     = reads[4]->at<long>(0);
88 |                 for (int i=0; i < batchsize; ++i) {
89 |                     col2im_cpu<%(cdtype)s, %(kernel_h)s,%(kernel_w)s,%(pad_h)s,%(pad_w)s,%(stride_h)s,%(stride_w)s>
90 |                     ((%(cdtype)s*)col->data() + col->stride(0)*i, channels, height, width,(%(cdtype)s*)write->data() + write->stride(0)*i);
91 |                 }
92 |             }"""%d
93 |         return core.NativeCompileInfo(code, includes=["im2col.h"], closure_triples=info2closure(self.info))
94 | 
95 | 


--------------------------------------------------------------------------------
/cgt/nn_ops/max_pool_2d.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | from cgt import core
 3 | import ctypes
 4 | from collections import namedtuple
 5 | import numpy as np
 6 | 
 7 | 
 8 | # <Copied from im2col.py>
 9 | PoolInfo = namedtuple("PoolInfo", ["kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w"])
10 | 
11 | def info2closure(info):
12 |     return [
13 |         ("kernel_h", ctypes.c_int, info.kernel_h),
14 |         ("kernel_w", ctypes.c_int, info.kernel_w),
15 |         ("pad_h", ctypes.c_int, info.pad_h),
16 |         ("pad_w", ctypes.c_int, info.pad_w),
17 |         ("stride_h", ctypes.c_int, info.stride_h),
18 |         ("stride_w", ctypes.c_int, info.stride_w),
19 |     ]    
20 | # </Copied>
21 | 
22 | class MaxPool(core.Op):
23 |     available_impls = ("native_cpu",)    
24 |     def __init__(self, info):
25 |         assert info.stride_h>0 and info.stride_w>0        
26 |         self.info = info
27 |     def get_diff(self, _):
28 |         return [True]
29 |     def get_py_impl(self):
30 |         raise core.MethodNotDefined
31 |     def pullback(self, (x,), y, gy):
32 |         pool,mask = core.unpack(y)
33 |         gpool,_gmask = gy
34 |         return [core.Result(MaxPoolPullback(self.info), [x,pool,mask,gpool])]
35 |     def shp_apply(self, inputs):
36 |         # pooled_height_ = static_cast<int>(ceil(static_cast<float>(height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
37 |         # pooled_width_ = static_cast<int>(ceil(static_cast<float>(width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
38 |         info = self.info
39 |         batch_size, channels, height, width = cgt.shape(inputs[0])
40 |         pooled_height =  cgt.ceil_divide(height + 2*info.pad_h - info.kernel_h, info.stride_h)
41 |         pooled_width = cgt.ceil_divide(width + 2*info.pad_w - info.kernel_w, info.stride_w)
42 |         outshape = [batch_size ,  channels, pooled_height, pooled_width]
43 |         return (outshape, outshape)
44 |     def typ_apply(self, inputs):
45 |         return core.TupleType(core.TensorType(inputs[0].dtype, 4), core.TensorType('i4', 4))
46 |     def get_closure(self, _inputs):
47 |         return info2closure(self.info)
48 |     def get_native_compile_info(self, input_types, devtype):
49 |         code = r"""
50 | CGT_EXPORT_C void $function(conv_closure* cl, cgtArray** reads, cgtTuple* write) {
51 |     max_pool<%(cdtype)s>(cl, reads[0], static_cast<cgtArray*>(write->getitem(0)), static_cast<cgtArray*>(write->getitem(1)));
52 | }"""%dict(cdtype=core.np2c[input_types[0].dtype])
53 |         return core.NativeCompileInfo(code, closure_triples=info2closure(self.info), includes=["pooling.h"])
54 | 
55 | class MaxPoolPullback(core.Op):
56 |     available_impls = ("native_cpu",)
57 |     def __init__(self, info):
58 |         self.info = info
59 |     def get_py_impl(self):
60 |         raise core.MethodNotDefined
61 |     def shp_apply(self, inputs):
62 |         return cgt.shape(inputs[0])
63 |     def typ_apply(self, inputs):
64 |         return core.TensorType(inputs[0].dtype, 4)
65 |     def get_closure(self, _inputs):
66 |         return info2closure(self.info)
67 |     def get_native_compile_info(self, input_types, devtype):
68 |         code = r"""
69 | CGT_EXPORT_C void $function(conv_closure* cl, cgtArray** reads, cgtArray* write) {
70 |     max_pool_pullback<%(cdtype)s>(reads[0], reads[1], reads[2], reads[3], write);
71 | }"""%dict(cdtype=core.np2c[input_types[0].dtype])
72 |         return core.NativeCompileInfo(code, closure_triples=info2closure(self.info), includes=["pooling.h"])
73 | 
74 | 


--------------------------------------------------------------------------------
/cgt/numeric_diff.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def numeric_grad(f,x,eps=1e-9,method="central"):
 4 |     if method == "central":
 5 |         xpert = x.copy()
 6 |         out = np.zeros_like(x)
 7 |         for i in xrange(x.size):
 8 |             xpert.flat[i] = x.flat[i] + eps
 9 |             yplus = f(xpert)
10 |             xpert.flat[i] = x.flat[i] - eps
11 |             yminus = f(xpert)
12 |             xpert.flat[i] = x.flat[i]
13 |             out.flat[i] = (yplus - yminus) / (2*eps)
14 |             if (i+1)%1000 == 0: print "%i/%i components done"%(i+1,x.size)
15 |         return out
16 |     else:
17 |         raise NotImplementedError("invalid method %s"%method)
18 | 
19 | def numeric_grad_multi(f, xs, eps=1e-9,method="central"):
20 |     out = []
21 |     for i in xrange(len(xs)):
22 |         li = list(xs)        
23 |         def f1(x):
24 |             li[i] = x
25 |             return f(*li)
26 |         out.append(numeric_grad(f1, xs[i], eps, method=method))
27 |     return out
28 | 
29 | 


--------------------------------------------------------------------------------
/cgt/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | from cgt.numeric_diff import numeric_grad, numeric_grad_multi
 3 | from functools import wraps
 4 | import itertools
 5 | import numpy as np
 6 | 
 7 | 
 8 | def gradcheck_model(cost, params, extravars=(), extravals=(), atol=1e-8, eps=1e-9):
 9 |     precision = cgt.get_precision()
10 |     if precision == "single":
11 |         cgt.utils.warn("You're doing a gradient check with %s precision. Use double or better yet quad for best results"%(precision))
12 |     assert all(param.is_input() for param in params)
13 |     assert len(extravars) == len(extravals)
14 | 
15 |     # Convert to Argument nodes
16 |     param_args = [cgt.core.Argument(typ=s.typ,name=s.name)if s.is_data() else s for s in params]
17 | 
18 |     # Get new cost in terms o farguments
19 |     cost = cgt.core.clone(cost, replace=dict(zip(params,param_args)))
20 | 
21 |     grads = cgt.grad(cost, param_args)
22 |     paramvals = [param.op.get_value() for param in params]
23 |     fcost = cgt.function(param_args, cost, givens=zip(extravars,extravals))
24 |     fgrad = cgt.function(param_args, grads,givens=zip(extravars,extravals))
25 | 
26 |     angrads = fgrad(*paramvals)
27 |     nugrads = numeric_grad_multi(fcost, paramvals, eps=eps)
28 | 
29 |     for (angrad,nugrad) in zip(angrads,nugrads):
30 |         assert np.allclose(angrad,nugrad,atol=atol)
31 | 
32 | 
33 | def across_configs(*args, **kwargs):
34 |     """
35 |     Decorator for Nose test. Generates tests for all combinations of configuration options.
36 | 
37 |     Examples:
38 | 
39 |     Generates tests for all assignments of backends and precision.
40 |       @across_configs
41 |       def test_adagrad(): ...
42 | 
43 |     Generates tests for python/single and python/double:
44 |       @across_configs(backends=("python",), precisions=("single", "double"))
45 |       def test_adagrad(): ...
46 |     """
47 | 
48 |     # If one function arg is passed, then apply this decorator with no parameters
49 |     if len(args) == 1 and not kwargs and hasattr(args[0], "__call__"):
50 |         return across_configs()(args[0])
51 | 
52 |     assert not args
53 |     backends = kwargs.get("backends", ("python", "native"))
54 |     precisions = kwargs.get("precisions", ("single", "double"))
55 |     devtypes = kwargs.get("devtypes",("cpu",))
56 |     pass_settings = kwargs.get("pass_settings", False)
57 | 
58 |     def decorator(check_func):
59 |         @wraps(check_func)
60 |         def check_func_with_config(backend, precision, devtype):
61 |             with cgt.scoped_update_config(backend=backend, precision=precision, default_device=cgt.core.Device(devtype=devtype)):
62 |                 if pass_settings:
63 |                     check_func(backend=backend, precision=precision)
64 |                 else:
65 |                     check_func()
66 | 
67 |         @wraps(check_func_with_config)
68 |         def wrapper():
69 |             for backend, precision, devtype in itertools.product(backends, precisions, devtypes):
70 |                 yield check_func_with_config, backend, precision, devtype
71 |         return wrapper
72 | 
73 |     return decorator
74 | 


--------------------------------------------------------------------------------
/cgt/tests/_test_assert.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | import unittest
 3 | 
 4 | def myfunc(x):
 5 |     print "x",x
 6 | 
 7 | class AssertTestCase(unittest.TestCase):
 8 |     def runTest(self):
 9 |         if cgt.get_config()["backend"] != "python":
10 |             cgt.utils.warn("Skipping test -- only works for backend=python")
11 |             return
12 |         x = cgt.scalar()
13 |         with cgt.debug_context() as dbg:
14 |             cgt.assert_(cgt.equal(x, 1),"yoyoyo")
15 |             cgt.dbg_call(myfunc, x)
16 |             print "dbg",dbg.nodes
17 |             # cgt.assert_(cgt.equal(x, 2))
18 | 
19 |         f = cgt.make_function([x],[x],dbg=dbg)
20 |         f(1)
21 |         with self.assertRaises(AssertionError):
22 |             f(2)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     AssertTestCase().runTest()
27 | 


--------------------------------------------------------------------------------
/cgt/tests/_test_cycgt.py:
--------------------------------------------------------------------------------
 1 | import cycgt
 2 | import cgt
 3 | import numpy as np
 4 | import unittest
 5 | 
 6 | 
 7 | class CyCgtTestCase(unittest.TestCase):
 8 |     def test_cycgt(self):
 9 |         x = cgt.vector('x')
10 |         y = cgt.vector('y')
11 |         z = y/x
12 |         cs = cycgt.CallSequence([x,y],[z], list(cgt.topsorted([z])))
13 | 
14 |         xshp = (4,)
15 |         yshp = (4,)
16 |         zshp = (4,)
17 | 
18 |         xval = np.random.randn(*xshp).astype('float32')
19 |         yval = np.random.randn(*yshp).astype('float32')
20 |         zval = np.random.randn(*zshp).astype('float32')
21 | 
22 |         cs.set_shapes([xshp,yshp,zshp])
23 |         cs.set_inputs([xval,yval])
24 |         cs.execute()
25 |         print xval, yval
26 |         print xval * yval
27 |         np.testing.assert_allclose(yval/xval , cs.get_outputs_numpy()[0])
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     unittest.main()
32 | 


--------------------------------------------------------------------------------
/cgt/tests/_test_eg.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | import unittest
 3 | import numpy as np 
 4 | import pprint
 5 | class EgTestCase(unittest.TestCase):
 6 |     def runTest(self):
 7 |         cgt.set_precision('double')
 8 |         x = cgt.vector()
 9 |         y = cgt.square(x)
10 |         eg = cgt.execution.compilation_pipeline([x],[y+y],[])
11 |         pprint.pprint(eg.to_json())
12 |         import cycgt
13 |         interp = cycgt.cInterpreter(eg)
14 |         print interp(np.array([3,4,5,6],'f8'))
15 | 
16 | if __name__ == "__main__":
17 |     EgTestCase().runTest()


--------------------------------------------------------------------------------
/cgt/tests/_test_flatvec.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cgt
 3 | from cgt import nn, core
 4 | 
 5 | def test_flatvec():
 6 |     cgt.reset_config
 7 |     cgt.set_precision('double')
 8 |     cgt.core.update_config(backend="python") # XXX
 9 | 
10 |     N = 10
11 |     K = 3
12 | 
13 |     Xval = np.random.randn(N,K)
14 |     wval = np.random.randn(K)
15 |     bval = np.random.randn()
16 |     yval = np.random.randn(N)
17 | 
18 |     X_nk = cgt.shared(Xval, "X")
19 |     y_n = cgt.shared(yval, "y")
20 |     w_k = cgt.shared(wval, "w")
21 |     b = cgt.shared(bval, name="b")
22 | 
23 |     ypred = cgt.dot(X_nk, w_k) + b
24 | 
25 |     err = cgt.sum(cgt.square(ypred - y_n))
26 |     g = cgt.grad(err, [w_k, b])
27 |     g = core.simplify(g)
28 | 
29 |     pars = [w_k, b]
30 |     flatx = nn.setup_contiguous_storage(pars)
31 |     f = cgt.function([], [err,cgt.flatcat(g)])
32 | 


--------------------------------------------------------------------------------
/cgt/tests/_test_shapecheck.py:
--------------------------------------------------------------------------------
1 | import cgt
2 | # X = cgt.matrix(fixed_shape=(10,3))
3 | y = cgt.vector(fixed_shape=(3,))
4 | w = cgt.vector(fixed_shape=(5,))
5 | # z = X.dot(y)
6 | y+w
7 | # cgt.print_tree(cgt.core.simplify(cgt.shape(z)))


--------------------------------------------------------------------------------
/cgt/tests/_test_tuples.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np
 2 | import unittest
 3 | 
 4 | class TupleTestCase(unittest.TestCase):
 5 |     def runTest(self):
 6 |         f1 = cgt.function1([], ())
 7 |         assert f1() == ()
 8 | 
 9 |         x = cgt.vector()
10 |         xval = np.random.randn(1)
11 |         f2 = cgt.function([x], [(x,x),(x,),()])
12 |         ytrue = [(xval,xval),(xval,),()]
13 |         y = f2(xval)
14 |         assert y==ytrue
15 | if __name__ == "__main__":
16 |     TupleTestCase().runTest()
17 | 
18 | 


--------------------------------------------------------------------------------
/cgt/tests/test_array_wrapper.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np
 2 | from cgt.tests import across_configs
 3 | 
 4 | 
 5 | @across_configs
 6 | def test_array_wrapper():
 7 |     xval = np.zeros(10)
 8 |     x = cgt.shared(xval)
 9 |     f = cgt.function([],[],updates=[(x,x+1)])
10 |     f()
11 |     g = cgt.function([],x.sum())
12 |     assert np.allclose(x.op.get_value(), xval+1)
13 |     xval2 = np.arange(10)
14 |     x.op.set_value(xval2)
15 |     print x.op.get_value()
16 |     assert np.allclose(x.op.get_value(), xval2)
17 |     assert g() == xval2.sum()
18 |     f()
19 |     assert np.allclose(x.op.get_value(), xval2+1)
20 |     assert g() == (xval2+1).sum()
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     import nose
25 |     nose.runmodule()


--------------------------------------------------------------------------------
/cgt/tests/test_conv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cgt
 3 | from cgt import nn
 4 | from cgt.tests import across_configs
 5 | from nose.plugins.skip import SkipTest
 6 | 
 7 | @across_configs()
 8 | def test_conv():
 9 |     try:
10 |         import scipy.signal
11 |     except ImportError:
12 |         raise SkipTest("skipping because we don't have ndimage")
13 | 
14 |     np.random.seed(0)
15 |     x = np.random.randn(2,2,5,17)
16 |     filt = np.random.randn(3,2,4,7)
17 | 
18 |     filtrows = filt.shape[2]
19 |     filtcols = filt.shape[3]
20 | 
21 |     batchsize = x.shape[0]
22 |     outchans = filt.shape[0]
23 | 
24 |     out = np.zeros((batchsize,outchans,x.shape[2]+filtrows-1,x.shape[3]+filtcols-1))
25 |     for b in xrange(x.shape[0]):
26 |         for inchan in xrange(x.shape[1]):
27 |             for outchan in xrange(outchans):
28 |                 out[b,outchan] += scipy.signal.convolve2d(x[b,inchan],filt[outchan,inchan][::-1,::-1],mode='full')
29 | 
30 |     f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1)))
31 |     out1 = f()
32 |     # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {})
33 |     np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
34 | 
35 | if __name__ == "__main__":
36 |     import nose
37 |     nose.runmodule()


--------------------------------------------------------------------------------
/cgt/tests/test_devices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cgt
 3 | from cgt.tests import across_configs
 4 | from nose.plugins.skip import SkipTest
 5 | 
 6 | @across_configs(backends=("native",))
 7 | def test_devices():
 8 |     N = 10
 9 |     K = 3
10 | 
11 |     compile_info = cgt.compilation.get_compile_info()
12 |     cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
13 |     if not cuda_enabled:
14 |         raise SkipTest("cuda disabled")
15 | 
16 |     Xval = np.random.randn(N,K).astype(cgt.floatX)
17 |     wval = np.random.randn(K).astype(cgt.floatX)
18 |     bval = np.asarray(np.random.randn()).astype(cgt.floatX)
19 |     yval = np.random.randn(N).astype(cgt.floatX)
20 | 
21 |     with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):
22 | 
23 |         X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
24 |         y_n = cgt.shared(yval, "y")
25 |         w_k = cgt.shared(wval, "w")
26 |         b = cgt.shared(bval, name="b")
27 | 
28 |         print "bval",bval
29 | 
30 |         ypred = cgt.dot(cgt.square(X_nk), w_k) + b
31 | 
32 |         err = cgt.sum(cgt.sin(ypred - y_n))
33 |         g = cgt.grad(err, [w_k, b])
34 |         outputs = [err]+g
35 |         f = cgt.function([], [err]+g)
36 |         results = f()
37 |         print results
38 |         assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum())
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     import nose
43 |     nose.runmodule()
44 | 


--------------------------------------------------------------------------------
/cgt/tests/test_einsum.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np,numpy.random as nr
 2 | from cgt.tests import across_configs
 3 | 
 4 | 
 5 | @across_configs
 6 | def test_einsum():
 7 |     x = cgt.tensor3()
 8 |     y = cgt.tensor3()
 9 | 
10 |     sizes = {'i':2,'j':3,'k':5,'l':7}
11 |     xaxes = 'ijk'
12 |     yaxes = 'ikl'
13 |     zaxes = 'ijl'
14 |     for i in xrange(10):
15 |         xperm = xaxes
16 |         (yperm,zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes,zaxes]]
17 |         desc = "%s,%s->%s"%tuple("".join(chars) for chars in [xperm] + permaxes)
18 |         z = cgt.einsum(desc, x, y)
19 |         xval = nr.randn(*(sizes[c] for c in xperm))
20 |         yval = nr.randn(*(sizes[c] for c in yperm))
21 |         np.testing.assert_allclose(
22 |             cgt.numeric_eval(z, {x : xval, y : yval}),
23 |             np.einsum(desc, xval, yval),
24 |             atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     import nose
29 |     nose.runmodule()
30 | 


--------------------------------------------------------------------------------
/cgt/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | import subprocess, os.path as osp
 2 | from nose.plugins.skip import SkipTest
 3 | 
 4 | thisdir = osp.dirname(__file__)
 5 | 
 6 | def run_example(cmd, filedepends=None):
 7 |     if filedepends and not osp.exists(filedepends):
 8 |         raise SkipTest(cmd)
 9 |     else:
10 |         subprocess.check_call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
11 | def skip_example(cmd):
12 |     raise SkipTest(cmd)
13 | 
14 | def test_examples():
15 |     mnist = "%s/../../downloads/mnist.npz"%thisdir
16 |     cifar = "%s/../../downloads/cifar10.npz"%thisdir
17 |     yield run_example, "CGT_FLAGS=backend=python python %s/../../examples/demo_mnist.py --unittest"%thisdir, mnist
18 |     yield run_example, "CGT_FLAGS=backend=native python %s/../../examples/demo_mnist.py --unittest"%thisdir,mnist
19 |     yield run_example, "python %s/../../examples/cgt_theano_feedforward_comparison.py --unittest"%thisdir,mnist
20 |     yield run_example, "CGT_FLAGS=backend=native python %s/../../examples/demo_cifar.py --unittest"%thisdir,cifar
21 |     yield run_example, "cd %s/../../examples/ && CGT_FLAGS=backend=native python demo_char_rnn.py --unittest"%thisdir
22 |     yield run_example, "CGT_FLAGS=backend=native python %s/../../examples/demo_neural_turing_machine.py --unittest"%thisdir
23 |     runipycmd = "runipy %s/../../examples/tutorial.ipynb"%thisdir
24 |     try:
25 |         import graphviz
26 |         yield run_example, runipycmd
27 |     except ImportError:
28 |         yield skip_example, runipycmd
29 | 
30 | if __name__ == "__main__":
31 |     import nose
32 |     nose.runmodule()


--------------------------------------------------------------------------------
/cgt/tests/test_imgproc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np, numpy.random as nr
  2 | from cgt.numeric_diff import numeric_grad_multi
  3 | import cgt
  4 | from cgt.nn import max_pool_2d, im2col, cross_channel_lrn, cudnn_ops
  5 | from cgt.compilation import get_compile_info
  6 | from cgt import utils
  7 | from cgt.tests import across_configs, gradcheck_model
  8 | from nose.plugins.skip import SkipTest
  9 | 
 10 | @across_configs(precisions=("double",), backends=("native",))
 11 | def test_cudnn():
 12 |     compile_info = get_compile_info()
 13 |     if not (compile_info["CGT_ENABLE_CUDNN"] and compile_info["CGT_ENABLE_CUDA"]):
 14 |         raise SkipTest("CUDNN not enabled. Skipping this test")
 15 | 
 16 |     Xval = nr.randn(2,3,19,18)
 17 |     Wval = nr.randn(5,3,3,3)
 18 |     bval = nr.randn(1,5,1,1)
 19 | 
 20 |     X = cgt.tensor4("X", fixed_shape=Xval.shape)
 21 |     W = cgt.tensor4("W", fixed_shape=Wval.shape)
 22 |     b = cgt.tensor4("b", fixed_shape=bval.shape)
 23 | 
 24 | 
 25 |     Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b])
 26 | 
 27 |     Y2 = nr.randn(*cgt.core.infer_shape(Y))
 28 | 
 29 |     fY = cgt.function([X,W,b],Y)
 30 |     Yval = fY(Xval,Wval,bval)
 31 |     cost = (Y*Y2).sum()
 32 |     fcost = cgt.function([X,W,b],cost)
 33 |     fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b]))
 34 |     angrads = fgrad(Xval,Wval,bval)
 35 |     nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3)
 36 |     for (nugrad,angrad) in zip(nugrads,angrads):
 37 |         assert np.allclose(nugrad, angrad, rtol=9e-3, atol=1e-7) 
 38 |         # precision issue: https://groups.google.com/forum/?utm_medium=email&utm_source=footer#!msg/cgt-users/l59nwLF9BzM/aDxcHU5pCgAJ
 39 | 
 40 | @across_configs(precisions=("double",), backends=("native",))
 41 | def test_pool(**kwargs):
 42 |     np.random.seed(0)
 43 |     x = cgt.tensor4("x", fixed_shape=(2,3,5,7))
 44 |     y = max_pool_2d(x, (4,4),(0,0),(1,1))
 45 |     xval = np.random.randn(2,3,5,7)
 46 |     hval = np.random.randn(*cgt.infer_shape(y))
 47 |     h = cgt.constant(hval)
 48 | 
 49 |     cost = (y*h).sum()
 50 | 
 51 |     fcost = cgt.function([x], cost)
 52 |     fgrad = cgt.function([x], cgt.grad(cost, [x])[0])
 53 | 
 54 |     from cgt.numeric_diff import numeric_grad
 55 |     gnum = numeric_grad(fcost, xval)
 56 |     gana = fgrad(xval)
 57 |     assert np.allclose(gnum,gana)
 58 | 
 59 | @across_configs(precisions=("quad",), backends=("native",))
 60 | def test_im2col():
 61 |     for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]:
 62 |         xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX)
 63 |         x = cgt.tensor4("x", fixed_shape=xval.shape)
 64 |         y = im2col(x, *settings)
 65 |         h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
 66 |         cost = (y*h).sum()
 67 | 
 68 |         fcost = cgt.function([x],cost)
 69 |         fgrad = cgt.function([x], cgt.grad(cost, [x])[0])
 70 | 
 71 |         from cgt.numeric_diff import numeric_grad
 72 |         gnum = numeric_grad(fcost, xval,eps=1e-5)
 73 |         gana = fgrad(xval)
 74 |         assert np.allclose(gnum, gana)
 75 |         # fy = cgt.function([x],y)
 76 |         # yval = fy(xval)
 77 |         # assert np.allclose(yval[0,0,0] , xval[0,:,0:4,0:4].flatten())
 78 | 
 79 | @across_configs(precisions=("double",), backends=("native",))
 80 | def test_lrn():
 81 |     if not get_compile_info()["CGT_ENABLE_CUDA"]:
 82 |         raise SkipTest("Skipping because CUDA disabled")
 83 | 
 84 |     nr.seed(0)
 85 |     Xval = nr.randn(4,8,16,16)
 86 |     X = cgt.shared(Xval, name="X", fixed_shape_mask="all")
 87 |     # X = cgt.tensor4(name='X')
 88 |     y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5)
 89 |     f = cgt.function([],y)
 90 |     print f().sum()
 91 |     print f().sum()
 92 |     print f().sum()
 93 |     assert np.isfinite(f().sum())
 94 |     # print f(Xval).sum()
 95 |     a = nr.rand(*cgt.infer_shape(y))
 96 |     loss = (y*a).sum()
 97 |     gradcheck_model(loss, [X],eps=1e-5)
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     import nose
102 |     nose.runmodule()
103 | 


--------------------------------------------------------------------------------
/cgt/tests/test_inc_subtensor.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | import numpy as np
 3 | from cgt.tests import across_configs
 4 | 
 5 | @across_configs(backends=("python","native"), precisions=("single","double"))
 6 | def test_incsubtensor0():
 7 |     # First let's test fancy slice along zeroth dimension
 8 | 
 9 |     W = cgt.shared(np.zeros((5,3)), name="W")
10 |     inc = cgt.matrix() # we'll increment W by this matrix
11 |     incval = np.arange(9).reshape(3,3)
12 |     
13 | 
14 |     inds = cgt.vector(dtype='i8')
15 |     updates = {W : cgt.inc_subtensor(W, inds, inc)}
16 |     f = cgt.function([inds,inc],[],updates=updates)
17 |     f([1,2,4],incval)
18 | 
19 |     assert np.allclose(W.op.get_value(), 
20 |         np.array(
21 |         [[ 0.,  0.,  0.],
22 |          [ 0.,  1.,  2.],
23 |          [ 3.,  4.,  5.],
24 |          [ 0.,  0.,  0.],
25 |          [ 6.,  7.,  8.]]))
26 | 
27 | 
28 |     # Now let's test non-fancy slice along zeroth dimension
29 | 
30 | @across_configs(backends=("python","native"), precisions=("single","double"))
31 | def test_incsubtensor1():
32 |     W = cgt.shared(np.zeros((5,3)), name="W")
33 |     inc = cgt.matrix() # we'll increment W by this matrix
34 |     incval = np.arange(9).reshape(3,3)
35 | 
36 |     start = cgt.scalar(dtype='i8')
37 |     stop = cgt.scalar(dtype='i8')
38 |     updates = {W : cgt.inc_subtensor(W, slice(start, stop), inc)}
39 |     f = cgt.function([start,stop,inc],[],updates=updates)
40 |     f(0,3,incval)
41 |     assert np.allclose(W.op.get_value(), 
42 |         np.array(
43 |         [
44 |          [ 0.,  1.,  2.],
45 |          [ 3.,  4.,  5.],
46 |          [ 6.,  7.,  8.],
47 |          [ 0.,  0.,  0.],
48 |          [ 0.,  0.,  0.],
49 |          ]))
50 | 
51 |     # Now let's test the last kind of slice, where we have int arrays on each dimension
52 | 
53 | @across_configs(backends=("python","native"), precisions=("single","double"))
54 | def test_incsubtensor2():
55 |     W = cgt.shared(np.zeros((5,3)), name="W")
56 |     i0 = cgt.vector(dtype='i8')
57 |     i1 = cgt.vector(dtype='i8')
58 |     inc = cgt.vector()
59 | 
60 |     updates2 = {W : cgt.inc_subtensor(W, (i0,i1), inc)}
61 |     f2 = cgt.function([i0,i1,inc],[],updates=updates2)
62 |     f2([0,1,2,2],[0,1,2,2],[1,2,3,4])
63 |     assert np.allclose(W.op.get_value(), 
64 |         np.array(
65 |         [
66 |          [ 1.,  0.,  0.],
67 |          [ 0.,  2.,  0.],
68 |          [ 0.,  0.,  7.],
69 |          [ 0.,  0.,  0.],
70 |          [ 0.,  0.,  0.],
71 |          ]))
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/cgt/tests/test_informative_errors.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np
 2 | from nose.tools import raises
 3 | from StringIO import StringIO
 4 | import sys
 5 | 
 6 | class CaptureStderr(object):
 7 |     def __init__(self):
 8 |         self.origstderr = sys.stderr
 9 |     def __enter__(self):
10 |         self.s = StringIO()
11 |         sys.stderr = self.s
12 |         return self.s
13 |     def __exit__(self, *args):
14 |         self.stderr = self.origstderr
15 | 
16 | @raises(RuntimeError)
17 | def test_shape_err():
18 |     with CaptureStderr():
19 |         with cgt.scoped_update_config(debug=True, backend="python"):
20 |             x = cgt.vector()
21 |             y = cgt.vector()
22 |             f = cgt.function([x,y],x+y)
23 |             f(np.zeros(3),np.zeros(4))
24 | 
25 | if __name__ == "__main__":
26 |     import nose
27 |     nose.runmodule()
28 | 


--------------------------------------------------------------------------------
/cgt/tests/test_input_conversions.py:
--------------------------------------------------------------------------------
 1 | from cgt.tests import across_configs
 2 | import cgt, numpy as np
 3 | 
 4 | @across_configs
 5 | def test_noncontiguous_matrix():
 6 | 
 7 |     x = np.arange(1,7).reshape(2,3).astype(cgt.floatX)
 8 |     result = np.log(x.sum(axis=0)).sum()
 9 | 
10 | 
11 |     xvar = cgt.matrix()
12 |     f = cgt.function([xvar],cgt.log(xvar.sum(axis=0)).sum())
13 | 
14 | 
15 |     assert np.allclose( f(np.asarray(x, order='C')), result)
16 |     assert np.allclose( f(np.asarray(x, order='C', dtype='int64')), result)
17 |     assert np.allclose( f(np.asarray(x, order='F')), result)
18 | 
19 |     X = np.zeros((4,6))
20 |     X[::2,::2] = x
21 |     assert np.allclose( f(X[::2,::2]), result)
22 | 
23 | @across_configs
24 | def test_scalar_input():
25 |     x = cgt.scalar()
26 |     f = cgt.function([x], x**2)
27 |     xval = 2
28 |     yval = 4
29 |     assert np.allclose(f(2), 4)
30 |     assert np.allclose(f(2.0), 4)    
31 |     assert np.allclose(f(np.array(2)), 4)      
32 |     assert np.allclose(f(np.array(2.0)), 4)    
33 |     assert np.allclose(f(np.array([2])[0]), 4)        
34 |     assert np.allclose(f(np.array([2.0])[0]), 4)        
35 | 


--------------------------------------------------------------------------------
/cgt/tests/test_linreg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cgt
 3 | from cgt.tests import across_configs
 4 | 
 5 | @across_configs
 6 | def test_linreg():
 7 |     N = 10
 8 |     K = 3
 9 | 
10 |     Xval = np.random.randn(N,K)
11 |     wval = np.random.randn(K)
12 |     bval = np.random.randn()
13 |     yval = np.random.randn(N)
14 | 
15 |     X_nk = cgt.matrix("X")
16 |     y_n = cgt.vector("y")
17 |     w_k = cgt.vector("w")
18 |     b = cgt.scalar(name="b")
19 | 
20 |     ypred = cgt.dot(X_nk, w_k) + b
21 | 
22 |     err = cgt.sum(cgt.square(ypred - y_n))
23 |     g = cgt.grad(err, [w_k, b])
24 | 
25 |     g_simple,an,_ = cgt.core.simplify_and_analyze(g)
26 | 
27 | 
28 |     print "Loss function:"
29 |     cgt.print_tree([err])
30 |     print "Gradient:"
31 |     cgt.print_tree(g)
32 | 
33 |     print "Gradient simplified"
34 |     cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))
35 | 
36 |     print "-------"
37 | 
38 |     d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}
39 | 
40 |     np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2,
41 |         atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
42 |     np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval),
43 |         atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
44 |     np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 *  np.sum(Xval.dot(wval) + bval - yval, 0),
45 |         atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
46 |     # add_log_entry("linreg", collect(values(d)), collect(keys(d)), [err], [g])
47 | 
48 | if __name__ == "__main__":
49 |     import nose
50 |     nose.runmodule()
51 | 


--------------------------------------------------------------------------------
/cgt/tests/test_multi_output.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np
 2 | from cgt import core
 3 | from cgt.tests import across_configs
 4 | 
 5 | class SinCos(core.Op):
 6 |     return_type = "byval"
 7 |     available_impls = ("python",)
 8 |     def typ_apply(self, inputs):
 9 |         assert inputs[0].dtype == 'f4'
10 |         d = inputs[0].ndim
11 |         return core.TupleType(core.TensorType(cgt.floatX, d), core.TensorType(cgt.floatX, d))
12 |     def shp_apply(self, inputs):
13 |         return (cgt.shape(inputs[0]), cgt.shape(inputs[0]))
14 |     def get_py_func(self, inputs):
15 |         def f(reads):
16 |             x = reads[0]
17 |             return (np.sin(x), np.cos(x))
18 |         return f
19 |     # c_extra_link_flags = "-lm"
20 |     # c_extra_includes = ["math.h"]
21 | #     def get_c_impl(self, inputs):
22 | #         code = """
23 | # void CGT_FUNCNAME(void* cldata, cgt_array** io) {
24 | #     float* x = io[0]->data();
25 | #     float* y = io[1]->data();
26 | #     float* z = io[2]->data();
27 | #     y[0] = sinf(x[0]);
28 | #     z[0] = cosf(x[0]);
29 | # }"""
30 | #         return CImpl(code, includes=["math.h"], link_flags="-lm")
31 | 
32 | class SinCos2(core.Op):
33 |     return_type = "byref"
34 |     available_impls = ("python","native_cpu")    
35 |     def typ_apply(self, inputs):
36 |         ndim = inputs[0].ndim
37 |         return core.TupleType(core.TensorType(cgt.floatX, ndim), core.TensorType(cgt.floatX, ndim))
38 |     def shp_apply(self, inputs):
39 |         return (cgt.shape(inputs[0]), cgt.shape(inputs[0]))
40 |     def get_py_func(self, inputs):
41 |         def f(reads, write):
42 |             x = reads[0]
43 |             write[0][...] = np.sin(x)
44 |             write[1][...] = np.cos(x)
45 |         return f
46 |     def get_native_compile_info(self, input_types,devtype):
47 |         assert devtype=="cpu"
48 |         code = """
49 | CGT_EXPORT_C void $function(void* cldata, cgtArray** reads, cgtTuple* write) {
50 |     float* x = static_cast<float*>(reads[0]->data());
51 |     float* y = static_cast<float*>(static_cast<cgtArray*>(write->getitem(0))->data());
52 |     float* z = static_cast<float*>(static_cast<cgtArray*>(write->getitem(1))->data());
53 |     for (int i=0; i < reads[0]->size(); ++i) {
54 |         y[i] = sinf(x[i]);
55 |         z[i] = cosf(x[i]);    
56 |     }
57 | }"""
58 |         return core.NativeCompileInfo(code, includes=["math.h"], link_flags="-lm")
59 | 
60 | @across_configs(precisions=("single",))
61 | def test_multi_output():
62 |     for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
63 |         for cls in (SinCos, SinCos2):
64 |             y,z = core.unpack(core.Result(cls(), [x]))
65 |             xnum = np.ones((3,)*x.ndim, cgt.floatX)
66 |             correct = (np.sin(xnum),np.cos(xnum))
67 |             yznum = cgt.numeric_eval([y,z], {x:xnum})
68 |             np.testing.assert_allclose(yznum, correct)
69 |             f = cgt.function([x],[y,z])
70 |             np.testing.assert_allclose(f(xnum), correct)
71 | 
72 | if __name__ == "__main__":
73 |     import nose
74 |     nose.runmodule()
75 | 


--------------------------------------------------------------------------------
/cgt/tests/test_optimizers.py:
--------------------------------------------------------------------------------
  1 | import cgt
  2 | import cgt.nn as nn
  3 | from cgt.tests import across_configs
  4 | import numpy as np
  5 | 
  6 | # Torch values obtained via this script: https://gist.github.com/ebenolson/931e879ed38f257253d2
  7 | 
  8 | torch_values = {
  9 |     'sgd': [0.81707280688755,0.6648326359915,0.5386151140949],
 10 |     'momentum': [0.6848486952183,0.44803321781003,0.27431190123502],
 11 |     # TORCH:
 12 |     # 'nesterov_momentum': [0.67466543592725,0.44108468114241,0.2769002108997],
 13 |     # OURS:
 14 |     'nesterov_momentum' : [0.6848486661911011, 0.4480332136154175, 0.2743118703365326], 
 15 |     # Different because we're using
 16 |     # version from http://arxiv.org/abs/1212.0901v2, which is returning "lookahead parameters"
 17 |     'adagrad': [0.55373120047759,0.55373120041518,0.55373120039438],
 18 |     'rmsprop': [0.83205403985348,0.83205322744821,0.83205295664444],
 19 |     'adadelta': [0.95453237704725,0.9545237471374,0.95452214847397],
 20 |     'adam': [0.90034973381771,0.90034969365796,0.90034968027137],
 21 | }
 22 | scales = [0.1, 0.2, 0.3]
 23 | 
 24 | 
 25 | def f(X, scale):
 26 |     return (scale*X**2).sum()
 27 | 
 28 | @across_configs
 29 | def test_sgd():
 30 |     results = []
 31 |     for scale in scales:
 32 |         A = cgt.shared(1.0)
 33 |         B = cgt.shared(1.0)
 34 |         updates = nn.sgd(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1)
 35 |         do_update = cgt.function([], [], updates=updates)
 36 |         for _ in range(10):
 37 |             do_update()
 38 | 
 39 |         assert np.allclose(A.op.get_value(), B.op.get_value())
 40 |         results.append(A.op.get_value().copy())
 41 | 
 42 |     assert np.allclose(results, torch_values['sgd'])
 43 | 
 44 | 
 45 | @across_configs
 46 | def test_momentum():
 47 |     results = []
 48 |     for scale in scales:
 49 |         A = cgt.shared(1.0)
 50 |         B = cgt.shared(1.0)
 51 |         updates = nn.momentum(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1, mu=0.5)
 52 |         do_update = cgt.function([], [], updates=updates)
 53 |         for _ in range(10):
 54 |             do_update()
 55 | 
 56 |         assert np.allclose(A.op.get_value(), B.op.get_value())
 57 |         results.append(A.op.get_value().copy())
 58 | 
 59 |     assert np.allclose(results, torch_values['momentum'])
 60 | 
 61 | 
 62 | @across_configs
 63 | def test_nesterov_momentum():
 64 |     results = []
 65 |     for scale in scales:
 66 |         A = cgt.shared(1.0)
 67 |         B = cgt.shared(1.0)
 68 |         updates = nn.momentum(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1, mu=0.5)
 69 |         do_update = cgt.function([], [], updates=updates)
 70 |         for _ in range(10):
 71 |             do_update()
 72 |         assert np.allclose(A.op.get_value(), B.op.get_value())
 73 |         results.append(A.op.get_value().copy())
 74 | 
 75 |     assert np.allclose(results, torch_values['nesterov_momentum'])
 76 | 
 77 | 
 78 | @across_configs
 79 | def test_adagrad():
 80 |     results = []
 81 |     for scale in scales:
 82 |         A = cgt.shared(1.0)
 83 |         B = cgt.shared(1.0)
 84 |         updates = nn.adagrad(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1)
 85 |         do_update = cgt.function([], [], updates=updates)
 86 |         for _ in range(10):
 87 |             do_update()
 88 | 
 89 |         assert np.allclose(A.op.get_value(), B.op.get_value())
 90 |         results.append(A.op.get_value().copy())
 91 | 
 92 |     assert np.allclose(results, torch_values['adagrad'])
 93 | 
 94 | 
 95 | @across_configs
 96 | def test_rmsprop():
 97 |     results = []
 98 |     for scale in scales:
 99 |         A = cgt.shared(1.0)
100 |         B = cgt.shared(1.0)
101 |         updates = nn.rmsprop(f(A, scale) + f(B, scale), [A, B], learning_rate=0.01)
102 |         do_update = cgt.function([], [], updates=updates)
103 |         for _ in range(10):
104 |             do_update()
105 | 
106 |         assert np.allclose(A.op.get_value(), B.op.get_value())
107 |         results.append(A.op.get_value().copy())
108 | 
109 |     assert np.allclose(results, torch_values['rmsprop'])
110 | 
111 | 
112 | @across_configs
113 | def test_adadelta():
114 |     results = []
115 |     for scale in scales:
116 |         A = cgt.shared(1.0)
117 |         B = cgt.shared(1.0)
118 |         updates = nn.adadelta(f(A, scale) + f(B, scale), [A, B])
119 |         do_update = cgt.function([], [], updates=updates)
120 |         for _ in range(10):
121 |             do_update()
122 | 
123 |         assert np.allclose(A.op.get_value(), B.op.get_value())
124 |         results.append(A.op.get_value().copy())
125 | 
126 |     assert np.allclose(results, torch_values['adadelta'])
127 | 
128 | if __name__ == "__main__":
129 |     import nose
130 |     nose.runmodule()
131 | 


--------------------------------------------------------------------------------
/cgt/tests/test_par_interp.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | from cgt.core import Op
 3 | from cgt.tests import across_configs
 4 | import time
 5 | import numpy as np
 6 | from numpy.random import randn, seed
 7 | 
 8 | # NOTE observe differences clearly if add a time.sleep to Mul21
 9 | 
10 | 
11 | class SleepFor(Op):
12 |     return_type="byval"
13 |     available_impls=("native_cpu",)
14 |     def get_native_compile_info(self, _, __):
15 |         code=r"""
16 |             CGT_EXPORT_C cgtArray* $function(void* cldata, cgtArray** reads) {
17 |                 float t = reads[1]->at<float>(0);
18 |                 usleep(t * 1000000);
19 |                 return reads[0];
20 |             }"""
21 |         return cgt.core.NativeCompileInfo(code,includes=["unistd.h"])
22 |     def typ_apply(self, input_types):
23 |         assert input_types[1].dtype == cgt.floatX
24 |         return input_types[0]
25 |     def shp_apply(self, inputs):
26 |         return cgt.shape(inputs[0])
27 | 
28 | 
29 | def sleepfor(x, t):
30 |     return cgt.core.Result(SleepFor(), [x, t])
31 | 
32 | @across_configs(backends=("native",))
33 | def test_sleeps():
34 |     with cgt.scoped_update_config(parallel=True):
35 |         x = cgt.scalar('x')
36 |         y1 = sleepfor(x, .1)
37 |         y2 = sleepfor(x, .1)
38 | 
39 |         z=y1+y2
40 |         fpar = cgt.function([x],z)
41 |         
42 |         tstart = time.time()
43 |         fpar(0)
44 |         elapsed = time.time() - tstart
45 |         assert elapsed < .11
46 | 
47 | 
48 | @across_configs(backends=("native",))
49 | def test_matmuls():
50 |     with cgt.scoped_update_config(parallel=True):
51 | 
52 |         m = 8
53 |         d = 1000
54 | 
55 |         # build graph
56 | 
57 |         X = cgt.matrix("X")
58 |         Y = cgt.matrix("Y")
59 |         loss=0
60 |         for k in xrange(m):
61 |             # loss = loss+cgt.sin(X*Y+k).sum()
62 |             loss = loss+(X.dot(Y+k)).sum()
63 | 
64 |             f = cgt.function([X,Y], loss)
65 | 
66 |         # test things out!
67 | 
68 |         seed(0)
69 | 
70 |         X_val = randn(d, d)
71 |         Y_val = randn(d, d)
72 |         vals = [X_val, Y_val]
73 | 
74 |         tic = time.time()
75 |         out = f(*vals)
76 |         toc = time.time()
77 | 
78 |         print toc-tic
79 |     
80 | 
81 | @across_configs(backends=("native",))
82 | def test_update():
83 |     with cgt.scoped_update_config(parallel=True):
84 |         xval = np.array(1.5)
85 |         x = cgt.shared(xval)
86 |         f = cgt.function([], x.sum(), updates=[(x,x+1)])
87 |         before = x.op.get_value().copy()
88 |         f()
89 |         after = x.op.get_value()
90 |         assert np.allclose(after , before+1)
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     import nose
95 |     nose.runmodule()
96 | 


--------------------------------------------------------------------------------
/cgt/tests/test_scalars.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np, numpy.random as nr, itertools as it
 2 | from cgt import core, utils
 3 | from cgt.numeric_diff import numeric_grad
 4 | from cgt.tests import across_configs
 5 | 
 6 | DISPLAY=False
 7 | 
 8 | @across_configs
 9 | def test_scalars():
10 |     np.random.seed(0)
11 |     x = cgt.scalar('x')
12 |     y = cgt.scalar('y')
13 |     z = cgt.scalar('z')
14 |     vars = [x,y,z] #pylint: disable=W0622
15 |     vals = nr.rand(len(vars))+1
16 | 
17 |     PROB2RESULT = {}
18 | 
19 |     for ((key,_), cls) in it.chain(
20 |             it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)),
21 |             it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary))
22 |             ):
23 |         if key == "conj":
24 |             print "skipping conj"
25 |             continue
26 |         utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key)
27 |         if cls == core.ElwiseUnary:
28 |             n_in = 1
29 |             op = cls(key)
30 |         else:
31 |             n_in = 2
32 |             op = cls(key, (True,True))
33 |         inputvars = vars[0:n_in]
34 |         inputvals = vals[0:n_in]
35 |         out = core.Result(op, inputvars)
36 |         f = cgt.function(inputvars, out)
37 |         try:
38 |             grads = cgt.grad(out, inputvars)
39 |         except core.NonDifferentiable:
40 |             print "nondiff"
41 |             continue
42 |         if DISPLAY:
43 |             print "Function:"
44 |             cgt.print_tree(out)
45 |             print "Gradient original:"
46 |             cgt.print_tree(grads)
47 |             print "Gradient simplified:"
48 |         grads_simple = core.simplify(grads)
49 |         if DISPLAY: cgt.print_tree(grads_simple)
50 |         gradf = cgt.function(inputvars, grads)
51 |         eps = {"single":1e-4,"double":1e-9}[cgt.get_precision()]
52 |         nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640
53 |         cgtgrad = gradf(*inputvals)
54 |         np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[cgt.get_precision()])
55 | 
56 |         grad_count = core.count_nodes(grads_simple)
57 |         PROB2RESULT[key] = {}
58 |         PROB2RESULT[key]["grad"] = grad_count
59 | 
60 |     if DISPLAY:
61 |         from thirdparty.tabulate import tabulate
62 |         print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"])    
63 | 
64 | if __name__ == "__main__":
65 |     import nose
66 |     nose.runmodule()
67 | 


--------------------------------------------------------------------------------
/cgt/tests/test_stack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cgt
 3 | from cgt.tests import across_configs
 4 | 
 5 | @across_configs
 6 | def test_stack():
 7 |     x = cgt.scalar()
 8 |     y = cgt.scalar()
 9 |     z = cgt.scalar()
10 |     s0 = cgt.stack([x, y, z], axis=0)
11 |     assert cgt.numeric_eval(s0, {x: 1, y: 2, z: 3}).shape == (3,)
12 | 
13 |     x = cgt.vector()
14 |     y = cgt.vector()
15 |     z = cgt.vector()
16 |     v0 = cgt.stack([x, y, z], axis=0)
17 |     assert cgt.numeric_eval(v0, {x: np.zeros(2), y: np.zeros(2), z: np.zeros(2)}).shape == (3, 2)
18 |     v1 = cgt.stack([x, y, z], axis=1)
19 |     assert cgt.numeric_eval(v1, {x: np.zeros(2), y: np.ones(2), z: np.zeros(2)}).shape == (2, 3)
20 | 
21 |     x = cgt.matrix()
22 |     y = cgt.matrix()
23 |     z = cgt.matrix()
24 |     m0 = cgt.stack([x, y, z], axis=0)
25 |     assert cgt.numeric_eval(m0, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (3, 2, 4)
26 |     m1 = cgt.stack([x, y, z], axis=1)
27 |     assert cgt.numeric_eval(m1, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 3, 4)
28 |     m2 = cgt.stack([x, y, z], axis=2)
29 |     assert cgt.numeric_eval(m2, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 4, 3)
30 | 
31 | if __name__ == "__main__":
32 |     import nose
33 |     nose.runmodule()
34 | 


--------------------------------------------------------------------------------
/cgt/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import hashlib
 4 | import time
 5 | 
 6 | # ================================================================
 7 | # Utils
 8 | # ================================================================
 9 | 
10 | class Color: #pylint: disable=W0232
11 |     GRAY=30
12 |     RED=31
13 |     GREEN=32
14 |     YELLOW=33
15 |     BLUE=34
16 |     MAGENTA=35
17 |     CYAN=36
18 |     WHITE=37
19 |     CRIMSON=38    
20 | 
21 | 
22 | def colorize(num, string, bold=False, highlight = False):
23 |     assert isinstance(num, int)
24 |     attr = []
25 |     if highlight: num += 10
26 |     attr.append(str(num))
27 |     if bold: attr.append('1')
28 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
29 | 
30 | def colorprint(colorcode, text, o=sys.stdout):
31 |     o.write(colorize(colorcode, text))
32 | 
33 | def warn(msg):
34 |     print colorize(Color.YELLOW, msg)
35 | 
36 | def error(msg):
37 |     print colorize(Color.RED, msg)
38 | 
39 | def is_singleton(x):
40 |     return isinstance(x, np.ndarray) and np.prod(x.shape)==1
41 | 
42 | def safezip(x,y):
43 |     assert len(x) == len(y)
44 |     return zip(x,y)
45 | 
46 | def safezip3(x,y,z):
47 |     assert len(x) == len(y) == len(z)
48 |     return zip(x,y,z)
49 | 
50 | 
51 | def allsame(xs):
52 |     out = True
53 |     if len(xs)>0:
54 |         x0 = xs[0]
55 |         for x in xs[1:]:
56 |             out &= x==x0
57 |     return out
58 | 
59 | def invert_perm(x):
60 |     return list(np.argsort(x))
61 | 
62 | def _hash_seq(args):
63 |     hashobj = hashlib.md5()
64 |     for a in args: hashobj.update(a)
65 |     return hashobj.hexdigest()
66 | 
67 | def hash_seq1(*args):
68 |     return _hash_seq(args)
69 | 
70 | MESSAGE_DEPTH = 0
71 | class Message(object):
72 |     def __init__(self, msg):
73 |         self.msg = msg
74 |     def __enter__(self):
75 |         global MESSAGE_DEPTH #pylint: disable=W0603
76 |         print colorize(Color.MAGENTA, '\t'*MESSAGE_DEPTH + '=: ' + self.msg)
77 |         self.tstart = time.time()
78 |         MESSAGE_DEPTH += 1
79 |     def __exit__(self, etype, *args):
80 |         global MESSAGE_DEPTH #pylint: disable=W0603
81 |         MESSAGE_DEPTH -= 1
82 |         maybe_exc = "" if etype is None else " (with exception)"
83 |         print colorize(Color.MAGENTA, '\t'*MESSAGE_DEPTH + "done%s in %.3f seconds"%(maybe_exc, time.time() - self.tstart))
84 | 


--------------------------------------------------------------------------------
/cgtrc.example:
--------------------------------------------------------------------------------
 1 | # see cgtrc_spec.ini for explanation
 2 | 
 3 | debug = False
 4 | precision = single
 5 | backend = python
 6 | cache_dir = ~/.cgt_cache
 7 | enable_inplace_opt = True
 8 | enable_simplification = True
 9 | parallel = False
10 | num_threads = default=4
11 | 
12 | force_python_impl = False
13 | debug_cpp = False
14 | verbose = False


--------------------------------------------------------------------------------
/cgtrc_spec.ini:
--------------------------------------------------------------------------------
 1 | # DEVELOPERS: when you edit this file, please also edit cgtrc.example
 2 | 
 3 | # User options
 4 | # ----------------------
 5 | 
 6 | # At the cost of some overhead,
 7 | # store information in the computation graph that helps with debugging
 8 | debug = boolean(default=False)
 9 | 
10 | # single or double precision:
11 | precision = string(default=single) 
12 | 
13 | # backend=python means using a pure python module to execute the graph, and using python implementations of ops whenever they exist
14 | # backend=native means using the compiled execution graph interpreter, and using the native (c++) implementation of ops
15 | backend = option("python","native",default="python") # "native" means using compiled implementations and 
16 | 
17 | # Where to put generated files
18 | cache_dir = string(default="~/.cgt_cache")
19 | 
20 | # Enable in-place optimizations.
21 | enable_inplace_opt = boolean(default=True)
22 | 
23 | # Enable simplifications of the graph, e.g. arithmetic simplifications like x*1=x
24 | enable_simplification = boolean(default=True)
25 | 
26 | # Use parallel execution graph interpreter
27 | parallel = boolean(default=False)
28 | 
29 | # Number of 
30 | num_threads = integer(default=4)
31 | 
32 | # Developer Options
33 | # -----------------
34 | 
35 | # Force native backend to use python
36 | force_python_impl = boolean(default=False)
37 | 
38 | # Compile C++ files with debug flags
39 | debug_cpp = boolean(default=False) # use debug flags when compiling c++
40 | 
41 | # Print lots of diagnostic information
42 | # (we'll break this down at some point)
43 | verbose = boolean(default=False)


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | spelling:
 58 | 	$(SPHINXBUILD) -b spelling $(ALLSPHINXOPTS) $(BUILDDIR)/spelling
 59 | 	@echo
 60 | 	@echo "Spelling check done"
 61 | 
 62 | dirhtml:
 63 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 66 | 
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | pickle:
 73 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the pickle files."
 76 | 
 77 | json:
 78 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 79 | 	@echo
 80 | 	@echo "Build finished; now you can process the JSON files."
 81 | 
 82 | htmlhelp:
 83 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 84 | 	@echo
 85 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 86 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 87 | 
 88 | qthelp:
 89 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 90 | 	@echo
 91 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 92 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 93 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/CGT.qhcp"
 94 | 	@echo "To view the help file:"
 95 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/CGT.qhc"
 96 | 
 97 | devhelp:
 98 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 99 | 	@echo
100 | 	@echo "Build finished."
101 | 	@echo "To view the help file:"
102 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/CGT"
103 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/CGT"
104 | 	@echo "# devhelp"
105 | 
106 | epub:
107 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
108 | 	@echo
109 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
110 | 
111 | latex:
112 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
113 | 	@echo
114 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
115 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
116 | 	      "(use \`make latexpdf' here to do that automatically)."
117 | 
118 | latexpdf:
119 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
120 | 	@echo "Running LaTeX files through pdflatex..."
121 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
122 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
123 | 
124 | latexpdfja:
125 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
126 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
127 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
128 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
129 | 
130 | text:
131 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
132 | 	@echo
133 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
134 | 
135 | man:
136 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
137 | 	@echo
138 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
139 | 
140 | texinfo:
141 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
142 | 	@echo
143 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
144 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
145 | 	      "(use \`make info' here to do that automatically)."
146 | 
147 | info:
148 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
149 | 	@echo "Running Texinfo files through makeinfo..."
150 | 	make -C $(BUILDDIR)/texinfo info
151 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
152 | 
153 | gettext:
154 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
155 | 	@echo
156 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
157 | 
158 | changes:
159 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
160 | 	@echo
161 | 	@echo "The overview file is in $(BUILDDIR)/changes."
162 | 
163 | linkcheck:
164 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
165 | 	@echo
166 | 	@echo "Link check complete; look for any errors in the above output " \
167 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
168 | 
169 | doctest:
170 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
171 | 	@echo "Testing of doctests in the sources finished, look at the " \
172 | 	      "results in $(BUILDDIR)/doctest/output.txt."
173 | 
174 | xml:
175 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
176 | 	@echo
177 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
178 | 
179 | pseudoxml:
180 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
181 | 	@echo
182 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
183 | 


--------------------------------------------------------------------------------
/doc/README:
--------------------------------------------------------------------------------
1 | BUILDING THE DOCS
2 | 
3 |     pip install -r requirements.txt
4 |     make html
5 | 
6 | then open ``_build/html/index.html``


--------------------------------------------------------------------------------
/doc/_static/my_theme.css:
--------------------------------------------------------------------------------
1 | html_style = 'css/my_theme.css'


--------------------------------------------------------------------------------
/doc/build_and_view.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | export PATH=/Users/joschu/Src/anaconda/bin:$PATH # In case this is being run by sublime
 5 | make html
 6 | 
 7 | index=_build/html/index.html
 8 | if [ `uname` = Linux ]
 9 | then
10 |     google-chrome $index
11 | else
12 |     open -a Google\ Chrome  $index
13 | fi


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # CGT documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon May 11 15:24:24 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | 
 19 | import sphinx_bootstrap_theme
 20 | html_theme = 'bootstrap'
 21 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
 22 | 
 23 | # import cloud_sptheme
 24 | # html_theme = 'cloud'
 25 | # html_theme_path = [cloud_sptheme.get_theme_dir()]
 26 | 
 27 | 
 28 | # If extensions (or modules to document with autodoc) are in another directory,
 29 | # add these directories to sys.path here. If the directory is relative to the
 30 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 31 | #sys.path.insert(0, os.path.abspath('.'))
 32 | 
 33 | # -- General configuration ------------------------------------------------
 34 | 
 35 | # If your documentation needs a minimal Sphinx version, state it here.
 36 | #needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = ["notebook_sphinxext1","sphinx.ext.mathjax","sphinxcontrib.spelling","sphinx.ext.autodoc"]
 42 | 
 43 | spelling_lang='en_US'
 44 | spelling_word_list_filename='spelling_wordlist.txt'
 45 | spelling_show_suggestions=False
 46 | 
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ['_templates']
 50 | 
 51 | # The suffix of source filenames.
 52 | source_suffix = '.rst'
 53 | 
 54 | # The encoding of source files.
 55 | #source_encoding = 'utf-8-sig'
 56 | 
 57 | # The master toctree document.
 58 | master_doc = 'index'
 59 | 
 60 | # General information about the project.
 61 | project = u'CGT'
 62 | copyright = u'2015, John Schulman et al.'
 63 | 
 64 | # The version info for the project you're documenting, acts as replacement for
 65 | # |version| and |release|, also used in various other places throughout the
 66 | # built documents.
 67 | #
 68 | # The short X.Y version.
 69 | version = '0.1'
 70 | # The full version, including alpha/beta/rc tags.
 71 | release = '0.1'
 72 | 
 73 | # The language for content autogenerated by Sphinx. Refer to documentation
 74 | # for a list of supported languages.
 75 | #language = None
 76 | 
 77 | # There are two options for replacing |today|: either, you set today to some
 78 | # non-false value, then it is used:
 79 | #today = ''
 80 | # Else, today_fmt is used as the format for a strftime call.
 81 | #today_fmt = '%B %d, %Y'
 82 | 
 83 | # List of patterns, relative to source directory, that match files and
 84 | # directories to ignore when looking for source files.
 85 | exclude_patterns = ['_build']
 86 | 
 87 | # The reST default role (used for this markup: `text`) to use for all
 88 | # documents.
 89 | #default_role = None
 90 | 
 91 | # If true, '()' will be appended to :func: etc. cross-reference text.
 92 | #add_function_parentheses = True
 93 | 
 94 | # If true, the current module name will be prepended to all description
 95 | # unit titles (such as .. function::).
 96 | #add_module_names = True
 97 | 
 98 | # If true, sectionauthor and moduleauthor directives will be shown in the
 99 | # output. They are ignored by default.
100 | show_authors = False
101 | 
102 | # The name of the Pygments (syntax highlighting) style to use.
103 | pygments_style = 'sphinx'
104 | 
105 | # A list of ignored prefixes for module index sorting.
106 | #modindex_common_prefix = []
107 | 
108 | # If true, keep warnings as "system message" paragraphs in the built documents.
109 | #keep_warnings = False
110 | 
111 | 
112 | # -- Options for HTML output ----------------------------------------------
113 | 
114 | # The theme to use for HTML and HTML Help pages.  See the documentation for
115 | # a list of builtin themes.
116 | # html_theme = html_theme # or redcloud
117 | 
118 | # Theme options are theme-specific and customize the look and feel of a theme
119 | # further.  For a list of options available for each theme, see the
120 | # documentation.
121 | html_theme_options = {
122 |     'navbar_links': [
123 |         ("GitHub", "https://github.com/joschu/cgt", True),
124 |     ],    
125 | 
126 | }
127 | 
128 | # Add any paths that contain custom themes here, relative to this directory.
129 | 
130 | # The name for this set of Sphinx documents.  If None, it defaults to
131 | # "<project> v<release> documentation".
132 | #html_title = None
133 | 
134 | # A shorter title for the navigation bar.  Default is the same as html_title.
135 | #html_short_title = None
136 | 
137 | # The name of an image file (relative to this directory) to place at the top
138 | # of the sidebar.
139 | # html_logo = "cgt.png"
140 | 
141 | # The name of an image file (within the static path) to use as favicon of the
142 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
143 | # pixels large.
144 | #html_favicon = None
145 | 
146 | # Add any paths that contain custom static files (such as style sheets) here,
147 | # relative to this directory. They are copied after the builtin static files,
148 | # so a file named "default.css" will overwrite the builtin "default.css".
149 | html_static_path = ['_static']
150 | 
151 | # Add any extra paths that contain custom files (such as robots.txt or
152 | # .htaccess) here, relative to this directory. These files are copied
153 | # directly to the root of the documentation.
154 | #html_extra_path = []
155 | 
156 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
157 | # using the given strftime format.
158 | #html_last_updated_fmt = '%b %d, %Y'
159 | 
160 | # If true, SmartyPants will be used to convert quotes and dashes to
161 | # typographically correct entities.
162 | #html_use_smartypants = True
163 | 
164 | # Custom sidebar templates, maps document names to template names.
165 | #html_sidebars = {}
166 | 
167 | # Additional templates that should be rendered to pages, maps page names to
168 | # template names.
169 | #html_additional_pages = {}
170 | 
171 | # If false, no module index is generated.
172 | #html_domain_indices = True
173 | 
174 | # If false, no index is generated.
175 | html_use_index = True
176 | 
177 | # If true, the index is split into individual pages for each letter.
178 | #html_split_index = False
179 | 
180 | # If true, links to the reST sources are added to the pages.
181 | html_show_sourcelink = False
182 | 
183 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
184 | html_show_sphinx = False
185 | 
186 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
187 | html_show_copyright = False
188 | 
189 | # If true, an OpenSearch description file will be output, and all pages will
190 | # contain a <link> tag referring to it.  The value of this option must be the
191 | # base URL from which the finished HTML is served.
192 | #html_use_opensearch = ''
193 | 
194 | # This is the file name suffix for HTML files (e.g. ".xhtml").
195 | #html_file_suffix = None
196 | 
197 | # Output file base name for HTML help builder.
198 | htmlhelp_basename = 'CGTdoc'
199 | 
200 | 
201 | # -- Options for LaTeX output ---------------------------------------------
202 | 
203 | latex_elements = {
204 | # The paper size ('letterpaper' or 'a4paper').
205 | #'papersize': 'letterpaper',
206 | 
207 | # The font size ('10pt', '11pt' or '12pt').
208 | #'pointsize': '10pt',
209 | 
210 | # Additional stuff for the LaTeX preamble.
211 | #'preamble': '',
212 | }
213 | 
214 | # Grouping the document tree into LaTeX files. List of tuples
215 | # (source start file, target name, title,
216 | #  author, documentclass [howto, manual, or own class]).
217 | latex_documents = [
218 |   ('index', 'CGT.tex', u'CGT Documentation',
219 |    u'John Schulman et al.', 'manual'),
220 | ]
221 | 
222 | # The name of an image file (relative to this directory) to place at the top of
223 | # the title page.
224 | #latex_logo = None
225 | 
226 | # For "manual" documents, if this is true, then toplevel headings are parts,
227 | # not chapters.
228 | #latex_use_parts = False
229 | 
230 | # If true, show page references after internal links.
231 | #latex_show_pagerefs = False
232 | 
233 | # If true, show URL addresses after external links.
234 | #latex_show_urls = False
235 | 
236 | # Documents to append as an appendix to all manuals.
237 | #latex_appendices = []
238 | 
239 | # If false, no module index is generated.
240 | #latex_domain_indices = True
241 | 
242 | 
243 | # -- Options for manual page output ---------------------------------------
244 | 
245 | # One entry per manual page. List of tuples
246 | # (source start file, name, description, authors, manual section).
247 | man_pages = [
248 |     ('index', 'cgt', u'CGT Documentation',
249 |      [u'John Schulman et al.'], 1)
250 | ]
251 | 
252 | # If true, show URL addresses after external links.
253 | #man_show_urls = False
254 | 
255 | 
256 | # -- Options for Texinfo output -------------------------------------------
257 | 
258 | # Grouping the document tree into Texinfo files. List of tuples
259 | # (source start file, target name, title, author,
260 | #  dir menu entry, description, category)
261 | texinfo_documents = [
262 |   ('index', 'CGT', u'CGT Documentation',
263 |    u'John Schulman et al.', 'CGT', 'One line description of project.',
264 |    'Miscellaneous'),
265 | ]
266 | 
267 | # Documents to append as an appendix to all manuals.
268 | #texinfo_appendices = []
269 | 
270 | # If false, no module index is generated.
271 | #texinfo_domain_indices = True
272 | 
273 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
274 | #texinfo_show_urls = 'footnote'
275 | 
276 | # If true, do not generate a @detailmenu in the "Top" node's menu.
277 | #texinfo_no_detailmenu = False
278 | 
279 | 
280 | latex_elements['preamble'] = """
281 | 
282 | """
283 | 
284 | 
285 | 
286 | 


--------------------------------------------------------------------------------
/doc/notebook_sphinxext1.py:
--------------------------------------------------------------------------------
  1 | import os, shutil, string, glob
  2 | from sphinx.util.compat import Directive
  3 | from docutils import nodes
  4 | from docutils.parsers.rst import directives
  5 | from IPython.nbconvert import html, python
  6 | from runipy.notebook_runner import NotebookRunner
  7 | 
  8 | class NotebookDirective(Directive):
  9 |     """Insert an evaluated notebook into a document
 10 | 
 11 |     This uses runipy and nbconvert to transform a path to an unevaluated notebook
 12 |     into html suitable for embedding in a Sphinx document.
 13 |     """
 14 |     required_arguments = 1
 15 |     optional_arguments = 1
 16 |     option_spec = {'skip_exceptions' : directives.flag}
 17 | 
 18 |     def run(self):
 19 |         # check if raw html is supported
 20 |         if not self.state.document.settings.raw_enabled:
 21 |             raise self.warning('"%s" directive disabled.' % self.name)
 22 | 
 23 |         # get path to notebook
 24 |         source_dir = os.path.dirname(
 25 |             os.path.abspath(self.state.document.current_source))
 26 |         nb_basename = os.path.basename(self.arguments[0])
 27 |         rst_file = self.state_machine.document.attributes['source']
 28 |         rst_dir = os.path.abspath(os.path.dirname(rst_file))
 29 |         nb_abs_path = self.arguments[0]#os.path.join(rst_dir, nb_basename)
 30 | 
 31 |         # Move files around.
 32 |         rel_dir = os.path.relpath(rst_dir, setup.confdir)
 33 |         rel_path = os.path.join(rel_dir, nb_basename)
 34 |         dest_dir = setup.app.builder.outdir
 35 |         dest_path = os.path.join(dest_dir, nb_basename)
 36 | 
 37 |         print dest_path, nb_abs_path
 38 | 
 39 |         if not os.path.exists(dest_dir):
 40 |             os.makedirs(dest_dir)
 41 | 
 42 |         # Copy unevaluated script
 43 |         try:
 44 |             shutil.copyfile(nb_abs_path, dest_path)
 45 |         except IOError:
 46 |             raise RuntimeError("Unable to copy notebook to build destination.")
 47 | 
 48 |         dest_path_eval = string.replace(dest_path, '.ipynb', '_evaluated.ipynb')
 49 |         dest_path_script = string.replace(dest_path, '.ipynb', '.py')
 50 |         rel_path_eval = string.replace(nb_basename, '.ipynb', '_evaluated.ipynb')
 51 |         rel_path_script = string.replace(nb_basename, '.ipynb', '.py')
 52 | 
 53 |         # Create python script vesion
 54 |         unevaluated_text = nb_to_html(nb_abs_path)
 55 |         script_text = nb_to_python(nb_abs_path)
 56 |         f = open(dest_path_script, 'w')
 57 |         f.write(script_text.encode('utf8'))
 58 |         f.close()
 59 | 
 60 |         skip_exceptions = 'skip_exceptions' in self.options
 61 | 
 62 |         # try:
 63 |         evaluated_text = evaluate_notebook(nb_abs_path, dest_path_eval,
 64 |                                            skip_exceptions=skip_exceptions)
 65 |         # except:
 66 |         #     # bail
 67 |         #     return []
 68 | 
 69 |         # Create link to notebook and script files
 70 |         link_rst = "(" + \
 71 |                    formatted_link(nb_basename) + "; " + \
 72 |                    formatted_link(rel_path_eval) + "; " + \
 73 |                    formatted_link(rel_path_script) + \
 74 |                    ")"
 75 | 
 76 |         self.state_machine.insert_input([link_rst], rst_file)
 77 | 
 78 |         # create notebook node
 79 |         attributes = {'format': 'html', 'source': 'nb_path'}
 80 |         nb_node = notebook_node('', evaluated_text, **attributes)
 81 |         (nb_node.source, nb_node.line) = \
 82 |             self.state_machine.get_source_and_line(self.lineno)
 83 | 
 84 |         # add dependency
 85 |         self.state.document.settings.record_dependencies.add(nb_abs_path)
 86 | 
 87 |         # clean up png files left behind by notebooks.
 88 |         png_files = glob.glob("*.png")
 89 |         fits_files = glob.glob("*.fits")
 90 |         h5_files = glob.glob("*.h5")
 91 |         for file in png_files:
 92 |             os.remove(file)
 93 | 
 94 |         return [nb_node]
 95 | 
 96 | 
 97 | 
 98 | class notebook_node(nodes.raw):
 99 |     pass
100 | 
101 | def nb_to_python(nb_path):
102 |     """convert notebook to python script"""
103 |     exporter = python.PythonExporter()
104 |     output, resources = exporter.from_filename(nb_path)
105 |     return output
106 | 
107 | def nb_to_html(nb_path):
108 |     """convert notebook to html"""
109 |     exporter = html.HTMLExporter(template_file='full')
110 |     output, resources = exporter.from_filename(nb_path)
111 |     header = output.split('<head>', 1)[1].split('</head>',1)[0]
112 |     body = output.split('<body>', 1)[1].split('</body>',1)[0]
113 | 
114 |     # http://imgur.com/eR9bMRH
115 |     header = header.replace('<style', '<style scoped="scoped"')
116 |     header = header.replace('body {\n  overflow: visible;\n  padding: 8px;\n}\n', '')
117 |     # Filter out styles that conflict with the sphinx theme.
118 |     filter_strings = [
119 |         'navbar',
120 |         'body{',
121 |         'alert{',
122 |         'uneditable-input{',
123 |         'collapse{',
124 |     ]
125 |     filter_strings.extend(['h%s{' % (i+1) for i in range(6)])
126 | 
127 |     header_lines = filter(
128 |         lambda x: not any([s in x for s in filter_strings]), header.split('\n'))
129 |     header = '\n'.join(header_lines)
130 | 
131 |     # concatenate raw html lines
132 |     lines = ['<div class="ipynotebook">']
133 |     lines.append(header)
134 |     lines.append("""<style>
135 |         div.output_text {
136 |             margin:10px;
137 |         }
138 |         </style>""")
139 |     import re
140 |     # bad way to do it
141 |     body = re.sub(r'<div class="prompt input_prompt">.*</div>','',body)
142 |     body = re.sub(r'<div class="prompt output_prompt">.*</div>','',body)
143 | 
144 |     lines.append(body)
145 |     lines.append('</div>')
146 |     return '\n'.join(lines)
147 | 
148 | def evaluate_notebook(nb_path, dest_path=None, skip_exceptions=False):
149 |     # Create evaluated version and save it to the dest path.
150 |     # Always use --pylab so figures appear inline
151 |     # perhaps this is questionable?
152 |     import subprocess
153 |     if not os.path.exists(dest_path) or os.path.getmtime(nb_path) > os.path.getmtime(dest_path):
154 |         subprocess.check_call("runipy %s %s"%(nb_path,dest_path),shell=True)
155 |     # nb_runner = NotebookRunner(nb_in=nb_path, pylab=True)
156 |     # nb_runner.run_notebook(skip_exceptions=skip_exceptions)
157 |     # if dest_path is None:
158 |     #     dest_path = 'temp_evaluated.ipynb'
159 |     # nb_runner.save_notebook(dest_path)
160 |     ret = nb_to_html(dest_path)
161 |     if dest_path is 'temp_evaluated.ipynb':
162 |         os.remove(dest_path)
163 |     return ret
164 | 
165 | def formatted_link(path):
166 |     return "`%s <%s>`__" % (os.path.basename(path), path)
167 | 
168 | def visit_notebook_node(self, node):
169 |     self.visit_raw(node)
170 | 
171 | def depart_notebook_node(self, node):
172 |     self.depart_raw(node)
173 | 
174 | def setup(app):
175 |     setup.app = app
176 |     setup.config = app.config
177 |     setup.confdir = app.confdir
178 | 
179 |     app.add_node(notebook_node,
180 |                  html=(visit_notebook_node, depart_notebook_node))
181 | 
182 |     app.add_directive('notebook', NotebookDirective)
183 | 


--------------------------------------------------------------------------------
/doc/spelling_wordlist.txt:
--------------------------------------------------------------------------------
 1 | Theano
 2 | codebase
 3 | reimplement
 4 | backend
 5 | multithreading
 6 | cmake
 7 | preinstalled
 8 | reimagines
 9 | multithreaded
10 | subexpression
11 | optimizations
12 | arrayobject
13 | dlopen
14 | dlsym
15 | autotuning
16 | upcasting
17 | numpy


--------------------------------------------------------------------------------
/doc/sphinx_preview.py:
--------------------------------------------------------------------------------
 1 | import sublime, sublime_plugin
 2 | import subprocess, os
 3 | 
 4 | class SphinxPreviewCommand(sublime_plugin.TextCommand):
 5 |     def run(self, edit, **kwargs):
 6 |         if self.view.file_name():
 7 |             folder_name, file_name = os.path.split(self.view.file_name())
 8 | 
 9 |         command = './build_and_view.sh'
10 |         p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=folder_name, shell=True)
11 |         result, err = p.communicate()
12 |         print(result,err)
13 |         # self.view.set_status('p4',str(result+err))
14 |         # sublime.set_timeout(self.clear,2000)


--------------------------------------------------------------------------------
/doc/tutorial-notes.txt:
--------------------------------------------------------------------------------
1 | https://github.com/Newmu/Theano-Tutorials
2 | 
3 | http://deeplearning.net/tutorial/
4 | 
5 | https://github.com/torch/tutorials


--------------------------------------------------------------------------------
/doc/upload.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | rsync -azvu --delete --progress  _build/html/ pabbeel@rll.berkeley.edu:/var/www/cgt
3 | 


--------------------------------------------------------------------------------
/examples/README:
--------------------------------------------------------------------------------
1 | Here you can find examples of using CGT.
2 | All of the scripts can be run without arguments.
3 | (If one fails, please notify us or open an issue.)
4 | 
5 | For best performance, set backend=native, via .cgtrc or CGT_FLAGS=backend=native
6 | Also try parallel=True, but that only helps sometimes.


--------------------------------------------------------------------------------
/examples/bench/cgt_gru.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | from gru import GRUCell
 3 | import time
 4 | from cgt.utils import Message
 5 | import numpy as np
 6 | 
 7 | if __name__ == "__main__":
 8 |     import argparse
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument("--horizon",type=int)
11 |     args = parser.parse_args()
12 |     horizon = args.horizon
13 |     assert horizon is not None    
14 |     size=128
15 |     batchsize=64
16 |     cell = GRUCell([size],size)
17 |     X = cgt.tensor3()
18 |     init = cgt.matrix()
19 | 
20 |     prev_h = init
21 |     for i in xrange(horizon):
22 |         prev_h = cell(X[i], prev_h)
23 |     loss = prev_h.sum()
24 | 
25 |     with Message("compiling"):
26 |         f = cgt.function([X, init],cgt.grad(loss, cell.params()))
27 |     with Message("running"):
28 |         xval = np.zeros((horizon,batchsize,size),cgt.floatX)
29 |         initval = np.zeros((batchsize, size), cgt.floatX)
30 |         for i in xrange(100): 
31 |             f(xval, initval)
32 | 
33 | 
34 | # # No speedup -- why?
35 | # with Message("split loss. compiling"):
36 | #     from cgt import nn
37 | #     m = cgt.nn.Module([X, init], [loss])
38 | #     split_loss = 0
39 | #     X1 = cgt.tensor3()
40 | #     init1 = cgt.matrix()
41 | #     for start in xrange(0, batchsize, batchsize//4):
42 | #         sli = slice(start, start+batchsize//4)
43 | #         split_loss += m([X1[:, sli], init1[sli]])[0]
44 | #     f = cgt.function([X1, init1],cgt.grad(split_loss, cell.params()))
45 | # with Message("running"):
46 | #     for i in xrange(100): 
47 | #         f(xval,initval)
48 | 


--------------------------------------------------------------------------------
/examples/bench/gru.py:
--------------------------------------------------------------------------------
 1 | import cgt
 2 | import numpy as np
 3 | 
 4 | def normc(x):
 5 |     assert x.ndim == 2
 6 |     return x/norms(x,0)[None,:]
 7 | def randnf(*shp):
 8 |     return np.random.randn(*shp).astype(cgt.floatX)
 9 | def norms(x,ax):
10 |     return np.sqrt(np.square(x).sum(axis=ax))
11 | 
12 | 
13 | class GRUCell(object):
14 |     """
15 |     Gated Recurrent Unit. E.g., see
16 |     Chung, Junyoung, et al. "Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling." arXiv preprint arXiv:1412.3555 (2014).
17 |     """    
18 |     def __init__(self,input_sizes,mem_size,name_prefix=""):
19 | 
20 |         Wiz_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes]
21 |         self.Wizs = [cgt.shared(Wiz_val,name=name_prefix+"Wiz") for Wiz_val in Wiz_vals]
22 |         Wmz_val = normc(randnf(mem_size,mem_size))
23 |         self.Wmz = cgt.shared(Wmz_val,name=name_prefix+"Wmz")
24 |         bz = np.zeros((1,mem_size),cgt.floatX)
25 |         self.bz = cgt.shared(bz,name=name_prefix+"bz")
26 | 
27 |         Wir_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes]
28 |         self.Wirs = [cgt.shared(Wir_val,name=name_prefix+"Wir") for Wir_val in Wir_vals]
29 |         Wmr_val = normc(randnf(mem_size,mem_size))
30 |         self.Wmr = cgt.shared(Wmr_val,name=name_prefix+"Wmr")
31 |         br = np.zeros((1,mem_size),cgt.floatX)
32 |         self.br = cgt.shared(br,name=name_prefix+"br")
33 | 
34 |         Wim_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes]
35 |         self.Wims = [cgt.shared(Wim_val,name=name_prefix+"Wim") for Wim_val in Wim_vals]
36 |         Wmm_val = normc(np.eye(mem_size,dtype=cgt.floatX))
37 |         self.Wmm = cgt.shared(Wmm_val,name=name_prefix+"Wmm")
38 |         bm = np.zeros((1,mem_size),cgt.floatX)
39 |         self.bm = cgt.shared(bm,name=name_prefix+"bm")
40 | 
41 |     def __call__(self,M,*inputs):
42 |         assert len(inputs) == len(self.Wizs)
43 |         n = M.shape[0]
44 |         summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),cgt.repeat(self.bz,n, axis=0)]
45 |         z = cgt.sigmoid(cgt.add_multi(summands))
46 | 
47 |         summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),cgt.repeat(self.br,n, axis=0)]
48 |         r = cgt.sigmoid(cgt.add_multi(summands))
49 | 
50 |         summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),cgt.repeat(self.bm,n, axis=0)]
51 |         Mtarg = cgt.tanh(cgt.add_multi(summands)) #pylint: disable=E1111
52 | 
53 |         Mnew = (1-z)*M + z*Mtarg
54 |         return Mnew
55 | 
56 |     def params(self):
57 |         out = []
58 |         out.extend(self.Wizs)
59 |         out.append(self.Wmz)
60 |         out.append(self.bz)        
61 |         out.extend(self.Wirs)
62 |         out.append(self.Wmr)
63 |         out.append(self.br)        
64 |         out.extend(self.Wims)
65 |         out.append(self.Wmm)
66 |         out.append(self.bm)        
67 |         return out
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/bench/seq_model.py:
--------------------------------------------------------------------------------
 1 | import gru,cgt, numpy as np
 2 | import sys
 3 | from time import time
 4 | 
 5 | elapsed = []
 6 | horizons = 2**np.arange(2, 10)
 7 | 
 8 | for horizon in horizons:
 9 |     print "HORIZON",horizon
10 |     tstart = time()
11 | 
12 |     batch_size = 6
13 |     dim_x = 16
14 |     mem_size = 10
15 | 
16 |     X_tnk = cgt.tensor3("X")
17 | 
18 |     cell = gru.GRUCell([dim_x], mem_size)
19 | 
20 |     Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]),cgt.floatX)
21 |     M = Minit_nk
22 | 
23 |     for t in xrange(horizon):
24 |         M = cell(M, X_tnk[t])
25 | 
26 |     # cgt.print_tree(M)
27 |     print "simplifying..."
28 |     M_simp = cgt.simplify([M])
29 |     print "done"
30 |     # cgt.print_tree(M_simp)
31 |     print "fn before:",cgt.count_nodes(M)
32 |     print "fn after:",cgt.count_nodes(M_simp)
33 | 
34 |     gs = cgt.grad(cgt.sum(M), cell.params())
35 |     print "grad before", cgt.count_nodes(gs)
36 |     g_simp = cgt.simplify(gs)
37 |     print "grad after",cgt.count_nodes(g_simp)
38 | 
39 |     # M = cgt.simplify(M)
40 |     elapsed.append(time()-tstart)
41 | 
42 | import matplotlib.pyplot as plt
43 | plt.plot(horizons,elapsed,'x-')
44 | plt.show()
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/bench/theano_gru.py:
--------------------------------------------------------------------------------
 1 | import theano, theano.tensor as TT
 2 | from cgt.utils import Message
 3 | import time
 4 | import numpy as np
 5 | 
 6 | def normc(x):
 7 |     assert x.ndim == 2
 8 |     return x/norms(x,0)[None,:]
 9 | def randnf(*shp):
10 |     return np.random.randn(*shp).astype(theano.config.floatX)
11 | def norms(x,ax):
12 |     return np.sqrt(np.square(x).sum(axis=ax))
13 | 
14 | class GRUCell(object):
15 |     """
16 |     Gated Recurrent Unit. E.g., see
17 |     Chung, Junyoung, et al. "Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling." arXiv preprint arXiv:1412.3555 (2014).
18 |     """    
19 |     def __init__(self,input_sizes,mem_size,name_prefix=""):
20 | 
21 |         Wiz_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes]
22 |         self.Wizs = [theano.shared(Wiz_val,name=name_prefix+"Wiz") for Wiz_val in Wiz_vals]
23 |         Wmz_val = normc(randnf(mem_size,mem_size))
24 |         self.Wmz = theano.shared(Wmz_val,name=name_prefix+"Wmz")
25 |         bz = np.zeros((1,mem_size),theano.config.floatX)
26 |         self.bz = theano.shared(bz,name=name_prefix+"bz")
27 |         self.bz.type.broadcastable = (True,False)        
28 | 
29 |         Wir_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes]
30 |         self.Wirs = [theano.shared(Wir_val,name=name_prefix+"Wir") for Wir_val in Wir_vals]
31 |         Wmr_val = normc(randnf(mem_size,mem_size))
32 |         self.Wmr = theano.shared(Wmr_val,name=name_prefix+"Wmr")
33 |         br = np.zeros((1,mem_size),theano.config.floatX)
34 |         self.br = theano.shared(br,name=name_prefix+"br")
35 |         self.br.type.broadcastable = (True,False)
36 | 
37 |         Wim_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes]
38 |         self.Wims = [theano.shared(Wim_val,name=name_prefix+"Wim") for Wim_val in Wim_vals]
39 |         Wmm_val = normc(np.eye(mem_size,dtype=theano.config.floatX))
40 |         self.Wmm = theano.shared(Wmm_val,name=name_prefix+"Wmm")
41 |         bm = np.zeros((1,mem_size),theano.config.floatX)
42 |         self.bm = theano.shared(bm,name=name_prefix+"bm")
43 |         self.bm.type.broadcastable = (True,False)
44 | 
45 |     def __call__(self,M,*inputs):
46 |         assert len(inputs) == len(self.Wizs)
47 |         summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),self.bz]
48 |         z = TT.nnet.sigmoid(TT.add(*summands))
49 | 
50 |         summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),self.br]
51 |         r = TT.nnet.sigmoid(TT.add(*summands))
52 | 
53 |         summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),self.bm]
54 |         Mtarg = TT.tanh(TT.add(*summands)) #pylint: disable=E1111
55 | 
56 |         Mnew = (1-z)*M + z*Mtarg
57 |         return Mnew
58 | 
59 |     def params(self):
60 |         out = []
61 |         out.extend(self.Wizs)
62 |         out.append(self.Wmz)
63 |         out.append(self.bz)        
64 |         out.extend(self.Wirs)
65 |         out.append(self.Wmr)
66 |         out.append(self.br)        
67 |         out.extend(self.Wims)
68 |         out.append(self.Wmm)
69 |         out.append(self.bm)        
70 |         return out
71 | 
72 | if __name__ == "__main__":
73 |     import argparse
74 |     parser = argparse.ArgumentParser()
75 |     parser.add_argument("--horizon",type=int)
76 |     args = parser.parse_args()
77 |     horizon =args.horizon
78 |     assert horizon is not None
79 |     size=128
80 |     batchsize=64
81 |     cell = GRUCell([size],size)
82 |     X = TT.tensor3()
83 |     init = TT.zeros((batchsize, size),theano.config.floatX)
84 | 
85 |     prev_h = init
86 |     for i in xrange(horizon):
87 |         prev_h = cell(X[i], prev_h)
88 | 
89 |     with Message("compiling"):
90 |         f = theano.function([X],theano.grad(prev_h.sum(), cell.params()))
91 |     with Message("running"):
92 |         x = np.zeros((horizon,batchsize,size),theano.config.floatX)
93 |         for i in xrange(100): 
94 |             f(x)


--------------------------------------------------------------------------------
/examples/broken/caffe2cgt.py:
--------------------------------------------------------------------------------
  1 | from google.protobuf import text_format
  2 | from caffe_pb2 import *
  3 | import os.path as osp
  4 | import cgt
  5 | from cgt import nn
  6 | from cgt.core import infer_shape
  7 | import numpy as np
  8 | infile = "/Users/joschu/Src/caffe/examples/mnist/lenet.prototxt"
  9 | # infile = "/Users/joschu/Src/caffe/models/bvlc_googlenet/train_val.prototxt"
 10 | 
 11 | with open(osp.expanduser(infile),"r") as fh:
 12 |     text = fh.read()
 13 | net = NetParameter()
 14 | text_format.Merge(text, net)
 15 | 
 16 | 
 17 | name2node = {}
 18 | 
 19 | cgt.set_precision('single')
 20 | 
 21 | if net.input: #pylint: disable=E1101
 22 |     assert len(net.input) == 1 #pylint: disable=E1101
 23 |     name2node[net.input[0]] = cgt.tensor(ndim=4,dtype=cgt.floatX, fixed_shape=tuple(net.input_dim))
 24 | 
 25 | 
 26 | # XXX super inefficient
 27 | 
 28 | for layer in net.layer: #pylint: disable=E1101
 29 |     if layer.phase==TRAIN:
 30 |         print "loading layer %s type=%s in=%s out=%s"%(layer.name, layer.type, layer.bottom, layer.top)
 31 |         output = None
 32 |         inputs = [name2node[name] for name in layer.bottom]
 33 |         if layer.type == "Data":
 34 |             tp = layer.transform_param
 35 |             crop_size = tp.crop_size
 36 |             chans = len(tp.mean_value)
 37 |             dp = layer.data_param
 38 |             batch_size = dp.batch_size
 39 |             output = [cgt.tensor(dtype=cgt.floatX,ndim=4,name=layer.name, fixed_shape=(batch_size,chans,crop_size,crop_size)),
 40 |                       cgt.tensor(dtype='i8',ndim=2,name=layer.name, fixed_shape=(batch_size, 1))]
 41 |         elif layer.type == "Convolution":
 42 |             X = inputs[0]
 43 |             param = layer.convolution_param
 44 |             kh,kw = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\
 45 |                 else (param.kernel_h, param.kernel_w)
 46 |             nchanin = infer_shape(X)[0]
 47 |             Wshape = (param.num_output, nchanin, kh, kw)
 48 |             Wname = layer.param[0].name or layer.name+":W"
 49 |             Wval = np.empty(Wshape, dtype=cgt.floatX)
 50 |             W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all")
 51 |             bshape = (1, param.num_output, 1, 1)
 52 |             bname = layer.param[1].name or layer.name+":b"
 53 |             bval = np.empty(bshape, dtype=cgt.floatX)
 54 |             b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all")
 55 |             sh,sw = (param.stride, param.stride) if param.HasField("stride")\
 56 |                 else (param.stride_h, param.stride_w)
 57 |             output = [cgt.broadcast("+",nn.conv2d(X, W, subsample=(sh,sw)), b, "xxxx,1x11")]
 58 |         elif layer.type == "Pooling":
 59 |             param = layer.pooling_param
 60 |             X = inputs[0]
 61 |             pool_type = {param.MAX : "max", param.AVE : "mean"}[param.pool]
 62 |             height_in,width_in = infer_shape(X)[2:4]
 63 |             kernel = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\
 64 |                 else (param.kernel_h, param.kernel_w)
 65 |             stride = (param.stride, param.stride) if param.HasField("stride")\
 66 |                 else (param.stride_h, param.stride_w)
 67 |             pad = (param.pad, param.pad) if param.HasField("pad")\
 68 |                 else (param.pad_h, param.pad_w)
 69 |             output = [nn.pool(pool_type, X, stride, kernel, pad)]
 70 |         elif layer.type == "InnerProduct":
 71 |             X = inputs[0]
 72 |             if X.ndim == 4:
 73 |                 X = cgt.reshape(X, [X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]] )
 74 |             param = layer.inner_product_param
 75 |             nchanin = infer_shape(X)[1]
 76 |             Wshape = (param.num_output, nchanin)
 77 |             Wname = layer.param[0].name or layer.name+":W"
 78 |             Wval = np.empty(Wshape, dtype=cgt.floatX)
 79 |             W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all")
 80 |             bshape = (1, param.num_output)
 81 |             bname = layer.param[1].name or layer.name+":b"
 82 |             bval = np.empty(bshape, dtype=cgt.floatX)
 83 |             b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all")
 84 |             yname = layer.top[0]
 85 |             output = [cgt.broadcast("+",X.dot(W), b, "xx,1x")          ]
 86 |         elif layer.type == "ReLU":
 87 |             output = [nn.rectify(inputs[0])]
 88 |         elif layer.type == "Softmax":
 89 |             output = [nn.softmax(inputs[0])]
 90 |         elif layer.type == "LRN":
 91 |             # XXX needs params
 92 |             param = layer.lrn_param
 93 |             output = [nn.lrn(inputs[0], param.alpha,param.beta, param.local_size)]
 94 |         elif layer.type == "Concat":
 95 |             param = layer.concat_param
 96 |             output = [cgt.concatenate(inputs, param.concat_dim)            ]
 97 |         elif layer.type == "Dropout":
 98 |             output = [nn.dropout(inputs[0])]
 99 |         elif layer.type == "SoftmaxWithLoss":
100 |             output = [nn.loglik_softmax(inputs[0], inputs[1])]
101 |         elif layer.type == "Accuracy":
102 |             output = [nn.zero_one_loss(inputs[0], inputs[1])]
103 |         else:
104 |             cgt.error("unrecognized layer type %s"%layer.type)
105 | 
106 |         assert output is not None
107 | 
108 |         # assert isinstance(output, cgt.Node)
109 |         for i in xrange(len(layer.top)): name2node[layer.top[i]] = output[i]
110 |         print "stored", layer.top[0]
111 |         if layer.type != "Data":
112 |             print "shape",layer.type, infer_shape(name2node[layer.bottom[0]]), infer_shape(name2node[layer.top[0]])
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/examples/broken/internals_tour.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Let's look at the least square problem we constructed above"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import cgt\n",
 19 |     "cgt.modify_config(backend=\"python\")\n",
 20 |     "X_nk = cgt.matrix(\"X\")\n",
 21 |     "y_n = cgt.vector(\"y\")\n",
 22 |     "w_k = cgt.vector(\"w\")\n",
 23 |     "b = cgt.scalar(\"b\")\n",
 24 |     "ypred_n = X_nk.dot(w_k) + b\n",
 25 |     "loss = cgt.sum(cgt.square(ypred_n - y_n))"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "Internally, CGT represents the loss function, as well as other expressions, using a directed acyclic graph called the **expression graph**. \n",
 33 |     "In the expression graph, each node corresponds to an intermediate result and the operation that was performed to obtain it.\n",
 34 |     "\n",
 35 |     "The graph is made up of two kinds of nodes: `Input` and `Result`. \n",
 36 |     "`Input` nodes correspond to values that are set externally, while `Result` node correspond to intermediate values in the computation, computed from zero-or-more preceding nodes."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "Below is a representation of the expression graph above.\n",
 44 |     "(TODO: we should show DAG plot here instead of the tree, maybe using dot for layout)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "cgt.display.print_text(loss);"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "\n",
 63 |     "\n",
 64 |     "\n"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# We can inspect the python objects involved.\n",
 76 |     "print loss\n",
 77 |     "\n",
 78 |     "print \"loss:\",loss, loss.parents\n",
 79 |     "print \"loss.parents[0]:\",loss.parents[0], loss.parents[0].parents"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "collapsed": false
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "grads = cgt.grad(loss, [w_k,b])\n",
 91 |     "cgt.display.print_text(grads)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "grads = cgt.simplify(grads)\n",
103 |     "cgt.display.print_text(grads);"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "collapsed": false,
111 |     "scrolled": false
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "inputs = [X_nk, y_n, w_k, b]\n",
116 |     "outputs = [loss]\n",
117 |     "interpreter = cgt.execution.run_compilation_pipeline(inputs, outputs,[],[])\n",
118 |     "eg = interpreter.eg\n",
119 |     "import pprint\n",
120 |     "pprint.pprint(eg.to_json())"
121 |    ]
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 2",
127 |    "language": "python",
128 |    "name": "python2"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 2
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython2",
140 |    "version": "2.7.10"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 0
145 | }
146 | 


--------------------------------------------------------------------------------
/examples/broken/mnist_torchstyle.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from sklearn.datasets import fetch_mldata
  4 | 
  5 | mnist = fetch_mldata('MNIST original', data_home='~/cgt/data')  # XXX
  6 | 
  7 | print(mnist.data.shape)
  8 | print(mnist.target.shape)
  9 | 
 10 | np.unique(mnist.target)
 11 | 
 12 | #plt.imshow(mnist.data[1, :].reshape(28, 28))
 13 | #plt.show()
 14 | 
 15 | # do some preprocessing
 16 | 
 17 | X = mnist.data
 18 | y = mnist.target
 19 | X = X.astype('float64')
 20 | X = X / 255
 21 | 
 22 | # train-test split (as [Joachims, 2006])
 23 | # TODO can define own validation split...
 24 | 
 25 | n_train = 60000
 26 | X_train = X[:n_train, :]
 27 | X_test = X[n_train:, :]
 28 | y_train = y[:n_train]
 29 | y_test = y[n_train:]
 30 | 
 31 | # construct the network
 32 | 
 33 | import nn
 34 | import cgt
 35 | from opt import sgd_update
 36 | 
 37 | N_LAYERS = 2
 38 | hid_size = X.shape[1]  # 28 * 28
 39 | out_size = 10
 40 | 
 41 | inps = [cgt.matrix(dtype=cgt.floatX)]
 42 | 
 43 | param_list = []
 44 | for k in xrange(N_LAYERS):
 45 |     tmp = nn.Affine(hid_size, hid_size)#(inps[k])
 46 |     param_list.extend([tmp.weight, tmp.bias])
 47 |     inps.append(cgt.tanh(tmp(inps[k])))
 48 | 
 49 | tmp = nn.Affine(hid_size, out_size)
 50 | param_list.extend([tmp.weight, tmp.bias])
 51 | logprobs = nn.logsoftmax(tmp(inps[-1]))
 52 | 
 53 | #dnn = nn.Module(inps[0:1], [logprobs])
 54 | #params = dnn.get_parameters()
 55 | # XXX think should just make this part of get_parameters
 56 | theta = nn.setup_contiguous_storage(param_list)
 57 | # XXX initialize
 58 | theta[:] = np.random.uniform(-0.08, 0.08, theta.shape)
 59 | 
 60 | # XXX taken from other demo, move
 61 | def ind2onehot(inds, n_cls):
 62 |     out = np.zeros(list(inds.shape)+[n_cls,], cgt.floatX)
 63 |     for k in xrange(inds.shape[0]):
 64 |         out[k, inds[k].astype('int32')] = 1
 65 |     #out.flat[np.arange(inds.size)*n_cls + inds.ravel()] = 1
 66 |     return out
 67 | 
 68 | b_size = 25
 69 | 
 70 | def make_loss_and_grad(net):
 71 |     X_b = inps[0] #cgt.matrix(dtype=cgt.floatX)
 72 |     y_onehot = cgt.matrix(dtype='i4')
 73 |     outputs = [logprobs]
 74 | 
 75 |     loss = nn.crossent(outputs[0], y_onehot) / b_size
 76 |     #gradloss = cgt.grad(loss, params)
 77 |     gradloss = cgt.grad(loss, param_list)
 78 | 
 79 |     # XXX use flatcat function
 80 |     grad = cgt.concatenate([x.flatten() for x in gradloss])
 81 |     #grad = gradloss
 82 |     return cgt.make_function([X_b, y_onehot], [loss, grad, logprobs])
 83 | 
 84 | f_loss_and_grad = make_loss_and_grad(None)
 85 | 
 86 | # train loop
 87 | 
 88 | # shuffle data
 89 | 
 90 | perm = np.random.permutation(np.arange(X_train.shape[0]))
 91 | X_train = X_train[perm, :]
 92 | y_train = y_train[perm]
 93 | 
 94 | class Table(object):
 95 |     pass
 96 | state = Table()
 97 | state.theta = theta
 98 | state.step_size = 0.1
 99 | exploss = None
100 | for k in xrange(X_train.shape[0] / b_size):
101 |     X_batch, y_batch = X_train[k*b_size:(k+1)*b_size, :], y_train[k*b_size:(k+1)*b_size]
102 |     loss, grad, logprobs = f_loss_and_grad(X_batch, ind2onehot(y_batch, 10))
103 |     exploss = loss if k == 0 else 0.99*exploss + 0.01*loss
104 |     print('iter %d, loss %f, exploss %f' % (k + 1, loss, exploss))
105 |     sgd_update(state, grad)
106 | 
107 | 
108 | # test code
109 | 
110 | correct = 0
111 | total = 0
112 | print(X_test.shape)
113 | print(y_test.shape)
114 | for k in xrange(X_test.shape[0] / b_size):
115 |     X_batch, y_batch = X_test[k*b_size:(k+1)*b_size, :], y_test[k*b_size:(k+1)*b_size]
116 |     loss, grad, logprobs = f_loss_and_grad(X_batch, ind2onehot(y_batch, 10))
117 |     preds = logprobs.argmax(axis=1).flatten()
118 |     correct = correct + (preds == y_batch).sum()
119 |     total = total + b_size
120 | 
121 | print('%d/%d correct', correct, total)
122 | 


--------------------------------------------------------------------------------
/examples/demo_cifar.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from example_utils import fmt_row, fetch_dataset
 3 | import cPickle, numpy as np
 4 | import cgt
 5 | from cgt import nn
 6 | import argparse, time
 7 | 
 8 | def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
 9 |     grads = cgt.grad(cost, params)
10 |     updates = []
11 |     for p, g in zip(params, grads):
12 |         acc = cgt.shared(p.op.get_value() * 0.)
13 |         acc_new = rho * acc + (1 - rho) * cgt.square(g)
14 |         gradient_scaling = cgt.sqrt(acc_new + epsilon)
15 |         g = g / gradient_scaling
16 |         updates.append((acc, acc_new))
17 |         updates.append((p, p - stepsize * g))
18 |     return updates
19 | 
20 | def main():
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument("--profile",action="store_true")
23 |     parser.add_argument("--unittest",action="store_true")
24 |     parser.add_argument("--epochs",type=int,default=10)
25 |     parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu")
26 |     args = parser.parse_args()
27 | 
28 |     cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native")
29 | 
30 |     batchsize = 64
31 |     Xshape = (batchsize, 3, 32, 32)
32 |     X = cgt.tensor4("X", fixed_shape = Xshape)
33 |     y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')
34 | 
35 |     conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
36 |         weight_init=nn.IIDGaussian(std=1e-4))(X)
37 |     relu1 = nn.rectify(conv1)
38 |     pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
39 |     conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
40 |         weight_init=nn.IIDGaussian(std=0.01))(pool1)
41 |     relu2 = nn.rectify(conv2)
42 |     pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
43 |     conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
44 |         weight_init=nn.IIDGaussian(std=0.01))(pool2)
45 |     pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
46 |     relu3 = nn.rectify(pool3)
47 |     d0,d1,d2,d3 = relu3.shape
48 |     flatlayer = relu3.reshape([d0,d1*d2*d3])
49 |     nfeats = cgt.infer_shape(flatlayer)[1]
50 |     ip1 = nn.Affine(nfeats, 10)(flatlayer)
51 |     logprobs = nn.logsoftmax(ip1)
52 |     loss = -logprobs[cgt.arange(batchsize), y].mean()
53 | 
54 | 
55 |     params = nn.get_parameters(loss)
56 | 
57 |     updates = rmsprop_updates(loss, params, stepsize=1e-3)
58 |     
59 | 
60 |     train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)
61 | 
62 | 
63 |     if args.profile: cgt.profiler.start()
64 | 
65 |     data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
66 |     Xtrain = data["X_train"]
67 |     ytrain = data["y_train"]
68 | 
69 | 
70 |     print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
71 |     for i_epoch in xrange(args.epochs):
72 |         for start in xrange(0, Xtrain.shape[0], batchsize):
73 |             tstart = time.time()
74 |             end = start+batchsize
75 |             print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
76 |             if start > batchsize*5: break
77 |         # elapsed = time.time() - tstart
78 |         # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
79 |         # testerr, testloss = computeloss(Xtest, ytest)
80 |         # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
81 |         if args.profile: 
82 |             cgt.profiler.print_stats()
83 |             return
84 |         if args.unittest:
85 |             break
86 | 
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     main()


--------------------------------------------------------------------------------
/examples/demo_mnist.py:
--------------------------------------------------------------------------------
  1 | # Based on tutorial by Alec Radford
  2 | # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py
  3 | 
  4 | import cgt
  5 | from cgt import nn
  6 | from cgt.distributions import categorical
  7 | import numpy as np
  8 | from example_utils import fmt_row, fetch_dataset
  9 | import time, sys
 10 | 
 11 | def init_weights(*shape):
 12 |     return cgt.shared(np.random.randn(*shape) * 0.01, fixed_shape_mask='all')
 13 | 
 14 | def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
 15 |     grads = cgt.grad(cost, params)
 16 |     updates = []
 17 |     for p, g in zip(params, grads):
 18 |         acc = cgt.shared(p.op.get_value() * 0.)
 19 |         acc_new = rho * acc + (1 - rho) * cgt.square(g)
 20 |         gradient_scaling = cgt.sqrt(acc_new + epsilon)
 21 |         g = g / gradient_scaling
 22 |         updates.append((acc, acc_new))
 23 |         updates.append((p, p - stepsize * g))
 24 |     return updates
 25 | 
 26 | def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
 27 |     X = nn.dropout(X, p_drop_input)
 28 |     h = nn.rectify(cgt.dot(X, w_h))
 29 | 
 30 |     h = nn.dropout(h, p_drop_hidden)
 31 |     h2 = nn.rectify(cgt.dot(h, w_h2))
 32 | 
 33 |     h2 = nn.dropout(h2, p_drop_hidden)
 34 |     py_x = nn.softmax(cgt.dot(h2, w_o))
 35 |     return py_x
 36 | 
 37 | def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden):
 38 |     l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1)))
 39 |     l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2,2))
 40 |     l1 = nn.dropout(l1, p_drop_conv)
 41 | 
 42 |     l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3,3), pad=(1,1)))
 43 |     l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2,2))
 44 |     l2 = nn.dropout(l2, p_drop_conv)
 45 | 
 46 |     l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3,3), pad=(1,1)))
 47 |     l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2,2))
 48 |     batchsize,channels,rows,cols = l3b.shape
 49 |     l3 = cgt.reshape(l3b, [batchsize, channels*rows*cols])
 50 |     l3 = nn.dropout(l3, p_drop_conv)
 51 | 
 52 |     l4 = nn.rectify(cgt.dot(l3, w4))
 53 |     l4 = nn.dropout(l4, p_drop_hidden)
 54 |     
 55 |     pyx = nn.softmax(cgt.dot(l4, w_o))
 56 |     return pyx
 57 | 
 58 | def tinyconv_model(X, w, w2, p_drop):
 59 |     l1 = nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1),stride=(3,3))
 60 |     l1a = nn.dropout(l1, p_drop)
 61 |     batchsize,channels,rows,cols = l1.shape
 62 |     l1flat = cgt.reshape(l1, [batchsize,channels*rows*cols])
 63 |     pyx = nn.softmax(l1flat.dot(w2))
 64 |     return l1, pyx
 65 | 
 66 | 
 67 | 
 68 | def main():
 69 |     import argparse
 70 |     parser=argparse.ArgumentParser()
 71 |     parser.add_argument("--epochs",type=int,default=10)
 72 |     parser.add_argument("--profile",action="store_true")
 73 |     parser.add_argument("--dropout",action="store_true")
 74 |     parser.add_argument("--stepsize",type=float, default=.001)
 75 |     parser.add_argument("--model",choices=["dense","conv"],default="dense")
 76 |     parser.add_argument("--unittest",action="store_true")
 77 |     parser.add_argument("--grad_check",action="store_true")
 78 |     parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu")
 79 |     args = parser.parse_args()
 80 | 
 81 |     if args.grad_check: cgt.set_precision("quad")
 82 | 
 83 |     # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
 84 |     # converted to npz
 85 |     mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")
 86 | 
 87 |     Xdata = (mnist["X"]/255.).astype(cgt.floatX)
 88 |     ydata = mnist["y"]
 89 | 
 90 |     np.random.seed(0)
 91 | 
 92 |     cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native")
 93 | 
 94 |     if args.model=="conv":
 95 |         Xdata = Xdata.reshape(-1, 1, 28, 28)
 96 | 
 97 |     Xtrain = Xdata[0:60000]
 98 |     ytrain = ydata[0:60000]
 99 | 
100 |     Xtest = Xdata[60000:70000]
101 |     ytest = ydata[60000:70000]
102 | 
103 |     sortinds = np.random.permutation(60000)
104 |     Xtrain = Xtrain[sortinds]
105 |     ytrain = ytrain[sortinds]
106 | 
107 |     X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28))
108 |     y = cgt.vector("y",dtype='i8')
109 | 
110 |     if args.model == "dense":
111 |         p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)    
112 |         w_h = init_weights(784, 256)
113 |         w_h2 = init_weights(256, 256)
114 |         w_o = init_weights(256, 10)
115 |         pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
116 |         pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
117 |         params = [w_h, w_h2, w_o]        
118 |     elif args.model == "conv":
119 |         p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)            
120 |         w = init_weights(32, 1, 3, 3)
121 |         w2 = init_weights(64, 32, 3, 3)
122 |         w3 = init_weights(128, 64, 3, 3)
123 |         w4 = init_weights(128 * 2 * 2, 625)
124 |         w_o = init_weights(625, 10)
125 |         pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden)
126 |         pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
127 |         params = [w, w2, w3, w4, w_o]
128 |     else:
129 |         raise RuntimeError("Unreachable")
130 | 
131 |     cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
132 |     updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)
133 | 
134 |     y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
135 |     cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
136 |     err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()
137 | 
138 |     train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
139 |     computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop])
140 | 
141 |     batch_size=128
142 | 
143 | 
144 |     from cgt.tests import gradcheck_model
145 |     if args.grad_check:
146 |         cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]})
147 |         print "doing gradient check..."
148 |         print "------------------------------------"
149 |         gradcheck_model(cost_nodrop, params[0:1])
150 |         print "success!"
151 |         return
152 | 
153 |     if args.profile: cgt.profiler.start()
154 | 
155 |     print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
156 |     for i_epoch in xrange(args.epochs):
157 |         tstart = time.time()
158 |         for start in xrange(0, Xtrain.shape[0], batch_size):
159 |             end = start+batch_size
160 |             train(Xtrain[start:end], ytrain[start:end])
161 |             if args.unittest: return
162 |         elapsed = time.time() - tstart
163 |         trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
164 |         testerr, testloss = computeloss(Xtest, ytest)
165 |         print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
166 |     if args.profile: cgt.execution.profiler.print_stats()
167 | 
168 | if __name__ == "__main__":
169 |     main()


--------------------------------------------------------------------------------
/examples/example_utils.py:
--------------------------------------------------------------------------------
 1 | import cPickle as pickle
 2 | import os, os.path as osp, shutil, numpy as np, urllib
 3 | 
 4 | def train_val_test_slices(n, trainfrac, valfrac, testfrac):
 5 |     assert trainfrac+valfrac+testfrac==1.0
 6 |     ntrain = int(np.round(n*trainfrac))
 7 |     nval = int(np.round(n*valfrac))
 8 |     ntest = n - ntrain - nval
 9 |     return slice(0,ntrain), slice(ntrain,ntrain+nval), slice(ntrain+nval,ntrain+nval+ntest)
10 | 
11 | # helper methods to print nice table
12 | def fmt_item(x, l):
13 |     if isinstance(x, np.ndarray):
14 |         assert x.ndim==0
15 |         x = x.item()
16 |     if isinstance(x, float): rep = "%g"%x
17 |     else: rep = str(x)
18 |     return " "*(l - len(rep)) + rep
19 | 
20 | def fmt_row(width, row, header=False):
21 |     out = " | ".join(fmt_item(x, width) for x in row)
22 |     if header: out = out + "\n" + "-"*len(out)
23 |     return out
24 | 
25 | def download(url):
26 |     "download and return path to file"
27 |     fname = osp.basename(url)
28 |     from cgt.core import get_cgt_src_root
29 |     datadir = osp.join(get_cgt_src_root(),"downloads")
30 |     datapath = osp.join(datadir, fname)
31 |     if not osp.exists(datapath):
32 |         print "downloading %s to %s"%(url, datapath)
33 |         if not osp.exists(datadir): os.makedirs(datadir)
34 |         urllib.urlretrieve(url, datapath)
35 |     return datapath
36 | 
37 | 
38 | def fetch_dataset(url):
39 |     datapath = download(url)
40 |     fname = osp.basename(url)
41 |     extension =  osp.splitext(fname)[-1]
42 |     assert extension in [".npz", ".pkl"]
43 |     if extension == ".npz":
44 |         return np.load(datapath)
45 |     elif extension == ".pkl":
46 |         with open(datapath, 'rb') as fin:
47 |             return pickle.load(fin)
48 |     else:
49 |         raise NotImplementedError
50 | 


--------------------------------------------------------------------------------
/examples/param_collection.py:
--------------------------------------------------------------------------------
 1 | import cgt, numpy as np
 2 | 
 3 | 
 4 | class ParamCollection(object):
 5 | 
 6 |     """
 7 |     A utility class containing a collection of parameters
 8 |     which makes it convenient to write optimization code that uses flat vectors
 9 |     """
10 | 
11 |     def __init__(self,params): #pylint: disable=W0622
12 |         assert all(param.is_data() and param.dtype == cgt.floatX for param in params)
13 |         self._params = params
14 | 
15 |     @property
16 |     def params(self):
17 |         return self._params
18 | 
19 |     def get_values(self):
20 |         return [param.op.get_value() for param in self._params]
21 | 
22 |     def get_shapes(self):
23 |         return [param.op.get_shape() for param in self._params]
24 | 
25 |     def get_total_size(self):
26 |         return sum(np.prod(shape) for shape in self.get_shapes())
27 | 
28 |     def num_vars(self):
29 |         return len(self._params)
30 | 
31 |     def set_values(self, parvals):
32 |         assert len(parvals) == len(self._params)
33 |         for (param, newval) in zip(self._params, parvals):
34 |             param.op.set_value(newval)
35 |             param.op.get_shape() == newval.shape
36 | 
37 |     def set_value_flat(self, theta):
38 |         theta = theta.astype(cgt.floatX)
39 |         arrs = []
40 |         n = 0        
41 |         for shape in self.get_shapes():
42 |             size = np.prod(shape)
43 |             arrs.append(theta[n:n+size].reshape(shape))
44 |             n += size
45 |         assert theta.size == n
46 |         self.set_values(arrs)
47 |     
48 |     def get_value_flat(self):
49 |         theta = np.empty(self.get_total_size(),dtype=cgt.floatX)
50 |         n = 0
51 |         for param in self._params:
52 |             s = param.op.get_size()
53 |             theta[n:n+s] = param.op.get_value().flat
54 |             n += s
55 |         assert theta.size == n
56 |         return theta


--------------------------------------------------------------------------------
/include/IRC.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | //== llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer ---*- C++ -*-==//
  3 | //
  4 | //                     The LLVM Compiler Infrastructure
  5 | //
  6 | // This file is distributed under the University of Illinois Open Source
  7 | // License. See LICENSE.TXT for details.
  8 | //
  9 | //===----------------------------------------------------------------------===//
 10 | //
 11 | // This file defines IntrusiveRefCntPtr, a template class that
 12 | // implements a "smart" pointer for objects that maintain their own
 13 | // internal reference count, and RefCountedBase/RefCountedBaseVPTR, two
 14 | // generic base classes for objects that wish to have their lifetimes
 15 | // managed using reference counting.
 16 | //
 17 | // IntrusiveRefCntPtr is similar to Boost's intrusive_ptr with added
 18 | // LLVM-style casting.
 19 | //
 20 | //===----------------------------------------------------------------------===//
 21 | 
 22 | 
 23 | #include <memory>
 24 | 
 25 | 
 26 |   template <class T>
 27 |   class IRC;
 28 | 
 29 |   template <typename T> struct IntrusiveRefCntPtrInfo {
 30 |     static void retain(T *obj) { obj->Retain(); }
 31 |     static void release(T *obj) { obj->Release(); }
 32 |   };
 33 | 
 34 | 
 35 | // JDS: rolled this into cgt_object so we don't need template inheritance business
 36 |   
 37 | // //===----------------------------------------------------------------------===//
 38 | // /// RefCountedBase - A generic base class for objects that wish to
 39 | // ///  have their lifetimes managed using reference counts. Classes
 40 | // ///  subclass RefCountedBase to obtain such functionality, and are
 41 | // ///  typically handled with IntrusiveRefCntPtr "smart pointers" (see below)
 42 | // ///  which automatically handle the management of reference counts.
 43 | // ///  Objects that subclass RefCountedBase should not be allocated on
 44 | // ///  the stack, as invoking "delete" (which is called when the
 45 | // ///  reference count hits 0) on such objects is an error.
 46 | // //===----------------------------------------------------------------------===//
 47 | //   template <class Derived>
 48 | //   class RefCountedBase {
 49 | //     mutable unsigned ref_cnt;
 50 | 
 51 | //   public:
 52 | //     RefCountedBase() : ref_cnt(0) {}
 53 | //     RefCountedBase(const RefCountedBase &) : ref_cnt(0) {}
 54 | 
 55 | //     void Retain() const { ++ref_cnt; }
 56 | //     void Release() const {
 57 | //       assert (ref_cnt > 0 && "Reference count is already zero.");
 58 | //       if (--ref_cnt == 0) delete static_cast<const Derived*>(this);
 59 | //     }
 60 | //   };
 61 | 
 62 | //===----------------------------------------------------------------------===//
 63 | /// IntrusiveRefCntPtr - A template class that implements a "smart pointer"
 64 | ///  that assumes the wrapped object has a reference count associated
 65 | ///  with it that can be managed via calls to
 66 | ///  IntrusivePtrAddRef/IntrusivePtrRelease.  The smart pointers
 67 | ///  manage reference counts via the RAII idiom: upon creation of
 68 | ///  smart pointer the reference count of the wrapped object is
 69 | ///  incremented and upon destruction of the smart pointer the
 70 | ///  reference count is decremented.  This class also safely handles
 71 | ///  wrapping NULL pointers.
 72 | ///
 73 | /// Reference counting is implemented via calls to
 74 | ///  Obj->Retain()/Obj->Release(). Release() is required to destroy
 75 | ///  the object when the reference count reaches zero. Inheriting from
 76 | ///  RefCountedBase/RefCountedBaseVPTR takes care of this
 77 | ///  automatically.
 78 | //===----------------------------------------------------------------------===//
 79 |   template <typename T>
 80 |   class IRC {
 81 |     T* Obj;
 82 | 
 83 |   public:
 84 |     typedef T element_type;
 85 | 
 86 |     explicit IRC() : Obj(nullptr) {}
 87 | 
 88 |     IRC(T* obj) : Obj(obj) {
 89 |       retain();
 90 |     }
 91 | 
 92 |     IRC(const IRC & S) : Obj(S.Obj) {
 93 |       retain();
 94 |     }
 95 | 
 96 |     IRC(IRC && S) : Obj(S.Obj) {
 97 |       S.Obj = nullptr;
 98 |     }
 99 | 
100 |     template <class X>
101 |     IRC(IRC<X>&& S) : Obj(S.get()) {
102 |       S.Obj = 0;
103 |     }
104 | 
105 |     template <class X>
106 |     IRC(const IRC<X>& S)
107 |       : Obj(S.get()) {
108 |       retain();
109 |     }
110 | 
111 |     IRC & operator=(IRC S) {
112 |       swap(S);
113 |       return *this;
114 |     }
115 | 
116 |     ~IRC() { release(); }
117 | 
118 |     T& operator*() const { return *Obj; }
119 | 
120 |     T* operator->() const { return Obj; }
121 | 
122 |     T* get() const { return Obj; }
123 | 
124 |     operator bool() const { return Obj; }
125 | 
126 |     void swap(IRC & other) {
127 |       T* tmp = other.Obj;
128 |       other.Obj = Obj;
129 |       Obj = tmp;
130 |     }
131 | 
132 |     void reset() {
133 |       release();
134 |       Obj = nullptr;
135 |     }
136 | 
137 |     void resetWithoutRelease() {
138 |       Obj = 0;
139 |     }
140 | 
141 |   private:
142 |     void retain() { if (Obj) IntrusiveRefCntPtrInfo<T>::retain(Obj); }
143 |     void release() { if (Obj) IntrusiveRefCntPtrInfo<T>::release(Obj); }
144 | 
145 |     template <typename X>
146 |     friend class IRC;
147 |   };
148 | 
149 |   template<class T, class U>
150 |   inline bool operator==(const IRC<T>& A,
151 |                          const IRC<U>& B)
152 |   {
153 |     return A.get() == B.get();
154 |   }
155 | 
156 |   template<class T, class U>
157 |   inline bool operator!=(const IRC<T>& A,
158 |                          const IRC<U>& B)
159 |   {
160 |     return A.get() != B.get();
161 |   }
162 | 
163 |   template<class T, class U>
164 |   inline bool operator==(const IRC<T>& A,
165 |                          U* B)
166 |   {
167 |     return A.get() == B;
168 |   }
169 | 
170 |   template<class T, class U>
171 |   inline bool operator!=(const IRC<T>& A,
172 |                          U* B)
173 |   {
174 |     return A.get() != B;
175 |   }
176 | 
177 |   template<class T, class U>
178 |   inline bool operator==(T* A,
179 |                          const IRC<U>& B)
180 |   {
181 |     return A == B.get();
182 |   }
183 | 
184 |   template<class T, class U>
185 |   inline bool operator!=(T* A,
186 |                          const IRC<U>& B)
187 |   {
188 |     return A != B.get();
189 |   }
190 | 
191 |   template <class T>
192 |   bool operator==(std::nullptr_t A, const IRC<T> &B) {
193 |     return !B;
194 |   }
195 | 
196 |   template <class T>
197 |   bool operator==(const IRC<T> &A, std::nullptr_t B) {
198 |     return B == A;
199 |   }
200 | 
201 |   template <class T>
202 |   bool operator!=(std::nullptr_t A, const IRC<T> &B) {
203 |     return !(A == B);
204 |   }
205 | 
206 |   template <class T>
207 |   bool operator!=(const IRC<T> &A, std::nullptr_t B) {
208 |     return !(A == B);
209 |   }
210 | 


--------------------------------------------------------------------------------
/include/cgt_common.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "IRC.h"
  4 | #include <cassert>
  5 | #include <cstdio>
  6 | #include <cstdlib>
  7 | #include <cstddef>
  8 | #include <atomic>
  9 | 
 10 | // ================================================================
 11 | // Visibility 
 12 | // ================================================================
 13 |  
 14 | #define CGT_EXPORT __attribute__((visibility("default")))
 15 | #define CGT_EXPORT_C extern "C" __attribute__((visibility("default")))
 16 | 
 17 | // ================================================================
 18 | // Basic structs and enums
 19 | // ================================================================
 20 | 
 21 | typedef enum cgtDtype {
 22 |   cgt_i1 = 1,
 23 |   cgt_i2 = 3,
 24 |   cgt_i4 = 5,
 25 |   cgt_i8 = 7,
 26 |   cgt_f2 = 23,
 27 |   cgt_f4 = 11,
 28 |   cgt_f8 = 12,
 29 |   cgt_f16 = 13,
 30 |   cgt_c8 = 14,
 31 |   cgt_c16 = 15,
 32 |   cgt_c32 = 16,
 33 |   cgt_O = 17
 34 | } cgtDtype;
 35 | // print np.dtype('i1').num # etc
 36 | 
 37 | static inline int cgt_itemsize(cgtDtype dtype) {
 38 |   switch (dtype) {
 39 |     case cgt_i1:
 40 |       return 1;
 41 |     case cgt_i2:
 42 |       return 2;
 43 |     case cgt_i4:
 44 |       return 4;
 45 |     case cgt_i8:
 46 |       return 8;
 47 |     case cgt_f2:
 48 |       return 2;
 49 |     case cgt_f4:
 50 |       return 4;
 51 |     case cgt_f8:
 52 |       return 8;
 53 |     case cgt_f16:
 54 |       return 16;
 55 |     case cgt_c8:
 56 |       return 8;
 57 |     case cgt_c16:
 58 |       return 16;
 59 |     case cgt_c32:
 60 |       return 32;
 61 |     case cgt_O:
 62 |       return 8;
 63 |   }
 64 |   assert(0 && "invalid dtype");
 65 |   return -1;
 66 | }
 67 | 
 68 | typedef enum cgtDevtype {
 69 |   cgtCPU,
 70 |   cgtGPU
 71 | } cgtDevtype;
 72 | 
 73 | class cgtObject {
 74 | public:
 75 |   enum ObjectKind {
 76 |     UndefKind=0,
 77 |     ArrayKind,
 78 |     TupleKind
 79 |   };
 80 |   cgtObject() : ref_cnt(0), kind_(UndefKind) { }
 81 |   cgtObject(ObjectKind kind) : ref_cnt(0), kind_(kind) { }
 82 |   ObjectKind kind() const {return kind_;}
 83 |   // for refcounting:
 84 |   void Retain() const { ++ref_cnt; }
 85 |   inline void Release() const;
 86 | private:
 87 |   ObjectKind kind_;
 88 |   mutable std::atomic<unsigned> ref_cnt;
 89 | };
 90 | 
 91 | class cgtArray : public cgtObject {
 92 | public:
 93 |   cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype);
 94 |   cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype, void* fromdata, bool copy);
 95 |   ~cgtArray();
 96 | 
 97 |   int ndim() const { return ndim_; }
 98 |   const long* shape() const { return shape_; }
 99 |   long size() const {
100 |     long s = 1;
101 |     for (int i = 0; i < ndim_; ++i) {
102 |       s *= shape_[i];
103 |     }
104 |     return s;
105 |   }
106 |   long nbytes() const { return size() * cgt_itemsize(dtype_); }
107 |   long stride(int i) const {
108 |     if (ndim_ == 0) {
109 |       return 0;
110 |     }
111 |     assert(0 <= i && i < ndim_ && ndim_ >= 1);
112 |     int s = 1;
113 |     for (int j = i; j < ndim_ - 1; ++j) { // note that (ndim_-1) >= 0, which is important because ndim_ is unsigned
114 |       s *= shape_[j + 1];
115 |     }
116 |     return s;
117 |   }
118 |   cgtDtype dtype() const { return dtype_; }
119 |   cgtDevtype devtype() const { return devtype_; }
120 |   bool ownsdata() const { return ownsdata_; }
121 |   void* data() { return data_; }
122 |   
123 | 
124 |   template <typename T>  
125 |   T& at() {return static_cast<T*>(data_)[0];}
126 |   template <typename T>
127 |   T& at(long i) {return static_cast<T*>(data_)[i];}
128 |   template <typename T>
129 |   T& at(long i, long j) {return static_cast<T*>(data_)[i*shape_[1]+j];}
130 |   template <typename T>
131 |   T& at(long i, long j, long k) {return static_cast<T*>(data_)[(i*shape_[1]+j)*shape_[2]+k];}
132 |   template <typename T>
133 |   T& at(long i, long j, long k, long l) {return static_cast<T*>(data_)[((i*shape_[1]+j)*shape_[2]+k)*shape_[3]+l];}
134 |   void print();
135 | 
136 | private:
137 |   const int ndim_;
138 |   const long* shape_;
139 |   const cgtDtype dtype_;
140 |   const cgtDevtype devtype_;
141 |   const bool ownsdata_;
142 |   void* data_;
143 | };
144 | 
145 | class cgtTuple : public cgtObject {
146 | public:
147 |   cgtTuple(int len);
148 |   void setitem(int i, cgtObject *o) {
149 |     members[i] = o;
150 |   }
151 |   cgtObject *getitem(int i) {
152 |     return members[i].get();
153 |   }
154 |   int size() {return len;}
155 |   ~cgtTuple();
156 |   int len;
157 |   IRC<cgtObject> *members;
158 | };
159 | 
160 | void cgtObject::Release() const {
161 |   assert (ref_cnt > 0 && "Reference count is already zero.");
162 |   if (--ref_cnt == 0) {
163 |     if (kind_==ArrayKind) delete (const cgtArray *)this;  // XXX is this legit?
164 |     else if (kind_==TupleKind) delete (const cgtTuple *)this;
165 |     else assert(0 && "invalid kind");
166 |   }
167 | }
168 | 
169 | /*
170 | Copy from -> to, transfering data between between devices if necessary
171 | */
172 | void cgt_copy_object(cgtObject* to, cgtObject* from);
173 | void cgt_copy_array(cgtArray* to, cgtArray* from);
174 | void cgt_copy_tuple(cgtTuple* to, cgtTuple* from);
175 | 
176 | typedef void (*cgtByRefFun)(void * /* closure data */, cgtObject ** /* read */, cgtObject * /* write */);
177 | typedef cgtObject *(*cgtByValFun)(void * /* closure data */, cgtObject ** /* read */);
178 | 
179 | // ================================================================
180 | // Error handling 
181 | // ================================================================
182 | 
183 | #define cgt_assert(x)  \
184 |     do {\
185 |         if (!(x)) {\
186 |             fprintf (stderr, "Assertion failed: %s (%s:%d)\n", #x, \
187 |                 __FILE__, __LINE__);\
188 |             fflush (stderr);\
189 |             abort();\
190 |         }\
191 |     } while (0)
192 | 
193 | #define CGT_NORETURN __attribute__ ((noreturn))
194 | 
195 | 
196 | typedef enum {
197 |   cgtStatusOK = 0,
198 |   cgtStatusErr
199 | } cgtStatus;
200 | 
201 | extern cgtStatus cgtGlobalStatus;
202 | extern char cgtGlobalErrorMsg[1000];
203 | 
204 | static inline void clear_error() {
205 |   cgtGlobalStatus = cgtStatusOK;
206 | }
207 | 
208 | // TODO can do it more safely now that we're in c++
209 | #define cgt_check(x, msg, ...) \
210 |     do {\
211 |         if ((!(x))) {\
212 |             sprintf(cgtGlobalErrorMsg, msg, ##__VA_ARGS__);\
213 |             cgtGlobalStatus = cgtStatusErr;\
214 |         }\
215 |     } while(0)
216 | 
217 | 
218 | // ================================================================
219 | // Memory management
220 | // ================================================================
221 | 
222 | static inline bool cgt_is_array(cgtObject *o) { return o->kind() == cgtObject::ArrayKind; }
223 | static inline bool cgt_is_tuple(cgtObject *o) { return o->kind() == cgtObject::TupleKind; }
224 | 
225 | void *cgt_alloc(cgtDevtype devtype, long size);
226 | void cgt_free(cgtDevtype devtype, void *ptr);
227 | void cgt_memcpy(cgtDevtype dest_type, cgtDevtype src_type, void *dest_ptr, void *src_ptr, long nbytes);
228 | 


--------------------------------------------------------------------------------
/include/cgt_cuda.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "cuda_runtime.h"
 3 | #include "cublas_v2.h"
 4 | #include "stdio.h"
 5 | 
 6 | #define CGT_EXPORT_C extern "C" __attribute__((visibility("default")))
 7 | 
 8 | // Code mostly ripped off from caffe
 9 | 
10 | // CUDA: various checks for different function calls.
11 | #define CUDA_CHECK(condition) \
12 |   /* Code block avoids redefinition of cudaError_t error */ \
13 |   do { \
14 |     cudaError_t error = condition; \
15 |     if (error != cudaSuccess) printf("%s\n", cudaGetErrorString(error)); \
16 |   } while (0)
17 | 
18 | #define CUDA_CHECK_ERROR(msg) do { \
19 |   cudaError_t e = cudaGetLastError(); \
20 |   if (e != cudaSuccess) {printf("%s\n", cudaGetErrorString(e));} \
21 |   } while (0)
22 | 
23 | #define CUBLAS_CHECK(condition) \
24 |   do { \
25 |     cublasStatus_t status = condition; \
26 |     if (status != CUBLAS_STATUS_SUCCESS) printf("%s\n", cublasGetErrorString(status)); \
27 |   } while (0)
28 | 
29 | #define CURAND_CHECK(condition) \
30 |   do { \
31 |     curandStatus_t status = condition; \
32 |     if (status != CURAND_STATUS_SUCCESS) printf("%s\n", curandGetErrorString(status)); \
33 |   } while (0)
34 | 
35 | // CUDA: grid stride looping
36 | #define CUDA_KERNEL_LOOP(i, n) \
37 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
38 |        i < (n); \
39 |        i += blockDim.x * gridDim.x)
40 | 
41 | // CUDA: check for error after kernel execution and exit loudly if there is one.
42 | #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
43 | 
44 | // this is from Minerva
45 | // todo: what's the rationale?
46 | static void cgt_get_bt(size_t size, int& num_blocks, int& num_threads) {
47 |   if(size <= 32)
48 |     num_threads = 32;
49 |   else if(size <= 64)
50 |     num_threads = 64;
51 |   else if(size <= 128)
52 |     num_threads = 128;
53 |   else if(size <= 256)
54 |     num_threads = 256;
55 |   else if(size <= 512)
56 |     num_threads = 512;
57 |   else
58 |     num_threads = 1024;
59 |   num_blocks = (int)(((size + num_threads - 1) / num_threads));
60 |   if (num_blocks < 0 || 128 < num_blocks) {
61 |     num_blocks = 128;
62 |   }
63 | }
64 | 
65 | inline const char* cublasGetErrorString(cublasStatus_t status) {
66 |   switch (status) {
67 |     case CUBLAS_STATUS_SUCCESS:
68 |       return "CUBLAS_STATUS_SUCCESS";
69 |     case CUBLAS_STATUS_NOT_INITIALIZED:
70 |       return "CUBLAS_STATUS_NOT_INITIALIZED";
71 |     case CUBLAS_STATUS_ALLOC_FAILED:
72 |       return "CUBLAS_STATUS_ALLOC_FAILED";
73 |     case CUBLAS_STATUS_INVALID_VALUE:
74 |       return "CUBLAS_STATUS_INVALID_VALUE";
75 |     case CUBLAS_STATUS_ARCH_MISMATCH:
76 |       return "CUBLAS_STATUS_ARCH_MISMATCH";
77 |     case CUBLAS_STATUS_MAPPING_ERROR:
78 |       return "CUBLAS_STATUS_MAPPING_ERROR";
79 |     case CUBLAS_STATUS_EXECUTION_FAILED:
80 |       return "CUBLAS_STATUS_EXECUTION_FAILED";
81 |     case CUBLAS_STATUS_INTERNAL_ERROR:
82 |       return "CUBLAS_STATUS_INTERNAL_ERROR";
83 |     case CUBLAS_STATUS_NOT_SUPPORTED:
84 |       return "CUBLAS_STATUS_NOT_SUPPORTED";
85 |     default:
86 |       break;
87 |   }
88 |   return "Unknown cuBLAS status";
89 | }
90 | 
91 | 


--------------------------------------------------------------------------------
/include/execution.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "cgt_common.h"
  3 | #include <vector>
  4 | #include <thread>
  5 | #include <string>
  6 | 
  7 | namespace cgt {
  8 | using std::vector;
  9 | 
 10 | // note: no-args initializers are only here because they're required by cython
 11 | 
 12 | class ByRefCallable {
 13 | public:
 14 |     cgtByRefFun fptr;
 15 |     void* data;
 16 |     ByRefCallable(cgtByRefFun fptr, void* data) : fptr(fptr), data(data) {}
 17 |     ByRefCallable() : fptr(NULL), data(NULL) {}
 18 |     void operator()(cgtObject ** reads, cgtObject * write) {
 19 |         (*fptr)(data, reads, write);
 20 |     }
 21 | };
 22 | 
 23 | struct ByValCallable {
 24 | public:
 25 |     cgtByValFun fptr;
 26 |     void* data;
 27 |     ByValCallable(cgtByValFun fptr, void* data) : fptr(fptr), data(data) {}
 28 |     ByValCallable() : fptr(NULL), data(NULL) {}
 29 |     cgtObject * operator()(cgtObject ** args) {
 30 |         return (*fptr)(data, args);
 31 |     }
 32 | };
 33 | 
 34 | class MemLocation {
 35 | public:
 36 |     MemLocation() : index_(0), devtype_(cgtCPU) {}
 37 |     MemLocation(long index, cgtDevtype devtype) : index_(index), devtype_(devtype) {}
 38 |     long index() const { return index_; }
 39 |     cgtDevtype devtype() const { return devtype_; }
 40 | private:
 41 |     long index_;
 42 |     cgtDevtype devtype_; // TODO: full device, not just devtype
 43 | };
 44 | 
 45 | class Interpreter;
 46 | 
 47 | enum InstructionKind {
 48 |     LoadArgumentKind,
 49 |     AllocKind,
 50 |     BuildTupKind,
 51 |     ReturnByRefKind,
 52 |     ReturnByValKind
 53 | };
 54 | 
 55 | class Instruction {
 56 | public:
 57 |     Instruction(InstructionKind kind, const std::string& repr, bool quick) : kind_(kind), repr_(repr), quick_(quick) { }
 58 |     virtual void fire(Interpreter*)=0;
 59 |     virtual ~Instruction() {};
 60 |     virtual const vector<MemLocation>& get_readlocs() const=0;
 61 |     virtual const MemLocation& get_writeloc() const=0;
 62 |     const std::string& repr() const { return repr_; }
 63 |     const InstructionKind kind() const {return kind_;}
 64 |     const bool quick() {return quick_;}
 65 | private:
 66 |     InstructionKind kind_;
 67 |     std::string repr_;
 68 |     bool quick_;
 69 | };
 70 | 
 71 | class ExecutionGraph {
 72 | public:
 73 |     ExecutionGraph(const vector<Instruction*>& instrs, long n_args, long n_locs)
 74 |     : instrs_(instrs), n_args_(n_args), n_locs_(n_locs) {}
 75 |     ~ExecutionGraph();
 76 |     const vector<Instruction*>& instrs() const {return instrs_;}
 77 |     long n_args() const {return n_args_;}
 78 |     long n_locs() const {return n_locs_;}
 79 |     long n_instrs() const {return instrs_.size();}
 80 | private:
 81 |     vector<Instruction*> instrs_; // owns, will delete
 82 |     long n_args_;
 83 |     long n_locs_;
 84 | };
 85 | 
 86 | class Interpreter {
 87 | public:
 88 |     // called by external code
 89 |     virtual cgtTuple * run(cgtTuple *)=0;
 90 |     // called by instructions:
 91 |     virtual cgtObject * get(const MemLocation&)=0;
 92 |     virtual void set(const MemLocation&, cgtObject *)=0;
 93 |     virtual cgtObject * getarg(int)=0;
 94 |     virtual ~Interpreter() {}
 95 | };
 96 | 
 97 | // pass by value because of cython
 98 | Interpreter* create_interpreter(ExecutionGraph*, vector<MemLocation> output_locs, int num_threads);
 99 | 
100 | class LoadArgument : public Instruction  {
101 | public:
102 |     LoadArgument(const std::string& repr, int ind, const MemLocation& writeloc) : Instruction(LoadArgumentKind, repr, true), ind(ind), writeloc(writeloc) {}
103 |     void fire(Interpreter*);
104 |     const vector<MemLocation>& get_readlocs() const { return readlocs; }
105 |     const MemLocation& get_writeloc() const { return writeloc; }
106 | private:
107 |     int ind;
108 |     vector<MemLocation> readlocs;  // empty
109 |     MemLocation writeloc;
110 | };
111 | 
112 | 
113 | class Alloc : public Instruction {
114 | public:
115 |     Alloc(const std::string& repr, cgtDtype dtype, vector<MemLocation> readlocs, const MemLocation& writeloc)
116 |     : Instruction(AllocKind, repr, true), dtype(dtype), readlocs(readlocs), writeloc(writeloc) {}
117 |     void fire(Interpreter*);
118 |     const vector<MemLocation>& get_readlocs() const { return readlocs; }
119 |     const MemLocation& get_writeloc() const { return writeloc; }
120 | private:
121 |     cgtDtype dtype;
122 |     vector<MemLocation> readlocs;
123 |     MemLocation writeloc;
124 | };
125 | 
126 | class BuildTup : public Instruction {
127 | public:
128 |     BuildTup(const std::string& repr, vector<MemLocation> readlocs, const MemLocation& writeloc)
129 |     : Instruction(BuildTupKind, repr, true), readlocs(readlocs), writeloc(writeloc) {}
130 |     void fire(Interpreter*);
131 |     const vector<MemLocation>& get_readlocs() const { return readlocs; }
132 |     const MemLocation& get_writeloc() const { return writeloc; }
133 | private:
134 |     vector<MemLocation> readlocs;
135 |     MemLocation writeloc;
136 | };
137 | 
138 | class ReturnByRef : public Instruction  {
139 | public:
140 |     ReturnByRef(const std::string& repr, vector<MemLocation> readlocs, const MemLocation& writeloc, ByRefCallable callable, bool quick)
141 |     : Instruction(ReturnByRefKind, repr, quick), readlocs(readlocs), writeloc(writeloc), callable(callable) {}
142 |     void fire(Interpreter*);
143 |     const vector<MemLocation>& get_readlocs() const { return readlocs; }
144 |     const MemLocation& get_writeloc() const { return writeloc; }
145 | private:
146 |     vector<MemLocation> readlocs;
147 |     MemLocation writeloc;
148 |     ByRefCallable callable;
149 | };
150 | 
151 | class ReturnByVal : public Instruction  {
152 | public:
153 |     ReturnByVal(const std::string& repr, vector<MemLocation> readlocs, const MemLocation& writeloc, ByValCallable callable, bool quick)
154 |     : Instruction(ReturnByValKind, repr, quick), readlocs(readlocs), writeloc(writeloc), callable(callable) {}
155 |     void fire(Interpreter*);
156 |     const vector<MemLocation>& get_readlocs() const { return readlocs; }
157 |     const MemLocation& get_writeloc() const { return writeloc; }
158 | private:
159 |     vector<MemLocation> readlocs;
160 |     MemLocation writeloc;
161 |     ByValCallable callable;
162 | };
163 | 
164 | 
165 | }
166 | 


--------------------------------------------------------------------------------
/include/im2col.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // JDS: Modified Caffe code so channels are last dimension in output, and so arguments are template parameters
 3 | // (actually gives a good speedup). could be further sped up by rearranging loops
 4 | // so inner loops can be unrolled
 5 | 
 6 | template <typename Dtype, int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w>
 7 | void im2col_cpu(const Dtype* data_im, int channels, int height, int width, Dtype* data_col) {
 8 |   int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
 9 |   int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
10 |   int channels_col = channels * kernel_h * kernel_w;
11 | 
12 |   for (int c = 0; c < channels_col; ++c) {
13 |     int w_offset = c % kernel_w;
14 |     int h_offset = (c / kernel_w) % kernel_h;
15 |     int c_im = c / kernel_h / kernel_w;
16 |     for (int h = 0; h < height_col; ++h) {
17 |       for (int w = 0; w < width_col; ++w) {
18 |         int h_pad = h * stride_h - pad_h + h_offset;
19 |         int w_pad = w * stride_w - pad_w + w_offset;
20 |         if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
21 |           data_col[channels_col*width_col*h + channels_col*w + c] =
22 |             data_im[(c_im * height + h_pad) * width + w_pad];
23 |         else
24 |           data_col[channels_col*width_col*h + channels_col*w + c] = 0;
25 |       }
26 |     }
27 |   }
28 | }
29 | 
30 | template <typename Dtype, int patch_h, int patch_w, int pad_h, int pad_w, int stride_h, int stride_w>
31 | void col2im_cpu(const Dtype* data_col, const int channels, int height, int width, Dtype* data_im) {
32 |   for (int i=0; i < height * width * channels; ++i) data_im[i] = Dtype(0);
33 |   int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
34 |   int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
35 |   int channels_col = channels * patch_h * patch_w;
36 |   for (int c = 0; c < channels_col; ++c) {
37 |     int w_offset = c % patch_w;
38 |     int h_offset = (c / patch_w) % patch_h;
39 |     int c_im = c / patch_h / patch_w;
40 |     for (int h = 0; h < height_col; ++h) {
41 |       for (int w = 0; w < width_col; ++w) {
42 |         int h_pad = h * stride_h - pad_h + h_offset;
43 |         int w_pad = w * stride_w - pad_w + w_offset;
44 |         if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
45 |           data_im[(c_im * height + h_pad) * width + w_pad] +=
46 |               data_col[channels_col*width_col*h + channels_col*w + c];
47 |       }
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/include/lrn.cuh:
--------------------------------------------------------------------------------
  1 | // Copied from Minerva
  2 | 
  3 | template<typename Dtype>
  4 | __global__ static void LRNFillScale(const int nthreads, const Dtype* in, const int num, const int channels, const int height, const int width, const int size, const Dtype alpha_over_size, Dtype* scale) {
  5 |   for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x ) {
  6 |     // find out the local offset
  7 |     int w = index % width;
  8 |     int h = (index / width) % height;
  9 |     int n = index / width / height;
 10 |     int offset = (n * channels * height + h) * width + w;
 11 |     int step = height * width;
 12 |     const Dtype* shifted_in = in + offset;
 13 |     Dtype* shifted_scale = scale + offset;
 14 |     int head = 0;
 15 |     int pre_pad = (size - 1) / 2;
 16 |     int post_pad = size - pre_pad - 1;
 17 |     Dtype accum_scale = 0;
 18 |     // fill the scale at [n, :, h, w]
 19 |     // accumulate values
 20 |     while (head < post_pad) {
 21 |       accum_scale += shifted_in[head * step] * shifted_in[head * step];
 22 |       ++head;
 23 |     }
 24 |     // until we reach size, nothing needs to be subtracted
 25 |     while (head < size) {
 26 |       accum_scale += shifted_in[head * step] * shifted_in[head * step];
 27 |       shifted_scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size;
 28 |       ++head;
 29 |     }
 30 |     // both add and subtract
 31 |     while (head < channels) {
 32 |       accum_scale += shifted_in[head * step] * shifted_in[head * step];
 33 |       accum_scale -= shifted_in[(head - size) * step] * shifted_in[(head - size) * step];
 34 |       shifted_scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size;
 35 |       ++head;
 36 |     }
 37 |     // subtract only
 38 |     while (head < channels + post_pad) {
 39 |       accum_scale -= shifted_in[(head - size) * step] * shifted_in[(head - size) * step];
 40 |       shifted_scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size;
 41 |       ++head;
 42 |     }
 43 |   }
 44 | }
 45 | 
 46 | template<typename Dtype>
 47 | __global__ static void LRNComputeOutput(const int nthreads, const Dtype* in,
 48 |     const Dtype* scale, const Dtype negative_beta, Dtype* out) {
 49 |   for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x ) {
 50 |     out[index] = in[index] * pow(scale[index], negative_beta);
 51 |   }
 52 | }
 53 | 
 54 | template<typename Dtype>
 55 | __global__ static void LRNComputeDiff(const int nthreads, const Dtype* bottom_data,
 56 |     const Dtype* top_data, const Dtype* scale, const Dtype* top_diff,
 57 |     const int num, const int channels, const int height,
 58 |     const int width, const int size, const Dtype negative_beta,
 59 |     const Dtype cache_ratio,
 60 |     Dtype* bottom_diff) {
 61 | 
 62 |   for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x ) {
 63 |     // find out the local offset
 64 |     int w = index % width;
 65 |     int h = (index / width) % height;
 66 |     int n = index / width / height;
 67 |     int offset = (n * channels * height + h) * width + w;
 68 |     int step = height * width;
 69 |     const Dtype* shifted_btm_data = bottom_data + offset;
 70 |     const Dtype* shifted_top_data = top_data + offset;
 71 |     const Dtype* shifted_scale = scale + offset;
 72 |     const Dtype* shifted_top_diff = top_diff + offset;
 73 |     Dtype* shifted_btm_diff = bottom_diff + offset;
 74 |     int head = 0;
 75 |     int pre_pad = size - (size + 1) / 2;
 76 |     int post_pad = size - pre_pad - 1;
 77 |     Dtype accum_ratio = 0;
 78 |     // accumulate values
 79 |     while (head < post_pad) {
 80 |       accum_ratio += shifted_top_diff[head * step] * shifted_top_data[head * step] /
 81 |         shifted_scale[head * step];
 82 |       ++head;
 83 |     }
 84 |     // until we reach size, nothing needs to be subtracted
 85 |     while (head < size) {
 86 |       accum_ratio += shifted_top_diff[head * step] * shifted_top_data[head * step] /
 87 |         shifted_scale[head * step];
 88 |       shifted_btm_diff[(head - post_pad) * step] = shifted_top_diff[(head - post_pad) * step]
 89 |         * pow(shifted_scale[(head - post_pad) * step], negative_beta) - cache_ratio *
 90 |         shifted_btm_data[(head - post_pad) * step] * accum_ratio;
 91 |       ++head;
 92 |     }
 93 |     // both add and subtract
 94 |     while (head < channels) {
 95 |       accum_ratio += shifted_top_diff[head * step] * shifted_top_data[head * step] /
 96 |         shifted_scale[head * step];
 97 |       accum_ratio -= shifted_top_diff[(head - size) * step] *
 98 |         shifted_top_data[(head - size) * step] / shifted_scale[(head - size) * step];
 99 |       shifted_btm_diff[(head - post_pad) * step] = shifted_top_diff[(head - post_pad) * step]
100 |         * pow(shifted_scale[(head - post_pad) * step], negative_beta) - cache_ratio *
101 |         shifted_btm_data[(head - post_pad) * step] * accum_ratio;
102 |       ++head;
103 |     }
104 |     // subtract only
105 |     while (head < channels + post_pad) {
106 |       accum_ratio -= shifted_top_diff[(head - size) * step] *
107 |         shifted_top_data[(head - size) * step] / shifted_scale[(head - size) * step];
108 |       shifted_btm_diff[(head - post_pad) * step] = shifted_top_diff[(head - post_pad) * step]
109 |         * pow(shifted_scale[(head - post_pad) * step], negative_beta) - cache_ratio *
110 |         shifted_btm_data[(head - post_pad) * step] * accum_ratio;
111 |       ++head;
112 |     }
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/include/pooling.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <algorithm>
  3 | #include <cfloat>
  4 | // Copied & modified from Caffe
  5 | 
  6 | 
  7 | struct conv_closure {
  8 |   int kernel_h;
  9 |   int kernel_w;
 10 |   int pad_h;
 11 |   int pad_w;
 12 |   int stride_h;
 13 |   int stride_w;
 14 | };
 15 | 
 16 | 
 17 | template <typename Dtype>
 18 | void caffe_set(const int N, const Dtype alpha, Dtype* Y) {
 19 |   for (int i = 0; i < N; ++i) {
 20 |     Y[i] = alpha;
 21 |   }
 22 | }
 23 | 
 24 | template <typename Dtype>
 25 | void max_pool(conv_closure* cl, cgtArray* bottom, cgtArray* top, cgtArray* mask) {
 26 |   using std::max;
 27 |   using std::min;
 28 |   Dtype* bottom_data = static_cast<Dtype*>(bottom->data());
 29 |   Dtype* top_data = static_cast<Dtype*>(top->data());
 30 |   const int top_count = top->size();
 31 |   // We'll output the mask to top[1] if it's of size >1.
 32 |   int* mask_data = static_cast<int*>(mask->data());
 33 |   caffe_set(top_count, Dtype(-FLT_MAX), top_data);
 34 |   caffe_set(top_count, -1, mask_data);
 35 |     // The main loop
 36 | 
 37 |   int batchsize = top->shape()[0],
 38 |       channels = top->shape()[1],
 39 |       pooledheight = top->shape()[2],
 40 |       pooledwidth = top->shape()[3],
 41 |       height = bottom->shape()[2],
 42 |       width = bottom->shape()[3];
 43 | 
 44 |   for (int n = 0; n < batchsize; ++n) {
 45 |     for (int c = 0; c < channels; ++c) {
 46 |       for (int ph = 0; ph < pooledheight; ++ph) {
 47 |         for (int pw = 0; pw < pooledwidth; ++pw) {
 48 |           int hstart = ph * cl->stride_h - cl->pad_h;
 49 |           int wstart = pw * cl->stride_w - cl->pad_w;
 50 |           int hend = min(hstart + cl->kernel_h, height);
 51 |           int wend = min(wstart + cl->kernel_w, width);
 52 |           hstart = max(hstart, 0);
 53 |           wstart = max(wstart, 0);
 54 |           const int pool_index = ph * pooledwidth + pw;
 55 |           for (int h = hstart; h < hend; ++h) {
 56 |             for (int w = wstart; w < wend; ++w) {
 57 |               const int index = h * width + w;
 58 |               if (bottom_data[index] > top_data[pool_index]) {
 59 |                 top_data[pool_index] = bottom_data[index];
 60 |                 mask_data[pool_index] = index;
 61 |               }
 62 |             }
 63 |           }
 64 |         }
 65 |       }
 66 |       bottom_data += bottom->stride(1);
 67 |       top_data += top->stride(1);
 68 |       mask_data += top->stride(1);
 69 |     }
 70 |   }
 71 | }
 72 | 
 73 | template <typename Dtype>
 74 | void max_pool_pullback(cgtArray* bottom, cgtArray* top, cgtArray* mask, 
 75 |   cgtArray* top_diff, cgtArray* bottom_diff) {
 76 |   const Dtype* top_diff_data = static_cast<Dtype*>(top_diff->data());
 77 |   Dtype* bottom_diff_data = static_cast<Dtype*>(bottom_diff->data());
 78 |   // Different pooling methods. We explicitly do the switch outside the for
 79 |   // loop to save time, although this results in more codes.
 80 |   caffe_set(bottom_diff->size(), Dtype(0), bottom_diff_data);
 81 |   // We'll output the mask to top[1] if it's of size >1.
 82 |   int* mask_data = static_cast<int*>(mask->data());
 83 | 
 84 |   int batchsize = top->shape()[0],
 85 |       channels = top->shape()[1],
 86 |       pooledheight = top->shape()[2],
 87 |       pooledwidth = top->shape()[3],
 88 |       height = bottom->shape()[2],
 89 |       width = top->shape()[3];
 90 | 
 91 |   for (int n = 0; n < batchsize; ++n) {
 92 |     for (int c = 0; c < channels; ++c) {
 93 |       for (int ph = 0; ph < pooledheight; ++ph) {
 94 |         for (int pw = 0; pw < pooledwidth; ++pw) {
 95 |           const int index = ph * pooledwidth + pw;
 96 |           const int bottom_index = mask_data[index];
 97 |           bottom_diff_data[bottom_index] += top_diff_data[index];
 98 |         }
 99 |       }
100 |       bottom_diff_data += bottom->stride(1);
101 |       top_diff_data += top->stride(1);
102 |       mask_data += mask->stride(1);
103 |     }
104 |   }
105 | 
106 | }
107 | 


--------------------------------------------------------------------------------
/scripts/cgt-clear-cache:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import cgt,os.path as osp, subprocess,sys
 3 | config = cgt.get_config()
 4 | cache_dir = config["cache_dir"]
 5 | cmd = "rm -rf %s"%osp.expandvars(cache_dir)
 6 | while True:
 7 |     sys.stderr.write("About to run \x1b[32m%s\x1b[0m. OK? (y/n): "%cmd)
 8 |     yn = raw_input()
 9 |     if yn=='y':
10 |         subprocess.check_call(cmd,shell=True)
11 |         break
12 |     elif yn=='n': 
13 |         break


--------------------------------------------------------------------------------
/src/cgt_common.cpp:
--------------------------------------------------------------------------------
  1 | #include "stdlib.h"
  2 | #include "assert.h"
  3 | #include "memory.h"
  4 | #include "stdio.h"
  5 | #include "cgt_common.h"
  6 | 
  7 | #ifdef CGT_ENABLE_CUDA
  8 | #include "cgt_cuda.h"
  9 | #endif
 10 | 
 11 | // ================================================================
 12 | // Object alloc/dealloc 
 13 | // ================================================================
 14 | 
 15 | cgtArray::cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype)
 16 |     : cgtObject(ObjectKind::ArrayKind),
 17 |       ndim_(ndim),
 18 |       dtype_(dtype),
 19 |       devtype_(devtype),
 20 |       ownsdata_(true) {
 21 |   shape_ = new long[ndim];
 22 |   memcpy(const_cast<long*>(shape_), shape, ndim * sizeof(long));
 23 |   data_ = cgt_alloc(devtype_, nbytes());
 24 | }
 25 | 
 26 | cgtArray::cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype, void* fromdata, bool copy)
 27 |     : cgtObject(ObjectKind::ArrayKind),
 28 |       ndim_(ndim),
 29 |       shape_(shape),
 30 |       dtype_(dtype),
 31 |       devtype_(devtype),
 32 |       ownsdata_(copy) {
 33 |   cgt_assert(fromdata != NULL);
 34 |   shape_ = new long[ndim];
 35 |   memcpy(const_cast<long*>(shape_), shape, ndim * sizeof(long));
 36 |   if (copy) {
 37 |     data_ = cgt_alloc(devtype, nbytes());
 38 |     cgt_memcpy(devtype, cgtCPU, data_, fromdata, nbytes());
 39 |   } else {
 40 |     data_ = fromdata;
 41 |   }
 42 | }
 43 | 
 44 | void cgtArray::print() {
 45 |   printf("Array{shape=(");
 46 |   if (ndim_ > 0) printf("%zu",shape_[0]);
 47 |   for (int i=1; i < ndim_; ++i) {
 48 |     printf(", %zu", shape_[i]);
 49 |   }
 50 |   printf("), dtype=%i}", dtype_);
 51 | }
 52 | 
 53 | cgtArray::~cgtArray() {
 54 |   delete[] shape_;
 55 |   if (ownsdata_) cgt_free(devtype_, data_);
 56 | }
 57 | 
 58 | cgtTuple::cgtTuple(int len)
 59 |     : cgtObject(ObjectKind::TupleKind), len(len) {
 60 |   members = new IRC<cgtObject>[len];
 61 | }
 62 | 
 63 | cgtTuple::~cgtTuple() {
 64 |   delete[] members;
 65 | }
 66 | 
 67 | 
 68 | // ================================================================
 69 | // Copying
 70 | // ================================================================
 71 | 
 72 | void cgt_copy_object(cgtObject* to, cgtObject* from) {
 73 |   cgt_assert(to->kind() == from->kind());
 74 |   if (to->kind() == cgtObject::ArrayKind) {    
 75 |     cgt_copy_array(static_cast<cgtArray*>(to), static_cast<cgtArray*>(from));
 76 |   }
 77 |   else if (to->kind() == cgtObject::TupleKind) {
 78 |     cgt_copy_tuple(static_cast<cgtTuple*>(to), static_cast<cgtTuple*>(from));
 79 |   }
 80 |   else cgt_assert(0 && "unreachable");
 81 | }
 82 | 
 83 | void cgt_copy_array(cgtArray* to, cgtArray* from) {
 84 |   cgt_assert(from->size() == to->size() && from->dtype() == to->dtype()) ;
 85 |   cgt_memcpy(to->devtype(), from->devtype(), to->data(), from->data(), from->nbytes());
 86 | }
 87 | 
 88 | void cgt_copy_tuple(cgtTuple* to, cgtTuple* from) {
 89 |   for (int i=0; i < to->size(); ++i) cgt_copy_object(to->getitem(i), from->getitem(i));
 90 | }
 91 | 
 92 | 
 93 | 
 94 | // ================================================================
 95 | // Error handling 
 96 | // ================================================================
 97 | 
 98 | void cgt_abort() {
 99 |   abort();
100 | }
101 | 
102 | cgtStatus cgtGlobalStatus = cgtStatusOK;
103 | char cgtGlobalErrorMsg[1000];
104 | 
105 | 
106 | // ================================================================
107 | // Memory management 
108 | // ================================================================
109 | 
110 | void *cgt_alloc(cgtDevtype devtype, long size) {
111 |   if (devtype == cgtCPU) {
112 |     return malloc(size);
113 |   }
114 |   else {
115 | #ifdef CGT_ENABLE_CUDA
116 |     void* out;
117 |     CUDA_CHECK(cudaMalloc(&out, size));
118 |     return out;
119 | #else
120 |     cgt_assert(0 && "CUDA disabled");
121 | #endif
122 |   }
123 | }
124 | 
125 | void cgt_free(cgtDevtype devtype, void *ptr) {
126 |   if (devtype == cgtCPU) {
127 |     free(ptr);
128 |   }
129 |   else {
130 | #ifdef CGT_ENABLE_CUDA
131 |     CUDA_CHECK(cudaFree(ptr));
132 | #else
133 |     cgt_assert(0 && "CUDA disabled");
134 | #endif
135 |   }
136 | }
137 | 
138 | void cgt_memcpy(cgtDevtype dest_type, cgtDevtype src_type, void *dest_ptr, void *src_ptr, long nbytes) {
139 |   if (src_type == cgtCPU && dest_type == cgtCPU) {
140 |     memcpy(dest_ptr, src_ptr, nbytes);
141 |   } else {
142 | #ifdef CGT_ENABLE_CUDA
143 |     enum cudaMemcpyKind kind;
144 |     if       (src_type == cgtCPU && dest_type == cgtGPU) kind = cudaMemcpyHostToDevice;
145 |     else if  (src_type == cgtGPU && dest_type == cgtCPU) kind = cudaMemcpyDeviceToHost;
146 |     else if  (src_type == cgtGPU && dest_type == cgtGPU) kind = cudaMemcpyDeviceToDevice;
147 |     else cgt_assert(0 && "invalid src/dest types");
148 |     CUDA_CHECK(cudaMemcpy(dest_ptr, src_ptr, nbytes, kind));
149 | #else
150 |     cgt_assert(0 && "CUDA disabled");
151 | #endif
152 |   }
153 | }
154 | 
155 | 


--------------------------------------------------------------------------------
/src/cuda_setup.c:
--------------------------------------------------------------------------------
 1 | #include "cgt_cuda.h"
 2 | #include "cublas.h"
 3 | 
 4 | CudaContext g_context;
 5 | 
 6 | #ifdef CGT_ENABLE_CUDA
 7 | 
 8 | void cuda_initialize() {
 9 |     CUDA_CHECK(cudaStreamCreate(&g_context.stream));
10 |     CUBLAS_CHECK(cublasCreate_v2(&g_context.cublas_handle));
11 |     CUBLAS_CHECK(cublasSetStream(g_context.cublas_handle, g_context.stream));
12 |     // CUDNN_CHECK(cudnnCreate(&g_context.cudnn_handle));
13 |     // CUDNN_CHECK(cudnnSetStream(g_context.cudnn_handle, g_context.stream));
14 | }
15 | 
16 | #else 
17 | 
18 | void cuda_initialize() {
19 | }
20 | 
21 | #endif


--------------------------------------------------------------------------------
/src/util/ThreadPool.h:
--------------------------------------------------------------------------------
  1 | #ifndef THREAD_POOL_H
  2 | #define THREAD_POOL_H
  3 | 
  4 | // from https://github.com/progschj/ThreadPool/blob/master/ThreadPool.h
  5 | 
  6 | #include <vector>
  7 | #include <queue>
  8 | #include <memory>
  9 | #include <thread>
 10 | #include <mutex>
 11 | #include <condition_variable>
 12 | #include <future>
 13 | #include <functional>
 14 | #include <stdexcept>
 15 | 
 16 | class ThreadPool {
 17 | public:
 18 |     ThreadPool(size_t);
 19 |     template<class F, class... Args>
 20 |     auto enqueue(F&& f, Args&&... args)
 21 |         -> std::future<typename std::result_of<F(Args...)>::type>;
 22 |     ~ThreadPool();
 23 | private:
 24 |     // need to keep track of threads so we can join them
 25 |     std::vector< std::thread > workers;
 26 |     // the task queue
 27 |     std::queue< std::function<void()> > tasks;
 28 | 
 29 |     // synchronization
 30 |     std::mutex queue_mutex;
 31 |     std::condition_variable condition;
 32 |     bool stop;
 33 | };
 34 | 
 35 | // the constructor just launches some amount of workers
 36 | inline ThreadPool::ThreadPool(size_t threads)
 37 |     :   stop(false)
 38 | {
 39 |     for(size_t i = 0;i<threads;++i)
 40 |         workers.emplace_back(
 41 |             [this]
 42 |             {
 43 |                 for(;;)
 44 |                 {
 45 |                     std::function<void()> task;
 46 | 
 47 |                     {
 48 |                         std::unique_lock<std::mutex> lock(this->queue_mutex);
 49 |                         this->condition.wait(lock,
 50 |                             [this]{ return this->stop || !this->tasks.empty(); });
 51 |                         if(this->stop && this->tasks.empty())
 52 |                             return;
 53 |                         task = std::move(this->tasks.front());
 54 |                         this->tasks.pop();
 55 |                     }
 56 | 
 57 |                     task();
 58 |                 }
 59 |             }
 60 |         );
 61 | }
 62 | 
 63 | // add new work item to the pool
 64 | template<class F, class... Args>
 65 | auto ThreadPool::enqueue(F&& f, Args&&... args)
 66 |     -> std::future<typename std::result_of<F(Args...)>::type>
 67 | {
 68 |     using return_type = typename std::result_of<F(Args...)>::type;
 69 | 
 70 |     auto task = std::make_shared< std::packaged_task<return_type()> >(
 71 |             std::bind(std::forward<F>(f), std::forward<Args>(args)...)
 72 |         );
 73 | 
 74 |     std::future<return_type> res = task->get_future();
 75 |     {
 76 |         std::unique_lock<std::mutex> lock(queue_mutex);
 77 | 
 78 |         // don't allow enqueueing after stopping the pool
 79 |         if(stop)
 80 |             throw std::runtime_error("enqueue on stopped ThreadPool");
 81 | 
 82 |         tasks.emplace([task](){ (*task)(); });
 83 |     }
 84 |     condition.notify_one();
 85 |     return res;
 86 | }
 87 | 
 88 | // the destructor joins all threads
 89 | inline ThreadPool::~ThreadPool()
 90 | {
 91 |     {
 92 |         std::unique_lock<std::mutex> lock(queue_mutex);
 93 |         stop = true;
 94 |     }
 95 |     condition.notify_all();
 96 |     for(std::thread &worker: workers)
 97 |         worker.join();
 98 | }
 99 | 
100 | #endif
101 | 


--------------------------------------------------------------------------------
/thirdparty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joschu/cgt/90b15ab041fc2137e62b96e8612ccee605f71ceb/thirdparty/__init__.py


--------------------------------------------------------------------------------