├── .gitignore ├── 4build ├── README ├── cmake │ ├── CUDA.cmake │ ├── FindCython.cmake │ ├── FindNumpy.cmake │ └── UseCython.cmake ├── download_and_build_openblas.py └── gen_py.py ├── 4dev ├── README ├── lintfiles.txt ├── pylintrc ├── run_pylint.py ├── style_check.py └── valgrind-python.supp ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cgt ├── __init__.py ├── api.py ├── api_autogen.py ├── compilation.py ├── core.py ├── display.py ├── distributions.py ├── img_ops.py ├── nn.py ├── nn_ops │ ├── __init__.py │ ├── cross_channel_lrn.py │ ├── cudnn_ops.py │ ├── im2col.py │ └── max_pool_2d.py ├── numeric_diff.py ├── tests │ ├── __init__.py │ ├── _test_assert.py │ ├── _test_cycgt.py │ ├── _test_eg.py │ ├── _test_flatvec.py │ ├── _test_shapecheck.py │ ├── _test_tuples.py │ ├── test_affine.py │ ├── test_array_wrapper.py │ ├── test_conv.py │ ├── test_devices.py │ ├── test_einsum.py │ ├── test_examples.py │ ├── test_imgproc.py │ ├── test_inc_subtensor.py │ ├── test_informative_errors.py │ ├── test_input_conversions.py │ ├── test_linreg.py │ ├── test_multi_output.py │ ├── test_optimizers.py │ ├── test_par_interp.py │ ├── test_scalars.py │ └── test_stack.py └── utils.py ├── cgtrc.example ├── cgtrc_spec.ini ├── doc ├── Makefile ├── README ├── _static │ └── my_theme.css ├── build_and_view.sh ├── conf.py ├── index.rst ├── notebook_sphinxext1.py ├── spelling_wordlist.txt ├── sphinx_preview.py ├── tutorial-notes.txt └── upload.sh ├── examples ├── README ├── alice │ └── input.txt ├── bench │ ├── cgt_gru.py │ ├── gru.py │ ├── seq_model.py │ └── theano_gru.py ├── broken │ ├── caffe2cgt.py │ ├── internals_tour.ipynb │ └── mnist_torchstyle.py ├── cgt_theano_feedforward_comparison.py ├── demo_char_rnn.py ├── demo_cifar.py ├── demo_mnist.py ├── demo_neural_turing_machine.py ├── demo_variational_autoencoder.py ├── example_utils.py ├── param_collection.py └── tutorial.ipynb ├── include ├── IRC.h ├── cgt_common.h ├── cgt_cuda.h ├── cudnn_support.h ├── execution.h ├── im2col.h ├── lrn.cuh └── pooling.h ├── scripts └── cgt-clear-cache ├── src ├── cgt_common.cpp ├── cuda_setup.c ├── cycgt.pyx ├── execution.cpp └── util │ └── ThreadPool.h └── thirdparty ├── __init__.py ├── configobj.py ├── tabulate.py └── validate.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # IDE junk 3 | .settings 4 | .cproject 5 | ._* 6 | .\#* 7 | \#* 8 | *.pyc 9 | .project 10 | .pydevproject 11 | *~ 12 | *.orig 13 | *~ 14 | 15 | 16 | # Build generated stuff 17 | doc/build 18 | doc/tex-images 19 | build 20 | *.swp 21 | openblas.tar.gz 22 | 23 | # Tmp stuff 24 | junk 25 | doc/primer 26 | doc/images 27 | _build 28 | _junk 29 | .coverage 30 | htmlcov 31 | cgt.sublime-project 32 | cgt.sublime-workspace 33 | 34 | 35 | doc2 36 | notes 37 | notes2 38 | build2 39 | .idea 40 | *-checkpoint.ipynb 41 | docjunk 42 | downloads/* 43 | doc/tex-src -------------------------------------------------------------------------------- /4build/README: -------------------------------------------------------------------------------- 1 | Files for building CGT -------------------------------------------------------------------------------- /4build/cmake/CUDA.cmake: -------------------------------------------------------------------------------- 1 | ################################################################################################ 2 | # Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution. 3 | # That's why not FindcuDNN.cmake file, but just the macro 4 | # Usage: 5 | # detect_cuDNN() 6 | function(detect_cuDNN) 7 | set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder") 8 | 9 | find_path(CUDNN_INCLUDE cudnn.h 10 | PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDA_TOOLKIT_INCLUDE} 11 | DOC "Path to cuDNN include directory." ) 12 | 13 | get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) 14 | find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a 15 | PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} 16 | DOC "Path to cuDNN library.") 17 | 18 | if(CUDNN_INCLUDE AND CUDNN_LIBRARY) 19 | set(HAVE_CUDNN TRUE PARENT_SCOPE) 20 | set(CUDNN_FOUND TRUE PARENT_SCOPE) 21 | 22 | mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT) 23 | message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") 24 | endif() 25 | endfunction() 26 | 27 | 28 | -------------------------------------------------------------------------------- /4build/cmake/FindCython.cmake: -------------------------------------------------------------------------------- 1 | # Find the Cython compiler. 2 | # 3 | # This code sets the following variables: 4 | # 5 | # CYTHON_EXECUTABLE 6 | # 7 | # See also UseCython.cmake 8 | 9 | #============================================================================= 10 | # Copyright 2011 Kitware, Inc. 11 | # 12 | # Licensed under the Apache License, Version 2.0 (the "License"); 13 | # you may not use this file except in compliance with the License. 14 | # You may obtain a copy of the License at 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | # Unless required by applicable law or agreed to in writing, software 19 | # distributed under the License is distributed on an "AS IS" BASIS, 20 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 | # See the License for the specific language governing permissions and 22 | # limitations under the License. 23 | #============================================================================= 24 | 25 | # Use the Cython executable that lives next to the Python executable 26 | # if it is a local installation. 27 | find_package( PythonInterp ) 28 | if( PYTHONINTERP_FOUND ) 29 | get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH ) 30 | find_program( CYTHON_EXECUTABLE 31 | NAMES cython cython.bat 32 | HINTS ${_python_path} 33 | ) 34 | else() 35 | find_program( CYTHON_EXECUTABLE 36 | NAMES cython cython.bat cython3 37 | ) 38 | endif() 39 | 40 | 41 | include( FindPackageHandleStandardArgs ) 42 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE ) 43 | 44 | mark_as_advanced( CYTHON_EXECUTABLE ) 45 | 46 | -------------------------------------------------------------------------------- /4build/cmake/FindNumpy.cmake: -------------------------------------------------------------------------------- 1 | # - Find the NumPy libraries 2 | # This module finds if NumPy is installed, and sets the following variables 3 | # indicating where it is. 4 | # 5 | # TODO: Update to provide the libraries and paths for linking npymath lib. 6 | # 7 | # NUMPY_FOUND - was NumPy found 8 | # NUMPY_VERSION - the version of NumPy found as a string 9 | # NUMPY_VERSION_MAJOR - the major version number of NumPy 10 | # NUMPY_VERSION_MINOR - the minor version number of NumPy 11 | # NUMPY_VERSION_PATCH - the patch version number of NumPy 12 | # NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 13 | # NUMPY_INCLUDE_DIR - path to the NumPy include files 14 | 15 | unset(NUMPY_VERSION) 16 | unset(NUMPY_INCLUDE_DIR) 17 | 18 | if(PYTHONINTERP_FOUND) 19 | execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" 20 | "import numpy as n; print(n.__version__); print(n.get_include());" 21 | RESULT_VARIABLE __result 22 | OUTPUT_VARIABLE __output 23 | OUTPUT_STRIP_TRAILING_WHITESPACE) 24 | 25 | if(__result MATCHES 0) 26 | string(REGEX REPLACE ";" "\\\\;" __values ${__output}) 27 | string(REGEX REPLACE "\r?\n" ";" __values ${__values}) 28 | list(GET __values 0 NUMPY_VERSION) 29 | list(GET __values 1 NUMPY_INCLUDE_DIR) 30 | 31 | string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}") 32 | if(NOT "${__ver_check}" STREQUAL "") 33 | set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) 34 | set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) 35 | set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) 36 | math(EXPR NUMPY_VERSION_DECIMAL 37 | "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") 38 | string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) 39 | else() 40 | unset(NUMPY_VERSION) 41 | unset(NUMPY_INCLUDE_DIR) 42 | message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n") 43 | endif() 44 | endif() 45 | else() 46 | message(STATUS "To find NumPy Python interpretator is required to be found.") 47 | endif() 48 | 49 | include(FindPackageHandleStandardArgs) 50 | find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION 51 | VERSION_VAR NUMPY_VERSION) 52 | 53 | if(NUMPY_FOUND) 54 | message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") 55 | endif() 56 | 57 | -------------------------------------------------------------------------------- /4build/download_and_build_openblas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import subprocess,sys,os,shutil,os.path as osp 3 | import urllib 4 | import multiprocessing 5 | 6 | unpack_dir = sys.argv[1] 7 | max_openblas_threads = int(sys.argv[2]) if len(sys.argv) > 2 else multiprocessing.cpu_count() 8 | 9 | def call_and_print(cmd): 10 | print "\x1b[32m%s\x1b[0m"%cmd 11 | subprocess.check_call(cmd,shell=True) 12 | 13 | fname = "openblas.tar.gz" 14 | url = "https://github.com/xianyi/OpenBLAS/archive/v0.2.14.tar.gz" 15 | 16 | if osp.exists(fname): 17 | print "already downloaded openblas.tar.gz" 18 | else: 19 | print "will download openblas and unpack to %s"%unpack_dir 20 | urllib.urlretrieve(url, fname+".part") 21 | shutil.move("{fname}.part".format(fname=fname),"{fname}".format(fname=fname)) 22 | call_and_print("mkdir -p {unpack_dir} && tar -xf {fname} --directory {unpack_dir} --strip-components=1".format( 23 | fname=fname,unpack_dir=unpack_dir)) 24 | os.chdir(unpack_dir) 25 | print "Compiling OpenBLAS...this will take a minute or so" 26 | call_and_print("make -j ONLY_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_OPENMP=0 NUM_THREADS=%i"%max_openblas_threads) 27 | -------------------------------------------------------------------------------- /4build/gen_py.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from cgt.core import UNARY_INFO, BINARY_INFO 3 | import cgt, os, os.path as osp 4 | fh = sys.stdout 5 | 6 | os.chdir(osp.dirname(osp.dirname(osp.realpath(cgt.__file__)))) 7 | 8 | with open("cgt/api_autogen.py","w") as fh: 9 | fh.write("# This file was autogenerated by gen_py.py. Do not edit.") 10 | fh.write("\nfrom . import core\n") 11 | 12 | for (shortname,info) in sorted(UNARY_INFO.iteritems(), key = lambda x:x[0]): 13 | fh.write( 14 | """ 15 | def {npname}(x): 16 | "Applies function {npname} elementwise to argument x" 17 | return core.Result(core.ElwiseUnary("{shortname}"), [x]) 18 | """.format(shortname=shortname,npname=info.short.lower())) 19 | 20 | for (infixname,info) in sorted(BINARY_INFO.iteritems(), key = lambda x:x[1].short): 21 | fh.write( 22 | """ 23 | def {npname}(x, y): 24 | "Applies function {npname} elementwise to arguments x,y" 25 | return core.elwise_binary("{infixname}", x,y) 26 | """.format(infixname = infixname, npname=info.short)) 27 | 28 | -------------------------------------------------------------------------------- /4dev/README: -------------------------------------------------------------------------------- 1 | Files for development -------------------------------------------------------------------------------- /4dev/lintfiles.txt: -------------------------------------------------------------------------------- 1 | + cgt/*.py -------------------------------------------------------------------------------- /4dev/pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Profiled execution. 11 | profile=no 12 | 13 | # Add files or directories to the blacklist. They should be base names, not 14 | # paths. 15 | ignore=CVS 16 | 17 | # Pickle collected data for later comparisons. 18 | persistent=yes 19 | 20 | # List of plugins (as comma separated values of python modules names) to load, 21 | # usually to register additional checkers. 22 | load-plugins= 23 | 24 | 25 | [MESSAGES CONTROL] 26 | 27 | # Enable the message, report, category or checker with the given id(s). You can 28 | # either give multiple identifier separated by comma (,) or put this option 29 | # multiple time. See also the "--disable" option for examples. 30 | #enable= 31 | 32 | # Disable the message, report, category or checker with the given id(s). You 33 | # can either give multiple identifiers separated by comma (,) or put this 34 | # option multiple times (only on the command line, not in the configuration 35 | # file where it should appear only once).You can also use "--disable=all" to 36 | # disable everything first and then reenable specific checks. For example, if 37 | # you want to run only the similarities checker, you can use "--disable=all 38 | # --enable=similarities". If you want to run only the classes checker, but have 39 | # no Warning level messages displayed, use"--disable=all --enable=classes 40 | # --disable=W" 41 | disable=C,R,W0221,I,W0614,W0201,W0142,W0141,W0401,W0760,W59,W0123,W0603,W0621,W0622 42 | 43 | 44 | [REPORTS] 45 | 46 | # Set the output format. Available formats are text, parseable, colorized, msvs 47 | # (visual studio) and html. You can also give a reporter class, eg 48 | # mypackage.mymodule.MyReporterClass. 49 | output-format=text 50 | 51 | # Include message's id in output 52 | #include-ids=yes 53 | 54 | # Include symbolic ids of messages in output 55 | #symbols=no 56 | 57 | # Put messages in a separate file for each module / package specified on the 58 | # command line instead of printing them on stdout. Reports (if any) will be 59 | # written in a file name "pylint_global.[txt|html]". 60 | files-output=no 61 | 62 | # Tells whether to display a full report or only the messages 63 | reports=yes 64 | 65 | # Python expression which should return a note less than 10 (10 is the highest 66 | # note). You have access to the variables errors warning, statement which 67 | # respectively contain the number of errors / warnings messages and the total 68 | # number of statements analyzed. This is used by the global evaluation report 69 | # (RP0004). 70 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 71 | 72 | # Add a comment according to your evaluation note. This is used by the global 73 | # evaluation report (RP0004). 74 | comment=no 75 | 76 | 77 | [FORMAT] 78 | 79 | # Maximum number of characters on a single line. 80 | max-line-length=80 81 | 82 | # Maximum number of lines in a module 83 | max-module-lines=1000 84 | 85 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 86 | # tab). 87 | indent-string=' ' 88 | 89 | # Regexp for a line that is allowed to be longer than the limit. 90 | ignore-long-lines=^\s*(# )??$ 91 | 92 | [BASIC] 93 | 94 | # Required attributes for module, separated by a comma 95 | required-attributes= 96 | 97 | # List of builtins function names that should not be used, separated by a comma 98 | bad-functions=map,filter,apply,input 99 | 100 | # Regular expression which should only match correct module names 101 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 102 | 103 | # Regular expression which should only match correct module level names 104 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 105 | 106 | # Regular expression which should only match correct class names 107 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 108 | 109 | # Regular expression which should only match correct function names 110 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 111 | 112 | # Regular expression which should only match correct method names 113 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 114 | 115 | # Regular expression which should only match correct instance attribute names 116 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 117 | 118 | # Regular expression which should only match correct argument names 119 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 120 | 121 | # Regular expression which should only match correct variable names 122 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 123 | 124 | # Regular expression which should only match correct list comprehension / 125 | # generator expression variable names 126 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 127 | 128 | # Good variable names which should always be accepted, separated by a comma 129 | good-names=i,j,k,ex,Run,_ 130 | 131 | # Bad variable names which should always be refused, separated by a comma 132 | bad-names=foo,bar,baz,toto,tutu,tata 133 | 134 | # Regular expression which should only match functions or classes name which do 135 | # not require a docstring 136 | no-docstring-rgx=__.*__ 137 | 138 | 139 | [SIMILARITIES] 140 | 141 | # Minimum lines number of a similarity. 142 | min-similarity-lines=4 143 | 144 | # Ignore comments when computing similarities. 145 | ignore-comments=yes 146 | 147 | # Ignore docstrings when computing similarities. 148 | ignore-docstrings=yes 149 | 150 | # Ignore imports when computing similarities. 151 | ignore-imports=no 152 | 153 | 154 | [MISCELLANEOUS] 155 | 156 | # List of note tags to take in consideration, separated by a comma. 157 | notes=FIXME,XXX,TODO 158 | 159 | 160 | [TYPECHECK] 161 | 162 | # Tells whether missing members accessed in mixin class should be ignored. A 163 | # mixin class is detected if its name ends with "mixin" (case insensitive). 164 | ignore-mixin-members=yes 165 | 166 | # List of classes names for which member attributes should not be checked 167 | # (useful for classes with attributes dynamically set). 168 | ignored-classes=SQLObject 169 | 170 | # When zope mode is activated, add a predefined set of Zope acquired attributes 171 | # to generated-members. 172 | zope=no 173 | 174 | # List of members which are set dynamically and missed by pylint inference 175 | # system, and so shouldn't trigger E0201 when accessed. Python regular 176 | # expressions are accepted. 177 | generated-members=REQUEST,acl_users,aq_parent 178 | 179 | ignored-modules=numpy,numpy.linalg,theano,numpy.random,scipy 180 | 181 | 182 | [VARIABLES] 183 | 184 | # Tells whether we should check for unused import in __init__ files. 185 | init-import=no 186 | 187 | # A regular expression matching the beginning of the name of dummy variables 188 | # (i.e. not used). 189 | dummy-variables-rgx=_|dummy 190 | 191 | # List of additional names supposed to be defined in builtins. Remember that 192 | # you should avoid to define new builtins when possible. 193 | additional-builtins= 194 | 195 | 196 | [IMPORTS] 197 | 198 | # Deprecated modules which should not be used, separated by a comma 199 | deprecated-modules=regsub,string,TERMIOS,Bastion,rexec 200 | 201 | # Create a graph of every (i.e. internal and external) dependencies in the 202 | # given file (report RP0402 must not be disabled) 203 | import-graph= 204 | 205 | # Create a graph of external dependencies in the given file (report RP0402 must 206 | # not be disabled) 207 | ext-import-graph= 208 | 209 | # Create a graph of internal dependencies in the given file (report RP0402 must 210 | # not be disabled) 211 | int-import-graph= 212 | 213 | 214 | [DESIGN] 215 | 216 | # Maximum number of arguments for function / method 217 | max-args=5 218 | 219 | # Argument names that match this expression will be ignored. Default to name 220 | # with leading underscore 221 | ignored-argument-names=_.* 222 | 223 | # Maximum number of locals for function / method body 224 | max-locals=15 225 | 226 | # Maximum number of return / yield for function / method body 227 | max-returns=6 228 | 229 | # Maximum number of branch for function / method body 230 | max-branchs=12 231 | 232 | # Maximum number of statements in function / method body 233 | max-statements=50 234 | 235 | # Maximum number of parents for a class (see R0901). 236 | max-parents=7 237 | 238 | # Maximum number of attributes for a class (see R0902). 239 | max-attributes=7 240 | 241 | # Minimum number of public methods for a class (see R0903). 242 | min-public-methods=2 243 | 244 | # Maximum number of public methods for a class (see R0904). 245 | max-public-methods=20 246 | 247 | 248 | [CLASSES] 249 | 250 | # List of interface methods to ignore, separated by a comma. This is used for 251 | # instance to not check methods defines in Zope's Interface base class. 252 | ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by 253 | 254 | # List of method names used to declare (i.e. assign) instance attributes. 255 | defining-attr-methods=__init__,__new__,setUp 256 | 257 | # List of valid names for the first argument in a class method. 258 | valid-classmethod-first-arg=cls 259 | 260 | # List of valid names for the first argument in a metaclass class method. 261 | valid-metaclass-classmethod-first-arg=mcs 262 | 263 | 264 | [EXCEPTIONS] 265 | 266 | # Exceptions that will emit a warning when being caught. Defaults to 267 | # "Exception" 268 | overgeneral-exceptions=Exception 269 | -------------------------------------------------------------------------------- /4dev/run_pylint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | parser = argparse.ArgumentParser() 4 | parser.add_argument("--files",nargs="+") 5 | parser.add_argument("--patfile", type=argparse.FileType("r")) 6 | args = parser.parse_args() 7 | 8 | 9 | import subprocess, os, os.path as osp, cgt 10 | from glob import glob 11 | 12 | os.chdir(osp.dirname(osp.dirname(osp.realpath(cgt.__file__)))) 13 | 14 | if args.files is None and args.patfile is None: args.patfile=open("4dev/lintfiles.txt","r") 15 | 16 | def cap(cmd): 17 | "call and print" 18 | print "\x1b[32m%s\x1b[0m"%cmd 19 | subprocess.call(cmd,shell=True) 20 | 21 | def filelist_from_patterns(pats, rootdir=None): 22 | if rootdir is None: rootdir = "." 23 | # filelist = [] 24 | fileset = set([]) 25 | lines = [line.strip() for line in pats] 26 | for line in lines: 27 | pat = line[2:] 28 | newfiles = glob(osp.join(rootdir,pat)) 29 | if line.startswith("+"): 30 | fileset.update(newfiles) 31 | elif line.startswith("-"): 32 | fileset.difference_update(newfiles) 33 | else: 34 | raise ValueError("line must start with + or -") 35 | filelist = list(fileset) 36 | return filelist 37 | 38 | assert args.files is not None or args.patfile is not None 39 | if args.files is not None: 40 | filelist = args.files 41 | elif args.patfile is not None: 42 | filelist = filelist_from_patterns(args.patfile.readlines()) 43 | else: 44 | raise Exception("unreachable") 45 | 46 | rcfile = "4dev/pylintrc" 47 | lint = "pylint" 48 | if filelist is not None: 49 | for fname in filelist: 50 | result = cap("%s -f colorized --rcfile %s -r n %s"%(lint, rcfile, fname)) 51 | else: 52 | result = cap("%s -f colorized --rcfile %s -r n *.py"%(lint,rcfile)) 53 | 54 | -------------------------------------------------------------------------------- /4dev/style_check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import cgt 3 | for (name,val) in cgt.__dict__.iteritems(): 4 | if not name.startswith("_"): 5 | if not val.__doc__: 6 | print "API function %s requires docstring!"%name 7 | 8 | 9 | for (name,val) in cgt.core.__dict__.iteritems(): 10 | if isinstance(val, type) and issubclass(val, cgt.core.Op): 11 | if val.get_native_compile_info == cgt.core.Op.get_native_compile_info: 12 | print "Op %s is missing 'get_native_compile_info'!"%name 13 | 14 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) 4 | message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there.\nBut first clean the build files that just got created in the source directory:\nrm -rf CMakeFiles CMakeCache.txt") 5 | endif() 6 | 7 | project(cgt) 8 | 9 | # http://cmake.3232098.n2.nabble.com/Default-value-for-CMAKE-BUILD-TYPE-td7550756.html 10 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) 11 | message(STATUS "Setting build type to 'Release' as none was specified.") 12 | set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) 13 | # Set the possible values of build type for cmake-gui 14 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" 15 | "MinSizeRel" "RelWithDebInfo") 16 | endif() 17 | 18 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib 19 | CACHE PATH "Output directory for static libraries.") 20 | 21 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib 22 | CACHE PATH "Output directory for shared libraries.") 23 | 24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin 25 | CACHE PATH "Output directory for executables and DLL's.") 26 | 27 | # set(CMAKE_C_STANDARD 99) 28 | list( APPEND CMAKE_C_FLAGS "-std=c99") 29 | if(APPLE) 30 | list( APPEND CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++") 31 | else() 32 | list( APPEND CMAKE_CXX_FLAGS "-std=c++11") 33 | endif() 34 | 35 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/4build/cmake) 36 | 37 | include(${CMAKE_SOURCE_DIR}/4build/cmake/UseCython.cmake) 38 | include(${CMAKE_SOURCE_DIR}/4build/cmake/CUDA.cmake) 39 | 40 | find_package(PythonLibs 2.7 REQUIRED) 41 | find_package(Numpy REQUIRED) 42 | 43 | if (APPLE) 44 | set(CMAKE_MACOSX_RPATH 0) 45 | endif() 46 | 47 | 48 | option(CGT_ENABLE_CUDA OFF "Enable CUDA support") 49 | option(CGT_ENABLE_CUDNN ON "Enable CUDNN support") 50 | option(CGT_DEVEL_MODE OFF "Build with more compiler errors and warnings") 51 | SET(CGT_MAX_CPUS "16" CACHE STRING "Max CPUs that might ever be used") 52 | 53 | message( 54 | "\n" 55 | "*****************************\n" 56 | "OPTIONS: \n" 57 | " CGT_ENABLE_CUDA: ${CGT_ENABLE_CUDA}\n" 58 | " CGT_ENABLE_CUDNN: ${CGT_ENABLE_CUDNN}\n" 59 | # " CGT_DEVEL_MODE: ${CGT_DEVEL_MODE}\n" 60 | " CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}\n" 61 | "*****************************\n" 62 | ) 63 | 64 | 65 | if (CGT_DEVEL_MODE) 66 | set(CMAKE_C_FLAGS "-Wall -Wextra -Wno-unused-function -Wno-sign-compare") 67 | endif(CGT_DEVEL_MODE) 68 | 69 | set(CGT_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/include) 70 | message("include dir ${CGT_INCLUDE_DIR}") 71 | 72 | add_library(cgt SHARED src/cgt_common.cpp include/cgt_common.h) 73 | if (CGT_ENABLE_CUDA) 74 | find_package(CUDA REQUIRED) 75 | add_definitions(-DCGT_ENABLE_CUDA) 76 | endif() 77 | 78 | if (CGT_ENABLE_CUDNN) 79 | detect_cuDNN() 80 | if(NOT CUDNN_FOUND AND NOT CUDNN_ROOT) 81 | message(FATAL_ERROR "Couldn't find CUDNN. Set CUDNN_ROOT") 82 | endif() 83 | endif() 84 | 85 | target_link_libraries(cgt ${CUDA_CUDART_LIBRARY}) 86 | target_include_directories(cgt PRIVATE ${CUDA_INCLUDE_DIRS} ${CGT_INCLUDE_DIR}) 87 | 88 | 89 | cython_add_module(cycgt ${CMAKE_SOURCE_DIR}/src/cycgt.pyx 90 | ${CMAKE_SOURCE_DIR}/include/execution.h 91 | ${CMAKE_SOURCE_DIR}/src/execution.cpp 92 | ) 93 | set_source_files_properties(${CMAKE_SOURCE_DIR}/src/cycgt.pyx PROPERTIES CYTHON_IS_CXX TRUE) 94 | target_include_directories(cycgt PRIVATE ${NUMPY_INCLUDE_DIR} ${CGT_INCLUDE_DIR}) 95 | target_link_libraries(cycgt cgt) 96 | 97 | 98 | 99 | # add_custom_command( 100 | # COMMAND ./do_autogen.py ${CMAKE_BINARY_DIR} 101 | # OUTPUT ${CMAKE_BINARY_DIR}/cgtcorefuns.c ${CMAKE_BINARY_DIR}/cgtcorefunscuda.cu 102 | # WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 103 | # # DEPENDS cgt.py 104 | # ) 105 | # add_custom_target(gencfiles ALL DEPENDS cgtcorefuns.c) 106 | 107 | set(OPENBLAS_LIBRARY ${CMAKE_BINARY_DIR}/OpenBLAS/libopenblas.a) 108 | 109 | add_custom_command( 110 | COMMAND 4build/download_and_build_openblas.py ${CMAKE_BINARY_DIR}/OpenBLAS ${CGT_MAX_CPUS} 111 | OUTPUT ${OPENBLAS_LIBRARY} 112 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 113 | ) 114 | add_custom_target(openblas ALL DEPENDS ${OPENBLAS_LIBRARY}) 115 | 116 | # add_library(cgtcorefuns SHARED cgtcorefuns.c) 117 | # add_dependencies(cgtcorefuns openblas) # WHY IS THIS NECESSARY? 118 | # target_link_libraries(cgtcorefuns ${OPENBLAS_LIBRARY}) 119 | # target_include_directories(cgtcorefuns PRIVATE ${CMAKE_BINARY_DIR}/OpenBLAS ${CGT_INCLUDE_DIR}) 120 | # set_target_properties(cgtcorefuns PROPERTIES SUFFIX .so) 121 | 122 | # if (CGT_ENABLE_CUDA) 123 | # # http://cuda-insight-toolkit.googlecode.com/svn-history/r46/trunk/Examples/Architecture1/src/CMakeLists.txt 124 | # find_package(CUDA REQUIRED) 125 | # cuda_include_directories(${CUDA_INCLUDE_DIRS} ${CGT_INCLUDE_DIR}) 126 | # cuda_add_library(cgtcorefunscuda SHARED ${CMAKE_BINARY_DIR}/cgtcorefunscuda.cu) 127 | # target_link_libraries(cgtcorefunscuda ${CUDA_LIBRARIES}) 128 | # set_target_properties(cgtcorefunscuda PROPERTIES SUFFIX .so) 129 | # endif() 130 | 131 | # cuda_add_library(cudadummy SHARED dummy.cu) 132 | 133 | message("link flags ${CMAKE_SHARED_LINKER_FLAGS}") 134 | 135 | get_property(CGT_LIBRARY_PATH TARGET cgt PROPERTY LOCATION) 136 | file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/build_info.txt 137 | "CUDA_ROOT := ${CUDA_TOOLKIT_ROOT_DIR}\n" 138 | "CGT_INCLUDE_DIR := ${CGT_INCLUDE_DIR}\n" 139 | "C_COMPILER := ${CMAKE_C_COMPILER}\n" 140 | "CGT_LIBRARY_PATH := ${CGT_LIBRARY_PATH}\n" 141 | "CUDA_LIBRARIES := ${CUDA_LIBRARIES}\n" 142 | "CGT_ENABLE_CUDA := ${CGT_ENABLE_CUDA}\n" 143 | "CGT_ENABLE_CUDNN := ${CGT_ENABLE_CUDNN}\n" 144 | "CUDNN_ROOT := ${CUDNN_ROOT}\n" 145 | ) 146 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CGT is licensed under the MIT License. 2 | 3 | > Copyright (c) 2015: Contributors 4 | > 5 | > Permission is hereby granted, free of charge, to any person obtaining 6 | > a copy of this software and associated documentation files (the 7 | > "Software"), to deal in the Software without restriction, including 8 | > without limitation the rights to use, copy, modify, merge, publish, 9 | > distribute, sublicense, and/or sell copies of the Software, and to 10 | > permit persons to whom the Software is furnished to do so, subject to 11 | > the following conditions: 12 | > 13 | > The above copyright notice and this permission notice shall be 14 | > included in all copies or substantial portions of the Software. 15 | > 16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | > NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | > LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | > OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | > WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Computation Graph Toolkit (CGT) is a library for evaluation and differentiation of functions of multidimensional arrays. 3 | 4 | Full documentation can be found at [http://rll.berkeley.edu/cgt](http://rll.berkeley.edu/cgt) 5 | 6 | [Release announcement](http://joschu.github.io/index.html#Announcing CGT) -------------------------------------------------------------------------------- /cgt/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import * 2 | from .display import print_tree, print_expr, print_text, as_dot 3 | from .compilation import function, numeric_eval, profiler 4 | from .core import (grad, get_config, update_config, simplify, reset_config, 5 | Device, scoped_update_config, infer_shape, count_nodes) 6 | try: 7 | import cycgt 8 | except ImportError: 9 | import warnings 10 | msg = """ 11 | Could not import the compiled extension module cycgt 12 | Only pure python mode is available. If you have compiled " 13 | this extension (via 'make'), you may need to add build/lib 14 | to your PYTHONPATH. Try 'import cycgt' to debug the problem further.""" 15 | warnings.warn(msg, UserWarning) 16 | del warnings 17 | 18 | 19 | floatX = "f4" 20 | complexX = "c8" 21 | 22 | # Get rid of names we don't want to export 23 | del np 24 | del cgt 25 | del operator 26 | del sys 27 | 28 | get_config() -------------------------------------------------------------------------------- /cgt/api_autogen.py: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by gen_py.py. Do not edit. 2 | from . import core 3 | 4 | def abs(x): 5 | "Applies function abs elementwise to argument x" 6 | return core.Result(core.ElwiseUnary("abs"), [x]) 7 | 8 | def ceil(x): 9 | "Applies function ceil elementwise to argument x" 10 | return core.Result(core.ElwiseUnary("ceil"), [x]) 11 | 12 | def conj(x): 13 | "Applies function conj elementwise to argument x" 14 | return core.Result(core.ElwiseUnary("conj"), [x]) 15 | 16 | def cos(x): 17 | "Applies function cos elementwise to argument x" 18 | return core.Result(core.ElwiseUnary("cos"), [x]) 19 | 20 | def exp(x): 21 | "Applies function exp elementwise to argument x" 22 | return core.Result(core.ElwiseUnary("exp"), [x]) 23 | 24 | def iceil(x): 25 | "Applies function iceil elementwise to argument x" 26 | return core.Result(core.ElwiseUnary("iceil"), [x]) 27 | 28 | def ifloor(x): 29 | "Applies function ifloor elementwise to argument x" 30 | return core.Result(core.ElwiseUnary("ifloor"), [x]) 31 | 32 | def log(x): 33 | "Applies function log elementwise to argument x" 34 | return core.Result(core.ElwiseUnary("log"), [x]) 35 | 36 | def negative(x): 37 | "Applies function negative elementwise to argument x" 38 | return core.Result(core.ElwiseUnary("neg"), [x]) 39 | 40 | def sigmoid(x): 41 | "Applies function sigmoid elementwise to argument x" 42 | return core.Result(core.ElwiseUnary("sigmoid"), [x]) 43 | 44 | def sign(x): 45 | "Applies function sign elementwise to argument x" 46 | return core.Result(core.ElwiseUnary("sign"), [x]) 47 | 48 | def sin(x): 49 | "Applies function sin elementwise to argument x" 50 | return core.Result(core.ElwiseUnary("sin"), [x]) 51 | 52 | def sqrt(x): 53 | "Applies function sqrt elementwise to argument x" 54 | return core.Result(core.ElwiseUnary("sqrt"), [x]) 55 | 56 | def square(x): 57 | "Applies function square elementwise to argument x" 58 | return core.Result(core.ElwiseUnary("square"), [x]) 59 | 60 | def tanh(x): 61 | "Applies function tanh elementwise to argument x" 62 | return core.Result(core.ElwiseUnary("tanh"), [x]) 63 | 64 | def add(x, y): 65 | "Applies function add elementwise to arguments x,y" 66 | return core.elwise_binary("+", x,y) 67 | 68 | def divide(x, y): 69 | "Applies function divide elementwise to arguments x,y" 70 | return core.elwise_binary("/", x,y) 71 | 72 | def equal(x, y): 73 | "Applies function equal elementwise to arguments x,y" 74 | return core.elwise_binary("==", x,y) 75 | 76 | def greater(x, y): 77 | "Applies function greater elementwise to arguments x,y" 78 | return core.elwise_binary(">", x,y) 79 | 80 | def greater_equal(x, y): 81 | "Applies function greater_equal elementwise to arguments x,y" 82 | return core.elwise_binary(">=", x,y) 83 | 84 | def less(x, y): 85 | "Applies function less elementwise to arguments x,y" 86 | return core.elwise_binary("<", x,y) 87 | 88 | def less_equal(x, y): 89 | "Applies function less_equal elementwise to arguments x,y" 90 | return core.elwise_binary("<=", x,y) 91 | 92 | def multiply(x, y): 93 | "Applies function multiply elementwise to arguments x,y" 94 | return core.elwise_binary("*", x,y) 95 | 96 | def not_equal(x, y): 97 | "Applies function not_equal elementwise to arguments x,y" 98 | return core.elwise_binary("!=", x,y) 99 | 100 | def power(x, y): 101 | "Applies function power elementwise to arguments x,y" 102 | return core.elwise_binary("**", x,y) 103 | 104 | def subtract(x, y): 105 | "Applies function subtract elementwise to arguments x,y" 106 | return core.elwise_binary("-", x,y) 107 | -------------------------------------------------------------------------------- /cgt/display.py: -------------------------------------------------------------------------------- 1 | from . import core, utils 2 | import sys 3 | 4 | # ================================================================ 5 | # Printing 6 | # ================================================================ 7 | 8 | def print_tree(outputs, o=sys.stdout, nodefn=None): 9 | """ 10 | Print out a representation of the computation graph as a tree 11 | nodefn is called after printing the result for every node, as 12 | nodefn(node, o) 13 | So you can print more attributes of the node 14 | """ 15 | if isinstance(outputs, core.Node): 16 | outputs = [outputs] 17 | node2name = {} 18 | expands = [] 19 | for node in outputs: 20 | _print_tree(node, 0, node2name, expands, o, nodefn) 21 | assert expands == [] 22 | return node2name 23 | 24 | def _print_tree(node, depth, node2name, expands, o, nodefn): 25 | o.write("| "*depth) 26 | if node in node2name: 27 | varname = node2name[node] 28 | new = False 29 | else: 30 | varname = _node_name(node) + "@%i"%len(node2name) 31 | node2name[node] = varname 32 | new = True 33 | 34 | color = utils.Color.GREEN if node.is_input() else utils.Color.RED 35 | utils.colorprint(color, varname, o) 36 | 37 | if new: 38 | if nodefn is not None: nodefn(node, o) 39 | o.write("\n") 40 | for p in node.parents: 41 | _print_tree(p, depth+1, node2name, expands, o, nodefn) 42 | else: 43 | if not node.is_input(): o.write(" (see above)") 44 | o.write("\n") 45 | 46 | def print_expr(x, o=sys.stdout): 47 | """ 48 | Returns a string that represents a computation graph 49 | """ 50 | node2s = {} 51 | o.write(_get_expr(x, node2s)) 52 | o.write("\n") 53 | 54 | def _get_expr(node, node2s): 55 | if node in node2s: 56 | return node2s[node] 57 | else: 58 | if node.is_input(): 59 | name = node2s[node] = _node_name(node) or "@%i"%len(node2s) 60 | return name 61 | else: 62 | parent_exprs = [_get_expr(parent, node2s) 63 | for parent in node.parents] 64 | return node.op.get_expr(parent_exprs) 65 | 66 | def print_text(outputs, o=sys.stdout): 67 | """ 68 | Print computation graph in single-statement assignment form, 69 | inspired by LLVM IR. (needs work) 70 | """ 71 | if isinstance(outputs, core.Node): 72 | outputs = [outputs] 73 | node2name = {} 74 | for node in core.topsorted(outputs): 75 | thisname = node2name[node] = _node_name(node) + "@%i"%len(node2name) 76 | if node.is_argument(): 77 | o.write("%s <- argument\n"%thisname) 78 | else: 79 | o.write("%s <- %s %s\n"%(thisname, str(node.op), " ".join(node2name[parent] 80 | for parent in node.parents))) 81 | 82 | def as_dot(nodes): 83 | """ 84 | Returns graphviz Digraph object that contains the nodes of the computation graph 85 | with names assigned. 86 | """ 87 | if isinstance(nodes, core.Node): 88 | nodes = [nodes] 89 | from graphviz import Digraph 90 | g = Digraph() 91 | for n in core.topsorted(nodes): 92 | g.node(str(id(n)), _node_name(n)) 93 | for (i,p) in enumerate(n.parents): 94 | g.edge(str(id(n)), str(id(p)),taillabel=str(i)) 95 | return g 96 | 97 | def _node_name(node): 98 | if node.is_input(): 99 | return node.name 100 | else: 101 | return str(node.op) 102 | -------------------------------------------------------------------------------- /cgt/distributions.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from . import core 3 | 4 | class Distribution(object): 5 | def lik(self, x, p): 6 | raise NotImplementedError 7 | def loglik(self, x, p): 8 | raise NotImplementedError 9 | def crossent(self, p, q): 10 | raise NotImplementedError 11 | def kl(self, p, q): 12 | raise NotImplementedError 13 | def sample(self, p): 14 | raise NotImplementedError 15 | 16 | class _Bernoulli(Distribution): 17 | def sample(self, p, shape=None): 18 | p = core.as_node(p) 19 | shape = shape or cgt.shape(p) 20 | return cgt.rand(*shape) <= p 21 | 22 | bernoulli = _Bernoulli() 23 | 24 | class _Categorical(Distribution): 25 | def crossent(self, p, q): 26 | assert p.ndim==2 and q.ndim==2 27 | return -(p*cgt.log(q)).sum(axis=1) 28 | def loglik(self, labels, p): 29 | return cgt.log(p[cgt.arange(cgt.size(labels,0)),labels]) 30 | categorical = _Categorical() 31 | 32 | class _DiagonalGaussian(Distribution): 33 | pass 34 | 35 | class Product(Distribution): 36 | r""" 37 | Factored distribution obtained by taking the product of several component distributions 38 | E.g. suppose we have p0(x), p1(y), p2(z), 39 | then p3 := ProductDistribution(p1,p2,p3) is a distribution satisfying 40 | p3(x,y,z) = p0(x)p1(y)p2(z) 41 | """ 42 | pass 43 | -------------------------------------------------------------------------------- /cgt/img_ops.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from cgt.core import Op, Result, TensorType, size, shape, ceil_divide 3 | import ctypes 4 | 5 | # Maybe we shouldn't have special CuDNN ops, we should just have the same 6 | # convolution interface with various implementations 7 | # see nice blog post http://benanne.github.io/2014/12/09/theano-metaopt.html 8 | 9 | def cudnn_conv_closure(*ints): 10 | return (ctypes.c_int*len(ints))(*ints) 11 | 12 | class CudnnConvForward(Op): 13 | def __init__(self, ph, pw, sv, sh): 14 | "pad_height, pad_width, stride_vertical, stride_horizontal" 15 | self.ph = ph 16 | self.pw = pw 17 | self.sv = sv 18 | self.sh = sh 19 | 20 | def cuda_code(self, _inputs, funcname): 21 | return """ 22 | void %(funcname)s(void* cldata, cgt_array** io) { 23 | CudaPerformConvForward(io[0], io[1], io[2], io[3], (conv_closure*)cldata, stream, handle) 24 | } 25 | """%dict(funcname=funcname) 26 | def cuda_includes(self): 27 | return ["cudnn_conv.cuh"] 28 | def impl_data(self): 29 | return (self.__class__.__name__,), cudnn_conv_closure(self.ph, self.pw, self.sv, self.sh) 30 | def shp_apply(self, inputs): 31 | X,W,_b = inputs 32 | h = ceil_divide(size(X,2) - size(W, 2) + 1, self.sv) 33 | w = ceil_divide(size(X,3) - size(W, 3) + 1, self.sh) 34 | return [size(X,0), size(W,0), h, w] 35 | def typ_apply(self, _inputs): 36 | return TensorType(cgt.floatX, 4) 37 | def pullback(self, inputs, output, gout): 38 | X,W,b = inputs 39 | # pass in an extra first argument to make output shape computation simpler 40 | return [Result(CudnnConvBackwardData(self.ph, self.pw, self.sv, self.sh), [X, gout, W]), 41 | Result(CudnnConvBackwardFilter(self.ph, self.pw, self.sv, self.sh), [W, gout, X]), 42 | Result(CudnnConvBackwardBias(self.ph, self.pw, self.sv, self.sh), [b, gout])] 43 | 44 | class CudnnConvBackwardData(Op): 45 | def __init__(self, ph, pw, sv, sh): 46 | self.ph = ph 47 | self.pw = pw 48 | self.sv = sv 49 | self.sh = sh 50 | def cuda_code(self, _inputs, funcname): 51 | return """ 52 | void %(funcname)s(void* cldata, cgt_array** io) { 53 | CudaPerformConvBackwardData(io[1], io[2], io[3], (conv_closure*)cldata, stream, handle); 54 | } 55 | """%dict(funcname=funcname) 56 | def cuda_includes(self): 57 | return ["cudnn_conv.hpp"] 58 | def shp_apply(self, inputs): 59 | return shape(inputs[0]) 60 | def typ_apply(self, _inputs): 61 | return TensorType(cgt.floatX, 4) 62 | 63 | class CudnnConvBackwardFilter(Op): 64 | def __init__(self, ph, pw, sv, sh): 65 | self.ph = ph 66 | self.pw = pw 67 | self.sv = sv 68 | self.sh = sh 69 | def cuda_code(self, _inputs, funcname): 70 | return """ 71 | void %(funcname)s(void* cldata, cgt_array** io) { 72 | CudaPerformConvBackwardFilter(io[1], io[2], io[3], (conv_closure*)cldata, stream, handle); 73 | } 74 | """%dict(funcname=funcname) 75 | def cuda_includes(self): 76 | return ["cudnn_conv.hpp"] 77 | def impl_data(self): 78 | return (self.__class__.__name__,), cudnn_conv_closure(self.ph, self.pw, self.sv, self.sh) 79 | def shp_apply(self, inputs): 80 | return shape(inputs[0]) 81 | def typ_apply(self, _inputs): 82 | return TensorType(cgt.floatX, 4) 83 | 84 | class CudnnConvBackwardBias(Op): 85 | def __init__(self, ph, pw, sv, sh): 86 | self.ph = ph 87 | self.pw = pw 88 | self.sv = sv 89 | self.sh = sh 90 | def cuda_code(self, _inputs, funcname): 91 | return """ 92 | void %(funcname)s(void* cldata, cgt_array** io) { 93 | CudaPerformConvBackwardBias(io[1], io[2], io[3], (conv_closure*)cldata, stream, handle); 94 | } 95 | """%dict(funcname=funcname) 96 | def cuda_includes(self): 97 | return ["cudnn_conv.hpp"] 98 | def shp_apply(self, inputs): 99 | return shape(inputs[0]) 100 | def typ_apply(self, _inputs): 101 | return TensorType(cgt.floatX, 4) 102 | 103 | # def pool(x_ncuv, rows_in, cols_in, poolshp, pool_type='max'): 104 | # if rows_in % poolshp[0] != 0 or cols_in % poolshp[0] != 0: 105 | # row_residue = rows_in%poolshp[0] 106 | # col_residue = cols_in%poolshp[1] 107 | # warn("image shape not divisible by pool size. cropping %i/%i on top, %i/%i on left"%(row_residue,rows_in,col_residue,cols_in)) 108 | # x_ncuv = x_ncuv[:,:,:rows_in - row_residue, :cols_in - col_residue] 109 | # x_ncpaqb = x_ncuv.reshape( (x_ncuv.shape[0], x_ncuv.shape[1], rows_in // poolshp[0], poolshp[0], cols_in // poolshp[1], poolshp[1]) ) 110 | # x_ncpqab = x_ncpaqb.transpose([0,1,2,4,3,5]) 111 | # x_ncpq_ab = cgt.reshape(x_ncpqab, shape(x_ncpqab)[:4] + [size(x_ncpqab,4)*size(x_ncpqab,5)]) 112 | # if pool_type == 'max': 113 | # x_ncpq = x_ncpq_ab.max(axis=4) 114 | # elif pool_type == 'mean': 115 | # x_ncpq = x_ncpq_ab.mean(axis=4) 116 | # elif pool_type == '2norm': 117 | # x_ncpq = cgt.sqrt(cgt.square(x_ncpq_ab).sum(axis=4)) #pylint: disable=E1111 118 | # elif pool_type == 'softmax': 119 | # x_ncpq = cgt.log(cgt.exp(x_ncpq_ab).sum(axis=4)) #pylint: disable=E1111 120 | # assert x_ncpq.ndim==4 121 | # return x_ncpq 122 | 123 | 124 | class Pool(Op): 125 | def __init__(self, kind, stride, kernel, pad): 126 | self.kind = kind 127 | self.stride = stride 128 | self.kernel = kernel 129 | self.pad = pad 130 | def get_diff(self, _): 131 | return [True] 132 | def get_name(self): 133 | return "%spool"%self.kind 134 | def get_numeric_py(self): 135 | raise cgt.core.Todo 136 | def pullback(self, inputs, output, goutput): 137 | raise cgt.core.Todo 138 | def shp_apply(self, inputs): 139 | x = inputs[0] 140 | assert x.ndim == 4 141 | return [size(x,0), size(x,1), (size(x,2)-self.pad[0]-self.kernel[0]+1)//self.stride[0], 142 | (size(x,3)-self.pad[1]-self.kernel[1]+1)//self.stride[0]] 143 | # XXX round up or down? 144 | def typ_apply(self, inputs): 145 | return inputs[0].typ 146 | 147 | def pool(kind, x, stride, kernel, pad): 148 | return Result(Pool(kind,stride,kernel,pad), [x]) 149 | 150 | def lrn(x, alpha, beta, local_size): 151 | s = Result(CudaLRNScaling(alpha, local_size), [x]) 152 | return s/cgt.power(s, -beta) 153 | 154 | # XXX needs params 155 | class CudaLRNScaling(Op): 156 | def __init__(self, alpha, local_size): 157 | self.alpha = alpha 158 | self.local_size = local_size 159 | def cuda_code(self, _inputs, funcname): 160 | return """ 161 | void %(funcname)s(void* cldata, cgt_array** io) { 162 | int block, thread, size; 163 | size = num_img * height * width; 164 | FindConfiguration(size, block, thread); 165 | cgt_array* bottom=io[0], *scale=io[1]; 166 | ing num_img = bottom->shape[0], channel = bottom->shape[1], 167 | height = bottom->shape[2], width=bottom->shape[2]; 168 | LRNFillScale<<>>( 169 | size, bottom->data, num_img, channel, height, width, cl->local_size, 170 | cl->alpha / cl->local_size, scale->data); 171 | }"""%dict(funcname=funcname) 172 | def cuda_headers(self): 173 | return ["cgt_cuda.h","lrn.cuh"] 174 | def shp_apply(self, inputs): 175 | return shape(inputs[0]) 176 | def typ_apply(self, _inputs): 177 | return TensorType(cgt.floatX, 4) 178 | -------------------------------------------------------------------------------- /cgt/nn_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joschu/cgt/90b15ab041fc2137e62b96e8612ccee605f71ceb/cgt/nn_ops/__init__.py -------------------------------------------------------------------------------- /cgt/nn_ops/cross_channel_lrn.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import cgt 3 | from cgt import core 4 | from collections import namedtuple 5 | 6 | LRNInfo = namedtuple("LRNInfo",["localsize","alpha","beta"]) 7 | 8 | def make_closure(info): 9 | return [ 10 | ("localsize",ctypes.c_int,info.localsize), 11 | ("alpha",ctypes.c_double,info.alpha), 12 | ("beta",ctypes.c_double,info.beta) 13 | ] 14 | 15 | class CrossChannelLRNForward(core.Op): 16 | available_impls = ("native_gpu",) 17 | def __init__(self, info): 18 | assert isinstance(info, LRNInfo) 19 | self.info = info 20 | 21 | def get_native_compile_info(self, input_types, devtype): 22 | assert devtype == "gpu" 23 | d = dict(cdtype=core.np2c[input_types[0].dtype]) 24 | cuda_code = r""" 25 | #include "cgt_cuda.h" 26 | #include "lrn.cuh" 27 | void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, %(cdtype)s* topdata, %(cdtype)s* scaledata) { 28 | int size = num_img * height * width; 29 | int nblocks, nthreads; 30 | cgt_get_bt(size, nblocks, nthreads); 31 | LRNFillScale<%(cdtype)s><<>>( 32 | size, Xdata, num_img, channels, height, width, localsize, alpha / localsize, scaledata); 33 | CUDA_CHECK_ERROR("LRNFillScale"); 34 | 35 | size = num_img * channels * width * height; 36 | cgt_get_bt(size, nblocks, nthreads); 37 | LRNComputeOutput<%(cdtype)s><<>>(size, Xdata, scaledata, -beta, topdata); 38 | CUDA_CHECK_ERROR("LRNComputeOutput"); 39 | }"""%d 40 | code = r""" 41 | extern void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, %(cdtype)s* topdata, %(cdtype)s* scaledata); 42 | CGT_EXPORT_C void $function($closure* cldata, cgtArray** reads, cgtTuple* write) { 43 | cgtArray* X = reads[0]; 44 | int num_img = X->shape()[0], 45 | channels = X->shape()[1], 46 | height = X->shape()[2], 47 | width = X->shape()[3]; 48 | cgtArray* top = (cgtArray*)write->getitem(0); 49 | cgtArray* scale = (cgtArray*)write->getitem(1); 50 | launchker_$function(num_img, channels, height, width, cldata->localsize, cldata->alpha, cldata->beta, (%(cdtype)s*)X->data(), (%(cdtype)s*)top->data(), (%(cdtype)s*)scale->data()); 51 | 52 | }"""%d 53 | return core.NativeCompileInfo(code, closure_triples = make_closure(self.info), 54 | link_flags="-lcudart", gpu_deref_mask=(True,), 55 | extra_srcs=[core.SrcFile("cuda",cuda_code)]) 56 | def shp_apply(self, inputs): 57 | return (inputs[0].shape,inputs[0].shape) 58 | def typ_apply(self, input_types): 59 | return core.TupleType(input_types[0], input_types[0]) 60 | def pullback(self, inputs, output, gout): 61 | top, scaling = cgt.core.unpack(output) 62 | gtop, _ = gout 63 | return [core.Result(CrossChannelLRNBackward(self.info), [inputs[0], top, scaling, gtop])] 64 | 65 | class CrossChannelLRNBackward(core.Op): 66 | available_impls = ("native_gpu",) 67 | def __init__(self, info): 68 | self.info = info 69 | def get_native_compile_info(self, input_types, devtype): 70 | assert devtype == "gpu" 71 | d = dict(cdtype=core.np2c[input_types[0].dtype]) 72 | cuda_code=r""" 73 | #include "cgt_cuda.h" 74 | #include "lrn.cuh" 75 | void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, 76 | %(cdtype)s* topdata, %(cdtype)s* scalingdata, %(cdtype)s* topdiffdata, %(cdtype)s* bottomdiffdata) { 77 | int nblocks, nthreads; 78 | int size = num_img * width * height; 79 | cgt_get_bt(size, nblocks, nthreads); 80 | LRNComputeDiff<%(cdtype)s><<>>(size, (%(cdtype)s*)Xdata, (%(cdtype)s*)topdata, 81 | (%(cdtype)s*)scalingdata, (%(cdtype)s*)topdiffdata, num_img, channels, height, width, localsize, 82 | -beta, 2. * alpha * beta / localsize, (%(cdtype)s*)bottomdiffdata); 83 | CUDA_CHECK_ERROR("CrossChannelLRNBackward"); 84 | } 85 | """%d 86 | code = """ 87 | void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, 88 | %(cdtype)s* topdata, %(cdtype)s* scaledata, %(cdtype)s* topdiffdata, %(cdtype)s* bottomdiffdata); 89 | CGT_EXPORT_C void $function($closure* cldata, cgtArray** reads, cgtArray* bottom_diff) { 90 | cgtArray *X=reads[0], *top=reads[1], *scaling=reads[2], *top_diff=reads[3]; 91 | int num_img = X->shape()[0], 92 | channels = X->shape()[1], 93 | height = X->shape()[2], 94 | width = X->shape()[3]; 95 | launchker_$function(num_img, channels, height, width, cldata->localsize, cldata->alpha, cldata->beta, (%(cdtype)s*)X->data(), 96 | (%(cdtype)s*)top->data(), (%(cdtype)s*)scaling->data(), (%(cdtype)s*)top_diff->data(), (%(cdtype)s*)bottom_diff->data()); 97 | }"""%d 98 | return core.NativeCompileInfo(code, closure_triples = make_closure(self.info), 99 | link_flags="-lcudart", gpu_deref_mask=(True,True,True,True), 100 | extra_srcs=[core.SrcFile("cuda",cuda_code)]) 101 | def shp_apply(self, inputs): 102 | return cgt.shape(inputs[0]) 103 | def typ_apply(self, _inputs): 104 | return core.TensorType(cgt.floatX, 4) 105 | 106 | def cross_channel_lrn(X, localsize, alpha, beta): 107 | assert X.ndim == 4 108 | return core.Result(CrossChannelLRNForward(LRNInfo(localsize,alpha,beta)), [X])[0] 109 | 110 | 111 | # print q[:-1].sum(), s[:-1].sum() 112 | 113 | 114 | -------------------------------------------------------------------------------- /cgt/nn_ops/cudnn_ops.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import cgt 3 | from cgt import core 4 | from collections import namedtuple 5 | 6 | def cudnn_conv_closure(*ints): 7 | return (ctypes.c_int*len(ints))(*ints) 8 | 9 | def make_closure(ph, pw, sv, sh): 10 | return [ 11 | ("ph",ctypes.c_int,ph), 12 | ("pw",ctypes.c_int,pw), 13 | ("sv",ctypes.c_int,sv), 14 | ("sh",ctypes.c_int,sh), 15 | ("handle",ctypes.c_void_p,0), 16 | ("stream",ctypes.c_void_p,0), 17 | ] 18 | 19 | class CudnnConvForward(core.Op): 20 | available_impls = ("native_gpu",) 21 | def __init__(self, ph, pw, sv, sh): 22 | "pad_height, pad_width, stride_vertical, stride_horizontal" 23 | self.ph = ph 24 | self.pw = pw 25 | self.sv = sv 26 | self.sh = sh 27 | 28 | def get_native_compile_info(self, _input_types, devtype): 29 | assert devtype=="gpu" 30 | code = """ 31 | CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);} 32 | CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);} 33 | CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) { 34 | if (!closure->handle) setup_cudnn(closure); 35 | performConvForward(closure, reads[0], reads[1], reads[2], write); 36 | }""" 37 | return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh), 38 | includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart") 39 | def shp_apply(self, inputs): 40 | X,W,_b = inputs 41 | h = cgt.ceil_divide(cgt.size(X,2) + self.ph*2 - cgt.size(W, 2) + 1, self.sv) 42 | w = cgt.ceil_divide(cgt.size(X,3) + self.pw*2 - cgt.size(W, 3) + 1, self.sh) 43 | return [cgt.size(X,0), cgt.size(W,0), h, w] 44 | def typ_apply(self, _inputs): 45 | return core.TensorType(cgt.floatX, 4) 46 | def pullback(self, inputs, _output, gout): 47 | X,W,b = inputs 48 | # pass in an extra first argument to make output shape computation simpler 49 | return [core.Result(CudnnConvBackwardData(self.ph, self.pw, self.sv, self.sh), [X, gout, W]), 50 | core.Result(CudnnConvBackwardFilter(self.ph, self.pw, self.sv, self.sh), [W, gout, X]), 51 | core.Result(CudnnConvBackwardBias(self.ph, self.pw, self.sv, self.sh), [b, gout])] 52 | 53 | class CudnnConvBackwardData(core.Op): 54 | available_impls = ("native_gpu",) 55 | def __init__(self, ph, pw, sv, sh): 56 | self.ph = ph 57 | self.pw = pw 58 | self.sv = sv 59 | self.sh = sh 60 | def get_native_compile_info(self, input_types, devtype): 61 | assert devtype=="gpu" 62 | code=""" 63 | CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);} 64 | CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);} 65 | CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) { 66 | if (!closure->handle) setup_cudnn(closure); 67 | performConvBackwardData(closure, reads[1], reads[2], write); 68 | }""" 69 | return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh), 70 | includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart") 71 | def shp_apply(self, inputs): 72 | return cgt.shape(inputs[0]) 73 | def typ_apply(self, _inputs): 74 | return core.TensorType(cgt.floatX, 4) 75 | 76 | class CudnnConvBackwardFilter(core.Op): 77 | available_impls = ("native_gpu",) 78 | def __init__(self, ph, pw, sv, sh): 79 | self.ph = ph 80 | self.pw = pw 81 | self.sv = sv 82 | self.sh = sh 83 | def get_native_compile_info(self, input_types, devtype): 84 | assert devtype=="gpu" 85 | code = """ 86 | CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);} 87 | CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);} 88 | CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) { 89 | if (!closure->handle) setup_cudnn(closure); 90 | performConvBackwardFilter(closure, reads[1], reads[2], write); 91 | }""" 92 | return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh), 93 | includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart") 94 | def shp_apply(self, inputs): 95 | return cgt.shape(inputs[0]) 96 | def typ_apply(self, _inputs): 97 | return core.TensorType(cgt.floatX, 4) 98 | 99 | class CudnnConvBackwardBias(core.Op): 100 | available_impls = ("native_gpu",) 101 | def __init__(self, ph, pw, sv, sh): 102 | self.ph = ph 103 | self.pw = pw 104 | self.sv = sv 105 | self.sh = sh 106 | def get_native_compile_info(self, input_types, devtype): 107 | assert devtype == "gpu" 108 | code = """ 109 | CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);} 110 | CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);} 111 | CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) { 112 | if (!closure->handle) setup_cudnn(closure); 113 | performConvBackwardBias(closure, reads[1], write); 114 | }""" 115 | return core.NativeCompileInfo(code, closure_triples = make_closure(self.ph, self.pw, self.sv, self.sh), 116 | includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart") 117 | def shp_apply(self, inputs): 118 | return cgt.shape(inputs[0]) 119 | def typ_apply(self, _inputs): 120 | return core.TensorType(cgt.floatX, 4) 121 | 122 | PoolInfo = namedtuple("PoolInfo", ["kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w"]) 123 | 124 | def poolinfo2closure(info): 125 | return [ 126 | ("kernel_h", ctypes.c_int, info.kernel_h), 127 | ("kernel_w", ctypes.c_int, info.kernel_w), 128 | ("pad_h", ctypes.c_int, info.pad_h), 129 | ("pad_w", ctypes.c_int, info.pad_w), 130 | ("stride_h", ctypes.c_int, info.stride_h), 131 | ("stride_w", ctypes.c_int, info.stride_w), 132 | ("handle",ctypes.c_void_p,0), 133 | ("stream",ctypes.c_void_p,0), 134 | ] 135 | 136 | class CudnnPoolForward(core.Op): 137 | available_impls = ("native_gpu",) 138 | def __init__(self, info): 139 | self.info = info 140 | 141 | def get_native_compile_info(self, _input_types, devtype): 142 | assert devtype == "gpu" 143 | code = """ 144 | CGT_EXPORT_C void $setup(pooling_closure* closure) {setup_cudnn(closure);} 145 | CGT_EXPORT_C void $teardown(pooling_closure* closure) {teardown_cudnn(closure);} 146 | CGT_EXPORT_C void $function(pooling_closure* closure, cgtArray** reads, cgtArray* write) { 147 | if (!closure->handle) setup_cudnn(closure); 148 | performPoolingForward(closure, reads[0], write); 149 | }""" 150 | return core.NativeCompileInfo(code, closure_triples = poolinfo2closure(self.info), 151 | includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart") 152 | def shp_apply(self, inputs): 153 | info = self.info 154 | batch_size, channels, height, width = cgt.shape(inputs[0]) 155 | pooled_height = cgt.ceil_divide(height + 2*info.pad_h - info.kernel_h, info.stride_h) 156 | pooled_width = cgt.ceil_divide(width + 2*info.pad_w - info.kernel_w, info.stride_w) 157 | outshape = [batch_size , channels, pooled_height, pooled_width] 158 | return outshape 159 | def typ_apply(self, input_types): 160 | return input_types[0] 161 | def pullback(self, inputs, output, gout): 162 | return [core.Result(CudnnPoolBackward(self.info), [inputs[0], output, gout])] 163 | 164 | 165 | class CudnnPoolBackward(core.Op): 166 | available_impls = ("native_gpu",) 167 | def __init__(self, info): 168 | self.info = info 169 | 170 | def get_native_compile_info(self, _input_types, devtype): 171 | assert devtype == "gpu" 172 | code = """ 173 | CGT_EXPORT_C void $setup(pooling_closure* closure) {setup_cudnn(closure);} 174 | CGT_EXPORT_C void $teardown(pooling_closure* closure) {teardown_cudnn(closure);} 175 | CGT_EXPORT_C void $function(pooling_closure* closure, cgtArray** reads, cgtArray* write) { 176 | if (!closure->handle) setup_cudnn(closure); 177 | performPoolingBackward(closure, reads[0], reads[1], reads[2], write); 178 | }""" 179 | return core.NativeCompileInfo(code, closure_triples = poolinfo2closure(self.info), 180 | includes=["cudnn_support.h"], link_flags="-lcudnn -lcudart") 181 | def shp_apply(self, inputs): 182 | return cgt.shape(inputs[0]) 183 | def typ_apply(self, input_types): 184 | return input_types[0] 185 | def pullback(self, inputs, output, gout): 186 | raise NotImplementedError 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /cgt/nn_ops/im2col.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from cgt import core 3 | import ctypes 4 | from collections import namedtuple 5 | 6 | Im2ColInfo = namedtuple("Im2ColInfo", ["kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w"]) 7 | 8 | def im2col(x, kernelshape, pad, stride): 9 | assert stride[0] > 0 and stride[1] > 0 10 | assert kernelshape[0] > 0 and kernelshape[1] > 0 11 | kernelshape, pad, stride = map(tuple, (kernelshape, pad, stride)) 12 | return core.Result(Im2Col(Im2ColInfo(*(kernelshape+pad+stride))), [x]) 13 | 14 | def info2closure(info): 15 | return [ 16 | ("kernel_h", ctypes.c_int, info.kernel_h), 17 | ("kernel_w", ctypes.c_int, info.kernel_w), 18 | ("pad_h", ctypes.c_int, info.pad_h), 19 | ("pad_w", ctypes.c_int, info.pad_w), 20 | ("stride_h", ctypes.c_int, info.stride_h), 21 | ("stride_w", ctypes.c_int, info.stride_w), 22 | ] 23 | 24 | 25 | class Im2Col(core.Op): 26 | available_impls = ("native_cpu",) 27 | def __init__(self, info): 28 | assert info.stride_h>0 and info.stride_w>0 29 | self.info = info 30 | def get_diff(self, _): 31 | return [True] 32 | def get_py_impl(self): 33 | raise core.MethodNotDefined 34 | def pullback(self, (x,), _y, gy): 35 | return [core.Result(Col2Im(self.info), [gy] + cgt.shape(x))] 36 | def shp_apply(self, inputs): 37 | info = self.info 38 | batch_size, channels, height, width = cgt.shape(inputs[0]) 39 | height_out = (height + 2 * info.pad_h - info.kernel_h) // info.stride_h + 1 40 | width_out = (width + 2 * info.pad_w - info.kernel_w) // info.stride_w + 1 41 | return [batch_size , height_out, width_out, channels * info.kernel_w * info.kernel_h] 42 | def typ_apply(self, inputs): 43 | assert inputs[0].ndim == 4 44 | return core.TensorType(inputs[0].dtype, 4) 45 | def get_native_compile_info(self, input_types, devtype): 46 | assert devtype == "cpu" 47 | d = dict(cdtype=core.np2c[input_types[0].dtype]) 48 | d.update(self.info._asdict()) 49 | code = r""" 50 | CGT_EXPORT_C void $function($closure* cl, cgtArray** reads, cgtArray* write) { 51 | cgtArray* im = reads[0]; 52 | const long* imshape = im->shape(); 53 | int batchsize = imshape[0], 54 | channels = imshape[1], 55 | height = imshape[2], 56 | width = imshape[3]; 57 | for (int i=0; i < batchsize; ++i) { 58 | im2col_cpu<%(cdtype)s, %(kernel_h)s,%(kernel_w)s,%(pad_h)s,%(pad_w)s,%(stride_h)s,%(stride_w)s> 59 | ((%(cdtype)s*)im->data() + im->stride(0)*i, channels, height, width, (%(cdtype)s*)write->data() + write->stride(0)*i); 60 | } 61 | }"""%d 62 | return core.NativeCompileInfo(code, includes=["im2col.h"], closure_triples=info2closure(self.info)) 63 | 64 | class Col2Im(core.Op): 65 | available_impls = ("native_cpu",) 66 | def __init__(self, info): 67 | self.info = info 68 | def get_diff(self, _): 69 | return [True] 70 | def get_py_impl(self): 71 | raise core.MethodNotDefined 72 | def shp_apply(self, inputs): 73 | return inputs[1:] 74 | def typ_apply(self, inputs): 75 | return core.TensorType(inputs[0].dtype, 4) 76 | def get_closure(self, _inputs): 77 | return info2closure(self.info) 78 | def get_native_compile_info(self, input_types, devtype): 79 | d = dict(cdtype=core.np2c[input_types[0].dtype]) 80 | d.update(self.info._asdict()) 81 | code = r""" 82 | CGT_EXPORT_C void $function($closure* cl, cgtArray** reads, cgtArray* write) { 83 | cgtArray* col = reads[0]; 84 | long batchsize = reads[1]->at(0), 85 | channels = reads[2]->at(0), 86 | height = reads[3]->at(0), 87 | width = reads[4]->at(0); 88 | for (int i=0; i < batchsize; ++i) { 89 | col2im_cpu<%(cdtype)s, %(kernel_h)s,%(kernel_w)s,%(pad_h)s,%(pad_w)s,%(stride_h)s,%(stride_w)s> 90 | ((%(cdtype)s*)col->data() + col->stride(0)*i, channels, height, width,(%(cdtype)s*)write->data() + write->stride(0)*i); 91 | } 92 | }"""%d 93 | return core.NativeCompileInfo(code, includes=["im2col.h"], closure_triples=info2closure(self.info)) 94 | 95 | -------------------------------------------------------------------------------- /cgt/nn_ops/max_pool_2d.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from cgt import core 3 | import ctypes 4 | from collections import namedtuple 5 | import numpy as np 6 | 7 | 8 | # 9 | PoolInfo = namedtuple("PoolInfo", ["kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w"]) 10 | 11 | def info2closure(info): 12 | return [ 13 | ("kernel_h", ctypes.c_int, info.kernel_h), 14 | ("kernel_w", ctypes.c_int, info.kernel_w), 15 | ("pad_h", ctypes.c_int, info.pad_h), 16 | ("pad_w", ctypes.c_int, info.pad_w), 17 | ("stride_h", ctypes.c_int, info.stride_h), 18 | ("stride_w", ctypes.c_int, info.stride_w), 19 | ] 20 | # 21 | 22 | class MaxPool(core.Op): 23 | available_impls = ("native_cpu",) 24 | def __init__(self, info): 25 | assert info.stride_h>0 and info.stride_w>0 26 | self.info = info 27 | def get_diff(self, _): 28 | return [True] 29 | def get_py_impl(self): 30 | raise core.MethodNotDefined 31 | def pullback(self, (x,), y, gy): 32 | pool,mask = core.unpack(y) 33 | gpool,_gmask = gy 34 | return [core.Result(MaxPoolPullback(self.info), [x,pool,mask,gpool])] 35 | def shp_apply(self, inputs): 36 | # pooled_height_ = static_cast(ceil(static_cast(height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1; 37 | # pooled_width_ = static_cast(ceil(static_cast(width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1; 38 | info = self.info 39 | batch_size, channels, height, width = cgt.shape(inputs[0]) 40 | pooled_height = cgt.ceil_divide(height + 2*info.pad_h - info.kernel_h, info.stride_h) 41 | pooled_width = cgt.ceil_divide(width + 2*info.pad_w - info.kernel_w, info.stride_w) 42 | outshape = [batch_size , channels, pooled_height, pooled_width] 43 | return (outshape, outshape) 44 | def typ_apply(self, inputs): 45 | return core.TupleType(core.TensorType(inputs[0].dtype, 4), core.TensorType('i4', 4)) 46 | def get_closure(self, _inputs): 47 | return info2closure(self.info) 48 | def get_native_compile_info(self, input_types, devtype): 49 | code = r""" 50 | CGT_EXPORT_C void $function(conv_closure* cl, cgtArray** reads, cgtTuple* write) { 51 | max_pool<%(cdtype)s>(cl, reads[0], static_cast(write->getitem(0)), static_cast(write->getitem(1))); 52 | }"""%dict(cdtype=core.np2c[input_types[0].dtype]) 53 | return core.NativeCompileInfo(code, closure_triples=info2closure(self.info), includes=["pooling.h"]) 54 | 55 | class MaxPoolPullback(core.Op): 56 | available_impls = ("native_cpu",) 57 | def __init__(self, info): 58 | self.info = info 59 | def get_py_impl(self): 60 | raise core.MethodNotDefined 61 | def shp_apply(self, inputs): 62 | return cgt.shape(inputs[0]) 63 | def typ_apply(self, inputs): 64 | return core.TensorType(inputs[0].dtype, 4) 65 | def get_closure(self, _inputs): 66 | return info2closure(self.info) 67 | def get_native_compile_info(self, input_types, devtype): 68 | code = r""" 69 | CGT_EXPORT_C void $function(conv_closure* cl, cgtArray** reads, cgtArray* write) { 70 | max_pool_pullback<%(cdtype)s>(reads[0], reads[1], reads[2], reads[3], write); 71 | }"""%dict(cdtype=core.np2c[input_types[0].dtype]) 72 | return core.NativeCompileInfo(code, closure_triples=info2closure(self.info), includes=["pooling.h"]) 73 | 74 | -------------------------------------------------------------------------------- /cgt/numeric_diff.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def numeric_grad(f,x,eps=1e-9,method="central"): 4 | if method == "central": 5 | xpert = x.copy() 6 | out = np.zeros_like(x) 7 | for i in xrange(x.size): 8 | xpert.flat[i] = x.flat[i] + eps 9 | yplus = f(xpert) 10 | xpert.flat[i] = x.flat[i] - eps 11 | yminus = f(xpert) 12 | xpert.flat[i] = x.flat[i] 13 | out.flat[i] = (yplus - yminus) / (2*eps) 14 | if (i+1)%1000 == 0: print "%i/%i components done"%(i+1,x.size) 15 | return out 16 | else: 17 | raise NotImplementedError("invalid method %s"%method) 18 | 19 | def numeric_grad_multi(f, xs, eps=1e-9,method="central"): 20 | out = [] 21 | for i in xrange(len(xs)): 22 | li = list(xs) 23 | def f1(x): 24 | li[i] = x 25 | return f(*li) 26 | out.append(numeric_grad(f1, xs[i], eps, method=method)) 27 | return out 28 | 29 | -------------------------------------------------------------------------------- /cgt/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from cgt.numeric_diff import numeric_grad, numeric_grad_multi 3 | from functools import wraps 4 | import itertools 5 | import numpy as np 6 | 7 | 8 | def gradcheck_model(cost, params, extravars=(), extravals=(), atol=1e-8, eps=1e-9): 9 | precision = cgt.get_precision() 10 | if precision == "single": 11 | cgt.utils.warn("You're doing a gradient check with %s precision. Use double or better yet quad for best results"%(precision)) 12 | assert all(param.is_input() for param in params) 13 | assert len(extravars) == len(extravals) 14 | 15 | # Convert to Argument nodes 16 | param_args = [cgt.core.Argument(typ=s.typ,name=s.name)if s.is_data() else s for s in params] 17 | 18 | # Get new cost in terms o farguments 19 | cost = cgt.core.clone(cost, replace=dict(zip(params,param_args))) 20 | 21 | grads = cgt.grad(cost, param_args) 22 | paramvals = [param.op.get_value() for param in params] 23 | fcost = cgt.function(param_args, cost, givens=zip(extravars,extravals)) 24 | fgrad = cgt.function(param_args, grads,givens=zip(extravars,extravals)) 25 | 26 | angrads = fgrad(*paramvals) 27 | nugrads = numeric_grad_multi(fcost, paramvals, eps=eps) 28 | 29 | for (angrad,nugrad) in zip(angrads,nugrads): 30 | assert np.allclose(angrad,nugrad,atol=atol) 31 | 32 | 33 | def across_configs(*args, **kwargs): 34 | """ 35 | Decorator for Nose test. Generates tests for all combinations of configuration options. 36 | 37 | Examples: 38 | 39 | Generates tests for all assignments of backends and precision. 40 | @across_configs 41 | def test_adagrad(): ... 42 | 43 | Generates tests for python/single and python/double: 44 | @across_configs(backends=("python",), precisions=("single", "double")) 45 | def test_adagrad(): ... 46 | """ 47 | 48 | # If one function arg is passed, then apply this decorator with no parameters 49 | if len(args) == 1 and not kwargs and hasattr(args[0], "__call__"): 50 | return across_configs()(args[0]) 51 | 52 | assert not args 53 | backends = kwargs.get("backends", ("python", "native")) 54 | precisions = kwargs.get("precisions", ("single", "double")) 55 | devtypes = kwargs.get("devtypes",("cpu",)) 56 | pass_settings = kwargs.get("pass_settings", False) 57 | 58 | def decorator(check_func): 59 | @wraps(check_func) 60 | def check_func_with_config(backend, precision, devtype): 61 | with cgt.scoped_update_config(backend=backend, precision=precision, default_device=cgt.core.Device(devtype=devtype)): 62 | if pass_settings: 63 | check_func(backend=backend, precision=precision) 64 | else: 65 | check_func() 66 | 67 | @wraps(check_func_with_config) 68 | def wrapper(): 69 | for backend, precision, devtype in itertools.product(backends, precisions, devtypes): 70 | yield check_func_with_config, backend, precision, devtype 71 | return wrapper 72 | 73 | return decorator 74 | -------------------------------------------------------------------------------- /cgt/tests/_test_assert.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | import unittest 3 | 4 | def myfunc(x): 5 | print "x",x 6 | 7 | class AssertTestCase(unittest.TestCase): 8 | def runTest(self): 9 | if cgt.get_config()["backend"] != "python": 10 | cgt.utils.warn("Skipping test -- only works for backend=python") 11 | return 12 | x = cgt.scalar() 13 | with cgt.debug_context() as dbg: 14 | cgt.assert_(cgt.equal(x, 1),"yoyoyo") 15 | cgt.dbg_call(myfunc, x) 16 | print "dbg",dbg.nodes 17 | # cgt.assert_(cgt.equal(x, 2)) 18 | 19 | f = cgt.make_function([x],[x],dbg=dbg) 20 | f(1) 21 | with self.assertRaises(AssertionError): 22 | f(2) 23 | 24 | 25 | if __name__ == "__main__": 26 | AssertTestCase().runTest() 27 | -------------------------------------------------------------------------------- /cgt/tests/_test_cycgt.py: -------------------------------------------------------------------------------- 1 | import cycgt 2 | import cgt 3 | import numpy as np 4 | import unittest 5 | 6 | 7 | class CyCgtTestCase(unittest.TestCase): 8 | def test_cycgt(self): 9 | x = cgt.vector('x') 10 | y = cgt.vector('y') 11 | z = y/x 12 | cs = cycgt.CallSequence([x,y],[z], list(cgt.topsorted([z]))) 13 | 14 | xshp = (4,) 15 | yshp = (4,) 16 | zshp = (4,) 17 | 18 | xval = np.random.randn(*xshp).astype('float32') 19 | yval = np.random.randn(*yshp).astype('float32') 20 | zval = np.random.randn(*zshp).astype('float32') 21 | 22 | cs.set_shapes([xshp,yshp,zshp]) 23 | cs.set_inputs([xval,yval]) 24 | cs.execute() 25 | print xval, yval 26 | print xval * yval 27 | np.testing.assert_allclose(yval/xval , cs.get_outputs_numpy()[0]) 28 | 29 | 30 | if __name__ == "__main__": 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /cgt/tests/_test_eg.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | import unittest 3 | import numpy as np 4 | import pprint 5 | class EgTestCase(unittest.TestCase): 6 | def runTest(self): 7 | cgt.set_precision('double') 8 | x = cgt.vector() 9 | y = cgt.square(x) 10 | eg = cgt.execution.compilation_pipeline([x],[y+y],[]) 11 | pprint.pprint(eg.to_json()) 12 | import cycgt 13 | interp = cycgt.cInterpreter(eg) 14 | print interp(np.array([3,4,5,6],'f8')) 15 | 16 | if __name__ == "__main__": 17 | EgTestCase().runTest() -------------------------------------------------------------------------------- /cgt/tests/_test_flatvec.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cgt 3 | from cgt import nn, core 4 | 5 | def test_flatvec(): 6 | cgt.reset_config 7 | cgt.set_precision('double') 8 | cgt.core.update_config(backend="python") # XXX 9 | 10 | N = 10 11 | K = 3 12 | 13 | Xval = np.random.randn(N,K) 14 | wval = np.random.randn(K) 15 | bval = np.random.randn() 16 | yval = np.random.randn(N) 17 | 18 | X_nk = cgt.shared(Xval, "X") 19 | y_n = cgt.shared(yval, "y") 20 | w_k = cgt.shared(wval, "w") 21 | b = cgt.shared(bval, name="b") 22 | 23 | ypred = cgt.dot(X_nk, w_k) + b 24 | 25 | err = cgt.sum(cgt.square(ypred - y_n)) 26 | g = cgt.grad(err, [w_k, b]) 27 | g = core.simplify(g) 28 | 29 | pars = [w_k, b] 30 | flatx = nn.setup_contiguous_storage(pars) 31 | f = cgt.function([], [err,cgt.flatcat(g)]) 32 | -------------------------------------------------------------------------------- /cgt/tests/_test_shapecheck.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | # X = cgt.matrix(fixed_shape=(10,3)) 3 | y = cgt.vector(fixed_shape=(3,)) 4 | w = cgt.vector(fixed_shape=(5,)) 5 | # z = X.dot(y) 6 | y+w 7 | # cgt.print_tree(cgt.core.simplify(cgt.shape(z))) -------------------------------------------------------------------------------- /cgt/tests/_test_tuples.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np 2 | import unittest 3 | 4 | class TupleTestCase(unittest.TestCase): 5 | def runTest(self): 6 | f1 = cgt.function1([], ()) 7 | assert f1() == () 8 | 9 | x = cgt.vector() 10 | xval = np.random.randn(1) 11 | f2 = cgt.function([x], [(x,x),(x,),()]) 12 | ytrue = [(xval,xval),(xval,),()] 13 | y = f2(xval) 14 | assert y==ytrue 15 | if __name__ == "__main__": 16 | TupleTestCase().runTest() 17 | 18 | -------------------------------------------------------------------------------- /cgt/tests/test_array_wrapper.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np 2 | from cgt.tests import across_configs 3 | 4 | 5 | @across_configs 6 | def test_array_wrapper(): 7 | xval = np.zeros(10) 8 | x = cgt.shared(xval) 9 | f = cgt.function([],[],updates=[(x,x+1)]) 10 | f() 11 | g = cgt.function([],x.sum()) 12 | assert np.allclose(x.op.get_value(), xval+1) 13 | xval2 = np.arange(10) 14 | x.op.set_value(xval2) 15 | print x.op.get_value() 16 | assert np.allclose(x.op.get_value(), xval2) 17 | assert g() == xval2.sum() 18 | f() 19 | assert np.allclose(x.op.get_value(), xval2+1) 20 | assert g() == (xval2+1).sum() 21 | 22 | 23 | if __name__ == "__main__": 24 | import nose 25 | nose.runmodule() -------------------------------------------------------------------------------- /cgt/tests/test_conv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cgt 3 | from cgt import nn 4 | from cgt.tests import across_configs 5 | from nose.plugins.skip import SkipTest 6 | 7 | @across_configs() 8 | def test_conv(): 9 | try: 10 | import scipy.signal 11 | except ImportError: 12 | raise SkipTest("skipping because we don't have ndimage") 13 | 14 | np.random.seed(0) 15 | x = np.random.randn(2,2,5,17) 16 | filt = np.random.randn(3,2,4,7) 17 | 18 | filtrows = filt.shape[2] 19 | filtcols = filt.shape[3] 20 | 21 | batchsize = x.shape[0] 22 | outchans = filt.shape[0] 23 | 24 | out = np.zeros((batchsize,outchans,x.shape[2]+filtrows-1,x.shape[3]+filtcols-1)) 25 | for b in xrange(x.shape[0]): 26 | for inchan in xrange(x.shape[1]): 27 | for outchan in xrange(outchans): 28 | out[b,outchan] += scipy.signal.convolve2d(x[b,inchan],filt[outchan,inchan][::-1,::-1],mode='full') 29 | 30 | f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1))) 31 | out1 = f() 32 | # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {}) 33 | np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) 34 | 35 | if __name__ == "__main__": 36 | import nose 37 | nose.runmodule() -------------------------------------------------------------------------------- /cgt/tests/test_devices.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cgt 3 | from cgt.tests import across_configs 4 | from nose.plugins.skip import SkipTest 5 | 6 | @across_configs(backends=("native",)) 7 | def test_devices(): 8 | N = 10 9 | K = 3 10 | 11 | compile_info = cgt.compilation.get_compile_info() 12 | cuda_enabled = compile_info["CGT_ENABLE_CUDA"] 13 | if not cuda_enabled: 14 | raise SkipTest("cuda disabled") 15 | 16 | Xval = np.random.randn(N,K).astype(cgt.floatX) 17 | wval = np.random.randn(K).astype(cgt.floatX) 18 | bval = np.asarray(np.random.randn()).astype(cgt.floatX) 19 | yval = np.random.randn(N).astype(cgt.floatX) 20 | 21 | with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")): 22 | 23 | X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu')) 24 | y_n = cgt.shared(yval, "y") 25 | w_k = cgt.shared(wval, "w") 26 | b = cgt.shared(bval, name="b") 27 | 28 | print "bval",bval 29 | 30 | ypred = cgt.dot(cgt.square(X_nk), w_k) + b 31 | 32 | err = cgt.sum(cgt.sin(ypred - y_n)) 33 | g = cgt.grad(err, [w_k, b]) 34 | outputs = [err]+g 35 | f = cgt.function([], [err]+g) 36 | results = f() 37 | print results 38 | assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum()) 39 | 40 | 41 | if __name__ == "__main__": 42 | import nose 43 | nose.runmodule() 44 | -------------------------------------------------------------------------------- /cgt/tests/test_einsum.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np,numpy.random as nr 2 | from cgt.tests import across_configs 3 | 4 | 5 | @across_configs 6 | def test_einsum(): 7 | x = cgt.tensor3() 8 | y = cgt.tensor3() 9 | 10 | sizes = {'i':2,'j':3,'k':5,'l':7} 11 | xaxes = 'ijk' 12 | yaxes = 'ikl' 13 | zaxes = 'ijl' 14 | for i in xrange(10): 15 | xperm = xaxes 16 | (yperm,zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes,zaxes]] 17 | desc = "%s,%s->%s"%tuple("".join(chars) for chars in [xperm] + permaxes) 18 | z = cgt.einsum(desc, x, y) 19 | xval = nr.randn(*(sizes[c] for c in xperm)) 20 | yval = nr.randn(*(sizes[c] for c in yperm)) 21 | np.testing.assert_allclose( 22 | cgt.numeric_eval(z, {x : xval, y : yval}), 23 | np.einsum(desc, xval, yval), 24 | atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) 25 | 26 | 27 | if __name__ == "__main__": 28 | import nose 29 | nose.runmodule() 30 | -------------------------------------------------------------------------------- /cgt/tests/test_examples.py: -------------------------------------------------------------------------------- 1 | import subprocess, os.path as osp 2 | from nose.plugins.skip import SkipTest 3 | 4 | thisdir = osp.dirname(__file__) 5 | 6 | def run_example(cmd, filedepends=None): 7 | if filedepends and not osp.exists(filedepends): 8 | raise SkipTest(cmd) 9 | else: 10 | subprocess.check_call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 11 | def skip_example(cmd): 12 | raise SkipTest(cmd) 13 | 14 | def test_examples(): 15 | mnist = "%s/../../downloads/mnist.npz"%thisdir 16 | cifar = "%s/../../downloads/cifar10.npz"%thisdir 17 | yield run_example, "CGT_FLAGS=backend=python python %s/../../examples/demo_mnist.py --unittest"%thisdir, mnist 18 | yield run_example, "CGT_FLAGS=backend=native python %s/../../examples/demo_mnist.py --unittest"%thisdir,mnist 19 | yield run_example, "python %s/../../examples/cgt_theano_feedforward_comparison.py --unittest"%thisdir,mnist 20 | yield run_example, "CGT_FLAGS=backend=native python %s/../../examples/demo_cifar.py --unittest"%thisdir,cifar 21 | yield run_example, "cd %s/../../examples/ && CGT_FLAGS=backend=native python demo_char_rnn.py --unittest"%thisdir 22 | yield run_example, "CGT_FLAGS=backend=native python %s/../../examples/demo_neural_turing_machine.py --unittest"%thisdir 23 | runipycmd = "runipy %s/../../examples/tutorial.ipynb"%thisdir 24 | try: 25 | import graphviz 26 | yield run_example, runipycmd 27 | except ImportError: 28 | yield skip_example, runipycmd 29 | 30 | if __name__ == "__main__": 31 | import nose 32 | nose.runmodule() -------------------------------------------------------------------------------- /cgt/tests/test_imgproc.py: -------------------------------------------------------------------------------- 1 | import numpy as np, numpy.random as nr 2 | from cgt.numeric_diff import numeric_grad_multi 3 | import cgt 4 | from cgt.nn import max_pool_2d, im2col, cross_channel_lrn, cudnn_ops 5 | from cgt.compilation import get_compile_info 6 | from cgt import utils 7 | from cgt.tests import across_configs, gradcheck_model 8 | from nose.plugins.skip import SkipTest 9 | 10 | @across_configs(precisions=("double",), backends=("native",)) 11 | def test_cudnn(): 12 | compile_info = get_compile_info() 13 | if not (compile_info["CGT_ENABLE_CUDNN"] and compile_info["CGT_ENABLE_CUDA"]): 14 | raise SkipTest("CUDNN not enabled. Skipping this test") 15 | 16 | Xval = nr.randn(2,3,19,18) 17 | Wval = nr.randn(5,3,3,3) 18 | bval = nr.randn(1,5,1,1) 19 | 20 | X = cgt.tensor4("X", fixed_shape=Xval.shape) 21 | W = cgt.tensor4("W", fixed_shape=Wval.shape) 22 | b = cgt.tensor4("b", fixed_shape=bval.shape) 23 | 24 | 25 | Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b]) 26 | 27 | Y2 = nr.randn(*cgt.core.infer_shape(Y)) 28 | 29 | fY = cgt.function([X,W,b],Y) 30 | Yval = fY(Xval,Wval,bval) 31 | cost = (Y*Y2).sum() 32 | fcost = cgt.function([X,W,b],cost) 33 | fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b])) 34 | angrads = fgrad(Xval,Wval,bval) 35 | nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3) 36 | for (nugrad,angrad) in zip(nugrads,angrads): 37 | assert np.allclose(nugrad, angrad, rtol=9e-3, atol=1e-7) 38 | # precision issue: https://groups.google.com/forum/?utm_medium=email&utm_source=footer#!msg/cgt-users/l59nwLF9BzM/aDxcHU5pCgAJ 39 | 40 | @across_configs(precisions=("double",), backends=("native",)) 41 | def test_pool(**kwargs): 42 | np.random.seed(0) 43 | x = cgt.tensor4("x", fixed_shape=(2,3,5,7)) 44 | y = max_pool_2d(x, (4,4),(0,0),(1,1)) 45 | xval = np.random.randn(2,3,5,7) 46 | hval = np.random.randn(*cgt.infer_shape(y)) 47 | h = cgt.constant(hval) 48 | 49 | cost = (y*h).sum() 50 | 51 | fcost = cgt.function([x], cost) 52 | fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) 53 | 54 | from cgt.numeric_diff import numeric_grad 55 | gnum = numeric_grad(fcost, xval) 56 | gana = fgrad(xval) 57 | assert np.allclose(gnum,gana) 58 | 59 | @across_configs(precisions=("quad",), backends=("native",)) 60 | def test_im2col(): 61 | for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]: 62 | xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX) 63 | x = cgt.tensor4("x", fixed_shape=xval.shape) 64 | y = im2col(x, *settings) 65 | h = cgt.constant(np.random.randn(*cgt.infer_shape(y))) 66 | cost = (y*h).sum() 67 | 68 | fcost = cgt.function([x],cost) 69 | fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) 70 | 71 | from cgt.numeric_diff import numeric_grad 72 | gnum = numeric_grad(fcost, xval,eps=1e-5) 73 | gana = fgrad(xval) 74 | assert np.allclose(gnum, gana) 75 | # fy = cgt.function([x],y) 76 | # yval = fy(xval) 77 | # assert np.allclose(yval[0,0,0] , xval[0,:,0:4,0:4].flatten()) 78 | 79 | @across_configs(precisions=("double",), backends=("native",)) 80 | def test_lrn(): 81 | if not get_compile_info()["CGT_ENABLE_CUDA"]: 82 | raise SkipTest("Skipping because CUDA disabled") 83 | 84 | nr.seed(0) 85 | Xval = nr.randn(4,8,16,16) 86 | X = cgt.shared(Xval, name="X", fixed_shape_mask="all") 87 | # X = cgt.tensor4(name='X') 88 | y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5) 89 | f = cgt.function([],y) 90 | print f().sum() 91 | print f().sum() 92 | print f().sum() 93 | assert np.isfinite(f().sum()) 94 | # print f(Xval).sum() 95 | a = nr.rand(*cgt.infer_shape(y)) 96 | loss = (y*a).sum() 97 | gradcheck_model(loss, [X],eps=1e-5) 98 | 99 | 100 | if __name__ == "__main__": 101 | import nose 102 | nose.runmodule() 103 | -------------------------------------------------------------------------------- /cgt/tests/test_inc_subtensor.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | import numpy as np 3 | from cgt.tests import across_configs 4 | 5 | @across_configs(backends=("python","native"), precisions=("single","double")) 6 | def test_incsubtensor0(): 7 | # First let's test fancy slice along zeroth dimension 8 | 9 | W = cgt.shared(np.zeros((5,3)), name="W") 10 | inc = cgt.matrix() # we'll increment W by this matrix 11 | incval = np.arange(9).reshape(3,3) 12 | 13 | 14 | inds = cgt.vector(dtype='i8') 15 | updates = {W : cgt.inc_subtensor(W, inds, inc)} 16 | f = cgt.function([inds,inc],[],updates=updates) 17 | f([1,2,4],incval) 18 | 19 | assert np.allclose(W.op.get_value(), 20 | np.array( 21 | [[ 0., 0., 0.], 22 | [ 0., 1., 2.], 23 | [ 3., 4., 5.], 24 | [ 0., 0., 0.], 25 | [ 6., 7., 8.]])) 26 | 27 | 28 | # Now let's test non-fancy slice along zeroth dimension 29 | 30 | @across_configs(backends=("python","native"), precisions=("single","double")) 31 | def test_incsubtensor1(): 32 | W = cgt.shared(np.zeros((5,3)), name="W") 33 | inc = cgt.matrix() # we'll increment W by this matrix 34 | incval = np.arange(9).reshape(3,3) 35 | 36 | start = cgt.scalar(dtype='i8') 37 | stop = cgt.scalar(dtype='i8') 38 | updates = {W : cgt.inc_subtensor(W, slice(start, stop), inc)} 39 | f = cgt.function([start,stop,inc],[],updates=updates) 40 | f(0,3,incval) 41 | assert np.allclose(W.op.get_value(), 42 | np.array( 43 | [ 44 | [ 0., 1., 2.], 45 | [ 3., 4., 5.], 46 | [ 6., 7., 8.], 47 | [ 0., 0., 0.], 48 | [ 0., 0., 0.], 49 | ])) 50 | 51 | # Now let's test the last kind of slice, where we have int arrays on each dimension 52 | 53 | @across_configs(backends=("python","native"), precisions=("single","double")) 54 | def test_incsubtensor2(): 55 | W = cgt.shared(np.zeros((5,3)), name="W") 56 | i0 = cgt.vector(dtype='i8') 57 | i1 = cgt.vector(dtype='i8') 58 | inc = cgt.vector() 59 | 60 | updates2 = {W : cgt.inc_subtensor(W, (i0,i1), inc)} 61 | f2 = cgt.function([i0,i1,inc],[],updates=updates2) 62 | f2([0,1,2,2],[0,1,2,2],[1,2,3,4]) 63 | assert np.allclose(W.op.get_value(), 64 | np.array( 65 | [ 66 | [ 1., 0., 0.], 67 | [ 0., 2., 0.], 68 | [ 0., 0., 7.], 69 | [ 0., 0., 0.], 70 | [ 0., 0., 0.], 71 | ])) 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /cgt/tests/test_informative_errors.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np 2 | from nose.tools import raises 3 | from StringIO import StringIO 4 | import sys 5 | 6 | class CaptureStderr(object): 7 | def __init__(self): 8 | self.origstderr = sys.stderr 9 | def __enter__(self): 10 | self.s = StringIO() 11 | sys.stderr = self.s 12 | return self.s 13 | def __exit__(self, *args): 14 | self.stderr = self.origstderr 15 | 16 | @raises(RuntimeError) 17 | def test_shape_err(): 18 | with CaptureStderr(): 19 | with cgt.scoped_update_config(debug=True, backend="python"): 20 | x = cgt.vector() 21 | y = cgt.vector() 22 | f = cgt.function([x,y],x+y) 23 | f(np.zeros(3),np.zeros(4)) 24 | 25 | if __name__ == "__main__": 26 | import nose 27 | nose.runmodule() 28 | -------------------------------------------------------------------------------- /cgt/tests/test_input_conversions.py: -------------------------------------------------------------------------------- 1 | from cgt.tests import across_configs 2 | import cgt, numpy as np 3 | 4 | @across_configs 5 | def test_noncontiguous_matrix(): 6 | 7 | x = np.arange(1,7).reshape(2,3).astype(cgt.floatX) 8 | result = np.log(x.sum(axis=0)).sum() 9 | 10 | 11 | xvar = cgt.matrix() 12 | f = cgt.function([xvar],cgt.log(xvar.sum(axis=0)).sum()) 13 | 14 | 15 | assert np.allclose( f(np.asarray(x, order='C')), result) 16 | assert np.allclose( f(np.asarray(x, order='C', dtype='int64')), result) 17 | assert np.allclose( f(np.asarray(x, order='F')), result) 18 | 19 | X = np.zeros((4,6)) 20 | X[::2,::2] = x 21 | assert np.allclose( f(X[::2,::2]), result) 22 | 23 | @across_configs 24 | def test_scalar_input(): 25 | x = cgt.scalar() 26 | f = cgt.function([x], x**2) 27 | xval = 2 28 | yval = 4 29 | assert np.allclose(f(2), 4) 30 | assert np.allclose(f(2.0), 4) 31 | assert np.allclose(f(np.array(2)), 4) 32 | assert np.allclose(f(np.array(2.0)), 4) 33 | assert np.allclose(f(np.array([2])[0]), 4) 34 | assert np.allclose(f(np.array([2.0])[0]), 4) 35 | -------------------------------------------------------------------------------- /cgt/tests/test_linreg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cgt 3 | from cgt.tests import across_configs 4 | 5 | @across_configs 6 | def test_linreg(): 7 | N = 10 8 | K = 3 9 | 10 | Xval = np.random.randn(N,K) 11 | wval = np.random.randn(K) 12 | bval = np.random.randn() 13 | yval = np.random.randn(N) 14 | 15 | X_nk = cgt.matrix("X") 16 | y_n = cgt.vector("y") 17 | w_k = cgt.vector("w") 18 | b = cgt.scalar(name="b") 19 | 20 | ypred = cgt.dot(X_nk, w_k) + b 21 | 22 | err = cgt.sum(cgt.square(ypred - y_n)) 23 | g = cgt.grad(err, [w_k, b]) 24 | 25 | g_simple,an,_ = cgt.core.simplify_and_analyze(g) 26 | 27 | 28 | print "Loss function:" 29 | cgt.print_tree([err]) 30 | print "Gradient:" 31 | cgt.print_tree(g) 32 | 33 | print "Gradient simplified" 34 | cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5])) 35 | 36 | print "-------" 37 | 38 | d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval} 39 | 40 | np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2, 41 | atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) 42 | np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval), 43 | atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) 44 | np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 * np.sum(Xval.dot(wval) + bval - yval, 0), 45 | atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) 46 | # add_log_entry("linreg", collect(values(d)), collect(keys(d)), [err], [g]) 47 | 48 | if __name__ == "__main__": 49 | import nose 50 | nose.runmodule() 51 | -------------------------------------------------------------------------------- /cgt/tests/test_multi_output.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np 2 | from cgt import core 3 | from cgt.tests import across_configs 4 | 5 | class SinCos(core.Op): 6 | return_type = "byval" 7 | available_impls = ("python",) 8 | def typ_apply(self, inputs): 9 | assert inputs[0].dtype == 'f4' 10 | d = inputs[0].ndim 11 | return core.TupleType(core.TensorType(cgt.floatX, d), core.TensorType(cgt.floatX, d)) 12 | def shp_apply(self, inputs): 13 | return (cgt.shape(inputs[0]), cgt.shape(inputs[0])) 14 | def get_py_func(self, inputs): 15 | def f(reads): 16 | x = reads[0] 17 | return (np.sin(x), np.cos(x)) 18 | return f 19 | # c_extra_link_flags = "-lm" 20 | # c_extra_includes = ["math.h"] 21 | # def get_c_impl(self, inputs): 22 | # code = """ 23 | # void CGT_FUNCNAME(void* cldata, cgt_array** io) { 24 | # float* x = io[0]->data(); 25 | # float* y = io[1]->data(); 26 | # float* z = io[2]->data(); 27 | # y[0] = sinf(x[0]); 28 | # z[0] = cosf(x[0]); 29 | # }""" 30 | # return CImpl(code, includes=["math.h"], link_flags="-lm") 31 | 32 | class SinCos2(core.Op): 33 | return_type = "byref" 34 | available_impls = ("python","native_cpu") 35 | def typ_apply(self, inputs): 36 | ndim = inputs[0].ndim 37 | return core.TupleType(core.TensorType(cgt.floatX, ndim), core.TensorType(cgt.floatX, ndim)) 38 | def shp_apply(self, inputs): 39 | return (cgt.shape(inputs[0]), cgt.shape(inputs[0])) 40 | def get_py_func(self, inputs): 41 | def f(reads, write): 42 | x = reads[0] 43 | write[0][...] = np.sin(x) 44 | write[1][...] = np.cos(x) 45 | return f 46 | def get_native_compile_info(self, input_types,devtype): 47 | assert devtype=="cpu" 48 | code = """ 49 | CGT_EXPORT_C void $function(void* cldata, cgtArray** reads, cgtTuple* write) { 50 | float* x = static_cast(reads[0]->data()); 51 | float* y = static_cast(static_cast(write->getitem(0))->data()); 52 | float* z = static_cast(static_cast(write->getitem(1))->data()); 53 | for (int i=0; i < reads[0]->size(); ++i) { 54 | y[i] = sinf(x[i]); 55 | z[i] = cosf(x[i]); 56 | } 57 | }""" 58 | return core.NativeCompileInfo(code, includes=["math.h"], link_flags="-lm") 59 | 60 | @across_configs(precisions=("single",)) 61 | def test_multi_output(): 62 | for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')): 63 | for cls in (SinCos, SinCos2): 64 | y,z = core.unpack(core.Result(cls(), [x])) 65 | xnum = np.ones((3,)*x.ndim, cgt.floatX) 66 | correct = (np.sin(xnum),np.cos(xnum)) 67 | yznum = cgt.numeric_eval([y,z], {x:xnum}) 68 | np.testing.assert_allclose(yznum, correct) 69 | f = cgt.function([x],[y,z]) 70 | np.testing.assert_allclose(f(xnum), correct) 71 | 72 | if __name__ == "__main__": 73 | import nose 74 | nose.runmodule() 75 | -------------------------------------------------------------------------------- /cgt/tests/test_optimizers.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | import cgt.nn as nn 3 | from cgt.tests import across_configs 4 | import numpy as np 5 | 6 | # Torch values obtained via this script: https://gist.github.com/ebenolson/931e879ed38f257253d2 7 | 8 | torch_values = { 9 | 'sgd': [0.81707280688755,0.6648326359915,0.5386151140949], 10 | 'momentum': [0.6848486952183,0.44803321781003,0.27431190123502], 11 | # TORCH: 12 | # 'nesterov_momentum': [0.67466543592725,0.44108468114241,0.2769002108997], 13 | # OURS: 14 | 'nesterov_momentum' : [0.6848486661911011, 0.4480332136154175, 0.2743118703365326], 15 | # Different because we're using 16 | # version from http://arxiv.org/abs/1212.0901v2, which is returning "lookahead parameters" 17 | 'adagrad': [0.55373120047759,0.55373120041518,0.55373120039438], 18 | 'rmsprop': [0.83205403985348,0.83205322744821,0.83205295664444], 19 | 'adadelta': [0.95453237704725,0.9545237471374,0.95452214847397], 20 | 'adam': [0.90034973381771,0.90034969365796,0.90034968027137], 21 | } 22 | scales = [0.1, 0.2, 0.3] 23 | 24 | 25 | def f(X, scale): 26 | return (scale*X**2).sum() 27 | 28 | @across_configs 29 | def test_sgd(): 30 | results = [] 31 | for scale in scales: 32 | A = cgt.shared(1.0) 33 | B = cgt.shared(1.0) 34 | updates = nn.sgd(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1) 35 | do_update = cgt.function([], [], updates=updates) 36 | for _ in range(10): 37 | do_update() 38 | 39 | assert np.allclose(A.op.get_value(), B.op.get_value()) 40 | results.append(A.op.get_value().copy()) 41 | 42 | assert np.allclose(results, torch_values['sgd']) 43 | 44 | 45 | @across_configs 46 | def test_momentum(): 47 | results = [] 48 | for scale in scales: 49 | A = cgt.shared(1.0) 50 | B = cgt.shared(1.0) 51 | updates = nn.momentum(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1, mu=0.5) 52 | do_update = cgt.function([], [], updates=updates) 53 | for _ in range(10): 54 | do_update() 55 | 56 | assert np.allclose(A.op.get_value(), B.op.get_value()) 57 | results.append(A.op.get_value().copy()) 58 | 59 | assert np.allclose(results, torch_values['momentum']) 60 | 61 | 62 | @across_configs 63 | def test_nesterov_momentum(): 64 | results = [] 65 | for scale in scales: 66 | A = cgt.shared(1.0) 67 | B = cgt.shared(1.0) 68 | updates = nn.momentum(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1, mu=0.5) 69 | do_update = cgt.function([], [], updates=updates) 70 | for _ in range(10): 71 | do_update() 72 | assert np.allclose(A.op.get_value(), B.op.get_value()) 73 | results.append(A.op.get_value().copy()) 74 | 75 | assert np.allclose(results, torch_values['nesterov_momentum']) 76 | 77 | 78 | @across_configs 79 | def test_adagrad(): 80 | results = [] 81 | for scale in scales: 82 | A = cgt.shared(1.0) 83 | B = cgt.shared(1.0) 84 | updates = nn.adagrad(f(A, scale) + f(B, scale), [A, B], learning_rate=0.1) 85 | do_update = cgt.function([], [], updates=updates) 86 | for _ in range(10): 87 | do_update() 88 | 89 | assert np.allclose(A.op.get_value(), B.op.get_value()) 90 | results.append(A.op.get_value().copy()) 91 | 92 | assert np.allclose(results, torch_values['adagrad']) 93 | 94 | 95 | @across_configs 96 | def test_rmsprop(): 97 | results = [] 98 | for scale in scales: 99 | A = cgt.shared(1.0) 100 | B = cgt.shared(1.0) 101 | updates = nn.rmsprop(f(A, scale) + f(B, scale), [A, B], learning_rate=0.01) 102 | do_update = cgt.function([], [], updates=updates) 103 | for _ in range(10): 104 | do_update() 105 | 106 | assert np.allclose(A.op.get_value(), B.op.get_value()) 107 | results.append(A.op.get_value().copy()) 108 | 109 | assert np.allclose(results, torch_values['rmsprop']) 110 | 111 | 112 | @across_configs 113 | def test_adadelta(): 114 | results = [] 115 | for scale in scales: 116 | A = cgt.shared(1.0) 117 | B = cgt.shared(1.0) 118 | updates = nn.adadelta(f(A, scale) + f(B, scale), [A, B]) 119 | do_update = cgt.function([], [], updates=updates) 120 | for _ in range(10): 121 | do_update() 122 | 123 | assert np.allclose(A.op.get_value(), B.op.get_value()) 124 | results.append(A.op.get_value().copy()) 125 | 126 | assert np.allclose(results, torch_values['adadelta']) 127 | 128 | if __name__ == "__main__": 129 | import nose 130 | nose.runmodule() 131 | -------------------------------------------------------------------------------- /cgt/tests/test_par_interp.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from cgt.core import Op 3 | from cgt.tests import across_configs 4 | import time 5 | import numpy as np 6 | from numpy.random import randn, seed 7 | 8 | # NOTE observe differences clearly if add a time.sleep to Mul21 9 | 10 | 11 | class SleepFor(Op): 12 | return_type="byval" 13 | available_impls=("native_cpu",) 14 | def get_native_compile_info(self, _, __): 15 | code=r""" 16 | CGT_EXPORT_C cgtArray* $function(void* cldata, cgtArray** reads) { 17 | float t = reads[1]->at(0); 18 | usleep(t * 1000000); 19 | return reads[0]; 20 | }""" 21 | return cgt.core.NativeCompileInfo(code,includes=["unistd.h"]) 22 | def typ_apply(self, input_types): 23 | assert input_types[1].dtype == cgt.floatX 24 | return input_types[0] 25 | def shp_apply(self, inputs): 26 | return cgt.shape(inputs[0]) 27 | 28 | 29 | def sleepfor(x, t): 30 | return cgt.core.Result(SleepFor(), [x, t]) 31 | 32 | @across_configs(backends=("native",)) 33 | def test_sleeps(): 34 | with cgt.scoped_update_config(parallel=True): 35 | x = cgt.scalar('x') 36 | y1 = sleepfor(x, .1) 37 | y2 = sleepfor(x, .1) 38 | 39 | z=y1+y2 40 | fpar = cgt.function([x],z) 41 | 42 | tstart = time.time() 43 | fpar(0) 44 | elapsed = time.time() - tstart 45 | assert elapsed < .11 46 | 47 | 48 | @across_configs(backends=("native",)) 49 | def test_matmuls(): 50 | with cgt.scoped_update_config(parallel=True): 51 | 52 | m = 8 53 | d = 1000 54 | 55 | # build graph 56 | 57 | X = cgt.matrix("X") 58 | Y = cgt.matrix("Y") 59 | loss=0 60 | for k in xrange(m): 61 | # loss = loss+cgt.sin(X*Y+k).sum() 62 | loss = loss+(X.dot(Y+k)).sum() 63 | 64 | f = cgt.function([X,Y], loss) 65 | 66 | # test things out! 67 | 68 | seed(0) 69 | 70 | X_val = randn(d, d) 71 | Y_val = randn(d, d) 72 | vals = [X_val, Y_val] 73 | 74 | tic = time.time() 75 | out = f(*vals) 76 | toc = time.time() 77 | 78 | print toc-tic 79 | 80 | 81 | @across_configs(backends=("native",)) 82 | def test_update(): 83 | with cgt.scoped_update_config(parallel=True): 84 | xval = np.array(1.5) 85 | x = cgt.shared(xval) 86 | f = cgt.function([], x.sum(), updates=[(x,x+1)]) 87 | before = x.op.get_value().copy() 88 | f() 89 | after = x.op.get_value() 90 | assert np.allclose(after , before+1) 91 | 92 | 93 | if __name__ == "__main__": 94 | import nose 95 | nose.runmodule() 96 | -------------------------------------------------------------------------------- /cgt/tests/test_scalars.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np, numpy.random as nr, itertools as it 2 | from cgt import core, utils 3 | from cgt.numeric_diff import numeric_grad 4 | from cgt.tests import across_configs 5 | 6 | DISPLAY=False 7 | 8 | @across_configs 9 | def test_scalars(): 10 | np.random.seed(0) 11 | x = cgt.scalar('x') 12 | y = cgt.scalar('y') 13 | z = cgt.scalar('z') 14 | vars = [x,y,z] #pylint: disable=W0622 15 | vals = nr.rand(len(vars))+1 16 | 17 | PROB2RESULT = {} 18 | 19 | for ((key,_), cls) in it.chain( 20 | it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)), 21 | it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary)) 22 | ): 23 | if key == "conj": 24 | print "skipping conj" 25 | continue 26 | utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key) 27 | if cls == core.ElwiseUnary: 28 | n_in = 1 29 | op = cls(key) 30 | else: 31 | n_in = 2 32 | op = cls(key, (True,True)) 33 | inputvars = vars[0:n_in] 34 | inputvals = vals[0:n_in] 35 | out = core.Result(op, inputvars) 36 | f = cgt.function(inputvars, out) 37 | try: 38 | grads = cgt.grad(out, inputvars) 39 | except core.NonDifferentiable: 40 | print "nondiff" 41 | continue 42 | if DISPLAY: 43 | print "Function:" 44 | cgt.print_tree(out) 45 | print "Gradient original:" 46 | cgt.print_tree(grads) 47 | print "Gradient simplified:" 48 | grads_simple = core.simplify(grads) 49 | if DISPLAY: cgt.print_tree(grads_simple) 50 | gradf = cgt.function(inputvars, grads) 51 | eps = {"single":1e-4,"double":1e-9}[cgt.get_precision()] 52 | nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640 53 | cgtgrad = gradf(*inputvals) 54 | np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[cgt.get_precision()]) 55 | 56 | grad_count = core.count_nodes(grads_simple) 57 | PROB2RESULT[key] = {} 58 | PROB2RESULT[key]["grad"] = grad_count 59 | 60 | if DISPLAY: 61 | from thirdparty.tabulate import tabulate 62 | print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"]) 63 | 64 | if __name__ == "__main__": 65 | import nose 66 | nose.runmodule() 67 | -------------------------------------------------------------------------------- /cgt/tests/test_stack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cgt 3 | from cgt.tests import across_configs 4 | 5 | @across_configs 6 | def test_stack(): 7 | x = cgt.scalar() 8 | y = cgt.scalar() 9 | z = cgt.scalar() 10 | s0 = cgt.stack([x, y, z], axis=0) 11 | assert cgt.numeric_eval(s0, {x: 1, y: 2, z: 3}).shape == (3,) 12 | 13 | x = cgt.vector() 14 | y = cgt.vector() 15 | z = cgt.vector() 16 | v0 = cgt.stack([x, y, z], axis=0) 17 | assert cgt.numeric_eval(v0, {x: np.zeros(2), y: np.zeros(2), z: np.zeros(2)}).shape == (3, 2) 18 | v1 = cgt.stack([x, y, z], axis=1) 19 | assert cgt.numeric_eval(v1, {x: np.zeros(2), y: np.ones(2), z: np.zeros(2)}).shape == (2, 3) 20 | 21 | x = cgt.matrix() 22 | y = cgt.matrix() 23 | z = cgt.matrix() 24 | m0 = cgt.stack([x, y, z], axis=0) 25 | assert cgt.numeric_eval(m0, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (3, 2, 4) 26 | m1 = cgt.stack([x, y, z], axis=1) 27 | assert cgt.numeric_eval(m1, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 3, 4) 28 | m2 = cgt.stack([x, y, z], axis=2) 29 | assert cgt.numeric_eval(m2, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 4, 3) 30 | 31 | if __name__ == "__main__": 32 | import nose 33 | nose.runmodule() 34 | -------------------------------------------------------------------------------- /cgt/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import hashlib 4 | import time 5 | 6 | # ================================================================ 7 | # Utils 8 | # ================================================================ 9 | 10 | class Color: #pylint: disable=W0232 11 | GRAY=30 12 | RED=31 13 | GREEN=32 14 | YELLOW=33 15 | BLUE=34 16 | MAGENTA=35 17 | CYAN=36 18 | WHITE=37 19 | CRIMSON=38 20 | 21 | 22 | def colorize(num, string, bold=False, highlight = False): 23 | assert isinstance(num, int) 24 | attr = [] 25 | if highlight: num += 10 26 | attr.append(str(num)) 27 | if bold: attr.append('1') 28 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) 29 | 30 | def colorprint(colorcode, text, o=sys.stdout): 31 | o.write(colorize(colorcode, text)) 32 | 33 | def warn(msg): 34 | print colorize(Color.YELLOW, msg) 35 | 36 | def error(msg): 37 | print colorize(Color.RED, msg) 38 | 39 | def is_singleton(x): 40 | return isinstance(x, np.ndarray) and np.prod(x.shape)==1 41 | 42 | def safezip(x,y): 43 | assert len(x) == len(y) 44 | return zip(x,y) 45 | 46 | def safezip3(x,y,z): 47 | assert len(x) == len(y) == len(z) 48 | return zip(x,y,z) 49 | 50 | 51 | def allsame(xs): 52 | out = True 53 | if len(xs)>0: 54 | x0 = xs[0] 55 | for x in xs[1:]: 56 | out &= x==x0 57 | return out 58 | 59 | def invert_perm(x): 60 | return list(np.argsort(x)) 61 | 62 | def _hash_seq(args): 63 | hashobj = hashlib.md5() 64 | for a in args: hashobj.update(a) 65 | return hashobj.hexdigest() 66 | 67 | def hash_seq1(*args): 68 | return _hash_seq(args) 69 | 70 | MESSAGE_DEPTH = 0 71 | class Message(object): 72 | def __init__(self, msg): 73 | self.msg = msg 74 | def __enter__(self): 75 | global MESSAGE_DEPTH #pylint: disable=W0603 76 | print colorize(Color.MAGENTA, '\t'*MESSAGE_DEPTH + '=: ' + self.msg) 77 | self.tstart = time.time() 78 | MESSAGE_DEPTH += 1 79 | def __exit__(self, etype, *args): 80 | global MESSAGE_DEPTH #pylint: disable=W0603 81 | MESSAGE_DEPTH -= 1 82 | maybe_exc = "" if etype is None else " (with exception)" 83 | print colorize(Color.MAGENTA, '\t'*MESSAGE_DEPTH + "done%s in %.3f seconds"%(maybe_exc, time.time() - self.tstart)) 84 | -------------------------------------------------------------------------------- /cgtrc.example: -------------------------------------------------------------------------------- 1 | # see cgtrc_spec.ini for explanation 2 | 3 | debug = False 4 | precision = single 5 | backend = python 6 | cache_dir = ~/.cgt_cache 7 | enable_inplace_opt = True 8 | enable_simplification = True 9 | parallel = False 10 | num_threads = default=4 11 | 12 | force_python_impl = False 13 | debug_cpp = False 14 | verbose = False -------------------------------------------------------------------------------- /cgtrc_spec.ini: -------------------------------------------------------------------------------- 1 | # DEVELOPERS: when you edit this file, please also edit cgtrc.example 2 | 3 | # User options 4 | # ---------------------- 5 | 6 | # At the cost of some overhead, 7 | # store information in the computation graph that helps with debugging 8 | debug = boolean(default=False) 9 | 10 | # single or double precision: 11 | precision = string(default=single) 12 | 13 | # backend=python means using a pure python module to execute the graph, and using python implementations of ops whenever they exist 14 | # backend=native means using the compiled execution graph interpreter, and using the native (c++) implementation of ops 15 | backend = option("python","native",default="python") # "native" means using compiled implementations and 16 | 17 | # Where to put generated files 18 | cache_dir = string(default="~/.cgt_cache") 19 | 20 | # Enable in-place optimizations. 21 | enable_inplace_opt = boolean(default=True) 22 | 23 | # Enable simplifications of the graph, e.g. arithmetic simplifications like x*1=x 24 | enable_simplification = boolean(default=True) 25 | 26 | # Use parallel execution graph interpreter 27 | parallel = boolean(default=False) 28 | 29 | # Number of 30 | num_threads = integer(default=4) 31 | 32 | # Developer Options 33 | # ----------------- 34 | 35 | # Force native backend to use python 36 | force_python_impl = boolean(default=False) 37 | 38 | # Compile C++ files with debug flags 39 | debug_cpp = boolean(default=False) # use debug flags when compiling c++ 40 | 41 | # Print lots of diagnostic information 42 | # (we'll break this down at some point) 43 | verbose = boolean(default=False) -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | spelling: 58 | $(SPHINXBUILD) -b spelling $(ALLSPHINXOPTS) $(BUILDDIR)/spelling 59 | @echo 60 | @echo "Spelling check done" 61 | 62 | dirhtml: 63 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 64 | @echo 65 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 66 | 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | pickle: 73 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 74 | @echo 75 | @echo "Build finished; now you can process the pickle files." 76 | 77 | json: 78 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 79 | @echo 80 | @echo "Build finished; now you can process the JSON files." 81 | 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | qthelp: 89 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 90 | @echo 91 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 92 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 93 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/CGT.qhcp" 94 | @echo "To view the help file:" 95 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/CGT.qhc" 96 | 97 | devhelp: 98 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 99 | @echo 100 | @echo "Build finished." 101 | @echo "To view the help file:" 102 | @echo "# mkdir -p $$HOME/.local/share/devhelp/CGT" 103 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/CGT" 104 | @echo "# devhelp" 105 | 106 | epub: 107 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 108 | @echo 109 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 110 | 111 | latex: 112 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 113 | @echo 114 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 115 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 116 | "(use \`make latexpdf' here to do that automatically)." 117 | 118 | latexpdf: 119 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 120 | @echo "Running LaTeX files through pdflatex..." 121 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 122 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 123 | 124 | latexpdfja: 125 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 126 | @echo "Running LaTeX files through platex and dvipdfmx..." 127 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 128 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 129 | 130 | text: 131 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 132 | @echo 133 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 134 | 135 | man: 136 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 137 | @echo 138 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 139 | 140 | texinfo: 141 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 142 | @echo 143 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 144 | @echo "Run \`make' in that directory to run these through makeinfo" \ 145 | "(use \`make info' here to do that automatically)." 146 | 147 | info: 148 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 149 | @echo "Running Texinfo files through makeinfo..." 150 | make -C $(BUILDDIR)/texinfo info 151 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 152 | 153 | gettext: 154 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 155 | @echo 156 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 157 | 158 | changes: 159 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 160 | @echo 161 | @echo "The overview file is in $(BUILDDIR)/changes." 162 | 163 | linkcheck: 164 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 165 | @echo 166 | @echo "Link check complete; look for any errors in the above output " \ 167 | "or in $(BUILDDIR)/linkcheck/output.txt." 168 | 169 | doctest: 170 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 171 | @echo "Testing of doctests in the sources finished, look at the " \ 172 | "results in $(BUILDDIR)/doctest/output.txt." 173 | 174 | xml: 175 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 176 | @echo 177 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 178 | 179 | pseudoxml: 180 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 181 | @echo 182 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 183 | -------------------------------------------------------------------------------- /doc/README: -------------------------------------------------------------------------------- 1 | BUILDING THE DOCS 2 | 3 | pip install -r requirements.txt 4 | make html 5 | 6 | then open ``_build/html/index.html`` -------------------------------------------------------------------------------- /doc/_static/my_theme.css: -------------------------------------------------------------------------------- 1 | html_style = 'css/my_theme.css' -------------------------------------------------------------------------------- /doc/build_and_view.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | export PATH=/Users/joschu/Src/anaconda/bin:$PATH # In case this is being run by sublime 5 | make html 6 | 7 | index=_build/html/index.html 8 | if [ `uname` = Linux ] 9 | then 10 | google-chrome $index 11 | else 12 | open -a Google\ Chrome $index 13 | fi -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # CGT documentation build configuration file, created by 4 | # sphinx-quickstart on Mon May 11 15:24:24 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | 19 | import sphinx_bootstrap_theme 20 | html_theme = 'bootstrap' 21 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 22 | 23 | # import cloud_sptheme 24 | # html_theme = 'cloud' 25 | # html_theme_path = [cloud_sptheme.get_theme_dir()] 26 | 27 | 28 | # If extensions (or modules to document with autodoc) are in another directory, 29 | # add these directories to sys.path here. If the directory is relative to the 30 | # documentation root, use os.path.abspath to make it absolute, like shown here. 31 | #sys.path.insert(0, os.path.abspath('.')) 32 | 33 | # -- General configuration ------------------------------------------------ 34 | 35 | # If your documentation needs a minimal Sphinx version, state it here. 36 | #needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = ["notebook_sphinxext1","sphinx.ext.mathjax","sphinxcontrib.spelling","sphinx.ext.autodoc"] 42 | 43 | spelling_lang='en_US' 44 | spelling_word_list_filename='spelling_wordlist.txt' 45 | spelling_show_suggestions=False 46 | 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ['_templates'] 50 | 51 | # The suffix of source filenames. 52 | source_suffix = '.rst' 53 | 54 | # The encoding of source files. 55 | #source_encoding = 'utf-8-sig' 56 | 57 | # The master toctree document. 58 | master_doc = 'index' 59 | 60 | # General information about the project. 61 | project = u'CGT' 62 | copyright = u'2015, John Schulman et al.' 63 | 64 | # The version info for the project you're documenting, acts as replacement for 65 | # |version| and |release|, also used in various other places throughout the 66 | # built documents. 67 | # 68 | # The short X.Y version. 69 | version = '0.1' 70 | # The full version, including alpha/beta/rc tags. 71 | release = '0.1' 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | #language = None 76 | 77 | # There are two options for replacing |today|: either, you set today to some 78 | # non-false value, then it is used: 79 | #today = '' 80 | # Else, today_fmt is used as the format for a strftime call. 81 | #today_fmt = '%B %d, %Y' 82 | 83 | # List of patterns, relative to source directory, that match files and 84 | # directories to ignore when looking for source files. 85 | exclude_patterns = ['_build'] 86 | 87 | # The reST default role (used for this markup: `text`) to use for all 88 | # documents. 89 | #default_role = None 90 | 91 | # If true, '()' will be appended to :func: etc. cross-reference text. 92 | #add_function_parentheses = True 93 | 94 | # If true, the current module name will be prepended to all description 95 | # unit titles (such as .. function::). 96 | #add_module_names = True 97 | 98 | # If true, sectionauthor and moduleauthor directives will be shown in the 99 | # output. They are ignored by default. 100 | show_authors = False 101 | 102 | # The name of the Pygments (syntax highlighting) style to use. 103 | pygments_style = 'sphinx' 104 | 105 | # A list of ignored prefixes for module index sorting. 106 | #modindex_common_prefix = [] 107 | 108 | # If true, keep warnings as "system message" paragraphs in the built documents. 109 | #keep_warnings = False 110 | 111 | 112 | # -- Options for HTML output ---------------------------------------------- 113 | 114 | # The theme to use for HTML and HTML Help pages. See the documentation for 115 | # a list of builtin themes. 116 | # html_theme = html_theme # or redcloud 117 | 118 | # Theme options are theme-specific and customize the look and feel of a theme 119 | # further. For a list of options available for each theme, see the 120 | # documentation. 121 | html_theme_options = { 122 | 'navbar_links': [ 123 | ("GitHub", "https://github.com/joschu/cgt", True), 124 | ], 125 | 126 | } 127 | 128 | # Add any paths that contain custom themes here, relative to this directory. 129 | 130 | # The name for this set of Sphinx documents. If None, it defaults to 131 | # " v documentation". 132 | #html_title = None 133 | 134 | # A shorter title for the navigation bar. Default is the same as html_title. 135 | #html_short_title = None 136 | 137 | # The name of an image file (relative to this directory) to place at the top 138 | # of the sidebar. 139 | # html_logo = "cgt.png" 140 | 141 | # The name of an image file (within the static path) to use as favicon of the 142 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 143 | # pixels large. 144 | #html_favicon = None 145 | 146 | # Add any paths that contain custom static files (such as style sheets) here, 147 | # relative to this directory. They are copied after the builtin static files, 148 | # so a file named "default.css" will overwrite the builtin "default.css". 149 | html_static_path = ['_static'] 150 | 151 | # Add any extra paths that contain custom files (such as robots.txt or 152 | # .htaccess) here, relative to this directory. These files are copied 153 | # directly to the root of the documentation. 154 | #html_extra_path = [] 155 | 156 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 157 | # using the given strftime format. 158 | #html_last_updated_fmt = '%b %d, %Y' 159 | 160 | # If true, SmartyPants will be used to convert quotes and dashes to 161 | # typographically correct entities. 162 | #html_use_smartypants = True 163 | 164 | # Custom sidebar templates, maps document names to template names. 165 | #html_sidebars = {} 166 | 167 | # Additional templates that should be rendered to pages, maps page names to 168 | # template names. 169 | #html_additional_pages = {} 170 | 171 | # If false, no module index is generated. 172 | #html_domain_indices = True 173 | 174 | # If false, no index is generated. 175 | html_use_index = True 176 | 177 | # If true, the index is split into individual pages for each letter. 178 | #html_split_index = False 179 | 180 | # If true, links to the reST sources are added to the pages. 181 | html_show_sourcelink = False 182 | 183 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 184 | html_show_sphinx = False 185 | 186 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 187 | html_show_copyright = False 188 | 189 | # If true, an OpenSearch description file will be output, and all pages will 190 | # contain a tag referring to it. The value of this option must be the 191 | # base URL from which the finished HTML is served. 192 | #html_use_opensearch = '' 193 | 194 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 195 | #html_file_suffix = None 196 | 197 | # Output file base name for HTML help builder. 198 | htmlhelp_basename = 'CGTdoc' 199 | 200 | 201 | # -- Options for LaTeX output --------------------------------------------- 202 | 203 | latex_elements = { 204 | # The paper size ('letterpaper' or 'a4paper'). 205 | #'papersize': 'letterpaper', 206 | 207 | # The font size ('10pt', '11pt' or '12pt'). 208 | #'pointsize': '10pt', 209 | 210 | # Additional stuff for the LaTeX preamble. 211 | #'preamble': '', 212 | } 213 | 214 | # Grouping the document tree into LaTeX files. List of tuples 215 | # (source start file, target name, title, 216 | # author, documentclass [howto, manual, or own class]). 217 | latex_documents = [ 218 | ('index', 'CGT.tex', u'CGT Documentation', 219 | u'John Schulman et al.', 'manual'), 220 | ] 221 | 222 | # The name of an image file (relative to this directory) to place at the top of 223 | # the title page. 224 | #latex_logo = None 225 | 226 | # For "manual" documents, if this is true, then toplevel headings are parts, 227 | # not chapters. 228 | #latex_use_parts = False 229 | 230 | # If true, show page references after internal links. 231 | #latex_show_pagerefs = False 232 | 233 | # If true, show URL addresses after external links. 234 | #latex_show_urls = False 235 | 236 | # Documents to append as an appendix to all manuals. 237 | #latex_appendices = [] 238 | 239 | # If false, no module index is generated. 240 | #latex_domain_indices = True 241 | 242 | 243 | # -- Options for manual page output --------------------------------------- 244 | 245 | # One entry per manual page. List of tuples 246 | # (source start file, name, description, authors, manual section). 247 | man_pages = [ 248 | ('index', 'cgt', u'CGT Documentation', 249 | [u'John Schulman et al.'], 1) 250 | ] 251 | 252 | # If true, show URL addresses after external links. 253 | #man_show_urls = False 254 | 255 | 256 | # -- Options for Texinfo output ------------------------------------------- 257 | 258 | # Grouping the document tree into Texinfo files. List of tuples 259 | # (source start file, target name, title, author, 260 | # dir menu entry, description, category) 261 | texinfo_documents = [ 262 | ('index', 'CGT', u'CGT Documentation', 263 | u'John Schulman et al.', 'CGT', 'One line description of project.', 264 | 'Miscellaneous'), 265 | ] 266 | 267 | # Documents to append as an appendix to all manuals. 268 | #texinfo_appendices = [] 269 | 270 | # If false, no module index is generated. 271 | #texinfo_domain_indices = True 272 | 273 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 274 | #texinfo_show_urls = 'footnote' 275 | 276 | # If true, do not generate a @detailmenu in the "Top" node's menu. 277 | #texinfo_no_detailmenu = False 278 | 279 | 280 | latex_elements['preamble'] = """ 281 | 282 | """ 283 | 284 | 285 | 286 | -------------------------------------------------------------------------------- /doc/notebook_sphinxext1.py: -------------------------------------------------------------------------------- 1 | import os, shutil, string, glob 2 | from sphinx.util.compat import Directive 3 | from docutils import nodes 4 | from docutils.parsers.rst import directives 5 | from IPython.nbconvert import html, python 6 | from runipy.notebook_runner import NotebookRunner 7 | 8 | class NotebookDirective(Directive): 9 | """Insert an evaluated notebook into a document 10 | 11 | This uses runipy and nbconvert to transform a path to an unevaluated notebook 12 | into html suitable for embedding in a Sphinx document. 13 | """ 14 | required_arguments = 1 15 | optional_arguments = 1 16 | option_spec = {'skip_exceptions' : directives.flag} 17 | 18 | def run(self): 19 | # check if raw html is supported 20 | if not self.state.document.settings.raw_enabled: 21 | raise self.warning('"%s" directive disabled.' % self.name) 22 | 23 | # get path to notebook 24 | source_dir = os.path.dirname( 25 | os.path.abspath(self.state.document.current_source)) 26 | nb_basename = os.path.basename(self.arguments[0]) 27 | rst_file = self.state_machine.document.attributes['source'] 28 | rst_dir = os.path.abspath(os.path.dirname(rst_file)) 29 | nb_abs_path = self.arguments[0]#os.path.join(rst_dir, nb_basename) 30 | 31 | # Move files around. 32 | rel_dir = os.path.relpath(rst_dir, setup.confdir) 33 | rel_path = os.path.join(rel_dir, nb_basename) 34 | dest_dir = setup.app.builder.outdir 35 | dest_path = os.path.join(dest_dir, nb_basename) 36 | 37 | print dest_path, nb_abs_path 38 | 39 | if not os.path.exists(dest_dir): 40 | os.makedirs(dest_dir) 41 | 42 | # Copy unevaluated script 43 | try: 44 | shutil.copyfile(nb_abs_path, dest_path) 45 | except IOError: 46 | raise RuntimeError("Unable to copy notebook to build destination.") 47 | 48 | dest_path_eval = string.replace(dest_path, '.ipynb', '_evaluated.ipynb') 49 | dest_path_script = string.replace(dest_path, '.ipynb', '.py') 50 | rel_path_eval = string.replace(nb_basename, '.ipynb', '_evaluated.ipynb') 51 | rel_path_script = string.replace(nb_basename, '.ipynb', '.py') 52 | 53 | # Create python script vesion 54 | unevaluated_text = nb_to_html(nb_abs_path) 55 | script_text = nb_to_python(nb_abs_path) 56 | f = open(dest_path_script, 'w') 57 | f.write(script_text.encode('utf8')) 58 | f.close() 59 | 60 | skip_exceptions = 'skip_exceptions' in self.options 61 | 62 | # try: 63 | evaluated_text = evaluate_notebook(nb_abs_path, dest_path_eval, 64 | skip_exceptions=skip_exceptions) 65 | # except: 66 | # # bail 67 | # return [] 68 | 69 | # Create link to notebook and script files 70 | link_rst = "(" + \ 71 | formatted_link(nb_basename) + "; " + \ 72 | formatted_link(rel_path_eval) + "; " + \ 73 | formatted_link(rel_path_script) + \ 74 | ")" 75 | 76 | self.state_machine.insert_input([link_rst], rst_file) 77 | 78 | # create notebook node 79 | attributes = {'format': 'html', 'source': 'nb_path'} 80 | nb_node = notebook_node('', evaluated_text, **attributes) 81 | (nb_node.source, nb_node.line) = \ 82 | self.state_machine.get_source_and_line(self.lineno) 83 | 84 | # add dependency 85 | self.state.document.settings.record_dependencies.add(nb_abs_path) 86 | 87 | # clean up png files left behind by notebooks. 88 | png_files = glob.glob("*.png") 89 | fits_files = glob.glob("*.fits") 90 | h5_files = glob.glob("*.h5") 91 | for file in png_files: 92 | os.remove(file) 93 | 94 | return [nb_node] 95 | 96 | 97 | 98 | class notebook_node(nodes.raw): 99 | pass 100 | 101 | def nb_to_python(nb_path): 102 | """convert notebook to python script""" 103 | exporter = python.PythonExporter() 104 | output, resources = exporter.from_filename(nb_path) 105 | return output 106 | 107 | def nb_to_html(nb_path): 108 | """convert notebook to html""" 109 | exporter = html.HTMLExporter(template_file='full') 110 | output, resources = exporter.from_filename(nb_path) 111 | header = output.split('', 1)[1].split('',1)[0] 112 | body = output.split('', 1)[1].split('',1)[0] 113 | 114 | # http://imgur.com/eR9bMRH 115 | header = header.replace(''] 133 | lines.append(header) 134 | lines.append("""""") 139 | import re 140 | # bad way to do it 141 | body = re.sub(r'
.*
','',body) 142 | body = re.sub(r'
.*
','',body) 143 | 144 | lines.append(body) 145 | lines.append('') 146 | return '\n'.join(lines) 147 | 148 | def evaluate_notebook(nb_path, dest_path=None, skip_exceptions=False): 149 | # Create evaluated version and save it to the dest path. 150 | # Always use --pylab so figures appear inline 151 | # perhaps this is questionable? 152 | import subprocess 153 | if not os.path.exists(dest_path) or os.path.getmtime(nb_path) > os.path.getmtime(dest_path): 154 | subprocess.check_call("runipy %s %s"%(nb_path,dest_path),shell=True) 155 | # nb_runner = NotebookRunner(nb_in=nb_path, pylab=True) 156 | # nb_runner.run_notebook(skip_exceptions=skip_exceptions) 157 | # if dest_path is None: 158 | # dest_path = 'temp_evaluated.ipynb' 159 | # nb_runner.save_notebook(dest_path) 160 | ret = nb_to_html(dest_path) 161 | if dest_path is 'temp_evaluated.ipynb': 162 | os.remove(dest_path) 163 | return ret 164 | 165 | def formatted_link(path): 166 | return "`%s <%s>`__" % (os.path.basename(path), path) 167 | 168 | def visit_notebook_node(self, node): 169 | self.visit_raw(node) 170 | 171 | def depart_notebook_node(self, node): 172 | self.depart_raw(node) 173 | 174 | def setup(app): 175 | setup.app = app 176 | setup.config = app.config 177 | setup.confdir = app.confdir 178 | 179 | app.add_node(notebook_node, 180 | html=(visit_notebook_node, depart_notebook_node)) 181 | 182 | app.add_directive('notebook', NotebookDirective) 183 | -------------------------------------------------------------------------------- /doc/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | Theano 2 | codebase 3 | reimplement 4 | backend 5 | multithreading 6 | cmake 7 | preinstalled 8 | reimagines 9 | multithreaded 10 | subexpression 11 | optimizations 12 | arrayobject 13 | dlopen 14 | dlsym 15 | autotuning 16 | upcasting 17 | numpy -------------------------------------------------------------------------------- /doc/sphinx_preview.py: -------------------------------------------------------------------------------- 1 | import sublime, sublime_plugin 2 | import subprocess, os 3 | 4 | class SphinxPreviewCommand(sublime_plugin.TextCommand): 5 | def run(self, edit, **kwargs): 6 | if self.view.file_name(): 7 | folder_name, file_name = os.path.split(self.view.file_name()) 8 | 9 | command = './build_and_view.sh' 10 | p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=folder_name, shell=True) 11 | result, err = p.communicate() 12 | print(result,err) 13 | # self.view.set_status('p4',str(result+err)) 14 | # sublime.set_timeout(self.clear,2000) -------------------------------------------------------------------------------- /doc/tutorial-notes.txt: -------------------------------------------------------------------------------- 1 | https://github.com/Newmu/Theano-Tutorials 2 | 3 | http://deeplearning.net/tutorial/ 4 | 5 | https://github.com/torch/tutorials -------------------------------------------------------------------------------- /doc/upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rsync -azvu --delete --progress _build/html/ pabbeel@rll.berkeley.edu:/var/www/cgt 3 | -------------------------------------------------------------------------------- /examples/README: -------------------------------------------------------------------------------- 1 | Here you can find examples of using CGT. 2 | All of the scripts can be run without arguments. 3 | (If one fails, please notify us or open an issue.) 4 | 5 | For best performance, set backend=native, via .cgtrc or CGT_FLAGS=backend=native 6 | Also try parallel=True, but that only helps sometimes. -------------------------------------------------------------------------------- /examples/bench/cgt_gru.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | from gru import GRUCell 3 | import time 4 | from cgt.utils import Message 5 | import numpy as np 6 | 7 | if __name__ == "__main__": 8 | import argparse 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--horizon",type=int) 11 | args = parser.parse_args() 12 | horizon = args.horizon 13 | assert horizon is not None 14 | size=128 15 | batchsize=64 16 | cell = GRUCell([size],size) 17 | X = cgt.tensor3() 18 | init = cgt.matrix() 19 | 20 | prev_h = init 21 | for i in xrange(horizon): 22 | prev_h = cell(X[i], prev_h) 23 | loss = prev_h.sum() 24 | 25 | with Message("compiling"): 26 | f = cgt.function([X, init],cgt.grad(loss, cell.params())) 27 | with Message("running"): 28 | xval = np.zeros((horizon,batchsize,size),cgt.floatX) 29 | initval = np.zeros((batchsize, size), cgt.floatX) 30 | for i in xrange(100): 31 | f(xval, initval) 32 | 33 | 34 | # # No speedup -- why? 35 | # with Message("split loss. compiling"): 36 | # from cgt import nn 37 | # m = cgt.nn.Module([X, init], [loss]) 38 | # split_loss = 0 39 | # X1 = cgt.tensor3() 40 | # init1 = cgt.matrix() 41 | # for start in xrange(0, batchsize, batchsize//4): 42 | # sli = slice(start, start+batchsize//4) 43 | # split_loss += m([X1[:, sli], init1[sli]])[0] 44 | # f = cgt.function([X1, init1],cgt.grad(split_loss, cell.params())) 45 | # with Message("running"): 46 | # for i in xrange(100): 47 | # f(xval,initval) 48 | -------------------------------------------------------------------------------- /examples/bench/gru.py: -------------------------------------------------------------------------------- 1 | import cgt 2 | import numpy as np 3 | 4 | def normc(x): 5 | assert x.ndim == 2 6 | return x/norms(x,0)[None,:] 7 | def randnf(*shp): 8 | return np.random.randn(*shp).astype(cgt.floatX) 9 | def norms(x,ax): 10 | return np.sqrt(np.square(x).sum(axis=ax)) 11 | 12 | 13 | class GRUCell(object): 14 | """ 15 | Gated Recurrent Unit. E.g., see 16 | Chung, Junyoung, et al. "Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling." arXiv preprint arXiv:1412.3555 (2014). 17 | """ 18 | def __init__(self,input_sizes,mem_size,name_prefix=""): 19 | 20 | Wiz_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes] 21 | self.Wizs = [cgt.shared(Wiz_val,name=name_prefix+"Wiz") for Wiz_val in Wiz_vals] 22 | Wmz_val = normc(randnf(mem_size,mem_size)) 23 | self.Wmz = cgt.shared(Wmz_val,name=name_prefix+"Wmz") 24 | bz = np.zeros((1,mem_size),cgt.floatX) 25 | self.bz = cgt.shared(bz,name=name_prefix+"bz") 26 | 27 | Wir_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes] 28 | self.Wirs = [cgt.shared(Wir_val,name=name_prefix+"Wir") for Wir_val in Wir_vals] 29 | Wmr_val = normc(randnf(mem_size,mem_size)) 30 | self.Wmr = cgt.shared(Wmr_val,name=name_prefix+"Wmr") 31 | br = np.zeros((1,mem_size),cgt.floatX) 32 | self.br = cgt.shared(br,name=name_prefix+"br") 33 | 34 | Wim_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes] 35 | self.Wims = [cgt.shared(Wim_val,name=name_prefix+"Wim") for Wim_val in Wim_vals] 36 | Wmm_val = normc(np.eye(mem_size,dtype=cgt.floatX)) 37 | self.Wmm = cgt.shared(Wmm_val,name=name_prefix+"Wmm") 38 | bm = np.zeros((1,mem_size),cgt.floatX) 39 | self.bm = cgt.shared(bm,name=name_prefix+"bm") 40 | 41 | def __call__(self,M,*inputs): 42 | assert len(inputs) == len(self.Wizs) 43 | n = M.shape[0] 44 | summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),cgt.repeat(self.bz,n, axis=0)] 45 | z = cgt.sigmoid(cgt.add_multi(summands)) 46 | 47 | summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),cgt.repeat(self.br,n, axis=0)] 48 | r = cgt.sigmoid(cgt.add_multi(summands)) 49 | 50 | summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),cgt.repeat(self.bm,n, axis=0)] 51 | Mtarg = cgt.tanh(cgt.add_multi(summands)) #pylint: disable=E1111 52 | 53 | Mnew = (1-z)*M + z*Mtarg 54 | return Mnew 55 | 56 | def params(self): 57 | out = [] 58 | out.extend(self.Wizs) 59 | out.append(self.Wmz) 60 | out.append(self.bz) 61 | out.extend(self.Wirs) 62 | out.append(self.Wmr) 63 | out.append(self.br) 64 | out.extend(self.Wims) 65 | out.append(self.Wmm) 66 | out.append(self.bm) 67 | return out 68 | 69 | -------------------------------------------------------------------------------- /examples/bench/seq_model.py: -------------------------------------------------------------------------------- 1 | import gru,cgt, numpy as np 2 | import sys 3 | from time import time 4 | 5 | elapsed = [] 6 | horizons = 2**np.arange(2, 10) 7 | 8 | for horizon in horizons: 9 | print "HORIZON",horizon 10 | tstart = time() 11 | 12 | batch_size = 6 13 | dim_x = 16 14 | mem_size = 10 15 | 16 | X_tnk = cgt.tensor3("X") 17 | 18 | cell = gru.GRUCell([dim_x], mem_size) 19 | 20 | Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]),cgt.floatX) 21 | M = Minit_nk 22 | 23 | for t in xrange(horizon): 24 | M = cell(M, X_tnk[t]) 25 | 26 | # cgt.print_tree(M) 27 | print "simplifying..." 28 | M_simp = cgt.simplify([M]) 29 | print "done" 30 | # cgt.print_tree(M_simp) 31 | print "fn before:",cgt.count_nodes(M) 32 | print "fn after:",cgt.count_nodes(M_simp) 33 | 34 | gs = cgt.grad(cgt.sum(M), cell.params()) 35 | print "grad before", cgt.count_nodes(gs) 36 | g_simp = cgt.simplify(gs) 37 | print "grad after",cgt.count_nodes(g_simp) 38 | 39 | # M = cgt.simplify(M) 40 | elapsed.append(time()-tstart) 41 | 42 | import matplotlib.pyplot as plt 43 | plt.plot(horizons,elapsed,'x-') 44 | plt.show() 45 | 46 | -------------------------------------------------------------------------------- /examples/bench/theano_gru.py: -------------------------------------------------------------------------------- 1 | import theano, theano.tensor as TT 2 | from cgt.utils import Message 3 | import time 4 | import numpy as np 5 | 6 | def normc(x): 7 | assert x.ndim == 2 8 | return x/norms(x,0)[None,:] 9 | def randnf(*shp): 10 | return np.random.randn(*shp).astype(theano.config.floatX) 11 | def norms(x,ax): 12 | return np.sqrt(np.square(x).sum(axis=ax)) 13 | 14 | class GRUCell(object): 15 | """ 16 | Gated Recurrent Unit. E.g., see 17 | Chung, Junyoung, et al. "Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling." arXiv preprint arXiv:1412.3555 (2014). 18 | """ 19 | def __init__(self,input_sizes,mem_size,name_prefix=""): 20 | 21 | Wiz_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes] 22 | self.Wizs = [theano.shared(Wiz_val,name=name_prefix+"Wiz") for Wiz_val in Wiz_vals] 23 | Wmz_val = normc(randnf(mem_size,mem_size)) 24 | self.Wmz = theano.shared(Wmz_val,name=name_prefix+"Wmz") 25 | bz = np.zeros((1,mem_size),theano.config.floatX) 26 | self.bz = theano.shared(bz,name=name_prefix+"bz") 27 | self.bz.type.broadcastable = (True,False) 28 | 29 | Wir_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes] 30 | self.Wirs = [theano.shared(Wir_val,name=name_prefix+"Wir") for Wir_val in Wir_vals] 31 | Wmr_val = normc(randnf(mem_size,mem_size)) 32 | self.Wmr = theano.shared(Wmr_val,name=name_prefix+"Wmr") 33 | br = np.zeros((1,mem_size),theano.config.floatX) 34 | self.br = theano.shared(br,name=name_prefix+"br") 35 | self.br.type.broadcastable = (True,False) 36 | 37 | Wim_vals = [normc(randnf(input_size,mem_size)) for input_size in input_sizes] 38 | self.Wims = [theano.shared(Wim_val,name=name_prefix+"Wim") for Wim_val in Wim_vals] 39 | Wmm_val = normc(np.eye(mem_size,dtype=theano.config.floatX)) 40 | self.Wmm = theano.shared(Wmm_val,name=name_prefix+"Wmm") 41 | bm = np.zeros((1,mem_size),theano.config.floatX) 42 | self.bm = theano.shared(bm,name=name_prefix+"bm") 43 | self.bm.type.broadcastable = (True,False) 44 | 45 | def __call__(self,M,*inputs): 46 | assert len(inputs) == len(self.Wizs) 47 | summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),self.bz] 48 | z = TT.nnet.sigmoid(TT.add(*summands)) 49 | 50 | summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),self.br] 51 | r = TT.nnet.sigmoid(TT.add(*summands)) 52 | 53 | summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),self.bm] 54 | Mtarg = TT.tanh(TT.add(*summands)) #pylint: disable=E1111 55 | 56 | Mnew = (1-z)*M + z*Mtarg 57 | return Mnew 58 | 59 | def params(self): 60 | out = [] 61 | out.extend(self.Wizs) 62 | out.append(self.Wmz) 63 | out.append(self.bz) 64 | out.extend(self.Wirs) 65 | out.append(self.Wmr) 66 | out.append(self.br) 67 | out.extend(self.Wims) 68 | out.append(self.Wmm) 69 | out.append(self.bm) 70 | return out 71 | 72 | if __name__ == "__main__": 73 | import argparse 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument("--horizon",type=int) 76 | args = parser.parse_args() 77 | horizon =args.horizon 78 | assert horizon is not None 79 | size=128 80 | batchsize=64 81 | cell = GRUCell([size],size) 82 | X = TT.tensor3() 83 | init = TT.zeros((batchsize, size),theano.config.floatX) 84 | 85 | prev_h = init 86 | for i in xrange(horizon): 87 | prev_h = cell(X[i], prev_h) 88 | 89 | with Message("compiling"): 90 | f = theano.function([X],theano.grad(prev_h.sum(), cell.params())) 91 | with Message("running"): 92 | x = np.zeros((horizon,batchsize,size),theano.config.floatX) 93 | for i in xrange(100): 94 | f(x) -------------------------------------------------------------------------------- /examples/broken/caffe2cgt.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | from caffe_pb2 import * 3 | import os.path as osp 4 | import cgt 5 | from cgt import nn 6 | from cgt.core import infer_shape 7 | import numpy as np 8 | infile = "/Users/joschu/Src/caffe/examples/mnist/lenet.prototxt" 9 | # infile = "/Users/joschu/Src/caffe/models/bvlc_googlenet/train_val.prototxt" 10 | 11 | with open(osp.expanduser(infile),"r") as fh: 12 | text = fh.read() 13 | net = NetParameter() 14 | text_format.Merge(text, net) 15 | 16 | 17 | name2node = {} 18 | 19 | cgt.set_precision('single') 20 | 21 | if net.input: #pylint: disable=E1101 22 | assert len(net.input) == 1 #pylint: disable=E1101 23 | name2node[net.input[0]] = cgt.tensor(ndim=4,dtype=cgt.floatX, fixed_shape=tuple(net.input_dim)) 24 | 25 | 26 | # XXX super inefficient 27 | 28 | for layer in net.layer: #pylint: disable=E1101 29 | if layer.phase==TRAIN: 30 | print "loading layer %s type=%s in=%s out=%s"%(layer.name, layer.type, layer.bottom, layer.top) 31 | output = None 32 | inputs = [name2node[name] for name in layer.bottom] 33 | if layer.type == "Data": 34 | tp = layer.transform_param 35 | crop_size = tp.crop_size 36 | chans = len(tp.mean_value) 37 | dp = layer.data_param 38 | batch_size = dp.batch_size 39 | output = [cgt.tensor(dtype=cgt.floatX,ndim=4,name=layer.name, fixed_shape=(batch_size,chans,crop_size,crop_size)), 40 | cgt.tensor(dtype='i8',ndim=2,name=layer.name, fixed_shape=(batch_size, 1))] 41 | elif layer.type == "Convolution": 42 | X = inputs[0] 43 | param = layer.convolution_param 44 | kh,kw = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\ 45 | else (param.kernel_h, param.kernel_w) 46 | nchanin = infer_shape(X)[0] 47 | Wshape = (param.num_output, nchanin, kh, kw) 48 | Wname = layer.param[0].name or layer.name+":W" 49 | Wval = np.empty(Wshape, dtype=cgt.floatX) 50 | W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all") 51 | bshape = (1, param.num_output, 1, 1) 52 | bname = layer.param[1].name or layer.name+":b" 53 | bval = np.empty(bshape, dtype=cgt.floatX) 54 | b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all") 55 | sh,sw = (param.stride, param.stride) if param.HasField("stride")\ 56 | else (param.stride_h, param.stride_w) 57 | output = [cgt.broadcast("+",nn.conv2d(X, W, subsample=(sh,sw)), b, "xxxx,1x11")] 58 | elif layer.type == "Pooling": 59 | param = layer.pooling_param 60 | X = inputs[0] 61 | pool_type = {param.MAX : "max", param.AVE : "mean"}[param.pool] 62 | height_in,width_in = infer_shape(X)[2:4] 63 | kernel = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\ 64 | else (param.kernel_h, param.kernel_w) 65 | stride = (param.stride, param.stride) if param.HasField("stride")\ 66 | else (param.stride_h, param.stride_w) 67 | pad = (param.pad, param.pad) if param.HasField("pad")\ 68 | else (param.pad_h, param.pad_w) 69 | output = [nn.pool(pool_type, X, stride, kernel, pad)] 70 | elif layer.type == "InnerProduct": 71 | X = inputs[0] 72 | if X.ndim == 4: 73 | X = cgt.reshape(X, [X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]] ) 74 | param = layer.inner_product_param 75 | nchanin = infer_shape(X)[1] 76 | Wshape = (param.num_output, nchanin) 77 | Wname = layer.param[0].name or layer.name+":W" 78 | Wval = np.empty(Wshape, dtype=cgt.floatX) 79 | W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all") 80 | bshape = (1, param.num_output) 81 | bname = layer.param[1].name or layer.name+":b" 82 | bval = np.empty(bshape, dtype=cgt.floatX) 83 | b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all") 84 | yname = layer.top[0] 85 | output = [cgt.broadcast("+",X.dot(W), b, "xx,1x") ] 86 | elif layer.type == "ReLU": 87 | output = [nn.rectify(inputs[0])] 88 | elif layer.type == "Softmax": 89 | output = [nn.softmax(inputs[0])] 90 | elif layer.type == "LRN": 91 | # XXX needs params 92 | param = layer.lrn_param 93 | output = [nn.lrn(inputs[0], param.alpha,param.beta, param.local_size)] 94 | elif layer.type == "Concat": 95 | param = layer.concat_param 96 | output = [cgt.concatenate(inputs, param.concat_dim) ] 97 | elif layer.type == "Dropout": 98 | output = [nn.dropout(inputs[0])] 99 | elif layer.type == "SoftmaxWithLoss": 100 | output = [nn.loglik_softmax(inputs[0], inputs[1])] 101 | elif layer.type == "Accuracy": 102 | output = [nn.zero_one_loss(inputs[0], inputs[1])] 103 | else: 104 | cgt.error("unrecognized layer type %s"%layer.type) 105 | 106 | assert output is not None 107 | 108 | # assert isinstance(output, cgt.Node) 109 | for i in xrange(len(layer.top)): name2node[layer.top[i]] = output[i] 110 | print "stored", layer.top[0] 111 | if layer.type != "Data": 112 | print "shape",layer.type, infer_shape(name2node[layer.bottom[0]]), infer_shape(name2node[layer.top[0]]) 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /examples/broken/internals_tour.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Let's look at the least square problem we constructed above" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import cgt\n", 19 | "cgt.modify_config(backend=\"python\")\n", 20 | "X_nk = cgt.matrix(\"X\")\n", 21 | "y_n = cgt.vector(\"y\")\n", 22 | "w_k = cgt.vector(\"w\")\n", 23 | "b = cgt.scalar(\"b\")\n", 24 | "ypred_n = X_nk.dot(w_k) + b\n", 25 | "loss = cgt.sum(cgt.square(ypred_n - y_n))" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Internally, CGT represents the loss function, as well as other expressions, using a directed acyclic graph called the **expression graph**. \n", 33 | "In the expression graph, each node corresponds to an intermediate result and the operation that was performed to obtain it.\n", 34 | "\n", 35 | "The graph is made up of two kinds of nodes: `Input` and `Result`. \n", 36 | "`Input` nodes correspond to values that are set externally, while `Result` node correspond to intermediate values in the computation, computed from zero-or-more preceding nodes." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "Below is a representation of the expression graph above.\n", 44 | "(TODO: we should show DAG plot here instead of the tree, maybe using dot for layout)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "cgt.display.print_text(loss);" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "\n", 63 | "\n", 64 | "\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# We can inspect the python objects involved.\n", 76 | "print loss\n", 77 | "\n", 78 | "print \"loss:\",loss, loss.parents\n", 79 | "print \"loss.parents[0]:\",loss.parents[0], loss.parents[0].parents" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "grads = cgt.grad(loss, [w_k,b])\n", 91 | "cgt.display.print_text(grads)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "grads = cgt.simplify(grads)\n", 103 | "cgt.display.print_text(grads);" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false, 111 | "scrolled": false 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "inputs = [X_nk, y_n, w_k, b]\n", 116 | "outputs = [loss]\n", 117 | "interpreter = cgt.execution.run_compilation_pipeline(inputs, outputs,[],[])\n", 118 | "eg = interpreter.eg\n", 119 | "import pprint\n", 120 | "pprint.pprint(eg.to_json())" 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "Python 2", 127 | "language": "python", 128 | "name": "python2" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 2 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython2", 140 | "version": "2.7.10" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 0 145 | } 146 | -------------------------------------------------------------------------------- /examples/broken/mnist_torchstyle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.datasets import fetch_mldata 4 | 5 | mnist = fetch_mldata('MNIST original', data_home='~/cgt/data') # XXX 6 | 7 | print(mnist.data.shape) 8 | print(mnist.target.shape) 9 | 10 | np.unique(mnist.target) 11 | 12 | #plt.imshow(mnist.data[1, :].reshape(28, 28)) 13 | #plt.show() 14 | 15 | # do some preprocessing 16 | 17 | X = mnist.data 18 | y = mnist.target 19 | X = X.astype('float64') 20 | X = X / 255 21 | 22 | # train-test split (as [Joachims, 2006]) 23 | # TODO can define own validation split... 24 | 25 | n_train = 60000 26 | X_train = X[:n_train, :] 27 | X_test = X[n_train:, :] 28 | y_train = y[:n_train] 29 | y_test = y[n_train:] 30 | 31 | # construct the network 32 | 33 | import nn 34 | import cgt 35 | from opt import sgd_update 36 | 37 | N_LAYERS = 2 38 | hid_size = X.shape[1] # 28 * 28 39 | out_size = 10 40 | 41 | inps = [cgt.matrix(dtype=cgt.floatX)] 42 | 43 | param_list = [] 44 | for k in xrange(N_LAYERS): 45 | tmp = nn.Affine(hid_size, hid_size)#(inps[k]) 46 | param_list.extend([tmp.weight, tmp.bias]) 47 | inps.append(cgt.tanh(tmp(inps[k]))) 48 | 49 | tmp = nn.Affine(hid_size, out_size) 50 | param_list.extend([tmp.weight, tmp.bias]) 51 | logprobs = nn.logsoftmax(tmp(inps[-1])) 52 | 53 | #dnn = nn.Module(inps[0:1], [logprobs]) 54 | #params = dnn.get_parameters() 55 | # XXX think should just make this part of get_parameters 56 | theta = nn.setup_contiguous_storage(param_list) 57 | # XXX initialize 58 | theta[:] = np.random.uniform(-0.08, 0.08, theta.shape) 59 | 60 | # XXX taken from other demo, move 61 | def ind2onehot(inds, n_cls): 62 | out = np.zeros(list(inds.shape)+[n_cls,], cgt.floatX) 63 | for k in xrange(inds.shape[0]): 64 | out[k, inds[k].astype('int32')] = 1 65 | #out.flat[np.arange(inds.size)*n_cls + inds.ravel()] = 1 66 | return out 67 | 68 | b_size = 25 69 | 70 | def make_loss_and_grad(net): 71 | X_b = inps[0] #cgt.matrix(dtype=cgt.floatX) 72 | y_onehot = cgt.matrix(dtype='i4') 73 | outputs = [logprobs] 74 | 75 | loss = nn.crossent(outputs[0], y_onehot) / b_size 76 | #gradloss = cgt.grad(loss, params) 77 | gradloss = cgt.grad(loss, param_list) 78 | 79 | # XXX use flatcat function 80 | grad = cgt.concatenate([x.flatten() for x in gradloss]) 81 | #grad = gradloss 82 | return cgt.make_function([X_b, y_onehot], [loss, grad, logprobs]) 83 | 84 | f_loss_and_grad = make_loss_and_grad(None) 85 | 86 | # train loop 87 | 88 | # shuffle data 89 | 90 | perm = np.random.permutation(np.arange(X_train.shape[0])) 91 | X_train = X_train[perm, :] 92 | y_train = y_train[perm] 93 | 94 | class Table(object): 95 | pass 96 | state = Table() 97 | state.theta = theta 98 | state.step_size = 0.1 99 | exploss = None 100 | for k in xrange(X_train.shape[0] / b_size): 101 | X_batch, y_batch = X_train[k*b_size:(k+1)*b_size, :], y_train[k*b_size:(k+1)*b_size] 102 | loss, grad, logprobs = f_loss_and_grad(X_batch, ind2onehot(y_batch, 10)) 103 | exploss = loss if k == 0 else 0.99*exploss + 0.01*loss 104 | print('iter %d, loss %f, exploss %f' % (k + 1, loss, exploss)) 105 | sgd_update(state, grad) 106 | 107 | 108 | # test code 109 | 110 | correct = 0 111 | total = 0 112 | print(X_test.shape) 113 | print(y_test.shape) 114 | for k in xrange(X_test.shape[0] / b_size): 115 | X_batch, y_batch = X_test[k*b_size:(k+1)*b_size, :], y_test[k*b_size:(k+1)*b_size] 116 | loss, grad, logprobs = f_loss_and_grad(X_batch, ind2onehot(y_batch, 10)) 117 | preds = logprobs.argmax(axis=1).flatten() 118 | correct = correct + (preds == y_batch).sum() 119 | total = total + b_size 120 | 121 | print('%d/%d correct', correct, total) 122 | -------------------------------------------------------------------------------- /examples/demo_cifar.py: -------------------------------------------------------------------------------- 1 | 2 | from example_utils import fmt_row, fetch_dataset 3 | import cPickle, numpy as np 4 | import cgt 5 | from cgt import nn 6 | import argparse, time 7 | 8 | def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6): 9 | grads = cgt.grad(cost, params) 10 | updates = [] 11 | for p, g in zip(params, grads): 12 | acc = cgt.shared(p.op.get_value() * 0.) 13 | acc_new = rho * acc + (1 - rho) * cgt.square(g) 14 | gradient_scaling = cgt.sqrt(acc_new + epsilon) 15 | g = g / gradient_scaling 16 | updates.append((acc, acc_new)) 17 | updates.append((p, p - stepsize * g)) 18 | return updates 19 | 20 | def main(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--profile",action="store_true") 23 | parser.add_argument("--unittest",action="store_true") 24 | parser.add_argument("--epochs",type=int,default=10) 25 | parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu") 26 | args = parser.parse_args() 27 | 28 | cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native") 29 | 30 | batchsize = 64 31 | Xshape = (batchsize, 3, 32, 32) 32 | X = cgt.tensor4("X", fixed_shape = Xshape) 33 | y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') 34 | 35 | conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 36 | weight_init=nn.IIDGaussian(std=1e-4))(X) 37 | relu1 = nn.rectify(conv1) 38 | pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) 39 | conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 40 | weight_init=nn.IIDGaussian(std=0.01))(pool1) 41 | relu2 = nn.rectify(conv2) 42 | pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) 43 | conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 44 | weight_init=nn.IIDGaussian(std=0.01))(pool2) 45 | pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) 46 | relu3 = nn.rectify(pool3) 47 | d0,d1,d2,d3 = relu3.shape 48 | flatlayer = relu3.reshape([d0,d1*d2*d3]) 49 | nfeats = cgt.infer_shape(flatlayer)[1] 50 | ip1 = nn.Affine(nfeats, 10)(flatlayer) 51 | logprobs = nn.logsoftmax(ip1) 52 | loss = -logprobs[cgt.arange(batchsize), y].mean() 53 | 54 | 55 | params = nn.get_parameters(loss) 56 | 57 | updates = rmsprop_updates(loss, params, stepsize=1e-3) 58 | 59 | 60 | train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) 61 | 62 | 63 | if args.profile: cgt.profiler.start() 64 | 65 | data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz") 66 | Xtrain = data["X_train"] 67 | ytrain = data["y_train"] 68 | 69 | 70 | print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) 71 | for i_epoch in xrange(args.epochs): 72 | for start in xrange(0, Xtrain.shape[0], batchsize): 73 | tstart = time.time() 74 | end = start+batchsize 75 | print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart 76 | if start > batchsize*5: break 77 | # elapsed = time.time() - tstart 78 | # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) 79 | # testerr, testloss = computeloss(Xtest, ytest) 80 | # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) 81 | if args.profile: 82 | cgt.profiler.print_stats() 83 | return 84 | if args.unittest: 85 | break 86 | 87 | 88 | 89 | if __name__ == "__main__": 90 | main() -------------------------------------------------------------------------------- /examples/demo_mnist.py: -------------------------------------------------------------------------------- 1 | # Based on tutorial by Alec Radford 2 | # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py 3 | 4 | import cgt 5 | from cgt import nn 6 | from cgt.distributions import categorical 7 | import numpy as np 8 | from example_utils import fmt_row, fetch_dataset 9 | import time, sys 10 | 11 | def init_weights(*shape): 12 | return cgt.shared(np.random.randn(*shape) * 0.01, fixed_shape_mask='all') 13 | 14 | def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6): 15 | grads = cgt.grad(cost, params) 16 | updates = [] 17 | for p, g in zip(params, grads): 18 | acc = cgt.shared(p.op.get_value() * 0.) 19 | acc_new = rho * acc + (1 - rho) * cgt.square(g) 20 | gradient_scaling = cgt.sqrt(acc_new + epsilon) 21 | g = g / gradient_scaling 22 | updates.append((acc, acc_new)) 23 | updates.append((p, p - stepsize * g)) 24 | return updates 25 | 26 | def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden): 27 | X = nn.dropout(X, p_drop_input) 28 | h = nn.rectify(cgt.dot(X, w_h)) 29 | 30 | h = nn.dropout(h, p_drop_hidden) 31 | h2 = nn.rectify(cgt.dot(h, w_h2)) 32 | 33 | h2 = nn.dropout(h2, p_drop_hidden) 34 | py_x = nn.softmax(cgt.dot(h2, w_o)) 35 | return py_x 36 | 37 | def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden): 38 | l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1))) 39 | l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2,2)) 40 | l1 = nn.dropout(l1, p_drop_conv) 41 | 42 | l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3,3), pad=(1,1))) 43 | l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2,2)) 44 | l2 = nn.dropout(l2, p_drop_conv) 45 | 46 | l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3,3), pad=(1,1))) 47 | l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2,2)) 48 | batchsize,channels,rows,cols = l3b.shape 49 | l3 = cgt.reshape(l3b, [batchsize, channels*rows*cols]) 50 | l3 = nn.dropout(l3, p_drop_conv) 51 | 52 | l4 = nn.rectify(cgt.dot(l3, w4)) 53 | l4 = nn.dropout(l4, p_drop_hidden) 54 | 55 | pyx = nn.softmax(cgt.dot(l4, w_o)) 56 | return pyx 57 | 58 | def tinyconv_model(X, w, w2, p_drop): 59 | l1 = nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1),stride=(3,3)) 60 | l1a = nn.dropout(l1, p_drop) 61 | batchsize,channels,rows,cols = l1.shape 62 | l1flat = cgt.reshape(l1, [batchsize,channels*rows*cols]) 63 | pyx = nn.softmax(l1flat.dot(w2)) 64 | return l1, pyx 65 | 66 | 67 | 68 | def main(): 69 | import argparse 70 | parser=argparse.ArgumentParser() 71 | parser.add_argument("--epochs",type=int,default=10) 72 | parser.add_argument("--profile",action="store_true") 73 | parser.add_argument("--dropout",action="store_true") 74 | parser.add_argument("--stepsize",type=float, default=.001) 75 | parser.add_argument("--model",choices=["dense","conv"],default="dense") 76 | parser.add_argument("--unittest",action="store_true") 77 | parser.add_argument("--grad_check",action="store_true") 78 | parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu") 79 | args = parser.parse_args() 80 | 81 | if args.grad_check: cgt.set_precision("quad") 82 | 83 | # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ 84 | # converted to npz 85 | mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") 86 | 87 | Xdata = (mnist["X"]/255.).astype(cgt.floatX) 88 | ydata = mnist["y"] 89 | 90 | np.random.seed(0) 91 | 92 | cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native") 93 | 94 | if args.model=="conv": 95 | Xdata = Xdata.reshape(-1, 1, 28, 28) 96 | 97 | Xtrain = Xdata[0:60000] 98 | ytrain = ydata[0:60000] 99 | 100 | Xtest = Xdata[60000:70000] 101 | ytest = ydata[60000:70000] 102 | 103 | sortinds = np.random.permutation(60000) 104 | Xtrain = Xtrain[sortinds] 105 | ytrain = ytrain[sortinds] 106 | 107 | X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28)) 108 | y = cgt.vector("y",dtype='i8') 109 | 110 | if args.model == "dense": 111 | p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) 112 | w_h = init_weights(784, 256) 113 | w_h2 = init_weights(256, 256) 114 | w_o = init_weights(256, 10) 115 | pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) 116 | pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) 117 | params = [w_h, w_h2, w_o] 118 | elif args.model == "conv": 119 | p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) 120 | w = init_weights(32, 1, 3, 3) 121 | w2 = init_weights(64, 32, 3, 3) 122 | w3 = init_weights(128, 64, 3, 3) 123 | w4 = init_weights(128 * 2 * 2, 625) 124 | w_o = init_weights(625, 10) 125 | pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) 126 | pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) 127 | params = [w, w2, w3, w4, w_o] 128 | else: 129 | raise RuntimeError("Unreachable") 130 | 131 | cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) 132 | updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) 133 | 134 | y_nodrop = cgt.argmax(pofy_nodrop, axis=1) 135 | cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) 136 | err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() 137 | 138 | train = cgt.function(inputs=[X, y], outputs=[], updates=updates) 139 | computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop]) 140 | 141 | batch_size=128 142 | 143 | 144 | from cgt.tests import gradcheck_model 145 | if args.grad_check: 146 | cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]}) 147 | print "doing gradient check..." 148 | print "------------------------------------" 149 | gradcheck_model(cost_nodrop, params[0:1]) 150 | print "success!" 151 | return 152 | 153 | if args.profile: cgt.profiler.start() 154 | 155 | print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) 156 | for i_epoch in xrange(args.epochs): 157 | tstart = time.time() 158 | for start in xrange(0, Xtrain.shape[0], batch_size): 159 | end = start+batch_size 160 | train(Xtrain[start:end], ytrain[start:end]) 161 | if args.unittest: return 162 | elapsed = time.time() - tstart 163 | trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) 164 | testerr, testloss = computeloss(Xtest, ytest) 165 | print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) 166 | if args.profile: cgt.execution.profiler.print_stats() 167 | 168 | if __name__ == "__main__": 169 | main() -------------------------------------------------------------------------------- /examples/example_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import os, os.path as osp, shutil, numpy as np, urllib 3 | 4 | def train_val_test_slices(n, trainfrac, valfrac, testfrac): 5 | assert trainfrac+valfrac+testfrac==1.0 6 | ntrain = int(np.round(n*trainfrac)) 7 | nval = int(np.round(n*valfrac)) 8 | ntest = n - ntrain - nval 9 | return slice(0,ntrain), slice(ntrain,ntrain+nval), slice(ntrain+nval,ntrain+nval+ntest) 10 | 11 | # helper methods to print nice table 12 | def fmt_item(x, l): 13 | if isinstance(x, np.ndarray): 14 | assert x.ndim==0 15 | x = x.item() 16 | if isinstance(x, float): rep = "%g"%x 17 | else: rep = str(x) 18 | return " "*(l - len(rep)) + rep 19 | 20 | def fmt_row(width, row, header=False): 21 | out = " | ".join(fmt_item(x, width) for x in row) 22 | if header: out = out + "\n" + "-"*len(out) 23 | return out 24 | 25 | def download(url): 26 | "download and return path to file" 27 | fname = osp.basename(url) 28 | from cgt.core import get_cgt_src_root 29 | datadir = osp.join(get_cgt_src_root(),"downloads") 30 | datapath = osp.join(datadir, fname) 31 | if not osp.exists(datapath): 32 | print "downloading %s to %s"%(url, datapath) 33 | if not osp.exists(datadir): os.makedirs(datadir) 34 | urllib.urlretrieve(url, datapath) 35 | return datapath 36 | 37 | 38 | def fetch_dataset(url): 39 | datapath = download(url) 40 | fname = osp.basename(url) 41 | extension = osp.splitext(fname)[-1] 42 | assert extension in [".npz", ".pkl"] 43 | if extension == ".npz": 44 | return np.load(datapath) 45 | elif extension == ".pkl": 46 | with open(datapath, 'rb') as fin: 47 | return pickle.load(fin) 48 | else: 49 | raise NotImplementedError 50 | -------------------------------------------------------------------------------- /examples/param_collection.py: -------------------------------------------------------------------------------- 1 | import cgt, numpy as np 2 | 3 | 4 | class ParamCollection(object): 5 | 6 | """ 7 | A utility class containing a collection of parameters 8 | which makes it convenient to write optimization code that uses flat vectors 9 | """ 10 | 11 | def __init__(self,params): #pylint: disable=W0622 12 | assert all(param.is_data() and param.dtype == cgt.floatX for param in params) 13 | self._params = params 14 | 15 | @property 16 | def params(self): 17 | return self._params 18 | 19 | def get_values(self): 20 | return [param.op.get_value() for param in self._params] 21 | 22 | def get_shapes(self): 23 | return [param.op.get_shape() for param in self._params] 24 | 25 | def get_total_size(self): 26 | return sum(np.prod(shape) for shape in self.get_shapes()) 27 | 28 | def num_vars(self): 29 | return len(self._params) 30 | 31 | def set_values(self, parvals): 32 | assert len(parvals) == len(self._params) 33 | for (param, newval) in zip(self._params, parvals): 34 | param.op.set_value(newval) 35 | param.op.get_shape() == newval.shape 36 | 37 | def set_value_flat(self, theta): 38 | theta = theta.astype(cgt.floatX) 39 | arrs = [] 40 | n = 0 41 | for shape in self.get_shapes(): 42 | size = np.prod(shape) 43 | arrs.append(theta[n:n+size].reshape(shape)) 44 | n += size 45 | assert theta.size == n 46 | self.set_values(arrs) 47 | 48 | def get_value_flat(self): 49 | theta = np.empty(self.get_total_size(),dtype=cgt.floatX) 50 | n = 0 51 | for param in self._params: 52 | s = param.op.get_size() 53 | theta[n:n+s] = param.op.get_value().flat 54 | n += s 55 | assert theta.size == n 56 | return theta -------------------------------------------------------------------------------- /include/IRC.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //== llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer ---*- C++ -*-==// 3 | // 4 | // The LLVM Compiler Infrastructure 5 | // 6 | // This file is distributed under the University of Illinois Open Source 7 | // License. See LICENSE.TXT for details. 8 | // 9 | //===----------------------------------------------------------------------===// 10 | // 11 | // This file defines IntrusiveRefCntPtr, a template class that 12 | // implements a "smart" pointer for objects that maintain their own 13 | // internal reference count, and RefCountedBase/RefCountedBaseVPTR, two 14 | // generic base classes for objects that wish to have their lifetimes 15 | // managed using reference counting. 16 | // 17 | // IntrusiveRefCntPtr is similar to Boost's intrusive_ptr with added 18 | // LLVM-style casting. 19 | // 20 | //===----------------------------------------------------------------------===// 21 | 22 | 23 | #include 24 | 25 | 26 | template 27 | class IRC; 28 | 29 | template struct IntrusiveRefCntPtrInfo { 30 | static void retain(T *obj) { obj->Retain(); } 31 | static void release(T *obj) { obj->Release(); } 32 | }; 33 | 34 | 35 | // JDS: rolled this into cgt_object so we don't need template inheritance business 36 | 37 | // //===----------------------------------------------------------------------===// 38 | // /// RefCountedBase - A generic base class for objects that wish to 39 | // /// have their lifetimes managed using reference counts. Classes 40 | // /// subclass RefCountedBase to obtain such functionality, and are 41 | // /// typically handled with IntrusiveRefCntPtr "smart pointers" (see below) 42 | // /// which automatically handle the management of reference counts. 43 | // /// Objects that subclass RefCountedBase should not be allocated on 44 | // /// the stack, as invoking "delete" (which is called when the 45 | // /// reference count hits 0) on such objects is an error. 46 | // //===----------------------------------------------------------------------===// 47 | // template 48 | // class RefCountedBase { 49 | // mutable unsigned ref_cnt; 50 | 51 | // public: 52 | // RefCountedBase() : ref_cnt(0) {} 53 | // RefCountedBase(const RefCountedBase &) : ref_cnt(0) {} 54 | 55 | // void Retain() const { ++ref_cnt; } 56 | // void Release() const { 57 | // assert (ref_cnt > 0 && "Reference count is already zero."); 58 | // if (--ref_cnt == 0) delete static_cast(this); 59 | // } 60 | // }; 61 | 62 | //===----------------------------------------------------------------------===// 63 | /// IntrusiveRefCntPtr - A template class that implements a "smart pointer" 64 | /// that assumes the wrapped object has a reference count associated 65 | /// with it that can be managed via calls to 66 | /// IntrusivePtrAddRef/IntrusivePtrRelease. The smart pointers 67 | /// manage reference counts via the RAII idiom: upon creation of 68 | /// smart pointer the reference count of the wrapped object is 69 | /// incremented and upon destruction of the smart pointer the 70 | /// reference count is decremented. This class also safely handles 71 | /// wrapping NULL pointers. 72 | /// 73 | /// Reference counting is implemented via calls to 74 | /// Obj->Retain()/Obj->Release(). Release() is required to destroy 75 | /// the object when the reference count reaches zero. Inheriting from 76 | /// RefCountedBase/RefCountedBaseVPTR takes care of this 77 | /// automatically. 78 | //===----------------------------------------------------------------------===// 79 | template 80 | class IRC { 81 | T* Obj; 82 | 83 | public: 84 | typedef T element_type; 85 | 86 | explicit IRC() : Obj(nullptr) {} 87 | 88 | IRC(T* obj) : Obj(obj) { 89 | retain(); 90 | } 91 | 92 | IRC(const IRC & S) : Obj(S.Obj) { 93 | retain(); 94 | } 95 | 96 | IRC(IRC && S) : Obj(S.Obj) { 97 | S.Obj = nullptr; 98 | } 99 | 100 | template 101 | IRC(IRC&& S) : Obj(S.get()) { 102 | S.Obj = 0; 103 | } 104 | 105 | template 106 | IRC(const IRC& S) 107 | : Obj(S.get()) { 108 | retain(); 109 | } 110 | 111 | IRC & operator=(IRC S) { 112 | swap(S); 113 | return *this; 114 | } 115 | 116 | ~IRC() { release(); } 117 | 118 | T& operator*() const { return *Obj; } 119 | 120 | T* operator->() const { return Obj; } 121 | 122 | T* get() const { return Obj; } 123 | 124 | operator bool() const { return Obj; } 125 | 126 | void swap(IRC & other) { 127 | T* tmp = other.Obj; 128 | other.Obj = Obj; 129 | Obj = tmp; 130 | } 131 | 132 | void reset() { 133 | release(); 134 | Obj = nullptr; 135 | } 136 | 137 | void resetWithoutRelease() { 138 | Obj = 0; 139 | } 140 | 141 | private: 142 | void retain() { if (Obj) IntrusiveRefCntPtrInfo::retain(Obj); } 143 | void release() { if (Obj) IntrusiveRefCntPtrInfo::release(Obj); } 144 | 145 | template 146 | friend class IRC; 147 | }; 148 | 149 | template 150 | inline bool operator==(const IRC& A, 151 | const IRC& B) 152 | { 153 | return A.get() == B.get(); 154 | } 155 | 156 | template 157 | inline bool operator!=(const IRC& A, 158 | const IRC& B) 159 | { 160 | return A.get() != B.get(); 161 | } 162 | 163 | template 164 | inline bool operator==(const IRC& A, 165 | U* B) 166 | { 167 | return A.get() == B; 168 | } 169 | 170 | template 171 | inline bool operator!=(const IRC& A, 172 | U* B) 173 | { 174 | return A.get() != B; 175 | } 176 | 177 | template 178 | inline bool operator==(T* A, 179 | const IRC& B) 180 | { 181 | return A == B.get(); 182 | } 183 | 184 | template 185 | inline bool operator!=(T* A, 186 | const IRC& B) 187 | { 188 | return A != B.get(); 189 | } 190 | 191 | template 192 | bool operator==(std::nullptr_t A, const IRC &B) { 193 | return !B; 194 | } 195 | 196 | template 197 | bool operator==(const IRC &A, std::nullptr_t B) { 198 | return B == A; 199 | } 200 | 201 | template 202 | bool operator!=(std::nullptr_t A, const IRC &B) { 203 | return !(A == B); 204 | } 205 | 206 | template 207 | bool operator!=(const IRC &A, std::nullptr_t B) { 208 | return !(A == B); 209 | } 210 | -------------------------------------------------------------------------------- /include/cgt_common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "IRC.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // ================================================================ 11 | // Visibility 12 | // ================================================================ 13 | 14 | #define CGT_EXPORT __attribute__((visibility("default"))) 15 | #define CGT_EXPORT_C extern "C" __attribute__((visibility("default"))) 16 | 17 | // ================================================================ 18 | // Basic structs and enums 19 | // ================================================================ 20 | 21 | typedef enum cgtDtype { 22 | cgt_i1 = 1, 23 | cgt_i2 = 3, 24 | cgt_i4 = 5, 25 | cgt_i8 = 7, 26 | cgt_f2 = 23, 27 | cgt_f4 = 11, 28 | cgt_f8 = 12, 29 | cgt_f16 = 13, 30 | cgt_c8 = 14, 31 | cgt_c16 = 15, 32 | cgt_c32 = 16, 33 | cgt_O = 17 34 | } cgtDtype; 35 | // print np.dtype('i1').num # etc 36 | 37 | static inline int cgt_itemsize(cgtDtype dtype) { 38 | switch (dtype) { 39 | case cgt_i1: 40 | return 1; 41 | case cgt_i2: 42 | return 2; 43 | case cgt_i4: 44 | return 4; 45 | case cgt_i8: 46 | return 8; 47 | case cgt_f2: 48 | return 2; 49 | case cgt_f4: 50 | return 4; 51 | case cgt_f8: 52 | return 8; 53 | case cgt_f16: 54 | return 16; 55 | case cgt_c8: 56 | return 8; 57 | case cgt_c16: 58 | return 16; 59 | case cgt_c32: 60 | return 32; 61 | case cgt_O: 62 | return 8; 63 | } 64 | assert(0 && "invalid dtype"); 65 | return -1; 66 | } 67 | 68 | typedef enum cgtDevtype { 69 | cgtCPU, 70 | cgtGPU 71 | } cgtDevtype; 72 | 73 | class cgtObject { 74 | public: 75 | enum ObjectKind { 76 | UndefKind=0, 77 | ArrayKind, 78 | TupleKind 79 | }; 80 | cgtObject() : ref_cnt(0), kind_(UndefKind) { } 81 | cgtObject(ObjectKind kind) : ref_cnt(0), kind_(kind) { } 82 | ObjectKind kind() const {return kind_;} 83 | // for refcounting: 84 | void Retain() const { ++ref_cnt; } 85 | inline void Release() const; 86 | private: 87 | ObjectKind kind_; 88 | mutable std::atomic ref_cnt; 89 | }; 90 | 91 | class cgtArray : public cgtObject { 92 | public: 93 | cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype); 94 | cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype, void* fromdata, bool copy); 95 | ~cgtArray(); 96 | 97 | int ndim() const { return ndim_; } 98 | const long* shape() const { return shape_; } 99 | long size() const { 100 | long s = 1; 101 | for (int i = 0; i < ndim_; ++i) { 102 | s *= shape_[i]; 103 | } 104 | return s; 105 | } 106 | long nbytes() const { return size() * cgt_itemsize(dtype_); } 107 | long stride(int i) const { 108 | if (ndim_ == 0) { 109 | return 0; 110 | } 111 | assert(0 <= i && i < ndim_ && ndim_ >= 1); 112 | int s = 1; 113 | for (int j = i; j < ndim_ - 1; ++j) { // note that (ndim_-1) >= 0, which is important because ndim_ is unsigned 114 | s *= shape_[j + 1]; 115 | } 116 | return s; 117 | } 118 | cgtDtype dtype() const { return dtype_; } 119 | cgtDevtype devtype() const { return devtype_; } 120 | bool ownsdata() const { return ownsdata_; } 121 | void* data() { return data_; } 122 | 123 | 124 | template 125 | T& at() {return static_cast(data_)[0];} 126 | template 127 | T& at(long i) {return static_cast(data_)[i];} 128 | template 129 | T& at(long i, long j) {return static_cast(data_)[i*shape_[1]+j];} 130 | template 131 | T& at(long i, long j, long k) {return static_cast(data_)[(i*shape_[1]+j)*shape_[2]+k];} 132 | template 133 | T& at(long i, long j, long k, long l) {return static_cast(data_)[((i*shape_[1]+j)*shape_[2]+k)*shape_[3]+l];} 134 | void print(); 135 | 136 | private: 137 | const int ndim_; 138 | const long* shape_; 139 | const cgtDtype dtype_; 140 | const cgtDevtype devtype_; 141 | const bool ownsdata_; 142 | void* data_; 143 | }; 144 | 145 | class cgtTuple : public cgtObject { 146 | public: 147 | cgtTuple(int len); 148 | void setitem(int i, cgtObject *o) { 149 | members[i] = o; 150 | } 151 | cgtObject *getitem(int i) { 152 | return members[i].get(); 153 | } 154 | int size() {return len;} 155 | ~cgtTuple(); 156 | int len; 157 | IRC *members; 158 | }; 159 | 160 | void cgtObject::Release() const { 161 | assert (ref_cnt > 0 && "Reference count is already zero."); 162 | if (--ref_cnt == 0) { 163 | if (kind_==ArrayKind) delete (const cgtArray *)this; // XXX is this legit? 164 | else if (kind_==TupleKind) delete (const cgtTuple *)this; 165 | else assert(0 && "invalid kind"); 166 | } 167 | } 168 | 169 | /* 170 | Copy from -> to, transfering data between between devices if necessary 171 | */ 172 | void cgt_copy_object(cgtObject* to, cgtObject* from); 173 | void cgt_copy_array(cgtArray* to, cgtArray* from); 174 | void cgt_copy_tuple(cgtTuple* to, cgtTuple* from); 175 | 176 | typedef void (*cgtByRefFun)(void * /* closure data */, cgtObject ** /* read */, cgtObject * /* write */); 177 | typedef cgtObject *(*cgtByValFun)(void * /* closure data */, cgtObject ** /* read */); 178 | 179 | // ================================================================ 180 | // Error handling 181 | // ================================================================ 182 | 183 | #define cgt_assert(x) \ 184 | do {\ 185 | if (!(x)) {\ 186 | fprintf (stderr, "Assertion failed: %s (%s:%d)\n", #x, \ 187 | __FILE__, __LINE__);\ 188 | fflush (stderr);\ 189 | abort();\ 190 | }\ 191 | } while (0) 192 | 193 | #define CGT_NORETURN __attribute__ ((noreturn)) 194 | 195 | 196 | typedef enum { 197 | cgtStatusOK = 0, 198 | cgtStatusErr 199 | } cgtStatus; 200 | 201 | extern cgtStatus cgtGlobalStatus; 202 | extern char cgtGlobalErrorMsg[1000]; 203 | 204 | static inline void clear_error() { 205 | cgtGlobalStatus = cgtStatusOK; 206 | } 207 | 208 | // TODO can do it more safely now that we're in c++ 209 | #define cgt_check(x, msg, ...) \ 210 | do {\ 211 | if ((!(x))) {\ 212 | sprintf(cgtGlobalErrorMsg, msg, ##__VA_ARGS__);\ 213 | cgtGlobalStatus = cgtStatusErr;\ 214 | }\ 215 | } while(0) 216 | 217 | 218 | // ================================================================ 219 | // Memory management 220 | // ================================================================ 221 | 222 | static inline bool cgt_is_array(cgtObject *o) { return o->kind() == cgtObject::ArrayKind; } 223 | static inline bool cgt_is_tuple(cgtObject *o) { return o->kind() == cgtObject::TupleKind; } 224 | 225 | void *cgt_alloc(cgtDevtype devtype, long size); 226 | void cgt_free(cgtDevtype devtype, void *ptr); 227 | void cgt_memcpy(cgtDevtype dest_type, cgtDevtype src_type, void *dest_ptr, void *src_ptr, long nbytes); 228 | -------------------------------------------------------------------------------- /include/cgt_cuda.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cuda_runtime.h" 3 | #include "cublas_v2.h" 4 | #include "stdio.h" 5 | 6 | #define CGT_EXPORT_C extern "C" __attribute__((visibility("default"))) 7 | 8 | // Code mostly ripped off from caffe 9 | 10 | // CUDA: various checks for different function calls. 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) printf("%s\n", cudaGetErrorString(error)); \ 16 | } while (0) 17 | 18 | #define CUDA_CHECK_ERROR(msg) do { \ 19 | cudaError_t e = cudaGetLastError(); \ 20 | if (e != cudaSuccess) {printf("%s\n", cudaGetErrorString(e));} \ 21 | } while (0) 22 | 23 | #define CUBLAS_CHECK(condition) \ 24 | do { \ 25 | cublasStatus_t status = condition; \ 26 | if (status != CUBLAS_STATUS_SUCCESS) printf("%s\n", cublasGetErrorString(status)); \ 27 | } while (0) 28 | 29 | #define CURAND_CHECK(condition) \ 30 | do { \ 31 | curandStatus_t status = condition; \ 32 | if (status != CURAND_STATUS_SUCCESS) printf("%s\n", curandGetErrorString(status)); \ 33 | } while (0) 34 | 35 | // CUDA: grid stride looping 36 | #define CUDA_KERNEL_LOOP(i, n) \ 37 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 38 | i < (n); \ 39 | i += blockDim.x * gridDim.x) 40 | 41 | // CUDA: check for error after kernel execution and exit loudly if there is one. 42 | #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError()) 43 | 44 | // this is from Minerva 45 | // todo: what's the rationale? 46 | static void cgt_get_bt(size_t size, int& num_blocks, int& num_threads) { 47 | if(size <= 32) 48 | num_threads = 32; 49 | else if(size <= 64) 50 | num_threads = 64; 51 | else if(size <= 128) 52 | num_threads = 128; 53 | else if(size <= 256) 54 | num_threads = 256; 55 | else if(size <= 512) 56 | num_threads = 512; 57 | else 58 | num_threads = 1024; 59 | num_blocks = (int)(((size + num_threads - 1) / num_threads)); 60 | if (num_blocks < 0 || 128 < num_blocks) { 61 | num_blocks = 128; 62 | } 63 | } 64 | 65 | inline const char* cublasGetErrorString(cublasStatus_t status) { 66 | switch (status) { 67 | case CUBLAS_STATUS_SUCCESS: 68 | return "CUBLAS_STATUS_SUCCESS"; 69 | case CUBLAS_STATUS_NOT_INITIALIZED: 70 | return "CUBLAS_STATUS_NOT_INITIALIZED"; 71 | case CUBLAS_STATUS_ALLOC_FAILED: 72 | return "CUBLAS_STATUS_ALLOC_FAILED"; 73 | case CUBLAS_STATUS_INVALID_VALUE: 74 | return "CUBLAS_STATUS_INVALID_VALUE"; 75 | case CUBLAS_STATUS_ARCH_MISMATCH: 76 | return "CUBLAS_STATUS_ARCH_MISMATCH"; 77 | case CUBLAS_STATUS_MAPPING_ERROR: 78 | return "CUBLAS_STATUS_MAPPING_ERROR"; 79 | case CUBLAS_STATUS_EXECUTION_FAILED: 80 | return "CUBLAS_STATUS_EXECUTION_FAILED"; 81 | case CUBLAS_STATUS_INTERNAL_ERROR: 82 | return "CUBLAS_STATUS_INTERNAL_ERROR"; 83 | case CUBLAS_STATUS_NOT_SUPPORTED: 84 | return "CUBLAS_STATUS_NOT_SUPPORTED"; 85 | default: 86 | break; 87 | } 88 | return "Unknown cuBLAS status"; 89 | } 90 | 91 | -------------------------------------------------------------------------------- /include/execution.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cgt_common.h" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace cgt { 8 | using std::vector; 9 | 10 | // note: no-args initializers are only here because they're required by cython 11 | 12 | class ByRefCallable { 13 | public: 14 | cgtByRefFun fptr; 15 | void* data; 16 | ByRefCallable(cgtByRefFun fptr, void* data) : fptr(fptr), data(data) {} 17 | ByRefCallable() : fptr(NULL), data(NULL) {} 18 | void operator()(cgtObject ** reads, cgtObject * write) { 19 | (*fptr)(data, reads, write); 20 | } 21 | }; 22 | 23 | struct ByValCallable { 24 | public: 25 | cgtByValFun fptr; 26 | void* data; 27 | ByValCallable(cgtByValFun fptr, void* data) : fptr(fptr), data(data) {} 28 | ByValCallable() : fptr(NULL), data(NULL) {} 29 | cgtObject * operator()(cgtObject ** args) { 30 | return (*fptr)(data, args); 31 | } 32 | }; 33 | 34 | class MemLocation { 35 | public: 36 | MemLocation() : index_(0), devtype_(cgtCPU) {} 37 | MemLocation(long index, cgtDevtype devtype) : index_(index), devtype_(devtype) {} 38 | long index() const { return index_; } 39 | cgtDevtype devtype() const { return devtype_; } 40 | private: 41 | long index_; 42 | cgtDevtype devtype_; // TODO: full device, not just devtype 43 | }; 44 | 45 | class Interpreter; 46 | 47 | enum InstructionKind { 48 | LoadArgumentKind, 49 | AllocKind, 50 | BuildTupKind, 51 | ReturnByRefKind, 52 | ReturnByValKind 53 | }; 54 | 55 | class Instruction { 56 | public: 57 | Instruction(InstructionKind kind, const std::string& repr, bool quick) : kind_(kind), repr_(repr), quick_(quick) { } 58 | virtual void fire(Interpreter*)=0; 59 | virtual ~Instruction() {}; 60 | virtual const vector& get_readlocs() const=0; 61 | virtual const MemLocation& get_writeloc() const=0; 62 | const std::string& repr() const { return repr_; } 63 | const InstructionKind kind() const {return kind_;} 64 | const bool quick() {return quick_;} 65 | private: 66 | InstructionKind kind_; 67 | std::string repr_; 68 | bool quick_; 69 | }; 70 | 71 | class ExecutionGraph { 72 | public: 73 | ExecutionGraph(const vector& instrs, long n_args, long n_locs) 74 | : instrs_(instrs), n_args_(n_args), n_locs_(n_locs) {} 75 | ~ExecutionGraph(); 76 | const vector& instrs() const {return instrs_;} 77 | long n_args() const {return n_args_;} 78 | long n_locs() const {return n_locs_;} 79 | long n_instrs() const {return instrs_.size();} 80 | private: 81 | vector instrs_; // owns, will delete 82 | long n_args_; 83 | long n_locs_; 84 | }; 85 | 86 | class Interpreter { 87 | public: 88 | // called by external code 89 | virtual cgtTuple * run(cgtTuple *)=0; 90 | // called by instructions: 91 | virtual cgtObject * get(const MemLocation&)=0; 92 | virtual void set(const MemLocation&, cgtObject *)=0; 93 | virtual cgtObject * getarg(int)=0; 94 | virtual ~Interpreter() {} 95 | }; 96 | 97 | // pass by value because of cython 98 | Interpreter* create_interpreter(ExecutionGraph*, vector output_locs, int num_threads); 99 | 100 | class LoadArgument : public Instruction { 101 | public: 102 | LoadArgument(const std::string& repr, int ind, const MemLocation& writeloc) : Instruction(LoadArgumentKind, repr, true), ind(ind), writeloc(writeloc) {} 103 | void fire(Interpreter*); 104 | const vector& get_readlocs() const { return readlocs; } 105 | const MemLocation& get_writeloc() const { return writeloc; } 106 | private: 107 | int ind; 108 | vector readlocs; // empty 109 | MemLocation writeloc; 110 | }; 111 | 112 | 113 | class Alloc : public Instruction { 114 | public: 115 | Alloc(const std::string& repr, cgtDtype dtype, vector readlocs, const MemLocation& writeloc) 116 | : Instruction(AllocKind, repr, true), dtype(dtype), readlocs(readlocs), writeloc(writeloc) {} 117 | void fire(Interpreter*); 118 | const vector& get_readlocs() const { return readlocs; } 119 | const MemLocation& get_writeloc() const { return writeloc; } 120 | private: 121 | cgtDtype dtype; 122 | vector readlocs; 123 | MemLocation writeloc; 124 | }; 125 | 126 | class BuildTup : public Instruction { 127 | public: 128 | BuildTup(const std::string& repr, vector readlocs, const MemLocation& writeloc) 129 | : Instruction(BuildTupKind, repr, true), readlocs(readlocs), writeloc(writeloc) {} 130 | void fire(Interpreter*); 131 | const vector& get_readlocs() const { return readlocs; } 132 | const MemLocation& get_writeloc() const { return writeloc; } 133 | private: 134 | vector readlocs; 135 | MemLocation writeloc; 136 | }; 137 | 138 | class ReturnByRef : public Instruction { 139 | public: 140 | ReturnByRef(const std::string& repr, vector readlocs, const MemLocation& writeloc, ByRefCallable callable, bool quick) 141 | : Instruction(ReturnByRefKind, repr, quick), readlocs(readlocs), writeloc(writeloc), callable(callable) {} 142 | void fire(Interpreter*); 143 | const vector& get_readlocs() const { return readlocs; } 144 | const MemLocation& get_writeloc() const { return writeloc; } 145 | private: 146 | vector readlocs; 147 | MemLocation writeloc; 148 | ByRefCallable callable; 149 | }; 150 | 151 | class ReturnByVal : public Instruction { 152 | public: 153 | ReturnByVal(const std::string& repr, vector readlocs, const MemLocation& writeloc, ByValCallable callable, bool quick) 154 | : Instruction(ReturnByValKind, repr, quick), readlocs(readlocs), writeloc(writeloc), callable(callable) {} 155 | void fire(Interpreter*); 156 | const vector& get_readlocs() const { return readlocs; } 157 | const MemLocation& get_writeloc() const { return writeloc; } 158 | private: 159 | vector readlocs; 160 | MemLocation writeloc; 161 | ByValCallable callable; 162 | }; 163 | 164 | 165 | } 166 | -------------------------------------------------------------------------------- /include/im2col.h: -------------------------------------------------------------------------------- 1 | 2 | // JDS: Modified Caffe code so channels are last dimension in output, and so arguments are template parameters 3 | // (actually gives a good speedup). could be further sped up by rearranging loops 4 | // so inner loops can be unrolled 5 | 6 | template 7 | void im2col_cpu(const Dtype* data_im, int channels, int height, int width, Dtype* data_col) { 8 | int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; 9 | int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; 10 | int channels_col = channels * kernel_h * kernel_w; 11 | 12 | for (int c = 0; c < channels_col; ++c) { 13 | int w_offset = c % kernel_w; 14 | int h_offset = (c / kernel_w) % kernel_h; 15 | int c_im = c / kernel_h / kernel_w; 16 | for (int h = 0; h < height_col; ++h) { 17 | for (int w = 0; w < width_col; ++w) { 18 | int h_pad = h * stride_h - pad_h + h_offset; 19 | int w_pad = w * stride_w - pad_w + w_offset; 20 | if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) 21 | data_col[channels_col*width_col*h + channels_col*w + c] = 22 | data_im[(c_im * height + h_pad) * width + w_pad]; 23 | else 24 | data_col[channels_col*width_col*h + channels_col*w + c] = 0; 25 | } 26 | } 27 | } 28 | } 29 | 30 | template 31 | void col2im_cpu(const Dtype* data_col, const int channels, int height, int width, Dtype* data_im) { 32 | for (int i=0; i < height * width * channels; ++i) data_im[i] = Dtype(0); 33 | int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; 34 | int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; 35 | int channels_col = channels * patch_h * patch_w; 36 | for (int c = 0; c < channels_col; ++c) { 37 | int w_offset = c % patch_w; 38 | int h_offset = (c / patch_w) % patch_h; 39 | int c_im = c / patch_h / patch_w; 40 | for (int h = 0; h < height_col; ++h) { 41 | for (int w = 0; w < width_col; ++w) { 42 | int h_pad = h * stride_h - pad_h + h_offset; 43 | int w_pad = w * stride_w - pad_w + w_offset; 44 | if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) 45 | data_im[(c_im * height + h_pad) * width + w_pad] += 46 | data_col[channels_col*width_col*h + channels_col*w + c]; 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /include/lrn.cuh: -------------------------------------------------------------------------------- 1 | // Copied from Minerva 2 | 3 | template 4 | __global__ static void LRNFillScale(const int nthreads, const Dtype* in, const int num, const int channels, const int height, const int width, const int size, const Dtype alpha_over_size, Dtype* scale) { 5 | for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x ) { 6 | // find out the local offset 7 | int w = index % width; 8 | int h = (index / width) % height; 9 | int n = index / width / height; 10 | int offset = (n * channels * height + h) * width + w; 11 | int step = height * width; 12 | const Dtype* shifted_in = in + offset; 13 | Dtype* shifted_scale = scale + offset; 14 | int head = 0; 15 | int pre_pad = (size - 1) / 2; 16 | int post_pad = size - pre_pad - 1; 17 | Dtype accum_scale = 0; 18 | // fill the scale at [n, :, h, w] 19 | // accumulate values 20 | while (head < post_pad) { 21 | accum_scale += shifted_in[head * step] * shifted_in[head * step]; 22 | ++head; 23 | } 24 | // until we reach size, nothing needs to be subtracted 25 | while (head < size) { 26 | accum_scale += shifted_in[head * step] * shifted_in[head * step]; 27 | shifted_scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size; 28 | ++head; 29 | } 30 | // both add and subtract 31 | while (head < channels) { 32 | accum_scale += shifted_in[head * step] * shifted_in[head * step]; 33 | accum_scale -= shifted_in[(head - size) * step] * shifted_in[(head - size) * step]; 34 | shifted_scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size; 35 | ++head; 36 | } 37 | // subtract only 38 | while (head < channels + post_pad) { 39 | accum_scale -= shifted_in[(head - size) * step] * shifted_in[(head - size) * step]; 40 | shifted_scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size; 41 | ++head; 42 | } 43 | } 44 | } 45 | 46 | template 47 | __global__ static void LRNComputeOutput(const int nthreads, const Dtype* in, 48 | const Dtype* scale, const Dtype negative_beta, Dtype* out) { 49 | for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x ) { 50 | out[index] = in[index] * pow(scale[index], negative_beta); 51 | } 52 | } 53 | 54 | template 55 | __global__ static void LRNComputeDiff(const int nthreads, const Dtype* bottom_data, 56 | const Dtype* top_data, const Dtype* scale, const Dtype* top_diff, 57 | const int num, const int channels, const int height, 58 | const int width, const int size, const Dtype negative_beta, 59 | const Dtype cache_ratio, 60 | Dtype* bottom_diff) { 61 | 62 | for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x ) { 63 | // find out the local offset 64 | int w = index % width; 65 | int h = (index / width) % height; 66 | int n = index / width / height; 67 | int offset = (n * channels * height + h) * width + w; 68 | int step = height * width; 69 | const Dtype* shifted_btm_data = bottom_data + offset; 70 | const Dtype* shifted_top_data = top_data + offset; 71 | const Dtype* shifted_scale = scale + offset; 72 | const Dtype* shifted_top_diff = top_diff + offset; 73 | Dtype* shifted_btm_diff = bottom_diff + offset; 74 | int head = 0; 75 | int pre_pad = size - (size + 1) / 2; 76 | int post_pad = size - pre_pad - 1; 77 | Dtype accum_ratio = 0; 78 | // accumulate values 79 | while (head < post_pad) { 80 | accum_ratio += shifted_top_diff[head * step] * shifted_top_data[head * step] / 81 | shifted_scale[head * step]; 82 | ++head; 83 | } 84 | // until we reach size, nothing needs to be subtracted 85 | while (head < size) { 86 | accum_ratio += shifted_top_diff[head * step] * shifted_top_data[head * step] / 87 | shifted_scale[head * step]; 88 | shifted_btm_diff[(head - post_pad) * step] = shifted_top_diff[(head - post_pad) * step] 89 | * pow(shifted_scale[(head - post_pad) * step], negative_beta) - cache_ratio * 90 | shifted_btm_data[(head - post_pad) * step] * accum_ratio; 91 | ++head; 92 | } 93 | // both add and subtract 94 | while (head < channels) { 95 | accum_ratio += shifted_top_diff[head * step] * shifted_top_data[head * step] / 96 | shifted_scale[head * step]; 97 | accum_ratio -= shifted_top_diff[(head - size) * step] * 98 | shifted_top_data[(head - size) * step] / shifted_scale[(head - size) * step]; 99 | shifted_btm_diff[(head - post_pad) * step] = shifted_top_diff[(head - post_pad) * step] 100 | * pow(shifted_scale[(head - post_pad) * step], negative_beta) - cache_ratio * 101 | shifted_btm_data[(head - post_pad) * step] * accum_ratio; 102 | ++head; 103 | } 104 | // subtract only 105 | while (head < channels + post_pad) { 106 | accum_ratio -= shifted_top_diff[(head - size) * step] * 107 | shifted_top_data[(head - size) * step] / shifted_scale[(head - size) * step]; 108 | shifted_btm_diff[(head - post_pad) * step] = shifted_top_diff[(head - post_pad) * step] 109 | * pow(shifted_scale[(head - post_pad) * step], negative_beta) - cache_ratio * 110 | shifted_btm_data[(head - post_pad) * step] * accum_ratio; 111 | ++head; 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /include/pooling.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // Copied & modified from Caffe 5 | 6 | 7 | struct conv_closure { 8 | int kernel_h; 9 | int kernel_w; 10 | int pad_h; 11 | int pad_w; 12 | int stride_h; 13 | int stride_w; 14 | }; 15 | 16 | 17 | template 18 | void caffe_set(const int N, const Dtype alpha, Dtype* Y) { 19 | for (int i = 0; i < N; ++i) { 20 | Y[i] = alpha; 21 | } 22 | } 23 | 24 | template 25 | void max_pool(conv_closure* cl, cgtArray* bottom, cgtArray* top, cgtArray* mask) { 26 | using std::max; 27 | using std::min; 28 | Dtype* bottom_data = static_cast(bottom->data()); 29 | Dtype* top_data = static_cast(top->data()); 30 | const int top_count = top->size(); 31 | // We'll output the mask to top[1] if it's of size >1. 32 | int* mask_data = static_cast(mask->data()); 33 | caffe_set(top_count, Dtype(-FLT_MAX), top_data); 34 | caffe_set(top_count, -1, mask_data); 35 | // The main loop 36 | 37 | int batchsize = top->shape()[0], 38 | channels = top->shape()[1], 39 | pooledheight = top->shape()[2], 40 | pooledwidth = top->shape()[3], 41 | height = bottom->shape()[2], 42 | width = bottom->shape()[3]; 43 | 44 | for (int n = 0; n < batchsize; ++n) { 45 | for (int c = 0; c < channels; ++c) { 46 | for (int ph = 0; ph < pooledheight; ++ph) { 47 | for (int pw = 0; pw < pooledwidth; ++pw) { 48 | int hstart = ph * cl->stride_h - cl->pad_h; 49 | int wstart = pw * cl->stride_w - cl->pad_w; 50 | int hend = min(hstart + cl->kernel_h, height); 51 | int wend = min(wstart + cl->kernel_w, width); 52 | hstart = max(hstart, 0); 53 | wstart = max(wstart, 0); 54 | const int pool_index = ph * pooledwidth + pw; 55 | for (int h = hstart; h < hend; ++h) { 56 | for (int w = wstart; w < wend; ++w) { 57 | const int index = h * width + w; 58 | if (bottom_data[index] > top_data[pool_index]) { 59 | top_data[pool_index] = bottom_data[index]; 60 | mask_data[pool_index] = index; 61 | } 62 | } 63 | } 64 | } 65 | } 66 | bottom_data += bottom->stride(1); 67 | top_data += top->stride(1); 68 | mask_data += top->stride(1); 69 | } 70 | } 71 | } 72 | 73 | template 74 | void max_pool_pullback(cgtArray* bottom, cgtArray* top, cgtArray* mask, 75 | cgtArray* top_diff, cgtArray* bottom_diff) { 76 | const Dtype* top_diff_data = static_cast(top_diff->data()); 77 | Dtype* bottom_diff_data = static_cast(bottom_diff->data()); 78 | // Different pooling methods. We explicitly do the switch outside the for 79 | // loop to save time, although this results in more codes. 80 | caffe_set(bottom_diff->size(), Dtype(0), bottom_diff_data); 81 | // We'll output the mask to top[1] if it's of size >1. 82 | int* mask_data = static_cast(mask->data()); 83 | 84 | int batchsize = top->shape()[0], 85 | channels = top->shape()[1], 86 | pooledheight = top->shape()[2], 87 | pooledwidth = top->shape()[3], 88 | height = bottom->shape()[2], 89 | width = top->shape()[3]; 90 | 91 | for (int n = 0; n < batchsize; ++n) { 92 | for (int c = 0; c < channels; ++c) { 93 | for (int ph = 0; ph < pooledheight; ++ph) { 94 | for (int pw = 0; pw < pooledwidth; ++pw) { 95 | const int index = ph * pooledwidth + pw; 96 | const int bottom_index = mask_data[index]; 97 | bottom_diff_data[bottom_index] += top_diff_data[index]; 98 | } 99 | } 100 | bottom_diff_data += bottom->stride(1); 101 | top_diff_data += top->stride(1); 102 | mask_data += mask->stride(1); 103 | } 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /scripts/cgt-clear-cache: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import cgt,os.path as osp, subprocess,sys 3 | config = cgt.get_config() 4 | cache_dir = config["cache_dir"] 5 | cmd = "rm -rf %s"%osp.expandvars(cache_dir) 6 | while True: 7 | sys.stderr.write("About to run \x1b[32m%s\x1b[0m. OK? (y/n): "%cmd) 8 | yn = raw_input() 9 | if yn=='y': 10 | subprocess.check_call(cmd,shell=True) 11 | break 12 | elif yn=='n': 13 | break -------------------------------------------------------------------------------- /src/cgt_common.cpp: -------------------------------------------------------------------------------- 1 | #include "stdlib.h" 2 | #include "assert.h" 3 | #include "memory.h" 4 | #include "stdio.h" 5 | #include "cgt_common.h" 6 | 7 | #ifdef CGT_ENABLE_CUDA 8 | #include "cgt_cuda.h" 9 | #endif 10 | 11 | // ================================================================ 12 | // Object alloc/dealloc 13 | // ================================================================ 14 | 15 | cgtArray::cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype) 16 | : cgtObject(ObjectKind::ArrayKind), 17 | ndim_(ndim), 18 | dtype_(dtype), 19 | devtype_(devtype), 20 | ownsdata_(true) { 21 | shape_ = new long[ndim]; 22 | memcpy(const_cast(shape_), shape, ndim * sizeof(long)); 23 | data_ = cgt_alloc(devtype_, nbytes()); 24 | } 25 | 26 | cgtArray::cgtArray(int ndim, const long* shape, cgtDtype dtype, cgtDevtype devtype, void* fromdata, bool copy) 27 | : cgtObject(ObjectKind::ArrayKind), 28 | ndim_(ndim), 29 | shape_(shape), 30 | dtype_(dtype), 31 | devtype_(devtype), 32 | ownsdata_(copy) { 33 | cgt_assert(fromdata != NULL); 34 | shape_ = new long[ndim]; 35 | memcpy(const_cast(shape_), shape, ndim * sizeof(long)); 36 | if (copy) { 37 | data_ = cgt_alloc(devtype, nbytes()); 38 | cgt_memcpy(devtype, cgtCPU, data_, fromdata, nbytes()); 39 | } else { 40 | data_ = fromdata; 41 | } 42 | } 43 | 44 | void cgtArray::print() { 45 | printf("Array{shape=("); 46 | if (ndim_ > 0) printf("%zu",shape_[0]); 47 | for (int i=1; i < ndim_; ++i) { 48 | printf(", %zu", shape_[i]); 49 | } 50 | printf("), dtype=%i}", dtype_); 51 | } 52 | 53 | cgtArray::~cgtArray() { 54 | delete[] shape_; 55 | if (ownsdata_) cgt_free(devtype_, data_); 56 | } 57 | 58 | cgtTuple::cgtTuple(int len) 59 | : cgtObject(ObjectKind::TupleKind), len(len) { 60 | members = new IRC[len]; 61 | } 62 | 63 | cgtTuple::~cgtTuple() { 64 | delete[] members; 65 | } 66 | 67 | 68 | // ================================================================ 69 | // Copying 70 | // ================================================================ 71 | 72 | void cgt_copy_object(cgtObject* to, cgtObject* from) { 73 | cgt_assert(to->kind() == from->kind()); 74 | if (to->kind() == cgtObject::ArrayKind) { 75 | cgt_copy_array(static_cast(to), static_cast(from)); 76 | } 77 | else if (to->kind() == cgtObject::TupleKind) { 78 | cgt_copy_tuple(static_cast(to), static_cast(from)); 79 | } 80 | else cgt_assert(0 && "unreachable"); 81 | } 82 | 83 | void cgt_copy_array(cgtArray* to, cgtArray* from) { 84 | cgt_assert(from->size() == to->size() && from->dtype() == to->dtype()) ; 85 | cgt_memcpy(to->devtype(), from->devtype(), to->data(), from->data(), from->nbytes()); 86 | } 87 | 88 | void cgt_copy_tuple(cgtTuple* to, cgtTuple* from) { 89 | for (int i=0; i < to->size(); ++i) cgt_copy_object(to->getitem(i), from->getitem(i)); 90 | } 91 | 92 | 93 | 94 | // ================================================================ 95 | // Error handling 96 | // ================================================================ 97 | 98 | void cgt_abort() { 99 | abort(); 100 | } 101 | 102 | cgtStatus cgtGlobalStatus = cgtStatusOK; 103 | char cgtGlobalErrorMsg[1000]; 104 | 105 | 106 | // ================================================================ 107 | // Memory management 108 | // ================================================================ 109 | 110 | void *cgt_alloc(cgtDevtype devtype, long size) { 111 | if (devtype == cgtCPU) { 112 | return malloc(size); 113 | } 114 | else { 115 | #ifdef CGT_ENABLE_CUDA 116 | void* out; 117 | CUDA_CHECK(cudaMalloc(&out, size)); 118 | return out; 119 | #else 120 | cgt_assert(0 && "CUDA disabled"); 121 | #endif 122 | } 123 | } 124 | 125 | void cgt_free(cgtDevtype devtype, void *ptr) { 126 | if (devtype == cgtCPU) { 127 | free(ptr); 128 | } 129 | else { 130 | #ifdef CGT_ENABLE_CUDA 131 | CUDA_CHECK(cudaFree(ptr)); 132 | #else 133 | cgt_assert(0 && "CUDA disabled"); 134 | #endif 135 | } 136 | } 137 | 138 | void cgt_memcpy(cgtDevtype dest_type, cgtDevtype src_type, void *dest_ptr, void *src_ptr, long nbytes) { 139 | if (src_type == cgtCPU && dest_type == cgtCPU) { 140 | memcpy(dest_ptr, src_ptr, nbytes); 141 | } else { 142 | #ifdef CGT_ENABLE_CUDA 143 | enum cudaMemcpyKind kind; 144 | if (src_type == cgtCPU && dest_type == cgtGPU) kind = cudaMemcpyHostToDevice; 145 | else if (src_type == cgtGPU && dest_type == cgtCPU) kind = cudaMemcpyDeviceToHost; 146 | else if (src_type == cgtGPU && dest_type == cgtGPU) kind = cudaMemcpyDeviceToDevice; 147 | else cgt_assert(0 && "invalid src/dest types"); 148 | CUDA_CHECK(cudaMemcpy(dest_ptr, src_ptr, nbytes, kind)); 149 | #else 150 | cgt_assert(0 && "CUDA disabled"); 151 | #endif 152 | } 153 | } 154 | 155 | -------------------------------------------------------------------------------- /src/cuda_setup.c: -------------------------------------------------------------------------------- 1 | #include "cgt_cuda.h" 2 | #include "cublas.h" 3 | 4 | CudaContext g_context; 5 | 6 | #ifdef CGT_ENABLE_CUDA 7 | 8 | void cuda_initialize() { 9 | CUDA_CHECK(cudaStreamCreate(&g_context.stream)); 10 | CUBLAS_CHECK(cublasCreate_v2(&g_context.cublas_handle)); 11 | CUBLAS_CHECK(cublasSetStream(g_context.cublas_handle, g_context.stream)); 12 | // CUDNN_CHECK(cudnnCreate(&g_context.cudnn_handle)); 13 | // CUDNN_CHECK(cudnnSetStream(g_context.cudnn_handle, g_context.stream)); 14 | } 15 | 16 | #else 17 | 18 | void cuda_initialize() { 19 | } 20 | 21 | #endif -------------------------------------------------------------------------------- /src/util/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | // from https://github.com/progschj/ThreadPool/blob/master/ThreadPool.h 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | class ThreadPool { 17 | public: 18 | ThreadPool(size_t); 19 | template 20 | auto enqueue(F&& f, Args&&... args) 21 | -> std::future::type>; 22 | ~ThreadPool(); 23 | private: 24 | // need to keep track of threads so we can join them 25 | std::vector< std::thread > workers; 26 | // the task queue 27 | std::queue< std::function > tasks; 28 | 29 | // synchronization 30 | std::mutex queue_mutex; 31 | std::condition_variable condition; 32 | bool stop; 33 | }; 34 | 35 | // the constructor just launches some amount of workers 36 | inline ThreadPool::ThreadPool(size_t threads) 37 | : stop(false) 38 | { 39 | for(size_t i = 0;i task; 46 | 47 | { 48 | std::unique_lock lock(this->queue_mutex); 49 | this->condition.wait(lock, 50 | [this]{ return this->stop || !this->tasks.empty(); }); 51 | if(this->stop && this->tasks.empty()) 52 | return; 53 | task = std::move(this->tasks.front()); 54 | this->tasks.pop(); 55 | } 56 | 57 | task(); 58 | } 59 | } 60 | ); 61 | } 62 | 63 | // add new work item to the pool 64 | template 65 | auto ThreadPool::enqueue(F&& f, Args&&... args) 66 | -> std::future::type> 67 | { 68 | using return_type = typename std::result_of::type; 69 | 70 | auto task = std::make_shared< std::packaged_task >( 71 | std::bind(std::forward(f), std::forward(args)...) 72 | ); 73 | 74 | std::future res = task->get_future(); 75 | { 76 | std::unique_lock lock(queue_mutex); 77 | 78 | // don't allow enqueueing after stopping the pool 79 | if(stop) 80 | throw std::runtime_error("enqueue on stopped ThreadPool"); 81 | 82 | tasks.emplace([task](){ (*task)(); }); 83 | } 84 | condition.notify_one(); 85 | return res; 86 | } 87 | 88 | // the destructor joins all threads 89 | inline ThreadPool::~ThreadPool() 90 | { 91 | { 92 | std::unique_lock lock(queue_mutex); 93 | stop = true; 94 | } 95 | condition.notify_all(); 96 | for(std::thread &worker: workers) 97 | worker.join(); 98 | } 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /thirdparty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joschu/cgt/90b15ab041fc2137e62b96e8612ccee605f71ceb/thirdparty/__init__.py --------------------------------------------------------------------------------