├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── LICENSE ├── README.md ├── config.cmake ├── docs ├── background.html ├── css │ └── default.css ├── highlight │ ├── highlight.min.js │ └── solarized-light.css ├── index.html ├── js │ ├── jquery.min.js │ └── page_effects.js ├── tvm-clj.application.image.html ├── tvm-clj.application.kmeans.html ├── tvm-clj.ast.html ├── tvm-clj.compiler.html ├── tvm-clj.device.html ├── tvm-clj.module.html └── tvm-clj.schedule.html ├── examples ├── .gitignore ├── Dockerfile ├── README.md ├── box-blur.md ├── images │ └── result-cpu.jpg ├── project.clj ├── scripts │ ├── dev-docker.sh │ └── docker-repl.sh ├── src │ └── box_blur.clj └── test │ └── data │ └── test.jpg ├── java └── tvm_clj │ └── tvm │ ├── CFunction.java │ └── DLPack.java ├── project.clj ├── python └── questions │ ├── bind_buffer.py │ └── compute_at_gpu.py ├── scripts ├── get-mnist-data ├── python_install.sh ├── scan-tvm-obj-types └── setup_python ├── src └── tvm_clj │ ├── application │ ├── image.clj │ ├── kmeans.clj │ └── mnist.clj │ ├── ast.clj │ ├── ast │ └── elemwise_op.clj │ ├── compiler.clj │ ├── device.clj │ ├── impl │ ├── base.clj │ ├── definitions.clj │ ├── dl_tensor.clj │ ├── fns.clj │ ├── fns │ │ ├── arith.clj │ │ ├── auto_scheduler.clj │ │ ├── autotvm │ │ │ └── feature.clj │ │ ├── codegen.clj │ │ ├── device_api.clj │ │ ├── hybrid.clj │ │ ├── ir.clj │ │ ├── node.clj │ │ ├── parser.clj │ │ ├── relay │ │ │ ├── _make.clj │ │ │ ├── _quantize.clj │ │ │ ├── _transform.clj │ │ │ ├── _vm.clj │ │ │ ├── analysis.clj │ │ │ ├── backend.clj │ │ │ ├── build_module.clj │ │ │ ├── dataflow_pattern.clj │ │ │ ├── ext.clj │ │ │ ├── ir.clj │ │ │ ├── op.clj │ │ │ ├── op │ │ │ │ ├── _make.clj │ │ │ │ ├── annotation │ │ │ │ │ └── _make.clj │ │ │ │ ├── dyn │ │ │ │ │ ├── _make.clj │ │ │ │ │ ├── image │ │ │ │ │ │ └── _make.clj │ │ │ │ │ └── nn │ │ │ │ │ │ └── _make.clj │ │ │ │ ├── image │ │ │ │ │ └── _make.clj │ │ │ │ ├── memory │ │ │ │ │ └── _make.clj │ │ │ │ ├── nn │ │ │ │ │ └── _make.clj │ │ │ │ ├── vision │ │ │ │ │ └── _make.clj │ │ │ │ └── vm.clj │ │ │ └── qnn │ │ │ │ ├── _transform.clj │ │ │ │ └── op │ │ │ │ └── _make.clj │ │ ├── rpc.clj │ │ ├── runtime.clj │ │ ├── runtime │ │ │ └── module.clj │ │ ├── schedule.clj │ │ ├── script.clj │ │ ├── support.clj │ │ ├── target.clj │ │ ├── te.clj │ │ ├── test │ │ │ └── op.clj │ │ ├── testing.clj │ │ ├── tir.clj │ │ ├── tir │ │ │ ├── analysis.clj │ │ │ └── transform.clj │ │ ├── topi.clj │ │ ├── topi │ │ │ ├── cuda.clj │ │ │ ├── generic.clj │ │ │ ├── nn.clj │ │ │ ├── rocm.clj │ │ │ ├── util.clj │ │ │ ├── vision.clj │ │ │ └── x86.clj │ │ ├── transform.clj │ │ └── tvm │ │ │ ├── codegen │ │ │ └── llvm.clj │ │ │ ├── contrib │ │ │ ├── cblas.clj │ │ │ ├── mkl.clj │ │ │ ├── random.clj │ │ │ └── sort.clj │ │ │ ├── graph_runtime.clj │ │ │ ├── graph_runtime_factory.clj │ │ │ ├── intrin │ │ │ └── rule │ │ │ │ ├── aocl.clj │ │ │ │ ├── aocl_sw_emu.clj │ │ │ │ ├── cuda.clj │ │ │ │ ├── default.clj │ │ │ │ ├── hexagon.clj │ │ │ │ ├── llvm.clj │ │ │ │ ├── metal.clj │ │ │ │ ├── nvptx.clj │ │ │ │ ├── opencl.clj │ │ │ │ ├── rocm.clj │ │ │ │ └── sdaccel.clj │ │ │ ├── relay.clj │ │ │ ├── relay │ │ │ └── type_relation.clj │ │ │ └── rpc │ │ │ └── server.clj │ ├── library_paths.clj │ ├── module.clj │ ├── node.clj │ ├── protocols.clj │ ├── stream.clj │ ├── tvm_ns_fns.clj │ └── typenames.clj │ ├── module.clj │ └── schedule.clj ├── test ├── data │ ├── castle.jpg │ └── jen.jpg └── tvm_clj │ └── tvm_test.clj └── topics ├── background.md └── images ├── ref.jpg └── test.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | .lein-* 9 | .nrepl-port 10 | .hgignore 11 | .hg/ 12 | java/tvm_clj/tvm/runtime.java 13 | java/native 14 | resources/ 15 | *.jpg 16 | *ubyte 17 | *ubyte.gz -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | 2 | [submodule "incubator-tvm"] 3 | path = incubator-tvm 4 | url = git@github.com:techascent/incubator-tvm.git 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and 6 | this project adheres to a 2-part versioning scheme X,Y where a change in X means you are 7 | probably fucked while a change in Y means you may not be fucked. 8 | 9 | 10 | ## [4.6] 11 | We will be staying on major versions now for TVM instead of tracking master. TVM has 12 | sufficiently matured that tracking master is unnecessary unless we want to make a major 13 | change to the TVM library itself. 14 | ### Fixed/Changed 15 | - tech.compute 3.18 16 | - tvm release v0.5 17 | 18 | 19 | ## [4.0] 20 | ### Fixed/Changed 21 | * Changed to newer resource system. Mainly naming changes and code reorganization bubbling 22 | up through the stack. 23 | 24 | ## [3.0] 25 | ### Fixed/Changed 26 | * Moved to newer versions of tech-ascent datatype libraries. 27 | * TVM things (tensors, AST-nodes, modules, functions, etc) are now both 28 | scope-based and gc-rooted things. So the gc can help keep memory and total 29 | object counts lower especially when you are describing complex systems using 30 | the api. Most AST nodes, for instance, when exposed to clojure are not really 31 | relevant to the larger picture and thus if they happen to be gc'd sooner than 32 | the resource context winds up then all the better. 33 | 34 | 35 | 36 | ## [2.0] 37 | ### Added 38 | * example project with dockerfile so anyone can try out the ubuntu version with 39 | only opencv installed. 40 | ### Fixed/Changed 41 | #### JNA FTW 42 | Binding layer now dynamically binds to tvm using jna instead of javacpp 43 | 1. Thus we can bind to tvm installed in the system, build by user, or packaged with 44 | jar. Because tvm has so many system dependencies it makes the most sense for it to be 45 | built specifically for each system (mkl, not mkl, cuda, cudnn, rocm etc) than it does 46 | for us to package the .so file. 47 | 2. Binding layer is split up into many files to make it far easier to understand for 48 | new people to the project. 49 | 50 | 51 | 52 | 53 | ## [1.4] 54 | ### Added 55 | * Better mac support [#8](https://github.com/tech-ascent/tvm-clj/pull/8) 56 | ### Fixed/Changed 57 | * Updated compute layer which is not based on jna. This gives perf benefits during 58 | host->host copies and allows a simpler cpu layer. Note that the tvm cpu layer is now 59 | 100% compliant with all tensor tests so you can intermix tensor operations and tvm 60 | operations. 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor to control, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tvm-clj 2 | 3 | Clojure bindings and exploration of the [tvm](https://github.com/apache/incubator-tvm) library. 4 | 5 | TVM is a high performance compiler for ND numeric code. In it's simplest form, it works via 6 | 4 steps: 7 | 8 | 1. Define an AST. 9 | 2. Schedule the AST, doing things such as tiling and operation or caching a partial 10 | result in GPU shared memory. This allows us to make transformations to the algorithm which 11 | allow us to map the algorithm to specific hardware such as GPU's, FPGA's, web-based 12 | backends such as wasm, graphics backends such as OpenGL and Vulkan and low powered IoT 13 | platforms such as [microcontrollers](https://tvm.apache.org/2020/06/04/tinyml-how-tvm-is-taming-tiny). 14 | These transformations are are guaranteed not to break the algorithm so they are very safe from 15 | a correctness viewpoint but also allow powerful vectorizing, SIMD, and 16 | [SIMT](https://en.wikipedia.org/wiki/Single_instruction,_multiple_threads) optimizations. 17 | 3. Compile the AST to a specific hardware profile. TVM has backends to a 18 | [wide variety of hardware](https://github.com/apache/incubator-tvm/blob/main/python/tvm/_ffi/runtime_ctypes.py#L156) 19 | including, as mentioned, extremely optimized versions for x86 and ARM CPUs, Cuda, and OpenCL. 20 | 4. Load your function and call it. 21 | 22 | 23 | 24 | * [API Documents](https://techascent.github.io/tvm-clj/) 25 | * [simple tests](test/tvm_clj/tvm_test.clj) 26 | 27 | 28 | ## Getting all the source 29 | 30 | At top level: 31 | ```bash 32 | git submodule update --init --recursive 33 | ``` 34 | 35 | ## Building TVM 36 | 37 | ```bash 38 | sudo apt install make g++ cmake llvm-dev libopenblas-dev 39 | 40 | ## opencl support (nvidia-cuda includes this) 41 | sudo apt install ocl-icd-* opencl-headers 42 | 43 | ## Cuda support 44 | sudo apt install nvidia-cuda-toolkit 45 | 46 | ## intel graphics adapter support 47 | sudo apt install beignet beignet-opencl-icd 48 | 49 | 50 | ## MKL support if you choose. I don't use it generally so this is very optional. 51 | curl https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | sudo apt-key add - 52 | sudo sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' 53 | sudo apt-get update 54 | ## Find the version of mkl...I would take the latest. 55 | apt-cache search mkl-64bit 56 | ## ... 57 | sudo apt-get install intel-mkl-64bit-2019.5-075 58 | 59 | 60 | mkdir -p tvm/build 61 | # Config setup for intel and such. 62 | # Base config.cmake file only has support for opencl. If you want 63 | # CUDA, CUDNN, or MKL I suggest you edit the config file after you copy 64 | # it. 65 | cp config.cmake tvm/build/ 66 | 67 | cd tvm/build 68 | 69 | cmake .. 70 | make -j8 71 | 72 | 73 | ``` 74 | 75 | This will copy the libs into a platform-specific directory that jna should find. 76 | 77 | Another options is to install the tvm libs themselves. We recommend this pathway as 78 | then the tvm libraries will work with the python bindings. In fact, it can be worth it 79 | to install the python bindings as there are a lot of examples in python that are 80 | instructive to work with. 81 | 82 | 83 | ## More Information 84 | 85 | 86 | * [background theoretical documentation](topics/background.md) 87 | 88 | 89 | ## License 90 | 91 | 92 | Distributed under the Eclipse Public License either version 1.0 or (at 93 | your option) any later version. 94 | -------------------------------------------------------------------------------- /config.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | #-------------------------------------------------------------------- 19 | # Template custom cmake configuration for compiling 20 | # 21 | # This file is used to override the build options in build. 22 | # If you want to change the configuration, please use the following 23 | # steps. Assume you are on the root directory. First copy the this 24 | # file so that any local changes will be ignored by git 25 | # 26 | # $ mkdir build 27 | # $ cp cmake/config.cmake build 28 | # 29 | # Next modify the according entries, and then compile by 30 | # 31 | # $ cd build 32 | # $ cmake .. 33 | # 34 | # Then build in parallel with 8 threads 35 | # 36 | # $ make -j8 37 | #-------------------------------------------------------------------- 38 | 39 | #--------------------------------------------- 40 | # Backend runtimes. 41 | #--------------------------------------------- 42 | 43 | # Whether enable CUDA during compile, 44 | # 45 | # Possible values: 46 | # - ON: enable CUDA with cmake's auto search 47 | # - OFF: disable CUDA 48 | # - /path/to/cuda: use specific path to cuda toolkit 49 | set(USE_CUDA OFF) 50 | 51 | # Whether enable ROCM runtime 52 | # 53 | # Possible values: 54 | # - ON: enable ROCM with cmake's auto search 55 | # - OFF: disable ROCM 56 | # - /path/to/rocm: use specific path to rocm 57 | set(USE_ROCM OFF) 58 | 59 | # Whether enable SDAccel runtime 60 | set(USE_SDACCEL OFF) 61 | 62 | # Whether enable Intel FPGA SDK for OpenCL (AOCL) runtime 63 | set(USE_AOCL OFF) 64 | 65 | # Whether enable OpenCL runtime 66 | # 67 | # Possible values: 68 | # - ON: enable OpenCL with cmake's auto search 69 | # - OFF: disable OpenCL 70 | # - /path/to/opencl-sdk: use specific path to opencl-sdk 71 | set(USE_OPENCL ON) 72 | 73 | # Whether enable Metal runtime 74 | set(USE_METAL OFF) 75 | 76 | # Whether enable Vulkan runtime 77 | # 78 | # Possible values: 79 | # - ON: enable Vulkan with cmake's auto search 80 | # - OFF: disable vulkan 81 | # - /path/to/vulkan-sdk: use specific path to vulkan-sdk 82 | set(USE_VULKAN OFF) 83 | 84 | # Whether enable OpenGL runtime 85 | set(USE_OPENGL OFF) 86 | 87 | # Whether enable MicroTVM runtime 88 | set(USE_MICRO OFF) 89 | 90 | # Whether enable RPC runtime 91 | set(USE_RPC ON) 92 | 93 | # Whether to build the C++ RPC server binary 94 | set(USE_CPP_RPC OFF) 95 | 96 | # Whether embed stackvm into the runtime 97 | set(USE_STACKVM_RUNTIME OFF) 98 | 99 | # Whether enable tiny embedded graph runtime. 100 | set(USE_GRAPH_RUNTIME ON) 101 | 102 | # Whether enable additional graph debug functions 103 | set(USE_GRAPH_RUNTIME_DEBUG OFF) 104 | 105 | # Whether enable additional vm profiler functions 106 | set(USE_VM_PROFILER OFF) 107 | 108 | # Whether enable uTVM standalone runtime 109 | set(USE_MICRO_STANDALONE_RUNTIME OFF) 110 | 111 | # Whether build with LLVM support 112 | # Requires LLVM version >= 4.0 113 | # 114 | # Possible values: 115 | # - ON: enable llvm with cmake's find search 116 | # - OFF: disable llvm 117 | # - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available. 118 | set(USE_LLVM ON) 119 | 120 | #--------------------------------------------- 121 | # Contrib libraries 122 | #--------------------------------------------- 123 | # Whether to build with BYODT software emulated posit custom datatype 124 | # 125 | # Possible values: 126 | # - ON: enable BYODT posit, requires setting UNIVERSAL_PATH 127 | # - OFF: disable BYODT posit 128 | # 129 | # set(UNIVERSAL_PATH /path/to/stillwater-universal) for ON 130 | set(USE_BYODT_POSIT OFF) 131 | 132 | # Whether use BLAS, choices: openblas, atlas, apple 133 | set(USE_BLAS openblas) 134 | 135 | # Whether to use MKL 136 | # Possible values: 137 | # - ON: Enable MKL 138 | # - /path/to/mkl: mkl root path 139 | # - OFF: Disable MKL 140 | # set(USE_MKL /opt/intel/mkl) for UNIX 141 | # set(USE_MKL ../IntelSWTools/compilers_and_libraries_2018/windows/mkl) for WIN32 142 | # set(USE_MKL ) if using `pip install mkl` 143 | set(USE_MKL OFF) 144 | 145 | # Whether use MKLDNN library, choices: ON, OFF, path to mkldnn library 146 | set(USE_MKLDNN OFF) 147 | 148 | # Whether use OpenMP thread pool, choices: gnu, intel 149 | # Note: "gnu" uses gomp library, "intel" uses iomp5 library 150 | set(USE_OPENMP none) 151 | 152 | # Whether use contrib.random in runtime 153 | set(USE_RANDOM ON) 154 | 155 | # Whether use NNPack 156 | set(USE_NNPACK OFF) 157 | 158 | # Possible values: 159 | # - ON: enable tflite with cmake's find search 160 | # - OFF: disable tflite 161 | # - /path/to/libtensorflow-lite.a: use specific path to tensorflow lite library 162 | set(USE_TFLITE OFF) 163 | 164 | # /path/to/tensorflow: tensorflow root path when use tflite library 165 | set(USE_TENSORFLOW_PATH none) 166 | 167 | # Required for full builds with TFLite. Not needed for runtime with TFLite. 168 | # /path/to/flatbuffers: flatbuffers root path when using tflite library 169 | set(USE_FLATBUFFERS_PATH none) 170 | 171 | # Possible values: 172 | # - OFF: disable tflite support for edgetpu 173 | # - /path/to/edgetpu: use specific path to edgetpu library 174 | set(USE_EDGETPU OFF) 175 | 176 | # Whether use CuDNN 177 | set(USE_CUDNN OFF) 178 | 179 | # Whether use cuBLAS 180 | set(USE_CUBLAS OFF) 181 | 182 | # Whether use MIOpen 183 | set(USE_MIOPEN OFF) 184 | 185 | # Whether use MPS 186 | set(USE_MPS OFF) 187 | 188 | # Whether use rocBlas 189 | set(USE_ROCBLAS OFF) 190 | 191 | # Whether use contrib sort 192 | set(USE_SORT ON) 193 | 194 | # Whether use MKL-DNN (DNNL) codegen 195 | set(USE_DNNL_CODEGEN OFF) 196 | 197 | # Whether to use Arm Compute Library (ACL) codegen 198 | # We provide 2 separate flags since we cannot build the ACL runtime on x86. 199 | # This is useful for cases where you want to cross-compile a relay graph 200 | # on x86 then run on AArch. 201 | # 202 | # An example of how to use this can be found here: docs/deploy/arm_compute_lib.rst. 203 | # 204 | # USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported 205 | # operators to Arm Compute Library. OFF/ON 206 | # USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME - Run Arm Compute Library annotated functions via the ACL 207 | # runtime. OFF/ON/"path/to/ACL" 208 | set(USE_ARM_COMPUTE_LIB OFF) 209 | set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME OFF) 210 | 211 | # Whether to build with Arm Ethos-N support 212 | # Possible values: 213 | # - OFF: disable Arm Ethos-N support 214 | # - path/to/arm-ethos-N-stack: use a specific version of the 215 | # Ethos-N driver stack 216 | set(USE_ETHOSN OFF) 217 | # If USE_ETHOSN is enabled, use ETHOSN_HW (ON) if Ethos-N hardware is available on this machine 218 | # otherwise use ETHOSN_HW (OFF) to use the software test infrastructure 219 | set(USE_ETHOSN_HW OFF) 220 | 221 | # Build ANTLR parser for Relay text format 222 | # Possible values: 223 | # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) 224 | # - OFF: disable ANTLR 225 | # - /path/to/antlr-*-complete.jar: path to specific ANTLR jar file 226 | set(USE_ANTLR OFF) 227 | 228 | # Whether use Relay debug mode 229 | set(USE_RELAY_DEBUG OFF) 230 | 231 | # Whether to build fast VTA simulator driver 232 | set(USE_VTA_FSIM OFF) 233 | 234 | # Whether to build cycle-accurate VTA simulator driver 235 | set(USE_VTA_TSIM OFF) 236 | 237 | # Whether to build VTA FPGA driver (device side only) 238 | set(USE_VTA_FPGA OFF) 239 | 240 | # Whether use Thrust 241 | set(USE_THRUST OFF) 242 | 243 | # Whether to build the TensorFlow TVMDSOOp module 244 | set(USE_TF_TVMDSOOP OFF) 245 | 246 | # Whether to use STL's std::unordered_map or TVM's POD compatible Map 247 | set(USE_FALLBACK_STL_MAP OFF) 248 | 249 | # Whether to use hexagon device 250 | set(USE_HEXAGON_DEVICE OFF) 251 | set(USE_HEXAGON_SDK /path/to/sdk) 252 | 253 | # Whether to use ONNX codegen 254 | set(USE_TARGET_ONNX OFF) 255 | 256 | # Whether to compile the standalone C runtime. 257 | set(USE_STANDALONE_CRT ON) 258 | 259 | -------------------------------------------------------------------------------- /docs/background.html: -------------------------------------------------------------------------------- 1 | 3 | Technical Background

Technical Background

4 |

tvm a system for dynamically generating high performance numeric code with backends for cpu, cuda, opencl, opengl, webassembly, vulcan, and verilog. It has frontends mainly in python and c++ with a clear and well designed C-ABI that not only aids in the implementation of their python interface, but it also eases the binding into other language ecosystems such as the jvm and node.

5 |

tvm leverages Halide for its IR layer and for the overall strategy. Halide takes algorithms structured in specific ways and allows performance experimentation without affecting the output of the core algorithm. A very solid justification for this is nicely put in these slides. A Ph. D. was minted here. We also recommend watching the youtube video.

6 |

It should be noted, however, that at this point TVM has diverged significantly from Halide, implementing essentially their own compiler specifically designed to work with deep learning-type workflows:

7 |
8 |

It is interesting. Please note that while TVM uses HalideIR that is derived from Halide, most of the code generation and optimization passes are done independently(with deep learning workloads in mind), while reusing sensible ones from Halide. So in terms of low level code generation, we are not necessarily bound to some of limitations listed.

9 |

In particular, we take a pragmatic approach, to focus on what is useful for deep learning workloads, so you can find unique things like more GPU optimization, accelerator support, recurrence(scan). If there are optimizations that Tiramisu have which is useful to get the state of art deep learning workloads, we are all for bringing that into TVM

10 |

I also want to emphasize that TVM is more than a low level tensor code generation, but instead trying to solve the end to end deep learning compilation problem, and many of the things goes beyond the tensor code generation.

11 |
12 |

tqchen, the main contributor to TVM.

13 |

Goals

14 |
    15 |
  1. Learn about Halide and tvm and enable very clear and simple exploration of the system in clojure. Make clojure a first class language in the dmlc ecosystem.
  2. 16 |
  3. Provide the tvm team with clear feedback and a second external implementation or a language binding on top of the C-ABI.
  4. 17 |
  5. Encourage wider adoption and exploration in terms of numerical programming; for instance a new implementation of J that carries the properties of a clojure or clojurescript ecosystem but includes all of the major concepts of J. This would enable running some subset of J (or APL) programs (or functions) that are now far more optimized mode than before and accessible from node.js or the jvm. It would also inform the wider discussion on numeric programming languages such as MatLab, TensorFlow, numpy, etc.
  6. 18 |
  7. Provide richer platform for binding to nnvm so that running existing networks via clojure is as seamless as possible.
  8. 19 |
20 |

What, Concretely, Are You Talking About?

21 |

Vector Math Compiler Example

22 |

Built a small compiler that takes a statement of vector math and compiles to tvm. This is extremely incomplete and not very efficient in terms of what is possible but shows a vision of doing potentially entire neural network functions.

23 |
hand-coded java took:  "Elapsed time: 558.662639 msecs"
24 | 
25 | produce bgr_types_op {
26 |   parallel (chan, 0, min(n_channels, 3)) {
27 |     for (y.outer, 0, ((image_height + 31)/32)) {
28 |       for (x.outer, 0, ((image_width + 31)/32)) {
29 |         for (y.inner, 0, 32) {
30 |           if (likely(((y.outer*32) < (image_height - y.inner)))) {
31 |             for (x.inner.s, 0, 32) {
32 |               if (likely(((x.outer*32) < (image_width - x.inner.s)))) {
33 |                 buffer[(((x.outer*32) + ((((chan*image_height) + (y.outer*32)) + y.inner)*image_width)) + x.inner.s)] = ((float32(buffer[((((((x.outer*32) + (((y.outer*32) + y.inner)*image_width)) + x.inner.s)*n_channels) - chan) + 2)])*0.003922f) + -0.500000f)
34 |               }
35 |             }
36 |           }
37 |         }
38 |       }
39 |     }
40 |   }
41 | }
42 | 
43 | Compiled (cpu) tensor took: "Elapsed time: 31.712205 msecs"
44 | 
45 | produce bgr_types_op {
46 |   // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = min(n_channels, 3)
47 |   // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = ((image_height + 31)/32)
48 |   // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = ((image_width + 31)/32)
49 |   // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 32
50 |   // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
51 |   if (likely(((blockIdx.y*32) < (image_height - threadIdx.y)))) {
52 |     if (likely(((blockIdx.x*32) < (image_width - threadIdx.x)))) {
53 |       buffer[(((blockIdx.x*32) + ((((blockIdx.z*image_height) + (blockIdx.y*32)) + threadIdx.y)*image_width)) + threadIdx.x)] = ((float32(buffer[((((((blockIdx.x*32) + (((blockIdx.y*32) + threadIdx.y)*image_width)) + threadIdx.x)*n_channels) - blockIdx.z) + 2)])*0.003922f) + -0.500000f)
54 |     }
55 |   }
56 | }
57 | 
58 | Compiled (opencl) tensor took: "Elapsed time: 4.641527 msecs"
59 | 
60 |

Image Scaling (TVM vs OpenCV)

61 |

Faster (and correct) bilinear and area filtering. Handily beats opencv::resize on a desktop compute in both speed and code readability.

62 |
;; cpu, algorithm run 10 times.  Desktop (NVIDIA 1070):
63 | 
64 | tvm-clj.image.resize-test> (downsample-img)
65 | {:opencv-area-time "\"Elapsed time: 815.136235 msecs\"\n",
66 |  :opencv-bilinear-time "\"Elapsed time: 220.774128 msecs\"\n",
67 |  :tvm-area-time "\"Elapsed time: 380.640778 msecs\"\n",
68 |  :tvm-bilinear-time "\"Elapsed time: 21.361915 msecs\"\n"}
69 | 
70 | tvm-clj.image.resize-test> (downsample-img :device-type :opencl)
71 | {:opencv-area-time "\"Elapsed time: 338.918811 msecs\"\n",
72 |  :opencv-bilinear-time "\"Elapsed time: 16.837844 msecs\"\n",
73 |  :tvm-area-time "\"Elapsed time: 31.076962 msecs\"\n",
74 |  :tvm-bilinear-time "\"Elapsed time: 3.033296 msecs\"\n"}
75 | 
76 | ;;Laptop times
77 | tvm-clj.image.resize-test> (downsample-img)
78 | {:opencv-area-time "\"Elapsed time: 2422.879178 msecs\"\n",
79 |  :opencv-bilinear-time "\"Elapsed time: 637.622425 msecs\"\n",
80 |  :tvm-area-time "\"Elapsed time: 333.946424 msecs\"\n",
81 |  :tvm-bilinear-time "\"Elapsed time: 20.585665 msecs\"\n"}
82 | 
83 | tvm-clj.image.resize-test> (downsample-img :device-type :opencl)
84 | {:opencv-area-time "\"Elapsed time: 2460.51718 msecs\"\n",
85 |  :opencv-bilinear-time "\"Elapsed time: 667.624091 msecs\"\n",
86 |  :tvm-area-time "\"Elapsed time: 315.864799 msecs\"\n",
87 |  :tvm-bilinear-time "\"Elapsed time: 16.290168 msecs\"\n"}
88 | 
89 |
-------------------------------------------------------------------------------- /docs/css/default.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css?family=PT+Sans'); 2 | 3 | body { 4 | font-family: 'PT Sans', Helvetica, sans-serif; 5 | font-size: 14px; 6 | } 7 | 8 | a { 9 | color: #337ab7; 10 | text-decoration: none; 11 | } 12 | 13 | a:hover { 14 | color: #30426a; 15 | text-decoration: underline; 16 | } 17 | 18 | pre, code { 19 | font-family: Monaco, DejaVu Sans Mono, Consolas, monospace; 20 | font-size: 9pt; 21 | margin: 15px 0; 22 | } 23 | 24 | h1 { 25 | font-weight: normal; 26 | font-size: 29px; 27 | margin: 10px 0 2px 0; 28 | padding: 0; 29 | } 30 | 31 | h2 { 32 | font-weight: normal; 33 | font-size: 25px; 34 | } 35 | 36 | h3 > a:hover { 37 | text-decoration: none; 38 | } 39 | 40 | .document h1, .namespace-index h1 { 41 | font-size: 32px; 42 | margin-top: 12px; 43 | } 44 | 45 | #header, #content, .sidebar { 46 | position: fixed; 47 | } 48 | 49 | #header { 50 | top: 0; 51 | left: 0; 52 | right: 0; 53 | height: 22px; 54 | color: #f5f5f5; 55 | padding: 5px 7px; 56 | } 57 | 58 | #content { 59 | top: 32px; 60 | right: 0; 61 | bottom: 0; 62 | overflow: auto; 63 | background: #fff; 64 | color: #333; 65 | padding: 0 18px; 66 | } 67 | 68 | .sidebar { 69 | position: fixed; 70 | top: 32px; 71 | bottom: 0; 72 | overflow: auto; 73 | } 74 | 75 | .sidebar.primary { 76 | background: #30426a; 77 | border-right: solid 1px #cccccc; 78 | left: 0; 79 | width: 250px; 80 | color: white; 81 | font-size: 110%; 82 | } 83 | 84 | .sidebar.secondary { 85 | background: #f2f2f2; 86 | border-right: solid 1px #d7d7d7; 87 | left: 251px; 88 | width: 200px; 89 | font-size: 110%; 90 | } 91 | 92 | #content.namespace-index, #content.document { 93 | left: 251px; 94 | } 95 | 96 | #content.namespace-docs { 97 | left: 452px; 98 | } 99 | 100 | #content.document { 101 | padding-bottom: 10%; 102 | } 103 | 104 | #header { 105 | background: #2d3e63; 106 | box-shadow: 0 0 8px rgba(0, 0, 0, 0.4); 107 | z-index: 100; 108 | } 109 | 110 | #header h1 { 111 | margin: 0; 112 | padding: 0; 113 | font-size: 18px; 114 | font-weight: lighter; 115 | text-shadow: -1px -1px 0px #333; 116 | } 117 | 118 | #header h1 .project-version { 119 | font-weight: normal; 120 | } 121 | 122 | .project-version { 123 | padding-left: 0.15em; 124 | } 125 | 126 | #header a, .sidebar a { 127 | display: block; 128 | text-decoration: none; 129 | } 130 | 131 | #header a { 132 | color: #f5f5f5; 133 | } 134 | 135 | .sidebar.primary, .sidebar.primary a { 136 | color: #b2bfdc; 137 | } 138 | 139 | .sidebar.primary a:hover { 140 | color: white; 141 | } 142 | 143 | .sidebar.secondary, .sidebar.secondary a { 144 | color: #738bc0; 145 | } 146 | 147 | .sidebar.secondary a:hover { 148 | color: #2d3e63; 149 | } 150 | 151 | #header h2 { 152 | float: right; 153 | font-size: 9pt; 154 | font-weight: normal; 155 | margin: 4px 3px; 156 | padding: 0; 157 | color: #bbb; 158 | } 159 | 160 | #header h2 a { 161 | display: inline; 162 | } 163 | 164 | .sidebar h3 { 165 | margin: 0; 166 | padding: 10px 13px 0 13px; 167 | font-size: 19px; 168 | font-weight: lighter; 169 | } 170 | 171 | .sidebar.primary h3.no-link { 172 | text-transform: uppercase; 173 | font-size: 12px; 174 | color: #738bc0; 175 | } 176 | 177 | .sidebar.secondary h3 a { 178 | text-transform: uppercase; 179 | font-size: 12px; 180 | color: #2d3e63; 181 | } 182 | 183 | .sidebar ul { 184 | padding: 7px 0 6px 0; 185 | margin: 0; 186 | } 187 | 188 | .sidebar ul.index-link { 189 | padding-bottom: 4px; 190 | } 191 | 192 | .sidebar li { 193 | display: block; 194 | vertical-align: middle; 195 | } 196 | 197 | .sidebar li a, .sidebar li .no-link { 198 | border-left: 3px solid transparent; 199 | padding: 0 10px; 200 | white-space: nowrap; 201 | } 202 | 203 | .sidebar li .inner { 204 | display: inline-block; 205 | padding-top: 7px; 206 | height: 24px; 207 | } 208 | 209 | .sidebar li a, .sidebar li .tree { 210 | height: 31px; 211 | } 212 | 213 | .depth-1 .inner { padding-left: 2px; } 214 | .depth-2 .inner { padding-left: 6px; } 215 | .depth-3 .inner { padding-left: 20px; } 216 | .depth-4 .inner { padding-left: 34px; } 217 | .depth-5 .inner { padding-left: 48px; } 218 | .depth-6 .inner { padding-left: 62px; } 219 | 220 | .sidebar li .tree { 221 | display: block; 222 | float: left; 223 | position: relative; 224 | top: -10px; 225 | margin: 0 4px 0 0; 226 | padding: 0; 227 | } 228 | 229 | .sidebar li.depth-1 .tree { 230 | display: none; 231 | } 232 | 233 | .sidebar li .tree .top, .sidebar li .tree .bottom { 234 | display: block; 235 | margin: 0; 236 | padding: 0; 237 | width: 7px; 238 | } 239 | 240 | .sidebar li .tree .top { 241 | border-left: 1px solid #aaa; 242 | border-bottom: 1px solid #aaa; 243 | height: 19px; 244 | } 245 | 246 | .sidebar li .tree .bottom { 247 | height: 22px; 248 | } 249 | 250 | .sidebar li.branch .tree .bottom { 251 | border-left: 1px solid #aaa; 252 | } 253 | 254 | .sidebar.primary li.current a { 255 | border-left: 3px solid #e99d1a; 256 | color: white; 257 | } 258 | 259 | .sidebar.secondary li.current a { 260 | border-left: 3px solid #2d3e63; 261 | color: #33a; 262 | } 263 | 264 | .namespace-index h2 { 265 | margin: 30px 0 0 0; 266 | } 267 | 268 | .namespace-index h3 { 269 | font-size: 16px; 270 | font-weight: bold; 271 | margin-bottom: 0; 272 | letter-spacing: 0.05em; 273 | border-bottom: solid 1px #ddd; 274 | max-width: 680px; 275 | background-color: #fafafa; 276 | padding: 0.5em; 277 | } 278 | 279 | .namespace-index .topics { 280 | padding-left: 30px; 281 | margin: 11px 0 0 0; 282 | } 283 | 284 | .namespace-index .topics li { 285 | padding: 5px 0; 286 | } 287 | 288 | .namespace-docs h3 { 289 | font-size: 18px; 290 | font-weight: bold; 291 | } 292 | 293 | .public h3 { 294 | margin: 0; 295 | float: left; 296 | } 297 | 298 | .usage { 299 | clear: both; 300 | } 301 | 302 | .public { 303 | margin: 0; 304 | border-top: 1px solid #e0e0e0; 305 | padding-top: 14px; 306 | padding-bottom: 6px; 307 | } 308 | 309 | .public:last-child { 310 | margin-bottom: 20%; 311 | } 312 | 313 | .members .public:last-child { 314 | margin-bottom: 0; 315 | } 316 | 317 | .members { 318 | margin: 15px 0; 319 | } 320 | 321 | .members h4 { 322 | color: #555; 323 | font-weight: normal; 324 | font-variant: small-caps; 325 | margin: 0 0 5px 0; 326 | } 327 | 328 | .members .inner { 329 | padding-top: 5px; 330 | padding-left: 12px; 331 | margin-top: 2px; 332 | margin-left: 7px; 333 | border-left: 1px solid #bbb; 334 | } 335 | 336 | #content .members .inner h3 { 337 | font-size: 12pt; 338 | } 339 | 340 | .members .public { 341 | border-top: none; 342 | margin-top: 0; 343 | padding-top: 6px; 344 | padding-bottom: 0; 345 | } 346 | 347 | .members .public:first-child { 348 | padding-top: 0; 349 | } 350 | 351 | h4.type, 352 | h4.dynamic, 353 | h4.added, 354 | h4.deprecated { 355 | float: left; 356 | margin: 3px 10px 15px 0; 357 | font-size: 15px; 358 | font-weight: bold; 359 | font-variant: small-caps; 360 | } 361 | 362 | .public h4.type, 363 | .public h4.dynamic, 364 | .public h4.added, 365 | .public h4.deprecated { 366 | font-size: 13px; 367 | font-weight: bold; 368 | margin: 3px 0 0 10px; 369 | } 370 | 371 | .members h4.type, 372 | .members h4.added, 373 | .members h4.deprecated { 374 | margin-top: 1px; 375 | } 376 | 377 | h4.type { 378 | color: #717171; 379 | } 380 | 381 | h4.dynamic { 382 | color: #9933aa; 383 | } 384 | 385 | h4.added { 386 | color: #508820; 387 | } 388 | 389 | h4.deprecated { 390 | color: #880000; 391 | } 392 | 393 | .namespace { 394 | margin-bottom: 30px; 395 | } 396 | 397 | .namespace:last-child { 398 | margin-bottom: 10%; 399 | } 400 | 401 | .index { 402 | padding: 0; 403 | font-size: 80%; 404 | margin: 15px 0; 405 | line-height: 1.6em; 406 | } 407 | 408 | .index * { 409 | display: inline; 410 | } 411 | 412 | .index p { 413 | padding-right: 3px; 414 | } 415 | 416 | .index li { 417 | padding-right: 5px; 418 | } 419 | 420 | .index ul { 421 | padding-left: 0; 422 | } 423 | 424 | .type-sig { 425 | clear: both; 426 | color: #088; 427 | } 428 | 429 | .type-sig pre { 430 | padding-top: 10px; 431 | margin: 0; 432 | } 433 | 434 | .usage code { 435 | display: block; 436 | color: #008; 437 | margin: 2px 0; 438 | } 439 | 440 | .usage code:first-child { 441 | padding-top: 10px; 442 | } 443 | 444 | p { 445 | margin: 15px 0; 446 | } 447 | 448 | .public p:first-child, .public pre.plaintext { 449 | margin-top: 12px; 450 | } 451 | 452 | .doc { 453 | margin: 0 0 26px 0; 454 | clear: both; 455 | } 456 | 457 | .public .doc { 458 | margin: 0; 459 | } 460 | 461 | .namespace-index { 462 | font-size: 120%; 463 | } 464 | 465 | .namespace-index .doc { 466 | margin-bottom: 20px; 467 | } 468 | 469 | .namespace-index .namespace .doc { 470 | margin-bottom: 10px; 471 | } 472 | 473 | .markdown p, .markdown li, .markdown dt, .markdown dd, .markdown td { 474 | line-height: 1.6em; 475 | } 476 | 477 | .markdown h2 { 478 | font-weight: normal; 479 | font-size: 25px; 480 | } 481 | 482 | #content .markdown h3 { 483 | font-size: 20px; 484 | } 485 | 486 | .markdown h4 { 487 | font-size: 15px; 488 | } 489 | 490 | .doc, .public, .namespace .index { 491 | max-width: 680px; 492 | overflow-x: visible; 493 | } 494 | 495 | .markdown pre > code { 496 | display: block; 497 | padding: 10px; 498 | } 499 | 500 | .markdown pre > code, .src-link a { 501 | border: 1px solid #e4e4e4; 502 | border-radius: 2px; 503 | } 504 | 505 | .src-link a { 506 | background: #f6f6f6; 507 | } 508 | 509 | .markdown code:not(.hljs) { 510 | color: #c7254e; 511 | background-color: #f9f2f4; 512 | border-radius: 4px; 513 | font-size: 90%; 514 | padding: 2px 4px; 515 | } 516 | 517 | pre.deps { 518 | display: inline-block; 519 | margin: 0 10px; 520 | border: 1px solid #e4e4e4; 521 | border-radius: 2px; 522 | padding: 10px; 523 | background-color: #f6f6f6; 524 | } 525 | 526 | .markdown hr { 527 | border-style: solid; 528 | border-top: none; 529 | color: #ccc; 530 | } 531 | 532 | .doc ul, .doc ol { 533 | padding-left: 30px; 534 | } 535 | 536 | .doc table { 537 | border-collapse: collapse; 538 | margin: 0 10px; 539 | } 540 | 541 | .doc table td, .doc table th { 542 | border: 1px solid #dddddd; 543 | padding: 4px 6px; 544 | } 545 | 546 | .doc table th { 547 | background: #f2f2f2; 548 | } 549 | 550 | .doc dl { 551 | margin: 0 10px 20px 10px; 552 | } 553 | 554 | .doc dl dt { 555 | font-weight: bold; 556 | margin: 0; 557 | padding: 3px 0; 558 | border-bottom: 1px solid #ddd; 559 | } 560 | 561 | .doc dl dd { 562 | padding: 5px 0; 563 | margin: 0 0 5px 10px; 564 | } 565 | 566 | .doc abbr { 567 | border-bottom: 1px dotted #333; 568 | font-variant: none; 569 | cursor: help; 570 | } 571 | 572 | .src-link { 573 | margin-bottom: 15px; 574 | } 575 | 576 | .src-link a { 577 | font-size: 70%; 578 | padding: 1px 4px; 579 | text-decoration: none; 580 | color: #5555bb; 581 | background-color: #f6f6f6; 582 | } 583 | 584 | blockquote { 585 | opacity: 0.6; 586 | border-left: solid 2px #ddd; 587 | margin-left: 0; 588 | padding-left: 1em; 589 | } 590 | 591 | /* Responsiveness Theme */ 592 | 593 | @media (max-device-width: 480px) { 594 | .sidebar { 595 | display:none; 596 | } 597 | 598 | #content { 599 | position: relative; 600 | left: initial !important; 601 | top: 110px; 602 | padding: 0 1em; 603 | } 604 | 605 | #header { 606 | display: flex; 607 | flex-direction: column-reverse; 608 | height: 100px; 609 | } 610 | 611 | #header > h1 { 612 | font-size: 52px; 613 | } 614 | 615 | #header h2 { 616 | float: none; 617 | font-size: 20px; 618 | } 619 | 620 | .namespace-index > h1 { 621 | display: none; 622 | } 623 | 624 | .public, .doc, .namespace > .index, .namespace > .doc, .namespace > h3 { 625 | max-width: initial; 626 | } 627 | 628 | .doc { 629 | text-align: justify; 630 | } 631 | 632 | .public { 633 | padding-top: 2em; 634 | padding-bottom: 2em; 635 | } 636 | 637 | .public > h3 { 638 | font-size: 300%; 639 | } 640 | 641 | .public > h4.type, .public > h4.added, .public > h4.deprecated { 642 | font-size: 150%; 643 | margin-top: 1em; 644 | } 645 | 646 | pre > code { 647 | font-size: 200%; 648 | } 649 | } 650 | -------------------------------------------------------------------------------- /docs/highlight/highlight.min.js: -------------------------------------------------------------------------------- 1 | /*! highlight.js v9.6.0 | BSD3 License | git.io/hljslicense */ 2 | !function(e){var n="object"==typeof window&&window||"object"==typeof self&&self;"undefined"!=typeof exports?e(exports):n&&(n.hljs=e({}),"function"==typeof define&&define.amd&&define([],function(){return n.hljs}))}(function(e){function n(e){return e.replace(/[&<>]/gm,function(e){return I[e]})}function t(e){return e.nodeName.toLowerCase()}function r(e,n){var t=e&&e.exec(n);return t&&0===t.index}function a(e){return k.test(e)}function i(e){var n,t,r,i,o=e.className+" ";if(o+=e.parentNode?e.parentNode.className:"",t=B.exec(o))return R(t[1])?t[1]:"no-highlight";for(o=o.split(/\s+/),n=0,r=o.length;r>n;n++)if(i=o[n],a(i)||R(i))return i}function o(e,n){var t,r={};for(t in e)r[t]=e[t];if(n)for(t in n)r[t]=n[t];return r}function u(e){var n=[];return function r(e,a){for(var i=e.firstChild;i;i=i.nextSibling)3===i.nodeType?a+=i.nodeValue.length:1===i.nodeType&&(n.push({event:"start",offset:a,node:i}),a=r(i,a),t(i).match(/br|hr|img|input/)||n.push({event:"stop",offset:a,node:i}));return a}(e,0),n}function c(e,r,a){function i(){return e.length&&r.length?e[0].offset!==r[0].offset?e[0].offset"}function u(e){l+=""}function c(e){("start"===e.event?o:u)(e.node)}for(var s=0,l="",f=[];e.length||r.length;){var g=i();if(l+=n(a.substr(s,g[0].offset-s)),s=g[0].offset,g===e){f.reverse().forEach(u);do c(g.splice(0,1)[0]),g=i();while(g===e&&g.length&&g[0].offset===s);f.reverse().forEach(o)}else"start"===g[0].event?f.push(g[0].node):f.pop(),c(g.splice(0,1)[0])}return l+n(a.substr(s))}function s(e){function n(e){return e&&e.source||e}function t(t,r){return new RegExp(n(t),"m"+(e.cI?"i":"")+(r?"g":""))}function r(a,i){if(!a.compiled){if(a.compiled=!0,a.k=a.k||a.bK,a.k){var u={},c=function(n,t){e.cI&&(t=t.toLowerCase()),t.split(" ").forEach(function(e){var t=e.split("|");u[t[0]]=[n,t[1]?Number(t[1]):1]})};"string"==typeof a.k?c("keyword",a.k):E(a.k).forEach(function(e){c(e,a.k[e])}),a.k=u}a.lR=t(a.l||/\w+/,!0),i&&(a.bK&&(a.b="\\b("+a.bK.split(" ").join("|")+")\\b"),a.b||(a.b=/\B|\b/),a.bR=t(a.b),a.e||a.eW||(a.e=/\B|\b/),a.e&&(a.eR=t(a.e)),a.tE=n(a.e)||"",a.eW&&i.tE&&(a.tE+=(a.e?"|":"")+i.tE)),a.i&&(a.iR=t(a.i)),null==a.r&&(a.r=1),a.c||(a.c=[]);var s=[];a.c.forEach(function(e){e.v?e.v.forEach(function(n){s.push(o(e,n))}):s.push("self"===e?a:e)}),a.c=s,a.c.forEach(function(e){r(e,a)}),a.starts&&r(a.starts,i);var l=a.c.map(function(e){return e.bK?"\\.?("+e.b+")\\.?":e.b}).concat([a.tE,a.i]).map(n).filter(Boolean);a.t=l.length?t(l.join("|"),!0):{exec:function(){return null}}}}r(e)}function l(e,t,a,i){function o(e,n){var t,a;for(t=0,a=n.c.length;a>t;t++)if(r(n.c[t].bR,e))return n.c[t]}function u(e,n){if(r(e.eR,n)){for(;e.endsParent&&e.parent;)e=e.parent;return e}return e.eW?u(e.parent,n):void 0}function c(e,n){return!a&&r(n.iR,e)}function g(e,n){var t=N.cI?n[0].toLowerCase():n[0];return e.k.hasOwnProperty(t)&&e.k[t]}function h(e,n,t,r){var a=r?"":y.classPrefix,i='',i+n+o}function p(){var e,t,r,a;if(!E.k)return n(B);for(a="",t=0,E.lR.lastIndex=0,r=E.lR.exec(B);r;)a+=n(B.substr(t,r.index-t)),e=g(E,r),e?(M+=e[1],a+=h(e[0],n(r[0]))):a+=n(r[0]),t=E.lR.lastIndex,r=E.lR.exec(B);return a+n(B.substr(t))}function d(){var e="string"==typeof E.sL;if(e&&!x[E.sL])return n(B);var t=e?l(E.sL,B,!0,L[E.sL]):f(B,E.sL.length?E.sL:void 0);return E.r>0&&(M+=t.r),e&&(L[E.sL]=t.top),h(t.language,t.value,!1,!0)}function b(){k+=null!=E.sL?d():p(),B=""}function v(e){k+=e.cN?h(e.cN,"",!0):"",E=Object.create(e,{parent:{value:E}})}function m(e,n){if(B+=e,null==n)return b(),0;var t=o(n,E);if(t)return t.skip?B+=n:(t.eB&&(B+=n),b(),t.rB||t.eB||(B=n)),v(t,n),t.rB?0:n.length;var r=u(E,n);if(r){var a=E;a.skip?B+=n:(a.rE||a.eE||(B+=n),b(),a.eE&&(B=n));do E.cN&&(k+=C),E.skip||(M+=E.r),E=E.parent;while(E!==r.parent);return r.starts&&v(r.starts,""),a.rE?0:n.length}if(c(n,E))throw new Error('Illegal lexeme "'+n+'" for mode "'+(E.cN||"")+'"');return B+=n,n.length||1}var N=R(e);if(!N)throw new Error('Unknown language: "'+e+'"');s(N);var w,E=i||N,L={},k="";for(w=E;w!==N;w=w.parent)w.cN&&(k=h(w.cN,"",!0)+k);var B="",M=0;try{for(var I,j,O=0;;){if(E.t.lastIndex=O,I=E.t.exec(t),!I)break;j=m(t.substr(O,I.index-O),I[0]),O=I.index+j}for(m(t.substr(O)),w=E;w.parent;w=w.parent)w.cN&&(k+=C);return{r:M,value:k,language:e,top:E}}catch(T){if(T.message&&-1!==T.message.indexOf("Illegal"))return{r:0,value:n(t)};throw T}}function f(e,t){t=t||y.languages||E(x);var r={r:0,value:n(e)},a=r;return t.filter(R).forEach(function(n){var t=l(n,e,!1);t.language=n,t.r>a.r&&(a=t),t.r>r.r&&(a=r,r=t)}),a.language&&(r.second_best=a),r}function g(e){return y.tabReplace||y.useBR?e.replace(M,function(e,n){return y.useBR&&"\n"===e?"
":y.tabReplace?n.replace(/\t/g,y.tabReplace):void 0}):e}function h(e,n,t){var r=n?L[n]:t,a=[e.trim()];return e.match(/\bhljs\b/)||a.push("hljs"),-1===e.indexOf(r)&&a.push(r),a.join(" ").trim()}function p(e){var n,t,r,o,s,p=i(e);a(p)||(y.useBR?(n=document.createElementNS("http://www.w3.org/1999/xhtml","div"),n.innerHTML=e.innerHTML.replace(/\n/g,"").replace(//g,"\n")):n=e,s=n.textContent,r=p?l(p,s,!0):f(s),t=u(n),t.length&&(o=document.createElementNS("http://www.w3.org/1999/xhtml","div"),o.innerHTML=r.value,r.value=c(t,u(o),s)),r.value=g(r.value),e.innerHTML=r.value,e.className=h(e.className,p,r.language),e.result={language:r.language,re:r.r},r.second_best&&(e.second_best={language:r.second_best.language,re:r.second_best.r}))}function d(e){y=o(y,e)}function b(){if(!b.called){b.called=!0;var e=document.querySelectorAll("pre code");w.forEach.call(e,p)}}function v(){addEventListener("DOMContentLoaded",b,!1),addEventListener("load",b,!1)}function m(n,t){var r=x[n]=t(e);r.aliases&&r.aliases.forEach(function(e){L[e]=n})}function N(){return E(x)}function R(e){return e=(e||"").toLowerCase(),x[e]||x[L[e]]}var w=[],E=Object.keys,x={},L={},k=/^(no-?highlight|plain|text)$/i,B=/\blang(?:uage)?-([\w-]+)\b/i,M=/((^(<[^>]+>|\t|)+|(?:\n)))/gm,C="
",y={classPrefix:"hljs-",tabReplace:null,useBR:!1,languages:void 0},I={"&":"&","<":"<",">":">"};return e.highlight=l,e.highlightAuto=f,e.fixMarkup=g,e.highlightBlock=p,e.configure=d,e.initHighlighting=b,e.initHighlightingOnLoad=v,e.registerLanguage=m,e.listLanguages=N,e.getLanguage=R,e.inherit=o,e.IR="[a-zA-Z]\\w*",e.UIR="[a-zA-Z_]\\w*",e.NR="\\b\\d+(\\.\\d+)?",e.CNR="(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)",e.BNR="\\b(0b[01]+)",e.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~",e.BE={b:"\\\\[\\s\\S]",r:0},e.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[e.BE]},e.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[e.BE]},e.PWM={b:/\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|like)\b/},e.C=function(n,t,r){var a=e.inherit({cN:"comment",b:n,e:t,c:[]},r||{});return a.c.push(e.PWM),a.c.push({cN:"doctag",b:"(?:TODO|FIXME|NOTE|BUG|XXX):",r:0}),a},e.CLCM=e.C("//","$"),e.CBCM=e.C("/\\*","\\*/"),e.HCM=e.C("#","$"),e.NM={cN:"number",b:e.NR,r:0},e.CNM={cN:"number",b:e.CNR,r:0},e.BNM={cN:"number",b:e.BNR,r:0},e.CSSNM={cN:"number",b:e.NR+"(%|em|ex|ch|rem|vw|vh|vmin|vmax|cm|mm|in|pt|pc|px|deg|grad|rad|turn|s|ms|Hz|kHz|dpi|dpcm|dppx)?",r:0},e.RM={cN:"regexp",b:/\//,e:/\/[gimuy]*/,i:/\n/,c:[e.BE,{b:/\[/,e:/\]/,r:0,c:[e.BE]}]},e.TM={cN:"title",b:e.IR,r:0},e.UTM={cN:"title",b:e.UIR,r:0},e.METHOD_GUARD={b:"\\.\\s*"+e.UIR,r:0},e});hljs.registerLanguage("clojure",function(e){var t={"builtin-name":"def defonce cond apply if-not if-let if not not= = < > <= >= == + / * - rem quot neg? pos? delay? symbol? keyword? true? false? integer? empty? coll? list? set? ifn? fn? associative? sequential? sorted? counted? reversible? number? decimal? class? distinct? isa? float? rational? reduced? ratio? odd? even? char? seq? vector? string? map? nil? contains? zero? instance? not-every? not-any? libspec? -> ->> .. . inc compare do dotimes mapcat take remove take-while drop letfn drop-last take-last drop-while while intern condp case reduced cycle split-at split-with repeat replicate iterate range merge zipmap declare line-seq sort comparator sort-by dorun doall nthnext nthrest partition eval doseq await await-for let agent atom send send-off release-pending-sends add-watch mapv filterv remove-watch agent-error restart-agent set-error-handler error-handler set-error-mode! error-mode shutdown-agents quote var fn loop recur throw try monitor-enter monitor-exit defmacro defn defn- macroexpand macroexpand-1 for dosync and or when when-not when-let comp juxt partial sequence memoize constantly complement identity assert peek pop doto proxy defstruct first rest cons defprotocol cast coll deftype defrecord last butlast sigs reify second ffirst fnext nfirst nnext defmulti defmethod meta with-meta ns in-ns create-ns import refer keys select-keys vals key val rseq name namespace promise into transient persistent! conj! assoc! dissoc! pop! disj! use class type num float double short byte boolean bigint biginteger bigdec print-method print-dup throw-if printf format load compile get-in update-in pr pr-on newline flush read slurp read-line subvec with-open memfn time re-find re-groups rand-int rand mod locking assert-valid-fdecl alias resolve ref deref refset swap! reset! set-validator! compare-and-set! alter-meta! reset-meta! commute get-validator alter ref-set ref-history-count ref-min-history ref-max-history ensure sync io! new next conj set! to-array future future-call into-array aset gen-class reduce map filter find empty hash-map hash-set sorted-map sorted-map-by sorted-set sorted-set-by vec vector seq flatten reverse assoc dissoc list disj get union difference intersection extend extend-type extend-protocol int nth delay count concat chunk chunk-buffer chunk-append chunk-first chunk-rest max min dec unchecked-inc-int unchecked-inc unchecked-dec-inc unchecked-dec unchecked-negate unchecked-add-int unchecked-add unchecked-subtract-int unchecked-subtract chunk-next chunk-cons chunked-seq? prn vary-meta lazy-seq spread list* str find-keyword keyword symbol gensym force rationalize"},r="a-zA-Z_\\-!.?+*=<>&#'",n="["+r+"]["+r+"0-9/;:]*",a="[-+]?\\d+(\\.\\d+)?",o={b:n,r:0},s={cN:"number",b:a,r:0},i=e.inherit(e.QSM,{i:null}),c=e.C(";","$",{r:0}),d={cN:"literal",b:/\b(true|false|nil)\b/},l={b:"[\\[\\{]",e:"[\\]\\}]"},m={cN:"comment",b:"\\^"+n},p=e.C("\\^\\{","\\}"),u={cN:"symbol",b:"[:]{1,2}"+n},f={b:"\\(",e:"\\)"},h={eW:!0,r:0},y={k:t,l:n,cN:"name",b:n,starts:h},b=[f,i,m,p,c,u,l,s,d,o];return f.c=[e.C("comment",""),y,h],h.c=b,l.c=b,{aliases:["clj"],i:/\S/,c:[f,i,m,p,c,u,l,s,d]}});hljs.registerLanguage("clojure-repl",function(e){return{c:[{cN:"meta",b:/^([\w.-]+|\s*#_)=>/,starts:{e:/$/,sL:"clojure"}}]}}); -------------------------------------------------------------------------------- /docs/highlight/solarized-light.css: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Orginal Style from ethanschoonover.com/solarized (c) Jeremy Hull 4 | 5 | */ 6 | 7 | .hljs { 8 | display: block; 9 | overflow-x: auto; 10 | padding: 0.5em; 11 | background: #fdf6e3; 12 | color: #657b83; 13 | } 14 | 15 | .hljs-comment, 16 | .hljs-quote { 17 | color: #93a1a1; 18 | } 19 | 20 | /* Solarized Green */ 21 | .hljs-keyword, 22 | .hljs-selector-tag, 23 | .hljs-addition { 24 | color: #859900; 25 | } 26 | 27 | /* Solarized Cyan */ 28 | .hljs-number, 29 | .hljs-string, 30 | .hljs-meta .hljs-meta-string, 31 | .hljs-literal, 32 | .hljs-doctag, 33 | .hljs-regexp { 34 | color: #2aa198; 35 | } 36 | 37 | /* Solarized Blue */ 38 | .hljs-title, 39 | .hljs-section, 40 | .hljs-name, 41 | .hljs-selector-id, 42 | .hljs-selector-class { 43 | color: #268bd2; 44 | } 45 | 46 | /* Solarized Yellow */ 47 | .hljs-attribute, 48 | .hljs-attr, 49 | .hljs-variable, 50 | .hljs-template-variable, 51 | .hljs-class .hljs-title, 52 | .hljs-type { 53 | color: #b58900; 54 | } 55 | 56 | /* Solarized Orange */ 57 | .hljs-symbol, 58 | .hljs-bullet, 59 | .hljs-subst, 60 | .hljs-meta, 61 | .hljs-meta .hljs-keyword, 62 | .hljs-selector-attr, 63 | .hljs-selector-pseudo, 64 | .hljs-link { 65 | color: #cb4b16; 66 | } 67 | 68 | /* Solarized Red */ 69 | .hljs-built_in, 70 | .hljs-deletion { 71 | color: #dc322f; 72 | } 73 | 74 | .hljs-formula { 75 | background: #eee8d5; 76 | } 77 | 78 | .hljs-emphasis { 79 | font-style: italic; 80 | } 81 | 82 | .hljs-strong { 83 | font-weight: bold; 84 | } 85 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj 6.00-beta-1-SNAPSHOT

tvm-clj 6.00-beta-1-SNAPSHOT

Released under the Eclipse Public License

Clojure bindings and exploration of the tvm library.

Installation

To install, add the following dependency to your project or build file:

[tvm-clj "6.00-beta-1-SNAPSHOT"]

Topics

Namespaces

tvm-clj.application.image

Image resize algorithm showing somewhat nontrivial application of TVM operators. In this case we have an algorithm which is a simple average area color algorithm used for scaling images down. This reads a rectangle in the source image and averages it for every destination pixel.

tvm-clj.application.kmeans

High performance implementation of the KMeans algorithm using kmeans++ initialization and Lloyd’s algorithm for convergence.

tvm-clj.compiler

Once a user has a schedule, they then need to compile the schedule into actual executable code. This produces a module which contains all of the executable code, cuda/opencl modules, etc required to run the schedule.

tvm-clj.device

Operations on a device. Devices, such as a GPU, need to be addressed independently and once you have a device you can allocate a tensor on that device.

tvm-clj.module

Once user’s have a compiled a module, the then can query the module for the functions within. Functions returned take only things convertible to TVM nodes such as scalars and tensors and the result buffer must be passed in.

Public variables and functions:

tvm-clj.schedule

After describing the algorithm, the user creates a ‘schedule’ for the algorithm which involve transformations to the algorithm that are guaranteed not to change the results such as the tiling a computation across a tensor.

-------------------------------------------------------------------------------- /docs/js/page_effects.js: -------------------------------------------------------------------------------- 1 | function visibleInParent(element) { 2 | var position = $(element).position().top 3 | return position > -50 && position < ($(element).offsetParent().height() - 50) 4 | } 5 | 6 | function hasFragment(link, fragment) { 7 | return $(link).attr("href").indexOf("#" + fragment) != -1 8 | } 9 | 10 | function findLinkByFragment(elements, fragment) { 11 | return $(elements).filter(function(i, e) { return hasFragment(e, fragment)}).first() 12 | } 13 | 14 | function scrollToCurrentVarLink(elements) { 15 | var elements = $(elements); 16 | var parent = elements.offsetParent(); 17 | 18 | if (elements.length == 0) return; 19 | 20 | var top = elements.first().position().top; 21 | var bottom = elements.last().position().top + elements.last().height(); 22 | 23 | if (top >= 0 && bottom <= parent.height()) return; 24 | 25 | if (top < 0) { 26 | parent.scrollTop(parent.scrollTop() + top); 27 | } 28 | else if (bottom > parent.height()) { 29 | parent.scrollTop(parent.scrollTop() + bottom - parent.height()); 30 | } 31 | } 32 | 33 | function setCurrentVarLink() { 34 | $('.secondary a').parent().removeClass('current') 35 | $('.anchor'). 36 | filter(function(index) { return visibleInParent(this) }). 37 | each(function(index, element) { 38 | findLinkByFragment(".secondary a", element.id). 39 | parent(). 40 | addClass('current') 41 | }); 42 | scrollToCurrentVarLink('.secondary .current'); 43 | } 44 | 45 | var hasStorage = (function() { try { return localStorage.getItem } catch(e) {} }()) 46 | 47 | function scrollPositionId(element) { 48 | var directory = window.location.href.replace(/[^\/]+\.html$/, '') 49 | return 'scroll::' + $(element).attr('id') + '::' + directory 50 | } 51 | 52 | function storeScrollPosition(element) { 53 | if (!hasStorage) return; 54 | localStorage.setItem(scrollPositionId(element) + "::x", $(element).scrollLeft()) 55 | localStorage.setItem(scrollPositionId(element) + "::y", $(element).scrollTop()) 56 | } 57 | 58 | function recallScrollPosition(element) { 59 | if (!hasStorage) return; 60 | $(element).scrollLeft(localStorage.getItem(scrollPositionId(element) + "::x")) 61 | $(element).scrollTop(localStorage.getItem(scrollPositionId(element) + "::y")) 62 | } 63 | 64 | function persistScrollPosition(element) { 65 | recallScrollPosition(element) 66 | $(element).scroll(function() { storeScrollPosition(element) }) 67 | } 68 | 69 | function sidebarContentWidth(element) { 70 | var widths = $(element).find('.inner').map(function() { return $(this).innerWidth() }) 71 | return Math.max.apply(Math, widths) 72 | } 73 | 74 | function calculateSize(width, snap, margin, minimum) { 75 | if (width == 0) { 76 | return 0 77 | } 78 | else { 79 | return Math.max(minimum, (Math.ceil(width / snap) * snap) + (margin * 2)) 80 | } 81 | } 82 | 83 | function resizeSidebars() { 84 | var primaryWidth = sidebarContentWidth('.primary') 85 | var secondaryWidth = 0 86 | 87 | if ($('.secondary').length != 0) { 88 | secondaryWidth = sidebarContentWidth('.secondary') 89 | } 90 | 91 | // snap to grid 92 | primaryWidth = calculateSize(primaryWidth, 32, 13, 160) 93 | secondaryWidth = calculateSize(secondaryWidth, 32, 13, 160) 94 | 95 | $('.primary').css('width', primaryWidth) 96 | $('.secondary').css('width', secondaryWidth).css('left', primaryWidth + 1) 97 | 98 | if (secondaryWidth > 0) { 99 | $('#content').css('left', primaryWidth + secondaryWidth + 2) 100 | } 101 | else { 102 | $('#content').css('left', primaryWidth + 1) 103 | } 104 | } 105 | 106 | $(window).ready(resizeSidebars) 107 | $(window).ready(setCurrentVarLink) 108 | $(window).ready(function() { persistScrollPosition('.primary')}) 109 | $(window).ready(function() { 110 | $('#content').scroll(setCurrentVarLink) 111 | $(window).resize(setCurrentVarLink) 112 | $(window).resize(resizeSidebars) 113 | }) 114 | -------------------------------------------------------------------------------- /docs/tvm-clj.application.image.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj.application.image documentation

tvm-clj.application.image

Image resize algorithm showing somewhat nontrivial application of TVM operators. In this case we have an algorithm which is a simple average area color algorithm used for scaling images down. This reads a rectangle in the source image and averages it for every destination pixel.

4 |

This is a namespace where you want to view the source :-)

5 |
  (def input-img (bufimg/load "test/data/jen.jpg"))
 6 |   (def test-fn (-> (tvm-area-resize-algo-def)
 7 |                    (schedule-tvm-area)
 8 |                    (compile-scheduled-tvm-area)))
 9 | 
10 |   (def result (time (area-resize! input-img 512 test-fn)))
11 |   ;;179 ms
12 |   (def jvm-result (time (area-resize! input-img 512 jvm-area-resize-fn!)))
13 |   ;;5.7 seconds
14 | 

area-resize!

(area-resize! input new-width resize-fn)

Perform an area resize with a defined resize algorithm.

jvm-area-resize-algo

(jvm-area-resize-algo input output-shape)

jvm-area-resize-fn!

(jvm-area-resize-fn! jvm-resize-algo input output)

jvm-area-split-resize-algo

(jvm-area-split-resize-algo input output-shape)

tvm-area-resize-algo

(tvm-area-resize-algo n-channels device-type)

Step 1 is to define the algorithm. This definition looks strikingly similar to the definition above.

-------------------------------------------------------------------------------- /docs/tvm-clj.application.kmeans.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj.application.kmeans documentation

tvm-clj.application.kmeans

High performance implementation of the KMeans algorithm using kmeans++ initialization and Lloyd’s algorithm for convergence.

kmeans++

(kmeans++ dataset n-centroids & [{:keys [n-iters rand-seed minimal-improvement-threshold], :or {minimal-improvement-threshold 0.01}, :as options}])

Find K cluster centroids via kmeans++ center initialization followed by Lloyds algorithm. Dataset must be a matrix (2d tensor).

4 |
    5 |
  • dataset - 2d matrix of numeric datatype.
  • 6 |
  • n-centroids - How many centroids to find.
  • 7 |
8 |

Returns map of:

9 |
    10 |
  • :centroids - 2d tensor of double centroids
  • 11 |
  • :centroid-indexes - 1d integer vector of assigned center indexes.
  • 12 |
  • :iteration-scores - n-iters+1 length array of mean squared error scores container the scores from centroid assigned up to the score when the algorithm terminates.
  • 13 |
14 |

Options:

15 |
    16 |
  • :minimal-improvement-threshold - defaults to 0.01 - algorithm terminates if (1.0 - error(n-1)/error(n-2)) < error-diff-threshold. When Zero means algorithm will always train to max-iters.
  • 17 |
  • :n-iters - defaults to 100 - Max number of iterations, algorithm terminates if `(>= iter-idx n-iters).
  • 18 |
  • :rand-seed - integer or implementation of java.util.Random.
  • 19 |

order-data-labels

(order-data-labels data labels)

Order the dataset and labels such that labels are monotonically increasing. returns tuple of [dataset labels]

predict-per-label

(predict-per-label data model)

Return both a probability distribution per row across each label and a 1d tensor of assigned label indexes.

20 |

Returns:

21 |
    22 |
  • :probability-distribution - each row sums to one, max prob is the index picked.
  • 23 |
  • :label-indexes - int32 assigned indexes for each row in the dataset.
  • 24 |

quantize-image

(quantize-image src-path dst-path n-quantization & [{:keys [n-iters seed], :or {n-iters 5}}])

Quantize an image using kmeans. Copies data into a new image and, if dest-path is provided, saves the image.

25 |

Returns:

26 |
    27 |
  • :centroids - result of the quantization.
  • 28 |
  • :result - resulting BufferedImage.
  • 29 |
  • :scores - Scores after each iteration including initialization.
  • 30 |

train-per-label

(train-per-label data labels n-per-label & [{:keys [input-ordered?], :as options}])

Given a dataset along with per-row integer labels, train N per-label kmeans centroids returning a model which you can use can use with predict-per-label.

-------------------------------------------------------------------------------- /docs/tvm-clj.compiler.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj.compiler documentation

tvm-clj.compiler

Once a user has a schedule, they then need to compile the schedule into actual executable code. This produces a module which contains all of the executable code, cuda/opencl modules, etc required to run the schedule.

compile

(compile fn-map {:keys [target-host], :or {target-host "llvm"}})(compile fn-map)

Build a map of function entries. fn-entries is a map of name to fn-data and fn-data is a map containing:

4 |
    5 |
  • :schedule - schedule to use.
  • 6 |
  • :arguments - argument declarations to the function
  • 7 |
  • :bind-map - optional map of argument to bind declaration to declare the memory layout of the argument.
  • 8 |

int->lowered-function-type-map

ir->fn

(ir->fn ir-data fn-name)

Given map of {:schedule :arguments} containing optionally :target create a clojure fn that calls into TVM.

lower

(lower sch args {:keys [name binds simple-mode? optimization-level], :or {name "main", optimization-level 2}})

Lowering step before build into target.

9 |

Users do not often need to call this themselves but it is useful in order to see code produced by different scheduling primitives. The return value prints the schedule nicely to the REPL.

10 |

Parameters ———- sch : tvm.te.schedule.Schedule The schedule to be built

11 |

args : list of Buffer or Tensor or Var The argument lists to the function.

12 |

name : str, optional The name of result function.

13 |

binds : dict of :any:Tensor to :any:Buffer, optional Dictionary that maps the Tensor to Buffer which specified the data layout requirement of the function. By default, a new compact buffer is created for each tensor in the argument.

14 |

simple_mode : bool, optional Whether only output simple and compact statement, this will skip LoopPartition, api wrapper generation and Unrolling.

15 |

Returns ——- m : IRModule or Stmt The result IRModule, if simple_mode=False Then the Stmt before make api is returned.

lowered-function-type->int-map

view-ir

(view-ir ir-data)

Lower the schedule without optimizations returning an IR layer that has a nice pprint for inspecting schedule state.

-------------------------------------------------------------------------------- /docs/tvm-clj.device.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj.device documentation

tvm-clj.device

Operations on a device. Devices, such as a GPU, need to be addressed independently and once you have a device you can allocate a tensor on that device.

4 |
    5 |
  • Device types are keywords: #{:cpu :cuda :opencl}
  • 6 |
  • Device ids are integers starting from zero.
  • 7 |

copy-tensor!

(copy-tensor! src-tens dest-tens stream)(copy-tensor! src-tens dest-tens)

Copy a src tensor to a destination tensor.

cpu->device

(cpu->device tensor device-type device-id {:keys [stream], :as options})(cpu->device tensor device-type device-id)

Ensure a tensor is on a device copying if necessary.

device->cpu

(device->cpu dev-tens {:keys [stream unsynchronized?]})(device->cpu dev-tens)

Ensure a tensor is on a device copying if necessary.

device-attributes

(device-attributes device-type device-id)

device-exists?

(device-exists? device-type device-id)

device-tensor

(device-tensor shape datatype device-type device-id options)(device-tensor shape datatype device-type device-id)(device-tensor src-tens-prototype device-type device-id)

Allocate a device tensor.

sync-with-host

(sync-with-host device-type device-id)

Synchonize the device stream with the host

-------------------------------------------------------------------------------- /docs/tvm-clj.module.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj.module documentation

tvm-clj.module

Once user’s have a compiled a module, the then can query the module for the functions within. Functions returned take only things convertible to TVM nodes such as scalars and tensors and the result buffer must be passed in.

find-function

(find-function module fn-name)

Find a function in module. Failure causes an exception.

-------------------------------------------------------------------------------- /docs/tvm-clj.schedule.html: -------------------------------------------------------------------------------- 1 | 3 | tvm-clj.schedule documentation

tvm-clj.schedule

After describing the algorithm, the user creates a ‘schedule’ for the algorithm which involve transformations to the algorithm that are guaranteed not to change the results such as the tiling a computation across a tensor.

->stage

(->stage stage-or-schedule operation)

create-schedule

(create-schedule op-seq)

inline-op

(inline-op schedule src-op dst-op rel-axis)

Inline an operation on the axis given. If axis is a number, then positive numbers increment left-to-right while negative numbers increment right-to-left in python semantics of the destination axis.

4 |

rel-axis defaults to -1, or the most-rapidly-changing index.

parallelize-axis

(parallelize-axis schedule op rel-axis)

schedule-cache-read

(schedule-cache-read schedule tensor cache-type readers)

schedule-cache-write

(schedule-cache-write schedule tensor cache-type)

Returns a new tensor

stage-bind

(stage-bind stage iter-var thread-ivar)

Bind an iter-var to a stage variable

stage-bind-gpu

(stage-bind-gpu stage block-axis-seq thread-axis-seq)

Bind the gpu-defined axis to the tvm axis. GPU (cuda, opencl) define a roughly level stage breakdown of axis: block and thread. Threads run on the same block and can share a special kind of memory (called shared memory). There can be up to 3 tvm axis per block or thread and these are labeled (outer iterator to inner iterator): [z y x]

stage-compute-at

(stage-compute-at src-stage dst-stage dst-axis)

Compute src stage at dst stage dst axis

stage-cpu-injective

(stage-cpu-injective stage op & {:keys [axis]})

stage-fuse

(stage-fuse stage axis-args)

Fuse n-axis together, returns single new axis

stage-gpu-injective

(stage-gpu-injective stage op & {:keys [thread-count axis], :or {thread-count 16}})

stage-inline

(stage-inline stage)

stage-parallel

(stage-parallel stage axis)

Indicate that this axis has complete parallelism

stage-reorder

(stage-reorder stage axis-seq)

stage-split-axis

(stage-split-axis stage iter-var factor)

stage-tile

(stage-tile stage outer-axis inner-axis outer-dim inner-dim)

stage-unroll

(stage-unroll stage axis)

stage-vectorize

(stage-vectorize stage axis)

throw-nil

(throw-nil item key-val)
-------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.jpg -------------------------------------------------------------------------------- /examples/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ 4 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 5 | make g++ cmake llvm-dev libopenblas-dev \ 6 | ocl-icd-* opencl-headers \ 7 | openjdk-11-jdk-headless wget curl 8 | 9 | 10 | RUN wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein &&\ 11 | chmod a+x lein &&\ 12 | mv lein /usr/bin 13 | 14 | ARG USERID 15 | ARG GROUPID 16 | ARG USERNAME 17 | 18 | RUN groupadd -g $GROUPID $USERNAME 19 | RUN useradd -u $USERID -g $GROUPID $USERNAME 20 | RUN mkdir /home/$USERNAME && chown $USERNAME:$USERNAME /home/$USERNAME 21 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | If you don't want to build the tvm library yourself (which we recommend) 4 | you can run 5 | 6 | ```bash 7 | scripts/docker-repl.sh 8 | ``` 9 | 10 | And then require tvm from the repl. A compatible native library should be 11 | unpacked and loaded. 12 | -------------------------------------------------------------------------------- /examples/images/result-cpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techascent/tvm-clj/1088845bd613b4ba14b00381ffe3cdbd3d8b639e/examples/images/result-cpu.jpg -------------------------------------------------------------------------------- /examples/project.clj: -------------------------------------------------------------------------------- 1 | (defproject tvm-example "0.1.0-SNAPSHOT" 2 | :description "Example project using tvm" 3 | :url "http://github.com/tech-ascent/tvm-clj" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.10.0"] 7 | [tvm-clj "5.0"] 8 | [techascent/tech.opencv "4.25"]] 9 | ;;This is useful if you want to see where the loaded tvm library 10 | ;;is coming from. We really recommend that you install a tvm 11 | ;;built specifically for your system into /usr/lib, however, as there 12 | ;;are quite a few options possible for tvm. 13 | :jvm-opts ["-Djna.debug_load=true"] 14 | ) 15 | -------------------------------------------------------------------------------- /examples/scripts/dev-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | IMG="tvm-dev-repl" 6 | REPL_PORT=7000 7 | 8 | docker build \ 9 | -t $IMG \ 10 | -f Dockerfile \ 11 | --build-arg USERID=$(id -u) \ 12 | --build-arg GROUPID=$(id -u) \ 13 | --build-arg USERNAME=$USER \ 14 | . 15 | 16 | docker run --rm -it -u $(id -u):$(id -g) \ 17 | -e LEIN_REPL_HOST="0.0.0.0" \ 18 | -e LEIN_ROOT=1 \ 19 | -e LEIN_REPL_PORT=$REPL_PORT \ 20 | -v /$HOME/.m2:/home/$USER/.m2 \ 21 | -v /$HOME/.lein:/home/$USER/.lein \ 22 | -v $(pwd)/:/tvm \ 23 | --net=host -w /tvm \ 24 | $IMG $@ 25 | 26 | -------------------------------------------------------------------------------- /examples/scripts/docker-repl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | scripts/dev-docker.sh lein repl 4 | -------------------------------------------------------------------------------- /examples/test/data/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techascent/tvm-clj/1088845bd613b4ba14b00381ffe3cdbd3d8b639e/examples/test/data/test.jpg -------------------------------------------------------------------------------- /java/tvm_clj/tvm/CFunction.java: -------------------------------------------------------------------------------- 1 | package tvm_clj.tvm; 2 | 3 | 4 | import com.sun.jna.*; 5 | 6 | public class CFunction { 7 | 8 | public static interface TVMPackedCFunc extends Callback { 9 | int invoke(Pointer args, Pointer typeCodes, int numArgs, 10 | Pointer retValueHandle, Pointer resourceHandle); 11 | } 12 | 13 | public static interface TVMPackedCFuncFinalizer extends Callback { 14 | int invoke(Pointer resourceHandle); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /java/tvm_clj/tvm/DLPack.java: -------------------------------------------------------------------------------- 1 | package tvm_clj.tvm; 2 | 3 | import com.sun.jna.*; 4 | import java.util.*; 5 | 6 | 7 | public interface DLPack extends Library { 8 | 9 | public static final int DLPACK_VERSION = 020; 10 | 11 | public static class DLContext extends Structure { 12 | 13 | public int device_type; 14 | public int device_id; 15 | 16 | 17 | public static class ByReference extends DLContext implements Structure.ByReference {} 18 | public static class ByValue extends DLContext implements Structure.ByValue {} 19 | public DLContext () {} 20 | public DLContext (Pointer p ) { super(p); read(); } 21 | protected List getFieldOrder() { return Arrays.asList(new String[] 22 | { "device_type", "device_id" }); } 23 | } 24 | 25 | public static class DLDataType extends Structure { 26 | public byte code; 27 | public byte bits; 28 | public short lanes; 29 | 30 | 31 | public static class ByReference extends DLDataType implements Structure.ByReference {} 32 | public static class ByValue extends DLDataType implements Structure.ByValue {} 33 | public DLDataType () {} 34 | public DLDataType (Pointer p ) { super(p); read(); } 35 | protected List getFieldOrder() { return Arrays.asList(new String[] 36 | { "code", "bits", "lanes" }); } 37 | } 38 | 39 | public static class DLTensor extends Structure { 40 | 41 | public Pointer data; 42 | public DLContext ctx; 43 | public int ndim; 44 | public DLDataType dtype; 45 | public Pointer shape; 46 | public Pointer strides; 47 | public long byte_offset; 48 | 49 | 50 | public static class ByReference extends DLTensor implements Structure.ByReference {} 51 | public static class ByValue extends DLTensor implements Structure.ByValue {} 52 | public DLTensor () {} 53 | public DLTensor (Pointer p ) { super(p); read(); } 54 | protected List getFieldOrder() { return Arrays.asList(new String[] 55 | { "data", "ctx", "ndim", "dtype", "shape", "strides", "byte_offset"}); } 56 | } 57 | 58 | public static class DLManagedTensor extends Structure { 59 | public DLTensor dl_tensor; 60 | //void* to used to store extra data for deleter 61 | public Pointer manager_ctx; 62 | //single argument delete fn, is passed the managed tensor 63 | public Pointer deleter; 64 | 65 | 66 | public static class ByReference extends DLManagedTensor implements Structure.ByReference {} 67 | public static class ByValue extends DLManagedTensor implements Structure.ByValue {} 68 | public DLManagedTensor () {} 69 | public DLManagedTensor (Pointer p ) { super(p); read(); } 70 | protected List getFieldOrder() { return Arrays.asList(new String[] 71 | { "dl_tensor", "manager_ctx", "deleter"}); } 72 | 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject tvm-clj "6.00-beta-1-SNAPSHOT" 2 | :description "Clojure bindings and exploration of the tvm library" 3 | :url "http://github.com/techascent/tvm-clj" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.10.2-alpha1"] 7 | [cnuernber/dtype-next "6.00-beta-5"] 8 | [techascent/tech.jna "4.05"]] 9 | 10 | :java-source-paths ["java"] 11 | 12 | :profiles {:dev {:dependencies [[criterium "0.4.5"]]} 13 | :codox 14 | {:dependencies [[codox-theme-rdash "0.1.2"]] 15 | :plugins [[lein-codox "0.10.7"]] 16 | :codox {:project {:name "tvm-clj"} 17 | :metadata {:doc/format :markdown} 18 | :themes [:rdash] 19 | :source-paths ["src"] 20 | :output-path "docs" 21 | :doc-paths ["topics"] 22 | :source-uri "https://github.com/techascent/tvm-clj/blob/master/{filepath}#L{line}" 23 | :namespaces [tvm-clj.ast 24 | tvm-clj.schedule 25 | tvm-clj.compiler 26 | tvm-clj.module 27 | tvm-clj.device 28 | tvm-clj.application.image 29 | tvm-clj.application.kmeans]}}} 30 | :aliases {"codox" ["with-profile" "codox,dev" "codox"]}) 31 | -------------------------------------------------------------------------------- /python/questions/bind_buffer.py: -------------------------------------------------------------------------------- 1 | import tvm 2 | 3 | 4 | def print_schedule(sched, arglist): 5 | print(tvm.lower(sched, arglist, simple_mode=True)) 6 | 7 | 8 | rows = tvm.var("rows") 9 | cols = tvm.var("cols") 10 | chans = tvm.var("chans") 11 | 12 | input_vec = tvm.placeholder((rows,cols,chans), dtype="float32", name="input") 13 | clamp = lambda v, v_min, v_max: tvm.max( tvm.min(v, v_max), v_min ) 14 | ## clamp to edge padding 15 | padded = tvm.compute((rows+2,cols+2,chans) 16 | , lambda y, x, c: input_vec[clamp(y-1, 0, rows-1) 17 | , clamp(x-1, 0, cols-1) 18 | , c].astype("uint16") 19 | , name="padded") 20 | 21 | 22 | 23 | x_blur = tvm.compute((rows+2, cols, chans) 24 | , lambda y, x, c: (padded[y,x,c] + 25 | padded[y,x+1,c] + 26 | padded[y,x+2,c]) / 3 27 | , name="x_blur") 28 | 29 | y_blur = tvm.compute((rows, cols, chans) 30 | , lambda y, x, c: (x_blur[y,x,c] + 31 | x_blur[y+1,x,c] + 32 | x_blur[y+2,x,c]) / 3 33 | , name="y_blur") 34 | 35 | box_blur = tvm.compute((rows,cols,chans) 36 | , lambda y, x, c: y_blur[y,x,c].astype("uint8") 37 | , name="box_blur") 38 | 39 | arglist = [input_vec, box_blur] 40 | 41 | schedule = tvm.create_schedule(box_blur.op) 42 | schedule[padded.op].compute_inline() 43 | schedule[y_blur].compute_inline() 44 | schedule[x_blur].compute_at(schedule[box_blur], box_blur.op.axis[1]) 45 | print_schedule(schedule, arglist) 46 | 47 | x_blur_y_stride = 1 48 | x_blur_c_stride = rows + 2 49 | x_blur_x_stride = x_blur_c_stride * 3 50 | 51 | fun = tvm.build(schedule, arglist, "llvm", name="box_blur" 52 | , binds={x_blur: tvm.decl_buffer(x_blur.shape 53 | , name="x_blur" 54 | , scope="local" 55 | , dtype=x_blur.dtype 56 | , strides=[x_blur_y_stride, 57 | x_blur_x_stride, 58 | x_blur_c_stride])}) 59 | -------------------------------------------------------------------------------- /python/questions/compute_at_gpu.py: -------------------------------------------------------------------------------- 1 | import tvm 2 | 3 | 4 | def print_schedule(sched, arglist): 5 | print(tvm.lower(sched, arglist, simple_mode=True)) 6 | 7 | 8 | rows = tvm.var("rows") 9 | cols = tvm.var("cols") 10 | max_chans = tvm.const(5) 11 | chans = tvm.var("chans") 12 | 13 | input_vec = tvm.placeholder((rows,cols,chans), dtype="float32") 14 | kernel = tvm.compute((cols,chans) 15 | , lambda c, cc: 1.0 * c * cc 16 | , name="kern_vec") 17 | 18 | result = tvm.compute((rows,cols,chans) 19 | , lambda y, x, c: input_vec[y,x,c] * kernel[x, tvm.min(max_chans, tvm.max(0, c))] 20 | , name="answer") 21 | 22 | sched = tvm.create_schedule(result.op) 23 | result_stage = sched[result] 24 | kernel_stage = sched[kernel] 25 | 26 | arglist=[input_vec,result] 27 | 28 | kernel_stage.compute_at(result_stage, result.op.axis[1]) 29 | 30 | print_schedule(sched, arglist) 31 | 32 | result_stage.bind(result.op.axis[0], tvm.thread_axis("blockIdx.x")) 33 | result_stage.bind(result.op.axis[1], tvm.thread_axis("threadIdx.x")) 34 | 35 | 36 | 37 | fun = tvm.build(sched, arglist, "opencl", name="test_compute_at") 38 | -------------------------------------------------------------------------------- /scripts/get-mnist-data: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | pushd test/data 5 | for FILE in train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz 6 | do 7 | wget http://yann.lecun.com/exdb/mnist/$FILE 8 | gunzip $FILE 9 | done 10 | popd 11 | -------------------------------------------------------------------------------- /scripts/python_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## This script is used to install enough python to file bugs with the tvm 4 | ## maintainers. Bugs should be filed in python until clojure is part of 5 | ## the accepted languages for tvm. 6 | 7 | 8 | sudo apt install -y --no-install-recommends \ 9 | libboost-python-dev \ 10 | build-essential \ 11 | python3-dev \ 12 | python3-pip \ 13 | python3-setuptools \ 14 | python3-wheel 15 | 16 | 17 | pip3 install \ 18 | numpy \ 19 | decorator \ 20 | Pillow \ 21 | scipy \ 22 | sklearn \ 23 | opencv-python \ 24 | scikit-image 25 | 26 | pushd tvm 27 | pushd python 28 | python3 setup.py install --user 29 | popd 30 | pushd topi/python 31 | python3 setup.py install --user 32 | popd 33 | pushd nnvm/python 34 | python3 setup.py install --user 35 | popd 36 | popd 37 | 38 | -------------------------------------------------------------------------------- /scripts/scan-tvm-obj-types: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | grep -irn -A 1 tvm._ffi.register_object tvm/python -------------------------------------------------------------------------------- /scripts/setup_python: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | 5 | export TVM_HOME="$(pwd)/tvm" 6 | export PYTHONPATH=$TVM_HOME/python:$TVM_HOME/topi/python:$TVM_HOME/nnvm/python:${PYTHONPATH} -------------------------------------------------------------------------------- /src/tvm_clj/application/mnist.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.application.mnist 2 | (:require [tech.v3.tensor :as dtt] 3 | [tech.v3.datatype :as dtype] 4 | [tech.v3.datatype.native-buffer :as native-buffer] 5 | [tech.v3.datatype.mmap :as mmap] 6 | [tech.v3.datatype.functional :as dfn] 7 | [tech.v3.libs.buffered-image :as bufimg] 8 | [tvm-clj.application.kmeans :as kmeans] 9 | [clojure.tools.logging :as log])) 10 | 11 | (def train-fnames {:data "train-images-idx3-ubyte" 12 | :labels "train-labels-idx1-ubyte"}) 13 | (def test-fnames {:data "t10k-images-idx3-ubyte" 14 | :labels "t10k-labels-idx1-ubyte"}) 15 | 16 | 17 | (def ^{:tag 'long} img-width 28) 18 | (def ^{:tag 'long} img-height 28) 19 | (def ^{:tag 'long} img-size (* img-width img-height)) 20 | 21 | 22 | (defn save-mnist-tensor-as-img 23 | ([tensor fname] 24 | (-> (dtt/reshape tensor [img-height img-width]) 25 | (dtype/copy! (bufimg/new-image img-height img-width :byte-gray)) 26 | (bufimg/save! fname)))) 27 | 28 | 29 | (defn mmap-file 30 | [fname] 31 | (-> (mmap/mmap-file (format "test/data/%s" fname)) 32 | (native-buffer/set-native-datatype :uint8))) 33 | 34 | 35 | (defn load-data 36 | [fname] 37 | (let [fdata (mmap-file fname) 38 | n-images (long (quot (dtype/ecount fdata) img-size)) 39 | leftover (rem (dtype/ecount fdata) 40 | img-size)] 41 | (-> (dtype/sub-buffer fdata leftover) 42 | (dtt/reshape [n-images img-height img-width])))) 43 | 44 | 45 | (defn load-labels 46 | [fname] 47 | (-> (mmap-file fname) 48 | (dtype/sub-buffer 8))) 49 | 50 | 51 | (defn load-dataset 52 | [dataset ds-name] 53 | ;;Data is an [n-images height width] tensor 54 | (log/infof "Loading %s dataset" ds-name) 55 | {:data (load-data (dataset :data)) 56 | :labels (load-labels (dataset :labels))}) 57 | 58 | 59 | ;;Datasets are maps of class-label->tensor 60 | (defonce train-ds (load-dataset train-fnames "train")) 61 | (defonce test-ds (load-dataset test-fnames "test")) 62 | 63 | 64 | (defn reshape-data 65 | "Reshape images to be a 2d matrix of rows where each image is one row." 66 | [data] 67 | (let [[n-images height width] (dtype/shape data)] 68 | (dtt/reshape data [n-images (* (long height) (long width))]))) 69 | 70 | 71 | (defn train-kmeans-per-label 72 | [n-per-label & [{:keys [seed n-iters] :as options}]] 73 | (kmeans/train-per-label (reshape-data (:data train-ds)) 74 | (:labels train-ds) 75 | n-per-label options)) 76 | 77 | 78 | (defn save-centers-as-images! 79 | [centers] 80 | (let [n-centers (long (first (dtype/shape centers)))] 81 | (doseq [idx (range n-centers)] 82 | (-> (centers idx) 83 | (save-mnist-tensor-as-img (format "center-%d.png" idx)))))) 84 | 85 | (defn kmeans->histograms 86 | "Takes the output of `train-kmeans` and returns a histogram of original labels for each learned center." 87 | [{:keys [assigned-centers]}] 88 | (->> (for [[idx assigned-center] (map-indexed vector assigned-centers)] 89 | (let [idx->label (fn [i] 90 | (->> (map-indexed vector (:labels train-ds)) 91 | (map (fn [[label-i [minimum maximum]]] 92 | (when (and (<= minimum i) (< i maximum)) 93 | label-i))) 94 | (remove nil?) 95 | (first)))] 96 | {:label (idx->label idx) 97 | :assigned-center assigned-center})) 98 | (group-by :assigned-center) 99 | (map (fn [[_ item-seq]] 100 | (frequencies (map :label item-seq)))) 101 | (sort-by (fn [hist] (ffirst (sort-by second > hist)))) 102 | (map (fn [center] 103 | (for [i (range 10)] 104 | (get center i 0)))) 105 | (dtt/ensure-tensor))) 106 | 107 | 108 | (defn confusion-matrix 109 | [labels predictions] 110 | (let [retval (dtt/new-tensor [10 10] :datatype :int64)] 111 | (doseq [[label pred] (map vector labels predictions)] 112 | (.ndAccumPlusLong retval label pred 1) 113 | (.ndAccumPlusLong retval pred label 1)) 114 | retval)) 115 | 116 | 117 | (defn test-kmeans 118 | [& [n-centers]] 119 | (let [n-centers (or n-centers 10)] 120 | (let [model (kmeans/train-per-label (reshape-data (:data train-ds)) 121 | (:labels train-ds) 122 | n-centers) 123 | prediction-data (kmeans/predict-per-label (reshape-data (:data test-ds)) 124 | model) 125 | labels (:labels test-ds) 126 | predictions (:label-indexes prediction-data)] 127 | {:accuracy (/ (dfn/sum (dfn/eq labels predictions)) 128 | (dtype/ecount predictions)) 129 | :confusion-matrix (confusion-matrix labels predictions)}))) 130 | 131 | 132 | (defn test-n-center-predictors 133 | [n-centers] 134 | (->> (range 1 (inc n-centers)) 135 | (mapv (fn [idx] 136 | (let [model (kmeans/train-per-label (reshape-data (:data train-ds)) 137 | (:labels train-ds) 138 | idx) 139 | prediction-data (kmeans/predict-per-label (reshape-data (:data test-ds)) 140 | model) 141 | labels (:labels test-ds) 142 | predictions (:label-indexes prediction-data)] 143 | {:accuracy (/ (dfn/sum (dfn/eq labels predictions)) 144 | (dtype/ecount predictions)) 145 | :confusion-matrix (confusion-matrix labels predictions)}))))) 146 | -------------------------------------------------------------------------------- /src/tvm_clj/ast/elemwise_op.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.ast.elemwise-op 2 | "Elemwise TVM AST operators" 3 | (:require [tvm-clj.impl.protocols :as tvm-proto] 4 | [tvm-clj.impl.fns.tir :as tir-fns] 5 | [tvm-clj.impl.node :as jna-node] 6 | [tvm-clj.impl.base :as jna-base] 7 | [tech.v3.datatype :as dtype]) 8 | (:import [com.sun.jna Pointer]) 9 | (:refer-clojure :exclude [cast + - * / min max and or mod 10 | > >= < <=])) 11 | 12 | 13 | (defn const 14 | "Convert an item to a const (immediate) value" 15 | [numeric-value & [dtype]] 16 | (jna-node/const numeric-value dtype)) 17 | 18 | 19 | (defn cast 20 | "Cast a node to a different value." 21 | [expr-node dtype] 22 | (tir-fns/Cast (jna-node/->dtype dtype) expr-node)) 23 | 24 | 25 | (def ^:private call-types 26 | "Possible call types from Halide/IR.h" 27 | {:extern 0 ;;< A call to an external C-ABI function, possibly with side-effects 28 | :extern-c-plus-plus 1 ;;< A call to an external C-ABI function, possibly with side-effects 29 | :pure-extern 2 ;;< A call to a guaranteed-side-effect-free external function 30 | :halide 3 ;;< A call to a Func 31 | :intrinsic 4 ;;< A possibly-side-effecty compiler intrinsic, which has special handling during codegen 32 | :pure-intrinsic 5 ;;< A side-effect-free version of the above. 33 | }) 34 | 35 | (defn- ->call-type 36 | ^long [ctype] 37 | (cond 38 | (keyword? ctype) 39 | (if-let [retval (get call-types ctype)] 40 | retval 41 | (throw (ex-info "Failed to find call type" 42 | {:call-type ctype}))) 43 | (number? ctype) 44 | (long ctype))) 45 | 46 | 47 | (def ^:private call-type-set (set (keys call-types))) 48 | 49 | 50 | (defn- call 51 | "Call a 'function', which is basically executing a statement. For instance, getting a 52 | value from the tensor is calling a halide function with the tensor's generating-op and 53 | value index." 54 | [ret-dtype fn-name fn-args call-type function-ref value-index] 55 | #_(bindings/global-node-function "make.Call" (->dtype ret-dtype) fn-name fn-args 56 | (->call-type call-type) 57 | function-ref value-index) 58 | (throw (Exception. "Failwhale"))) 59 | 60 | 61 | (defn- call-pure-intrin 62 | "Build expression by calling a pure intrinsic function. 63 | 64 | Intrinsics can be overloaded with multiple data types via 65 | the intrinsic translation rule. 66 | 67 | Parameters 68 | ---------- 69 | dtype : str 70 | The data type of the result. 71 | 72 | func_name: str 73 | The intrinsic function name. 74 | 75 | args : list 76 | Positional arguments. 77 | 78 | Returns 79 | ------- 80 | call : Expr 81 | The call expression. 82 | " 83 | [dtype func-name & args] 84 | (call dtype func-name (tvm-proto/->node args) :pure-intrinsic nil 0)) 85 | 86 | 87 | (defn- call-intrin 88 | "Build expression by calling an intrinsic function. 89 | 90 | Intrinsics can be overloaded with multiple data types via 91 | the intrinsic translation rule. 92 | 93 | Parameters 94 | ---------- 95 | dtype : str 96 | The data type of the result. 97 | 98 | func_name: str 99 | The intrinsic function name. 100 | 101 | args : list 102 | Positional arguments. 103 | 104 | Returns 105 | ------- 106 | call : Expr 107 | The call expression. 108 | " 109 | [dtype func-name & args] 110 | (call dtype func-name (tvm-proto/->node args) :intrinsic nil 0)) 111 | 112 | 113 | (defmacro ^:no-doc def-bin-op 114 | "Define a binary operation" 115 | [op-name op-fn] 116 | `(defn ~op-name 117 | [~'lhs ~'rhs] 118 | (~op-fn (tvm-proto/->node ~'lhs) (tvm-proto/->node ~'rhs)))) 119 | 120 | 121 | (defmacro ^:no-doc def-op 122 | "Define a binary operation" 123 | [op-name make-name] 124 | `(defn ~op-name 125 | [~'lhs] 126 | (~op-name (tvm-proto/->node ~'lhs)))) 127 | 128 | 129 | (defmacro ^:no-doc def-bin-intrin-op 130 | [op-name] 131 | `(defn ~op-name 132 | [~'lhs ~'rhs] 133 | (call-pure-intrin (dtype/get-datatype ~'lhs) 134 | ~(str op-name) 135 | (tvm-proto/->node ~'lhs) 136 | (tvm-proto/->node ~'rhs)))) 137 | 138 | 139 | (defmacro ^:no-doc def-intrin-op 140 | [op-name] 141 | `(defn ~op-name 142 | [~'lhs] 143 | (call-pure-intrin (dtype/get-datatype ~'lhs) 144 | ~(str "tir." op-name) 145 | (tvm-proto/->node ~'lhs)))) 146 | 147 | 148 | (def-bin-op + tir-fns/Add) 149 | (def-bin-op - tir-fns/Sub) 150 | (def-bin-op mod tir-fns/Mod) 151 | (def-bin-op * tir-fns/Mul) 152 | (def-bin-op / tir-fns/Div) 153 | (def-bin-op eq tir-fns/_OpEQ) 154 | (def-bin-op not-eq tir-fns/_OpNE) 155 | (def-bin-op > tir-fns/_OpGT) 156 | (def-bin-op >= tir-fns/_OpGE) 157 | (def-bin-op < tir-fns/_OpLT) 158 | (def-bin-op <= tir-fns/_OpLE) 159 | (def-bin-op min tir-fns/_OpMin) 160 | 161 | 162 | (defn min-value 163 | "Return an AST node that will generate the minimum value for a given datatype." 164 | [dtype] 165 | (let [[lval ltype] (jna-base/raw-call-function @tir-fns/min_value-fnptr* 166 | (jna-node/->dtype dtype))] 167 | (jna-node/construct-node (Pointer. lval)))) 168 | 169 | (defn max-value 170 | "Return an AST node that will generate the maximum value for a given datatype." 171 | [dtype] 172 | (let [[lval ltype] (jna-base/raw-call-function @tir-fns/max_value-fnptr* 173 | (jna-node/->dtype dtype))] 174 | (jna-node/construct-node (Pointer. lval)))) 175 | 176 | (def-bin-op max tir-fns/_OpMax) 177 | (def-bin-op floor tir-fns/floor) 178 | (def-bin-op ceil tir-fns/ceil) 179 | (def-op abs tir-fns/abs) 180 | (def-bin-op and tir-fns/And) 181 | (def-bin-op or tir-fns/Or) 182 | (def-intrin-op exp) 183 | (def-intrin-op tanh) 184 | (def-intrin-op sigmoid) 185 | (def-intrin-op log) 186 | (def-intrin-op sqrt) 187 | (def-intrin-op trunc) 188 | (def-op round tir-fns/round) 189 | (def-bin-op pow tir-fns/_OpPow) 190 | 191 | 192 | (defn select 193 | "Select between two expressions based on a condition. Thus works similar to the 194 | clojure 'if' statement except it executes both branches. This does not guard against 195 | out of bounds access; use if-then-else for that case. 196 | On the other hand, select can be vectorized while if-then-else cannot be." 197 | [bool-stmt true-stmt false-stmt] 198 | (tir-fns/Select (tvm-proto/->node bool-stmt) 199 | (tvm-proto/->node true-stmt) 200 | (tvm-proto/->node false-stmt))) 201 | 202 | 203 | (defn if-then-else 204 | "Select between two expressions based on a condition. Thus works similar to the 205 | clojure 'if' statement. This is similar to 'select' except that it does not 206 | execute the wrong branch. As a drawback, unlike select, it cannot be vectorized." 207 | [bool-stmt true-stmt false-stmt] 208 | (tir-fns/_OpIfThenElse (tvm-proto/->node bool-stmt) 209 | (tvm-proto/->node true-stmt) 210 | (tvm-proto/->node false-stmt))) 211 | -------------------------------------------------------------------------------- /src/tvm_clj/device.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.device 2 | "Operations on a device. Devices, such as a GPU, need to be addressed 3 | independently and once you have a device you can allocate a tensor on that 4 | device. 5 | 6 | * Device types are keywords: `#{:cpu :cuda :opencl}` 7 | * Device ids are integers starting from zero." 8 | (:require [tvm-clj.impl.definitions :as definitions] 9 | [tvm-clj.impl.protocols :as tvm-proto] 10 | [tvm-clj.impl.fns.runtime :as runtime-fns] 11 | [tvm-clj.impl.dl-tensor :as dl-tensor] 12 | [tvm-clj.impl.stream :as stream] 13 | [tvm-clj.impl.base :as base] 14 | [tech.v3.datatype :as dtype] 15 | [tech.v3.tensor :as dtt])) 16 | 17 | 18 | (defn device-exists? 19 | [device-type device-id] 20 | (if (= device-type :cpu) 21 | (== 0 (long device-id))) 22 | (= 1 (runtime-fns/GetDeviceAttr 23 | (definitions/device-type->device-type-int device-type) 24 | (long device-id) (definitions/device-attribute-map :exists)))) 25 | 26 | 27 | (defn device-attributes 28 | [device-type device-id] 29 | (when (device-exists? device-type device-id) 30 | (->> definitions/device-attribute-map 31 | (map (fn [[att-name att-id]] 32 | [att-name (runtime-fns/GetDeviceAttr 33 | (definitions/device-type->device-type-int device-type) 34 | (long device-id) 35 | (long att-id))])) 36 | (into {})))) 37 | 38 | 39 | (defn device-tensor 40 | "Allocate a device tensor." 41 | ([shape datatype device-type device-id options] 42 | (dl-tensor/allocate-device-array shape datatype device-type 43 | device-id options)) 44 | ([shape datatype device-type device-id] 45 | (device-tensor shape datatype device-type device-id nil)) 46 | ([src-tens-prototype device-type device-id] 47 | (device-tensor (dtype/shape src-tens-prototype) 48 | (dtype/elemwise-datatype src-tens-prototype) 49 | device-type device-id nil))) 50 | 51 | 52 | (defn copy-tensor! 53 | "Copy a src tensor to a destination tensor." 54 | ([src-tens dest-tens stream] 55 | (dl-tensor/copy-array-to-array! src-tens dest-tens stream) 56 | dest-tens) 57 | ([src-tens dest-tens] 58 | (copy-tensor! src-tens dest-tens nil))) 59 | 60 | 61 | (defn sync-with-host 62 | "Synchonize the device stream with the host" 63 | [device-type device-id] 64 | (base/check-call (stream/TVMSynchronize device-type device-id nil))) 65 | 66 | 67 | (defn cpu->device 68 | "Ensure a tensor is on a device copying if necessary." 69 | ([tensor device-type device-id {:keys [stream] :as options}] 70 | (let [dev-tens (device-tensor (dtype/shape tensor) 71 | (dtype/elemwise-datatype tensor) 72 | device-type device-id options) 73 | ;;This will make a gc-based tensor so be careful. 74 | tensor (if (dtt/dims-suitable-for-desc? tensor) 75 | tensor 76 | (dtt/clone tensor :container-type :native-heap))] 77 | (copy-tensor! tensor dev-tens stream))) 78 | ([tensor device-type device-id] 79 | (cpu->device tensor device-type device-id nil))) 80 | 81 | 82 | (defn device->cpu 83 | "Ensure a tensor is on a device copying if necessary." 84 | ([dev-tens {:keys [stream unsynchronized?]}] 85 | (let [tensor (dtt/new-tensor (dtype/shape dev-tens) 86 | :datatype (dtype/elemwise-datatype dev-tens) 87 | :container-type :native-heap)] 88 | (copy-tensor! dev-tens tensor stream) 89 | (when-not unsynchronized? 90 | (sync-with-host (tvm-proto/device-type dev-tens) 91 | (tvm-proto/device-id dev-tens))) 92 | tensor)) 93 | ([dev-tens] 94 | (device->cpu dev-tens nil))) 95 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/definitions.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.definitions 2 | (:require [clojure.set :as set])) 3 | 4 | 5 | (def tvm-datatype->keyword-map 6 | {0 :int 7 | 1 :uint 8 | 2 :float 9 | 3 :handle 10 | 4 :null 11 | 5 :tvm-type 12 | 6 :tvm-context 13 | 7 :array-handle 14 | 8 :node-handle 15 | 9 :module-handle 16 | 10 :func-handle 17 | 11 :string 18 | 12 :bytes 19 | 13 :ndarray-handle 20 | 14 :object-rvalue-ref-arg 21 | 15 :ext-begin}) 22 | 23 | 24 | (def keyword->tvm-datatype-map 25 | (set/map-invert tvm-datatype->keyword-map)) 26 | 27 | 28 | (def datatype->dl-type-code-map 29 | {:uint8 :uint 30 | :uint16 :uint 31 | :uint32 :uint 32 | :uint64 :uint 33 | :int8 :int 34 | :int16 :int 35 | :int32 :int 36 | :int64 :int 37 | :float32 :float 38 | :float64 :float}) 39 | 40 | 41 | (defn keyword->tvm-datatype 42 | [kwd] 43 | (if-let [retval (get keyword->tvm-datatype-map kwd)] 44 | (long retval) 45 | (throw (ex-info "Failed to get tvm-datatype from kwd" 46 | {:kwd kwd})))) 47 | 48 | 49 | (defn tvm-datatype->keyword-nothrow 50 | [tvm-datatype] 51 | (get tvm-datatype->keyword-map tvm-datatype tvm-datatype)) 52 | 53 | 54 | (defn tvm-datatype->keyword 55 | [tvm-datatype] 56 | (if-let [retval (get tvm-datatype->keyword-map tvm-datatype)] 57 | retval 58 | (throw (ex-info "Failed to find keyword for tvm datatype" 59 | {:tvm-datatype tvm-datatype})))) 60 | 61 | 62 | 63 | (def dl-dtype-map->datatype-map 64 | {{:tvm-datatype :float 65 | :bits 32 66 | :lanes 1} :float32 67 | {:tvm-datatype :float 68 | :bits 64 69 | :lanes 1} :float64 70 | 71 | {:tvm-datatype :int 72 | :bits 8 73 | :lanes 1} :int8 74 | {:tvm-datatype :int 75 | :bits 16 76 | :lanes 1} :int16 77 | {:tvm-datatype :int 78 | :bits 32 79 | :lanes 1} :int32 80 | {:tvm-datatype :int 81 | :bits 64 82 | :lanes 1} :int64 83 | 84 | {:tvm-datatype :uint 85 | :bits 8 86 | :lanes 1} :uint8 87 | {:tvm-datatype :uint 88 | :bits 16 89 | :lanes 1} :uint16 90 | {:tvm-datatype :uint 91 | :bits 32 92 | :lanes 1} :uint32 93 | {:tvm-datatype :uint 94 | :bits 64 95 | :lanes 1} :uint64}) 96 | 97 | 98 | (def kwd->device-type-map 99 | {:cpu 1 100 | :cpu-pinned 3 101 | :cuda 2 102 | :ext-dev 12 103 | :gpu 2 104 | :llvm 1 105 | :metal 8 106 | :opencl 4 107 | :rocm 10 108 | :stackvm 1 109 | :vpi 9 110 | :vulkan 7}) 111 | 112 | 113 | (def device-type->kwd-map (set/map-invert kwd->device-type-map)) 114 | 115 | 116 | (defn device-type->device-type-int 117 | ^long [device-type] 118 | (if-let [dev-enum (kwd->device-type-map device-type)] 119 | dev-enum 120 | (throw (ex-info "Failed to find device type enum" 121 | {:device-type device-type})))) 122 | 123 | 124 | (defn device-type-int->device-type 125 | [^long device-type] 126 | (if-let [retval (device-type->kwd-map device-type)] 127 | retval 128 | (throw (ex-info "Failed to find keyword for device type" 129 | {:device-type device-type})))) 130 | 131 | 132 | (def device-attribute-map 133 | {:exists 0 134 | :max-threads-per-block 1 135 | :warp-size 2 136 | :max-shared-memory-per-block 3 137 | :compute-version 4 138 | :device-name 5 139 | :max-clock-rate 6 140 | :multi-processor-count 7 141 | :max-thread-dimensions 8}) 142 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns 2 | "TVM describes a lot of their C api dynamically; you query for the list of global 3 | function names and they are returned delimited by periods similar to clojure 4 | namespaces. We want to scan the list of functions once and dynamically 5 | create all sub namespaces. This does mean these namespaces will not 6 | have documentation at this point." 7 | (:require [tvm-clj.impl.base :as jna-base] 8 | [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns] 9 | [clojure.string :as s] 10 | [clojure.tools.logging :as log] 11 | [clojure.java.io :as io])) 12 | 13 | 14 | (defn define-tvm-fns! 15 | [] 16 | (let [namespaces (->> (jna-base/global-function-names) 17 | (map (fn [gname] 18 | (let [parts (s/split gname #"\.")] 19 | [(butlast parts) 20 | {:fullname gname 21 | :local-name (last parts)}]))) 22 | (group-by first) 23 | (map (fn [[k vs]] 24 | (if (seq k) 25 | [k (mapv second vs)] 26 | (do 27 | (log/warnf "Skipping non-namespaced symbols %s" 28 | (mapv (comp :fullname second) vs)) 29 | nil)))) 30 | (remove nil?)) 31 | cur-dir (System/getProperty "user.dir") 32 | root-ns-path (str cur-dir "/src/tvm_clj/impl/fns/")] 33 | (doseq [[ns-name ns-data] namespaces] 34 | ;;Auto generating the namespace only gets you dynamic resolution of the 35 | ;;names. So we *actually* define the namespace. 36 | (let [ns-path (str root-ns-path (s/join "/" ns-name) ".clj") 37 | tvm-ns-name (s/join "." ns-name) 38 | ns-name (str "tvm-clj.impl.fns." tvm-ns-name) 39 | builder (StringBuilder.)] 40 | (.append builder (format "(ns %s 41 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 42 | 43 | " ns-name)) 44 | 45 | (.append builder (format "(tvm-ns-fns/export-tvm-functions \"%s\")" 46 | tvm-ns-name)) 47 | (spit ns-path (.toString builder)))))) 48 | 49 | 50 | (comment 51 | ;;Only need to run this when the version of TVM changes. 52 | ;;In that case *delete* all files under tvm_clj.jna.fns/* 53 | ;;and then run and check changes. 54 | (define-tvm-fns!) 55 | ) 56 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/arith.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.arith 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "arith") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/auto_scheduler.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.auto_scheduler 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "auto_scheduler") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/autotvm/feature.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.autotvm.feature 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "autotvm.feature") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/codegen.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.codegen 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "codegen") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/device_api.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.device_api 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "device_api") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/hybrid.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.hybrid 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "hybrid") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/ir.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.ir 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "ir") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/node.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.node 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "node") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/parser.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.parser 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "parser") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/_quantize.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay._quantize 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay._quantize") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/_transform.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay._transform 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay._transform") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/_vm.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay._vm 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay._vm") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/analysis.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.analysis 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.analysis") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/backend.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.backend 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.backend") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/build_module.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.build_module 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.build_module") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/dataflow_pattern.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.dataflow_pattern 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.dataflow_pattern") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/ext.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.ext 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.ext") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/ir.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.ir 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.ir") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/annotation/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.annotation._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.annotation._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/dyn/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.dyn._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.dyn._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/dyn/image/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.dyn.image._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.dyn.image._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/dyn/nn/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.dyn.nn._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.dyn.nn._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/image/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.image._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.image._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/memory/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.memory._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.memory._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/nn/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.nn._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.nn._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/vision/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.vision._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.vision._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/op/vm.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.op.vm 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.op.vm") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/qnn/_transform.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.qnn._transform 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.qnn._transform") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/relay/qnn/op/_make.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.relay.qnn.op._make 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "relay.qnn.op._make") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/rpc.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.rpc 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "rpc") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/runtime.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.runtime 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "runtime") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/runtime/module.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.runtime.module 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "runtime.module") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/schedule.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.schedule 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "schedule") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/script.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.script 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "script") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/support.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.support 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "support") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/target.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.target 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "target") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/te.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.te 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "te") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/test/op.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.test.op 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "test.op") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/testing.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.testing 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "testing") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tir.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tir 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tir") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tir/analysis.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tir.analysis 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tir.analysis") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tir/transform.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tir.transform 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tir.transform") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/cuda.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.cuda 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.cuda") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/generic.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.generic 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.generic") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/nn.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.nn 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.nn") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/rocm.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.rocm 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.rocm") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/util.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.util 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.util") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/vision.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.vision 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.vision") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/topi/x86.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.topi.x86 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "topi.x86") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/transform.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.transform 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "transform") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/codegen/llvm.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.codegen.llvm 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.codegen.llvm") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/contrib/cblas.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.contrib.cblas 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.contrib.cblas") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/contrib/mkl.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.contrib.mkl 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.contrib.mkl") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/contrib/random.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.contrib.random 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.contrib.random") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/contrib/sort.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.contrib.sort 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.contrib.sort") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/graph_runtime.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.graph_runtime 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.graph_runtime") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/graph_runtime_factory.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.graph_runtime_factory 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.graph_runtime_factory") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/aocl.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.aocl 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.aocl") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/aocl_sw_emu.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.aocl_sw_emu 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.aocl_sw_emu") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/cuda.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.cuda 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.cuda") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/default.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.default 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.default") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/hexagon.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.hexagon 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.hexagon") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/llvm.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.llvm 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.llvm") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/metal.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.metal 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.metal") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/nvptx.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.nvptx 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.nvptx") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/opencl.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.opencl 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.opencl") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/rocm.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.rocm 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.rocm") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/intrin/rule/sdaccel.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.intrin.rule.sdaccel 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.intrin.rule.sdaccel") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/relay.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.relay 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.relay") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/relay/type_relation.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.relay.type_relation 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.relay.type_relation") -------------------------------------------------------------------------------- /src/tvm_clj/impl/fns/tvm/rpc/server.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.fns.tvm.rpc.server 2 | (:require [tvm-clj.impl.tvm-ns-fns :as tvm-ns-fns])) 3 | 4 | (tvm-ns-fns/export-tvm-functions "tvm.rpc.server") -------------------------------------------------------------------------------- /src/tvm_clj/impl/library_paths.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.library-paths 2 | (:require [tech.v3.jna :as jna]) 3 | (:import [java.io File])) 4 | 5 | 6 | (def tvm-library-name "tvm") 7 | 8 | 9 | ;;Setup library search paths 10 | ;;Add the full path to the development system 11 | (jna/add-library-path tvm-library-name :system 12 | (str (System/getProperty "user.dir") 13 | File/separator 14 | "incubator-tvm/build" 15 | File/separator 16 | (jna/map-shared-library-name tvm-library-name))) 17 | 18 | (when-let [tvm-home (System/getenv "TVM_HOME")] 19 | (when (.exists (File. tvm-home)) 20 | (jna/add-library-path tvm-library-name :system 21 | (str tvm-home 22 | File/separator 23 | "build" 24 | File/separator 25 | (jna/map-shared-library-name tvm-library-name))))) 26 | 27 | (jna/add-library-path tvm-library-name :system tvm-library-name) 28 | ;;Then if nothing else works use the packaged library 29 | ;;that only supports a couple things if any and may not load. 30 | (jna/add-library-path tvm-library-name :java-library-path tvm-library-name) 31 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/module.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.module 2 | (:require [tvm-clj.impl.base :as jna-base] 3 | [tech.v3.jna :refer [checknil] :as jna] 4 | [tvm-clj.impl.protocols :as bindings-proto] 5 | [tvm-clj.impl.fns.runtime :as runtime] 6 | [tech.v3.resource :as resource]) 7 | (:import [com.sun.jna Native NativeLibrary Pointer Function Platform] 8 | [com.sun.jna.ptr PointerByReference IntByReference LongByReference] 9 | [tvm_clj.tvm DLPack$DLContext DLPack$DLTensor DLPack$DLDataType 10 | DLPack$DLManagedTensor] 11 | [tvm_clj.impl.base TVMFunction])) 12 | 13 | 14 | (jna-base/make-tvm-jna-fn TVMModFree 15 | "Free a module" 16 | Integer 17 | [module checknil]) 18 | 19 | 20 | (defrecord ModuleHandle [^Pointer tvm-hdl] 21 | bindings-proto/PToTVM 22 | (->tvm [item] item) 23 | bindings-proto/PJVMTypeToTVMValue 24 | (->tvm-value [item] [(Pointer/nativeValue tvm-hdl) :module-handle]) 25 | jna/PToPtr 26 | (is-jna-ptr-convertible? [item] true) 27 | (->ptr-backing-store [item] tvm-hdl)) 28 | 29 | 30 | (defmethod jna-base/tvm-value->jvm :module-handle 31 | [long-val val-type-kwd] 32 | (-> (->ModuleHandle (Pointer. long-val)) 33 | ;;modules are *always* tracked via the gc, never the stack! 34 | (resource/track {:dispose-fn #(TVMModFree (Pointer. long-val)) 35 | :track-type :gc}))) 36 | 37 | 38 | (jna-base/make-tvm-jna-fn TVMModGetFunction 39 | "Get module function" 40 | Integer 41 | [mod checknil] 42 | [func_name jna/string->ptr] 43 | [query_imports int] 44 | [out jna-base/ptr-ptr]) 45 | 46 | 47 | (defn get-module-function 48 | ([module ^String fn-name query-imports?] 49 | (let [retval (PointerByReference.)] 50 | (jna-base/check-call (TVMModGetFunction 51 | module fn-name 52 | (int (if query-imports? 1 0)) retval)) 53 | (when (= 0 (Pointer/nativeValue (.getValue retval))) 54 | (throw (ex-info "Could not find module function" 55 | {:fn-name fn-name}))) 56 | (TVMFunction. (.getValue retval) module))) 57 | ([module fn-name] 58 | (get-module-function module fn-name false))) 59 | 60 | 61 | (defn get-module-source 62 | [module format] 63 | (runtime/ModuleGetSource module format)) 64 | 65 | 66 | (jna-base/make-tvm-jna-fn TVMModImport 67 | "Import one module into another" 68 | Integer 69 | [mod checknil] 70 | [dep checknil]) 71 | 72 | 73 | (defn mod-import 74 | [mod dep] 75 | (jna-base/check-call (TVMModImport mod dep))) 76 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/protocols.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.protocols 2 | (:require [tech.v3.jna :as jna] 3 | [tech.v3.datatype :as dtype]) 4 | (:import [com.sun.jna Pointer])) 5 | 6 | 7 | (set! *warn-on-reflection* true) 8 | (set! *unchecked-math* :warn-on-boxed) 9 | 10 | 11 | (defprotocol PJVMTypeToTVMValue 12 | "Convert something to a [long tvm-value-type] pair" 13 | (->tvm-value [jvm-type])) 14 | 15 | 16 | (defprotocol PToTVM 17 | "Convert something to some level of tvm type." 18 | (->tvm [item])) 19 | 20 | 21 | (defprotocol PConvertToNode 22 | (->node [item])) 23 | 24 | 25 | (defprotocol PTVMNode 26 | (is-node-handle? [item]) 27 | (node-type-index [item]) 28 | (node-type-name [item])) 29 | 30 | 31 | (extend-type Object 32 | PTVMNode 33 | (is-node-handle? [item] false)) 34 | 35 | 36 | (defprotocol PTVMDeviceId 37 | (device-id [item])) 38 | 39 | 40 | (defprotocol PTVMDeviceType 41 | (device-type [item])) 42 | 43 | 44 | (extend-type Object 45 | PTVMDeviceId 46 | (device-id [item] 0) 47 | PTVMDeviceType 48 | (device-type [item] :cpu)) 49 | 50 | 51 | (defprotocol PByteOffset 52 | "Some buffers you cant offset (opengl, for instance). 53 | So buffers have a logical byte-offset that is passed to functions. 54 | So we need to get the actual base ptr sometimes." 55 | (byte-offset [item]) 56 | (base-ptr [item])) 57 | 58 | 59 | (defn string->ptr 60 | ([str-data {:keys [encoding] 61 | :or {encoding "UTF-8"}}] 62 | (let [str-bytes (.getBytes ^String str-data 63 | (java.nio.charset.Charset/forName encoding)) 64 | retval (dtype/make-container :native-heap :int8 65 | {:resource-type :stack} 66 | ;;force zero pad the ending 67 | (+ (alength str-bytes) 4))] 68 | (dtype/copy! str-bytes (dtype/sub-buffer retval 0 (alength str-bytes))))) 69 | ([str-data] 70 | (string->ptr str-data nil))) 71 | 72 | 73 | (extend-protocol PJVMTypeToTVMValue 74 | Double 75 | (->tvm-value [value] [(Double/doubleToLongBits (double value)) :float]) 76 | Float 77 | (->tvm-value [value] [(Double/doubleToLongBits (double value)) :float]) 78 | Byte 79 | (->tvm-value [value] [(long value) :int]) 80 | Short 81 | (->tvm-value [value] [(long value) :int]) 82 | Integer 83 | (->tvm-value [value] [(long value) :int]) 84 | Long 85 | (->tvm-value [value] [(long value) :int]) 86 | Boolean 87 | (->tvm-value [value] [(if value 88 | (long 1) 89 | (long 0)) :int]) 90 | String 91 | (->tvm-value [value] [(-> (string->ptr value) 92 | (jna/as-ptr) 93 | (Pointer/nativeValue)) :string]) 94 | 95 | nil 96 | (->tvm-value [value] 97 | [(long 0) :null])) 98 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/stream.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.stream 2 | (:require [tvm-clj.impl.base :refer [make-tvm-jna-fn 3 | device-type->int 4 | device-id->int 5 | ptr-ptr 6 | check-call]] 7 | [tech.v3.resource :as resource] 8 | [tvm-clj.impl.protocols :refer [->tvm] :as bindings-proto] 9 | [tech.v3.jna :as jna]) 10 | (:import [com.sun.jna Native NativeLibrary Pointer Function Platform] 11 | [com.sun.jna.ptr PointerByReference IntByReference LongByReference])) 12 | 13 | 14 | (declare ensure-stream->ptr) 15 | 16 | 17 | (make-tvm-jna-fn TVMStreamCreate 18 | "Create a stream" 19 | Integer 20 | [device_type device-type->int] 21 | [device_id device-id->int] 22 | [out ptr-ptr]) 23 | 24 | 25 | (make-tvm-jna-fn TVMStreamFree 26 | "Free a stream" 27 | Integer 28 | [device_type device-type->int] 29 | [device_id device-id->int] 30 | [stream jna/ensure-ptr]) 31 | 32 | 33 | (make-tvm-jna-fn TVMSetStream 34 | "Set current stream" 35 | Integer 36 | [device_type device-type->int] 37 | [device_id device-id->int] 38 | [stream ensure-stream->ptr]) 39 | 40 | (make-tvm-jna-fn TVMSynchronize 41 | "Synchronize stream with host" 42 | Integer 43 | [device_type device-type->int] 44 | [device_id device-id->int] 45 | [stream jna/as-ptr]) 46 | 47 | 48 | (make-tvm-jna-fn TVMStreamStreamSynchronize 49 | "Synchronize stream with stream" 50 | Integer 51 | [device_type device-type->int] 52 | [device_id device-id->int] 53 | [src ensure-stream->ptr] 54 | [dst ensure-stream->ptr]) 55 | 56 | 57 | (defrecord StreamHandle [device-type ^long device-id tvm-hdl] 58 | bindings-proto/PToTVM 59 | (->tvm [item] item) 60 | jna/PToPtr 61 | (->ptr-backing-store [item] tvm-hdl) 62 | bindings-proto/PTVMDeviceId 63 | (device-id [item] device-id) 64 | bindings-proto/PTVMDeviceType 65 | (device-type [item] device-type)) 66 | 67 | 68 | (defn ensure-stream->ptr 69 | [item] 70 | (let [item (->tvm item)] 71 | (jna/ensure-type StreamHandle item) 72 | (jna/->ptr-backing-store item))) 73 | 74 | 75 | (defn create-stream 76 | ^StreamHandle [device-type ^long device-id] 77 | (let [retval (PointerByReference.)] 78 | (check-call (TVMStreamCreate device-type device-id retval)) 79 | (resource/track (->StreamHandle device-type device-id (.getValue retval)) 80 | {:track-type :auto 81 | :dispose-fn #(TVMStreamFree (.getValue retval) 82 | device-type device-id)}))) 83 | 84 | 85 | (defn sync-stream-with-host 86 | [stream] 87 | (let [stream (->tvm stream)] 88 | (check-call (TVMSynchronize stream stream stream)))) 89 | 90 | 91 | (defn sync-stream-with-stream 92 | [stream other-stream] 93 | (let [stream (->tvm stream) 94 | other-stream (->tvm other-stream)] 95 | (check-call (TVMStreamStreamSynchronize stream other-stream)))) 96 | 97 | 98 | (defn set-current-thread-stream 99 | [stream] 100 | (let [stream (->tvm stream)] 101 | (check-call (TVMSetStream stream stream stream)))) 102 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/tvm_ns_fns.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.tvm-ns-fns 2 | (:require [tvm-clj.impl.base :as base])) 3 | 4 | 5 | (defn- fn-prefix 6 | [^String fn-name] 7 | (let [last-idx (.lastIndexOf fn-name ".")] 8 | (if (> last-idx 0) 9 | (.substring fn-name 0 last-idx) 10 | ""))) 11 | 12 | 13 | (defn safe-local-name 14 | [^String lname] 15 | (cond 16 | (= "String" lname) "RuntimeString" 17 | :else lname)) 18 | 19 | 20 | (defn fn-postfix 21 | [^String fn-name] 22 | (let [last-idx (.lastIndexOf fn-name ".")] 23 | (-> (if (> last-idx 0) 24 | (.substring fn-name (inc last-idx)) 25 | "") 26 | (safe-local-name)))) 27 | 28 | 29 | (defn- fns-with-prefix 30 | [prefix] 31 | (->> (base/global-function-names) 32 | (filter #(= prefix (fn-prefix %))))) 33 | 34 | 35 | (defmacro export-tvm-functions 36 | [prefix] 37 | `(do 38 | ~@(->> (fns-with-prefix prefix) 39 | (mapcat (fn [fn-name] 40 | (let [local-name (fn-postfix fn-name) 41 | global-sym (symbol (str local-name "-fnptr*"))] 42 | [`(defonce ~global-sym (delay (base/name->global-function 43 | ~fn-name))) 44 | `(defn ~(symbol local-name) 45 | "TVM exported fn" 46 | [& ~'args] 47 | (with-bindings {#'base/fn-name ~fn-name} 48 | (apply base/call-function @~global-sym ~'args)))])))))) 49 | -------------------------------------------------------------------------------- /src/tvm_clj/impl/typenames.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.impl.typenames) 2 | 3 | 4 | (comment 5 | 6 | 7 | (require '[clojure.java.shell :as sh]) 8 | (require '[clojure.string :as s]) 9 | 10 | 11 | (def grep-results* (delay (:out (sh/sh "grep" "-hirA" "1" "_ffi.register_object" "incubator-tvm/python/")))) 12 | 13 | 14 | (defn results->groups 15 | [] 16 | (let [lines (->> (-> @grep-results* 17 | (s/split #"--")) 18 | (map (fn [^String group] 19 | (s/split group #"\n"))) 20 | (filter #(== 3 (count %))))] 21 | lines)) 22 | 23 | 24 | (defn group->typename 25 | [[_ reg-line cls-def]] 26 | (if-let [[_ data] (re-find #"\"(.*)\"" reg-line)] 27 | data 28 | (second (re-find #"class ([^\(]+)" cls-def)))) 29 | 30 | ) 31 | 32 | 33 | 34 | (def typenames 35 | ["Array" "Attrs" "BaseComputeOp" "ComputeOp" "DictAttrs" "EnvFunc" "ExternOp" 36 | "FloatImm" "FuncType" "Fuse" "GenericFunc" "GlobalTypeVar" "GlobalVar" "HybridOp" 37 | "IRModule" "IncompleteType" "IntImm" "Map" "Op" "PlaceholderOp" "PointerType" 38 | "PrimType" "Range" "ScanOp" "Schedule" "Singleton" "SourceName" "Span" 39 | "SpecializedCondition" "Split" "Stage" "Target" "TargetKind" "Tensor" 40 | "TensorComputeOp" "TensorIntrin" "TensorIntrinCall" "TupleType" "TypeCall" 41 | "TypeConstraint" "TypeRelation" "TypeVar" "arith.ConstIntBound" 42 | "arith.IntConstraints" "arith.IntConstraintsTransform" "arith.IntGroupBounds" 43 | "arith.IntervalSet" "arith.ModularSet" "auto_scheduler.BuildResult" 44 | "auto_scheduler.ComputeDAG" "auto_scheduler.CostModel" "auto_scheduler.EmptyPolicy" 45 | "auto_scheduler.HardwareParams" "auto_scheduler.Iterator" 46 | "auto_scheduler.LocalBuilder" "auto_scheduler.LocalRunner" 47 | "auto_scheduler.MeasureCallback" "auto_scheduler.MeasureInput" 48 | "auto_scheduler.MeasureResult" "auto_scheduler.PreloadMeasuredStates" 49 | "auto_scheduler.ProgramBuilder" "auto_scheduler.ProgramRunner" 50 | "auto_scheduler.PythonBasedModel" "auto_scheduler.RPCRunner" 51 | "auto_scheduler.RandomModel" "auto_scheduler.RecordReader" 52 | "auto_scheduler.RecordToFile" "auto_scheduler.SearchCallback" 53 | "auto_scheduler.SearchPolicy" "auto_scheduler.SearchTask" 54 | "auto_scheduler.SketchPolicy" "auto_scheduler.Stage" "auto_scheduler.State" 55 | "auto_scheduler.TuningOptions" "relay.CCacheKey" "relay.CCacheValue" "relay.Call" 56 | "relay.Clause" "relay.CompileEngine" "relay.Constant" "relay.Constructor" 57 | "relay.ConstructorValue" "relay.Function" "relay.FunctionPass" "relay.Id" "relay.If" 58 | "relay.Let" "relay.LoweredOutput" "relay.Match" "relay.OpImplementation" 59 | "relay.OpSpecialization" "relay.OpStrategy" "relay.PatternConstructor" 60 | "relay.PatternTuple" "relay.PatternVar" "relay.PatternWildcard" "relay.QAnnotateExpr" 61 | "relay.QPartitionExpr" "relay.RefCreate" "relay.RefRead" "relay.RefType" 62 | "relay.RefValue" "relay.RefWrite" "relay.TensorType" "relay.Tuple" 63 | "relay.TupleGetItem" "relay.TypeData" "relay.Var" "relay.attrs.AdaptivePool2DAttrs" 64 | "relay.attrs.AdaptivePool3DAttrs" "relay.attrs.AffineGridAttrs" 65 | "relay.attrs.AllocStorageAttrs" "relay.attrs.AllocTensorAttrs" 66 | "relay.attrs.ArangeAttrs" "relay.attrs.ArgsortAttrs" "relay.attrs.AvgPool1DAttrs" 67 | "relay.attrs.AvgPool2DAttrs" "relay.attrs.AvgPool3DAttrs" 68 | "relay.attrs.BatchNormAttrs" "relay.attrs.BiasAddAttrs" 69 | "relay.attrs.BinaryConv2DAttrs" "relay.attrs.BinaryDenseAttrs" 70 | "relay.attrs.BitPackAttrs" "relay.attrs.CastAttrs" "relay.attrs.CastHintAttrs" 71 | "relay.attrs.ClipAttrs" "relay.attrs.CompilerAttrs" "relay.attrs.ConcatenateAttrs" 72 | "relay.attrs.Conv1DAttrs" "relay.attrs.Conv1DTransposeAttrs" 73 | "relay.attrs.Conv2DAttrs" "relay.attrs.Conv2DTransposeAttrs" 74 | "relay.attrs.Conv2DWinogradAttrs" 75 | "relay.attrs.Conv2DWinogradNNPACKWeightTransformAttrs" "relay.attrs.Conv3DAttrs" 76 | "relay.attrs.Conv3DTransposeAttrs" "relay.attrs.Conv3DWinogradAttrs" 77 | "relay.attrs.ConvWinogradWeightTransformAttrs" "relay.attrs.CorrelationAttrs" 78 | "relay.attrs.CropAndResizeAttrs" "relay.attrs.DebugAttrs" 79 | "relay.attrs.DeformableConv2DAttrs" "relay.attrs.DenseAttrs" 80 | "relay.attrs.DeviceCopyAttrs" "relay.attrs.DilateAttrs" "relay.attrs.Dilation2DAttrs" 81 | "relay.attrs.DropoutAttrs" "relay.attrs.ExpandDimsAttrs" 82 | "relay.attrs.FIFOBufferAttrs" "relay.attrs.GatherAttrs" 83 | "relay.attrs.GetValidCountsAttrs" "relay.attrs.GlobalPool2DAttrs" 84 | "relay.attrs.GridSampleAttrs" "relay.attrs.GroupNormAttrs" "relay.attrs.InitOpAttrs" 85 | "relay.attrs.InstanceNormAttrs" "relay.attrs.L2NormalizeAttrs" "relay.attrs.LRNAttrs" 86 | "relay.attrs.LayerNormAttrs" "relay.attrs.LayoutTransformAttrs" 87 | "relay.attrs.LeakyReluAttrs" "relay.attrs.MaxPool1DAttrs" 88 | "relay.attrs.MaxPool2DAttrs" "relay.attrs.MaxPool3DAttrs" "relay.attrs.MeshgridAttrs" 89 | "relay.attrs.MirrorPadAttrs" "relay.attrs.MultiBoxPriorAttrs" 90 | "relay.attrs.MultiBoxTransformLocAttrs" "relay.attrs.NdarraySizeAttrs" 91 | "relay.attrs.NonMaximumSuppressionAttrs" "relay.attrs.OnDeviceAttrs" 92 | "relay.attrs.OneHotAttrs" "relay.attrs.PReluAttrs" "relay.attrs.PadAttrs" 93 | "relay.attrs.ProposalAttrs" "relay.attrs.QuantizeAttrs" "relay.attrs.ROIAlignAttrs" 94 | "relay.attrs.ROIPoolAttrs" "relay.attrs.ReduceAttrs" "relay.attrs.RepeatAttrs" 95 | "relay.attrs.RequantizeAttrs" "relay.attrs.ReshapeAttrs" "relay.attrs.Resize3dAttrs" 96 | "relay.attrs.ResizeAttrs" "relay.attrs.ReverseAttrs" 97 | "relay.attrs.ReverseSequenceAttrs" "relay.attrs.ScatterAttrs" 98 | "relay.attrs.SequenceMaskAttrs" "relay.attrs.ShapeFuncAttrs" 99 | "relay.attrs.ShapeOfAttrs" "relay.attrs.SimulatedQuantizeAttrs" 100 | "relay.attrs.SliceLikeAttrs" "relay.attrs.SoftmaxAttrs" 101 | "relay.attrs.SparseDenseAttrs" "relay.attrs.SparseToDenseAttrs" 102 | "relay.attrs.SparseTransposeAttrs" "relay.attrs.SplitAttrs" 103 | "relay.attrs.SqueezeAttrs" "relay.attrs.StackAttrs" "relay.attrs.StridedSliceAttrs" 104 | "relay.attrs.SubPixelAttrs" "relay.attrs.TakeAttrs" "relay.attrs.TileAttrs" 105 | "relay.attrs.TopkAttrs" "relay.attrs.TransposeAttrs" "relay.attrs.TupleGetItemAttrs" 106 | "relay.attrs.UpSampling3DAttrs" "relay.attrs.UpSamplingAttrs" 107 | "relay.attrs.VarianceAttrs" "relay.attrs.WithFuncIdAttrs" 108 | "relay.attrs.YoloReorgAttrs" "relay.dataflow_pattern." "relay.quantize.QConfig" 109 | "runtime.ADT" "runtime.NDArray" "runtime.String" "tir.Add" "tir.Allocate" "tir.And" 110 | "tir.Any" "tir.AssertStmt" "tir.AttrStmt" "tir.BijectiveLayout" "tir.Broadcast" 111 | "tir.Buffer" "tir.BufferLoad" "tir.BufferRealize" "tir.BufferStore" "tir.Call" 112 | "tir.Cast" "tir.CommReducer" "tir.DataProducer" "tir.Div" "tir.EQ" "tir.Evaluate" 113 | "tir.FloorDiv" "tir.FloorMod" "tir.For" "tir.GE" "tir.GT" "tir.IfThenElse" 114 | "tir.IterVar" "tir.LE" "tir.LT" "tir.Let" "tir.LetStmt" "tir.Load" "tir.Max" 115 | "tir.Min" "tir.Mod" "tir.Mul" "tir.NE" "tir.Not" "tir.Or" "tir.Prefetch" 116 | "tir.PrimFunc" "tir.PrimFuncPass" "tir.ProducerLoad" "tir.ProducerRealize" 117 | "tir.ProducerStore" "tir.Ramp" "tir.Reduce" "tir.Select" "tir.SeqStmt" "tir.Shuffle" 118 | "tir.SizeVar" "tir.Store" "tir.StringImm" "tir.Sub" "tir.Var" "transform.ModulePass" 119 | "transform.Pass" "transform.PassContext" "transform.PassInfo" "transform.Sequential"]) 120 | -------------------------------------------------------------------------------- /src/tvm_clj/module.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.module 2 | "Once user's have a compiled a module, the then can query the module 3 | for the functions within. Functions returned take only things convertible 4 | to TVM nodes such as scalars and tensors and the result buffer must be 5 | passed in." 6 | (:require [tvm-clj.impl.module :as mod-impl] 7 | [tvm-clj.impl.dl-tensor :as dl-tensor])) 8 | 9 | 10 | (defn find-function 11 | "Find a function in module. Failure causes an exception." 12 | [module fn-name] 13 | (mod-impl/get-module-function module fn-name false)) 14 | -------------------------------------------------------------------------------- /src/tvm_clj/schedule.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.schedule 2 | "After describing the algorithm, the user creates a 'schedule' for the 3 | algorithm which involve transformations to the algorithm that are guaranteed 4 | not to change the results such as the tiling a computation across a tensor." 5 | (:require [tvm-clj.impl.protocols :refer [->node] :as bindings] 6 | [tvm-clj.impl.node :as jna-node] 7 | [tvm-clj.ast :as ast] 8 | [tvm-clj.impl.fns.te :as te-fns] 9 | [tech.v3.datatype :as dtype] 10 | [tech.v3.datatype.errors :as errors]) 11 | (:import [java.util Objects])) 12 | 13 | 14 | (defn throw-nil 15 | [item key-val] 16 | (if-let [retval (get item key-val)] 17 | retval 18 | (throw (ex-info "Expected object but got nil" 19 | {:item item 20 | :key key-val})))) 21 | 22 | 23 | (defn create-schedule 24 | [op-seq] 25 | (let [op-seq (->> (if-not (sequential? op-seq) 26 | [op-seq] 27 | op-seq) 28 | (mapv ast/->operation))] 29 | (te-fns/CreateSchedule op-seq))) 30 | 31 | 32 | (defn ->stage 33 | [stage-or-schedule operation] 34 | (case (bindings/node-type-name stage-or-schedule) 35 | "Stage" stage-or-schedule 36 | "Schedule" (throw-nil (:stage_map stage-or-schedule) 37 | (ast/->operation operation)))) 38 | 39 | 40 | (defmethod jna-node/get-extended-node-value :schedule 41 | [node-handle item-key] 42 | (->stage node-handle (ast/->operation item-key))) 43 | 44 | 45 | (defn stage-split-axis 46 | [stage iter-var factor] 47 | (te-fns/StageSplitByFactor stage iter-var factor)) 48 | 49 | 50 | (defn stage-bind 51 | "Bind an iter-var to a stage variable" 52 | [stage iter-var thread-ivar] 53 | (te-fns/StageBind stage iter-var thread-ivar)) 54 | 55 | 56 | (defn stage-compute-at 57 | "Compute src stage at dst stage dst axis" 58 | [src-stage dst-stage dst-axis] 59 | (te-fns/StageComputeAt src-stage dst-stage dst-axis)) 60 | 61 | 62 | (defn- resolve-rel-axis 63 | [op rel-axis] 64 | (let [op (ast/->operation op)] 65 | (if (number? rel-axis) 66 | (let [axis (:axis op) 67 | n-axis (dtype/ecount axis) 68 | rel-axis (long rel-axis) 69 | rel-axis (if (neg? rel-axis) 70 | (max 0 (+ n-axis rel-axis)) 71 | rel-axis)] 72 | (errors/check-idx rel-axis n-axis) 73 | (nth axis rel-axis)) 74 | rel-axis))) 75 | 76 | 77 | (defn inline-op 78 | "Inline an operation on the axis given. If axis is a number, then 79 | positive numbers increment left-to-right while negative numbers increment 80 | right-to-left in python semantics of the destination axis. 81 | 82 | rel-axis defaults to -1, or the most-rapidly-changing index." 83 | ([schedule src-op dst-op rel-axis] 84 | (let [stage-map (:stage_map schedule) 85 | src-op (ast/->operation src-op) 86 | dst-op (ast/->operation dst-op)] 87 | (Objects/nonNull schedule) 88 | (Objects/nonNull src-op) 89 | (Objects/nonNull (stage-map src-op)) 90 | (Objects/nonNull dst-op) 91 | (Objects/nonNull (stage-map dst-op)) 92 | (Objects/nonNull (resolve-rel-axis dst-op rel-axis)) 93 | (stage-compute-at (stage-map src-op) (stage-map dst-op) 94 | (resolve-rel-axis dst-op rel-axis)) 95 | schedule))) 96 | 97 | 98 | (defn stage-fuse 99 | "Fuse n-axis together, returns single new axis" 100 | [stage axis-args] 101 | ;;If there is only one axis, then fusing is pointless 102 | (if (= 1 (count axis-args)) 103 | (first axis-args) 104 | (te-fns/StageFuse stage axis-args))) 105 | 106 | 107 | (defn stage-parallel 108 | "Indicate that this axis has complete parallelism" 109 | [stage axis] 110 | (Objects/nonNull stage) 111 | (Objects/nonNull axis) 112 | (te-fns/StageParallel stage axis)) 113 | 114 | 115 | (defn parallelize-axis 116 | [schedule op rel-axis] 117 | (Objects/nonNull schedule) 118 | (Objects/nonNull op) 119 | (let [stage-map (:stage_map schedule) 120 | op (ast/->operation op)] 121 | (stage-parallel (stage-map op) (resolve-rel-axis op rel-axis))) 122 | schedule) 123 | 124 | 125 | (defn stage-inline 126 | [stage] 127 | (te-fns/StageComputeInline stage)) 128 | 129 | 130 | (defn stage-tile 131 | [stage outer-axis inner-axis outer-dim inner-dim] 132 | (te-fns/StageTile stage outer-axis inner-axis outer-dim inner-dim)) 133 | 134 | 135 | (defn stage-reorder 136 | [stage axis-seq] 137 | (te-fns/StageReorder stage axis-seq)) 138 | 139 | 140 | (defn stage-vectorize 141 | [stage axis] 142 | (te-fns/StageVectorize stage axis)) 143 | 144 | 145 | (defn stage-unroll 146 | [stage axis] 147 | (te-fns/StageUnroll stage axis)) 148 | 149 | 150 | (defn schedule-cache-write 151 | "Returns a new tensor" 152 | [schedule tensor cache-type] 153 | (let [retval (te-fns/ScheduleCacheWrite schedule tensor cache-type)] 154 | {:tensor retval 155 | :schedule schedule})) 156 | 157 | 158 | (defn schedule-cache-read 159 | [schedule tensor cache-type readers] 160 | (throw (ex-info "Unimplemented" {}))) 161 | 162 | 163 | (defn stage-bind-gpu 164 | "Bind the gpu-defined axis to the tvm axis. 165 | GPU (cuda, opencl) define a roughly level stage breakdown of axis: block and thread. 166 | Threads run on the same block and can share a special kind of memory (called shared 167 | memory). There can be up to 3 tvm axis per block or thread and these are labeled 168 | (outer iterator to inner iterator): 169 | [z y x]" 170 | [stage block-axis-seq thread-axis-seq] 171 | (let [axis-names ["z" "y" "x"] 172 | full-info-fn (fn [grp-name axis-seq] 173 | (map vector 174 | (repeat grp-name) 175 | axis-seq 176 | ;;map to axis such that if you have one, it becomes 177 | ;;the x axis. If you have 2, first is y and second 178 | ;;is x, etc. 179 | (drop (- 3 (count axis-seq)) axis-names)))] 180 | (when-not (and (<= (count block-axis-seq) 3) 181 | (<= (count thread-axis-seq) 3)) 182 | (throw (ex-info "Block, threads can have up to 3 axis" 183 | {:thread-axis-count (count thread-axis-seq) 184 | :block-axis-count (count block-axis-seq)}))) 185 | (->> (concat (full-info-fn "blockIdx" block-axis-seq) 186 | (full-info-fn "threadIdx" thread-axis-seq)) 187 | (map (fn [[grp-name axis gpu-axis-name]] 188 | (stage-bind stage axis 189 | (ast/name->thread-axis-iterator 190 | (str grp-name "." gpu-axis-name))))) 191 | dorun))) 192 | 193 | 194 | (defn stage-gpu-injective 195 | [stage op & {:keys [thread-count axis] 196 | :or {thread-count 16}}] 197 | 198 | (let [retval stage 199 | op (ast/->operation op) 200 | stage (->stage stage op) 201 | fused-axis (stage-fuse stage (or axis (:axis op))) 202 | [bx tx] (stage-split-axis stage fused-axis thread-count)] 203 | (stage-bind-gpu stage [bx] [tx]) 204 | retval)) 205 | 206 | 207 | (defn stage-cpu-injective 208 | [stage op & {:keys [axis]}] 209 | (let [retval stage 210 | op (ast/->operation op) 211 | stage (->stage stage op) 212 | fused-axis (stage-fuse stage (or axis (:axis op)))] 213 | (stage-parallel stage fused-axis) 214 | retval)) 215 | -------------------------------------------------------------------------------- /test/data/castle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techascent/tvm-clj/1088845bd613b4ba14b00381ffe3cdbd3d8b639e/test/data/castle.jpg -------------------------------------------------------------------------------- /test/data/jen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techascent/tvm-clj/1088845bd613b4ba14b00381ffe3cdbd3d8b639e/test/data/jen.jpg -------------------------------------------------------------------------------- /test/tvm_clj/tvm_test.clj: -------------------------------------------------------------------------------- 1 | (ns tvm-clj.tvm-test 2 | (:require [tvm-clj.ast :as ast] 3 | [tvm-clj.ast.elemwise-op :as ast-op] 4 | [tvm-clj.schedule :as schedule] 5 | [tvm-clj.compiler :as compiler] 6 | [tvm-clj.module :as module] 7 | [tvm-clj.device :as device] 8 | [tech.v3.tensor :as dtt] 9 | [tech.v3.datatype.functional :as dfn] 10 | [clojure.test :refer [deftest is]])) 11 | 12 | 13 | (defn make-add-fn 14 | [] 15 | ;;Default datatype of variable is integer 16 | (let [n (ast/variable "n") 17 | ;;Default datatype of placeholder is float32 18 | A (ast/placeholder [n] "A") 19 | B (ast/placeholder [n] "B") 20 | compute-op (ast/compute [n] 21 | ;;Attaches metadata to the fn so we know the argument 22 | ;;count. 23 | (ast/tvm-fn 24 | [i] 25 | (ast-op/+ (ast/tget A [i]) 26 | (ast/tget B [i]))) 27 | "C") 28 | C (first (ast/output-tensors compute-op))] 29 | {:schedule (schedule/create-schedule compute-op) 30 | :arguments [A B C] 31 | :compute-op compute-op})) 32 | 33 | 34 | (deftest cpu-add 35 | (let [{:keys [schedule arguments compute-op]} (make-add-fn) 36 | _ (schedule/stage-cpu-injective schedule compute-op) 37 | module (compiler/compile {"cpu_add" {:schedule schedule 38 | :arguments arguments}}) 39 | add-fn (module/find-function module "cpu_add") 40 | tens-a (dtt/->tensor (range 10) :datatype :float32 41 | :container-type :native-heap) 42 | tens-b (dtt/->tensor (range 10 20) :datatype :float32 43 | :container-type :native-heap) 44 | tens-c (dtt/new-tensor [10] :datatype :float32 45 | :container-type :native-heap)] 46 | (add-fn tens-a tens-b tens-c) 47 | (is (dfn/equals tens-c (dfn/+ tens-a tens-b))))) 48 | 49 | 50 | (defn device-add-test 51 | [device-type] 52 | (let [{:keys [schedule arguments compute-op]} (make-add-fn) 53 | _ (schedule/stage-gpu-injective schedule compute-op) 54 | module (compiler/compile {"device_add" {:schedule schedule 55 | :arguments arguments 56 | :target device-type}}) 57 | add-fn (module/find-function module "device_add") 58 | tens-a (dtt/->tensor (range 10) :datatype :float32 59 | :container-type :native-heap) 60 | tens-b (dtt/->tensor (range 10 20) :datatype :float32 61 | :container-type :native-heap) 62 | device-id 0 63 | dev-a (device/cpu->device tens-a device-type device-id) 64 | dev-b (device/cpu->device tens-b device-type device-id) 65 | ;;Create a device tensor taking the shape and elemwise datatype 66 | ;;from the input. 67 | dev-c (device/device-tensor tens-a device-type device-id) 68 | _ (add-fn dev-a dev-b dev-c) 69 | tens-c (device/device->cpu dev-c)] 70 | (is (dfn/equals tens-c (dfn/+ tens-a tens-b))))) 71 | 72 | 73 | (deftest ^:cuda cuda-add 74 | (device-add-test :cuda)) 75 | 76 | 77 | (deftest ^:opencl opencl-add 78 | (device-add-test :opencl)) 79 | 80 | 81 | (deftest cpu-reduction 82 | (let [n (ast/variable "n") 83 | A (ast/placeholder [n] "A") 84 | 85 | reducer (ast/tvm-fn->commutative-reducer 86 | ;;reduce-fn, arguments are divided into accumulators 87 | ;;and inputs. Accum args are implicitly defined by the 88 | ;;number of identity values passed in.. 89 | (ast/tvm-fn 90 | [lhs rhs] 91 | (ast-op/max lhs rhs)) 92 | ;;reduction identity values, one for each accumulator argument. 93 | [(ast-op/min-value :float32)]) 94 | 95 | compute-op (ast/compute 96 | [1] 97 | (ast/tvm-fn 98 | [i] 99 | (ast/commutative-reduce 100 | reducer 101 | [{:name "reduce-n" :domain [0 n]}] 102 | [#(ast/tget A [%])])) 103 | "C") 104 | C (first (ast/output-tensors compute-op)) 105 | schedule (schedule/create-schedule compute-op) 106 | arguments [A C] 107 | module (compiler/compile {"vec_max" {:schedule schedule 108 | :arguments arguments}}) 109 | max-fn (module/find-function module "vec_max") 110 | tens-a (dtt/->tensor (range 10) :datatype :float32 111 | :container-type :native-heap) 112 | tens-c (dtt/new-tensor [1] :datatype :float32 113 | :container-type :native-heap)] 114 | (max-fn tens-a tens-c) 115 | (is (= 9.0 116 | (double (first tens-c)))))) 117 | -------------------------------------------------------------------------------- /topics/background.md: -------------------------------------------------------------------------------- 1 | # Technical Background 2 | 3 | [tvm](https://github.com/dmlc/tvm) a system for dynamically generating high performance numeric code with backends for cpu, cuda, opencl, opengl, webassembly, vulcan, and verilog. It has frontends mainly in python and c++ with a clear and well designed C-ABI that not only aids in the implementation of their python interface, but it also eases the binding into other language ecosystems such as the jvm and node. 4 | 5 | tvm leverages [Halide](http://halide-lang.org) for its IR layer and for the overall strategy. Halide takes algorithms structured in specific ways and allows performance experimentation without affecting the output of the core algorithm. A very solid justification for this is nicely put in these [slides](http://stellar.mit.edu/S/course/6/sp15/6.815/courseMaterial/topics/topic2/lectureNotes/14_Halide_print/14_Halide_print.pdf). A Ph. D. was minted [here](http://people.csail.mit.edu/jrk/jrkthesis.pdf). We also recommend watching the youtube [video](https://youtu.be/3uiEyEKji0M). 6 | 7 | It should be noted, however, that at this point TVM has diverged significantly from Halide, implementing essentially their own compiler specifically designed to work with deep learning-type workflows: 8 | 9 | > It is interesting. Please note that while TVM uses HalideIR that is derived from Halide, most of the code generation and optimization passes are done independently(with deep learning workloads in mind), while reusing sensible ones from Halide. So in terms of low level code generation, we are not necessarily bound to some of limitations listed. 10 | > 11 | > In particular, we take a pragmatic approach, to focus on what is useful for deep learning workloads, so you can find unique things like more GPU optimization, accelerator support, recurrence(scan). If there are optimizations that Tiramisu have which is useful to get the state of art deep learning workloads, we are all for bringing that into TVM 12 | > 13 | > I also want to emphasize that TVM is more than a low level tensor code generation, but instead trying to solve the end to end deep learning compilation problem, and many of the things goes beyond the tensor code generation. 14 | 15 | -- [tqchen](https://discuss.tvm.ai/t/comparison-between-tiramisu-and-tvm-and-halide/933/2), the main contributor to TVM. 16 | 17 | 18 | ## Goals 19 | 20 | 1. Learn about Halide and tvm and enable very clear and simple exploration of the system in clojure. Make clojure a first class language in the dmlc ecosystem. 21 | 1. Provide the tvm team with clear feedback and a second external implementation or a language binding on top of the C-ABI. 22 | 1. Encourage wider adoption and exploration in terms of numerical programming; for instance a new implementation of J that carries the properties of a clojure or clojurescript ecosystem but includes all of the major concepts of J. This would enable running some subset of J (or APL) programs (or functions) that are now far more optimized mode than before and accessible from node.js or the jvm. It would also inform the wider discussion on numeric programming languages such as MatLab, TensorFlow, numpy, etc. 23 | 1. Provide richer platform for binding to nnvm so that running existing networks via clojure is as seamless as possible. 24 | 25 | 26 | ## What, Concretely, Are You Talking About? 27 | 28 | 29 | ### Vector Math Compiler Example 30 | 31 | Built a small compiler that takes a statement of vector math and compiles to tvm. This is extremely incomplete and not very efficient in terms of what is possible but 32 | shows a vision of doing potentially entire neural network functions. 33 | 34 | ``` 35 | hand-coded java took: "Elapsed time: 558.662639 msecs" 36 | 37 | produce bgr_types_op { 38 | parallel (chan, 0, min(n_channels, 3)) { 39 | for (y.outer, 0, ((image_height + 31)/32)) { 40 | for (x.outer, 0, ((image_width + 31)/32)) { 41 | for (y.inner, 0, 32) { 42 | if (likely(((y.outer*32) < (image_height - y.inner)))) { 43 | for (x.inner.s, 0, 32) { 44 | if (likely(((x.outer*32) < (image_width - x.inner.s)))) { 45 | buffer[(((x.outer*32) + ((((chan*image_height) + (y.outer*32)) + y.inner)*image_width)) + x.inner.s)] = ((float32(buffer[((((((x.outer*32) + (((y.outer*32) + y.inner)*image_width)) + x.inner.s)*n_channels) - chan) + 2)])*0.003922f) + -0.500000f) 46 | } 47 | } 48 | } 49 | } 50 | } 51 | } 52 | } 53 | } 54 | 55 | Compiled (cpu) tensor took: "Elapsed time: 31.712205 msecs" 56 | 57 | produce bgr_types_op { 58 | // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = min(n_channels, 3) 59 | // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = ((image_height + 31)/32) 60 | // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = ((image_width + 31)/32) 61 | // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 32 62 | // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32 63 | if (likely(((blockIdx.y*32) < (image_height - threadIdx.y)))) { 64 | if (likely(((blockIdx.x*32) < (image_width - threadIdx.x)))) { 65 | buffer[(((blockIdx.x*32) + ((((blockIdx.z*image_height) + (blockIdx.y*32)) + threadIdx.y)*image_width)) + threadIdx.x)] = ((float32(buffer[((((((blockIdx.x*32) + (((blockIdx.y*32) + threadIdx.y)*image_width)) + threadIdx.x)*n_channels) - blockIdx.z) + 2)])*0.003922f) + -0.500000f) 66 | } 67 | } 68 | } 69 | 70 | Compiled (opencl) tensor took: "Elapsed time: 4.641527 msecs" 71 | ``` 72 | 73 | 74 | ### Image Scaling (TVM vs OpenCV) 75 | 76 | Faster (and correct) bilinear and area filtering. Handily beats opencv::resize on a 77 | desktop compute in both speed and code readability. 78 | 79 | ```clojure 80 | ;; cpu, algorithm run 10 times. Desktop (NVIDIA 1070): 81 | 82 | tvm-clj.image.resize-test> (downsample-img) 83 | {:opencv-area-time "\"Elapsed time: 815.136235 msecs\"\n", 84 | :opencv-bilinear-time "\"Elapsed time: 220.774128 msecs\"\n", 85 | :tvm-area-time "\"Elapsed time: 380.640778 msecs\"\n", 86 | :tvm-bilinear-time "\"Elapsed time: 21.361915 msecs\"\n"} 87 | 88 | tvm-clj.image.resize-test> (downsample-img :device-type :opencl) 89 | {:opencv-area-time "\"Elapsed time: 338.918811 msecs\"\n", 90 | :opencv-bilinear-time "\"Elapsed time: 16.837844 msecs\"\n", 91 | :tvm-area-time "\"Elapsed time: 31.076962 msecs\"\n", 92 | :tvm-bilinear-time "\"Elapsed time: 3.033296 msecs\"\n"} 93 | 94 | ;;Laptop times 95 | tvm-clj.image.resize-test> (downsample-img) 96 | {:opencv-area-time "\"Elapsed time: 2422.879178 msecs\"\n", 97 | :opencv-bilinear-time "\"Elapsed time: 637.622425 msecs\"\n", 98 | :tvm-area-time "\"Elapsed time: 333.946424 msecs\"\n", 99 | :tvm-bilinear-time "\"Elapsed time: 20.585665 msecs\"\n"} 100 | 101 | tvm-clj.image.resize-test> (downsample-img :device-type :opencl) 102 | {:opencv-area-time "\"Elapsed time: 2460.51718 msecs\"\n", 103 | :opencv-bilinear-time "\"Elapsed time: 667.624091 msecs\"\n", 104 | :tvm-area-time "\"Elapsed time: 315.864799 msecs\"\n", 105 | :tvm-bilinear-time "\"Elapsed time: 16.290168 msecs\"\n"} 106 | ``` 107 | 108 | * [opencv source](https://github.com/opencv/opencv/blob/master/modules/imgproc/src/resize.cpp) 109 | -------------------------------------------------------------------------------- /topics/images/ref.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techascent/tvm-clj/1088845bd613b4ba14b00381ffe3cdbd3d8b639e/topics/images/ref.jpg -------------------------------------------------------------------------------- /topics/images/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techascent/tvm-clj/1088845bd613b4ba14b00381ffe3cdbd3d8b639e/topics/images/test.jpg --------------------------------------------------------------------------------