├── .gitignore ├── 1_square ├── Makefile └── square.c ├── 2_norm ├── Makefile └── norm.c ├── 3_dot ├── Makefile └── dot.c ├── 4_invsqrt ├── Makefile └── invsqrt.c ├── 5_fn_like ├── Makefile └── fn_like.c ├── 6_cache ├── Makefile └── cache.c ├── 7_fwd ├── Makefile └── fwd.c ├── 8_batch ├── Makefile └── batch.c ├── 9_multisource ├── Makefile ├── multisource.c ├── myblas.c └── myblas.h ├── README.md ├── cuda ├── README.md └── square │ ├── Makefile │ └── cuda_square.cu ├── docker └── Dockerfile ├── dockerscript.sh ├── julia └── introduction.ipynb ├── julia_activity ├── .ipynb_checkpoints │ └── activity-checkpoint.ipynb ├── .jupyter │ └── desktop-workspaces │ │ └── default-37a8.jupyterlab-workspace └── activity.jl ├── julia_custom └── custom.jl ├── julia_fwd_and_batch └── fwd_and_batch.jl ├── mpi ├── README.md └── disclaimer.txt └── openmp ├── README.md ├── parallel_for ├── Makefile ├── OldMakefile └── omp_parallel_for.c ├── parallel_for_nounroll ├── Makefile └── omp_parallel_for_nounroll.c └── parallel_simple ├── Makefile └── omp_parallel.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.ll 3 | -------------------------------------------------------------------------------- /1_square/Makefile: -------------------------------------------------------------------------------- 1 | all: square.o 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %.o: %.c 7 | ../dockerscript.sh clang-12 /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -o /host/$@ 8 | 9 | run-%: %.o 10 | ../dockerscript.sh /host/$^ 3.14 11 | -------------------------------------------------------------------------------- /1_square/square.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Function to differentiate 5 | double square(double x) { 6 | return x * x; 7 | } 8 | 9 | double __enzyme_autodiff(void*, ...); 10 | int enzyme_const, enzyme_dup, enzyme_out; 11 | 12 | int main(int argc, char *argv[]) { 13 | double x = 20; 14 | if (argc > 1) { 15 | x = atof(argv[1]); 16 | } 17 | 18 | double grad_x = __enzyme_autodiff((void*)square, x); 19 | printf("Gradient square(%f) = %f\n", x, grad_x); 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /2_norm/Makefile: -------------------------------------------------------------------------------- 1 | all: norm-O2enzyme.o norm-enzymeO2.o norm-O2enzyme.ll norm-enzymeO2.ll norm-unopt.ll 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %-unopt.ll: %.c 7 | ../dockerscript.sh clang-12 /host/$^ -O1 -Xclang -disable-llvm-passes -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o /host/$@ -S -emit-llvm 8 | 9 | %-enzymeO2.ll: %-unopt.ll 10 | ../dockerscript.sh opt-12 /host/$^ -load /Enzyme/enzyme/build/Enzyme/LLVMEnzyme-12.so -enzyme -O2 -o /host/$@ -S 11 | 12 | %-O2enzyme.ll: %-unopt.ll 13 | ../dockerscript.sh opt-12 /host/$^ -load /Enzyme/enzyme/build/Enzyme/LLVMEnzyme-12.so -O2 -enzyme -o /host/$@ -S 14 | 15 | %.o: %.ll 16 | ../dockerscript.sh clang-12 -O2 /host/$^ -o /host/$@ -lm 17 | 18 | run-%: %.o 19 | ../dockerscript.sh /host/$^ 2000 20 | -------------------------------------------------------------------------------- /2_norm/norm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | float tdiff(struct timeval *start, struct timeval *end) { 9 | return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec); 10 | } 11 | 12 | __attribute__((const,noinline)) 13 | double mag(const double *A, int n) { 14 | double amt = 0; 15 | for(int i=0; i 1) { 32 | n = atoi(argv[1]); 33 | if (argc > 2) { 34 | x = atoi(argv[2]); 35 | } 36 | } 37 | 38 | double *A = (double*)malloc(sizeof(double) * n); 39 | assert(A != 0); 40 | 41 | double *B = (double*)malloc(sizeof(double) * n); 42 | assert(B != 0); 43 | for(int i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | float tdiff(struct timeval *start, struct timeval *end) { 7 | return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec); 8 | } 9 | 10 | double dot(double* __restrict__ A, double* __restrict__ B, double C, int n) { 11 | double sum = 0; 12 | for (int i=0; i 1) { 26 | n = atoi(argv[1]); 27 | if (argc > 2) { 28 | x = atoi(argv[2]); 29 | } 30 | } 31 | 32 | double *A = (double*)malloc(sizeof(double) * n); 33 | for(int i=0; i 2 | #include 3 | #include 4 | 5 | // Fast inverse sqrt 6 | // Code taken from https://en.wikipedia.org/wiki/Fast_inverse_square_root 7 | #ifdef CUSTOM 8 | __attribute__((noinline)) 9 | #endif 10 | float Q_rsqrt( float number ) 11 | { 12 | long i; 13 | float x2, y; 14 | const float threehalfs = 1.5F; 15 | 16 | x2 = number * 0.5F; 17 | y = number; 18 | i = * ( long * ) &y; // evil floating point bit level hacking 19 | i = 0x5f3759df - ( i >> 1 ); // what the [...]? 20 | y = * ( float * ) &i; 21 | y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration 22 | return y; 23 | } 24 | 25 | 26 | double invmag(double* __restrict__ A, int n) { 27 | double sumsq = 0; 28 | for (int i=0; i 1) { 59 | n = atoi(argv[1]); 60 | } 61 | 62 | 63 | double *A = (double*)malloc(sizeof(double) * n); 64 | for(int i=0; i 2 | #include 3 | #include 4 | 5 | 6 | double __enzyme_autodiff(void*, ...); 7 | 8 | double log1p_like_function(double a) { 9 | return 2*a; 10 | } 11 | 12 | double test(double a) { 13 | return log1p_like_function(a); 14 | } 15 | 16 | void* __enzyme_function_like[2] = {(void*)log1p_like_function, "log1p"}; 17 | 18 | int main(int argc, char** argv) { 19 | 20 | double grad_out = __enzyme_autodiff(test, 2.0); 21 | printf("Gradient of the log1p like function is %f", grad_out); 22 | 23 | return 0; 24 | } -------------------------------------------------------------------------------- /6_cache/Makefile: -------------------------------------------------------------------------------- 1 | all: cache-alias.o cache-noalias.o 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %-alias.o: %.c 7 | ../dockerscript.sh clang-12 -Rpass=enzyme /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -o /host/$@ 8 | 9 | %-noalias.o: %.c 10 | ../dockerscript.sh clang-12 -Rpass=enzyme -DNOALIAS=1 /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -o /host/$@ 11 | 12 | run-%: %.o 13 | ../dockerscript.sh /host/$^ 14 | -------------------------------------------------------------------------------- /6_cache/cache.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | float tdiff(struct timeval *start, struct timeval *end) { 7 | return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec); 8 | } 9 | 10 | #ifndef NOALIAS 11 | 12 | void squareCopy(double* in, double* out, int n) { 13 | double sumsq = 0; 14 | for (int i=0; i 1) { 40 | n = atoi(argv[1]); 41 | } 42 | 43 | 44 | double *in = (double*)malloc(sizeof(double) * n); 45 | assert(in != 0); 46 | for(int i=0; i 2 | #include 3 | #include 4 | 5 | double __enzyme_fwddiff(void*, ...); 6 | 7 | void compute_loops(float* a, float* b, float* ret) { 8 | double sum0 = 0.0; 9 | for (int i = 0; i < 100; i++) { 10 | sum0 += *a + *b; 11 | } 12 | *ret = sum0; 13 | } 14 | 15 | int main(int argc, char** argv) { 16 | float a = 2.0; 17 | float b = 3.0; 18 | 19 | float da = 1.0;//(float*) malloc(sizeof(float)); 20 | float db = 1.0;//(float*) malloc(sizeof(float)); 21 | 22 | float ret = 0; 23 | float dret = 1.0; 24 | 25 | __enzyme_fwddiff(compute_loops, &a, &da, &b, &db, &ret, &dret); 26 | printf("ret %f, dret %f, da: %f, db: %f\n", ret, dret, da, db); 27 | 28 | return 0; 29 | } -------------------------------------------------------------------------------- /8_batch/Makefile: -------------------------------------------------------------------------------- 1 | all: batch.o 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %.o: %.c 7 | ../dockerscript.sh clang-12 /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -o /host/$@ 8 | 9 | run-%: %.o 10 | ../dockerscript.sh /host/$^ 3.14 11 | -------------------------------------------------------------------------------- /8_batch/batch.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct Vector { 4 | double x1, x2, x3, x4; 5 | }; 6 | 7 | extern Vector __enzyme_batch(...); 8 | 9 | extern int enzyme_width; 10 | extern int enzyme_vector; 11 | extern int enzyme_scalar; 12 | 13 | double square(double x) { return x * x; } 14 | 15 | Vector vecsquare(double x1, double x2, double x3, double x4) { 16 | return __enzyme_batch(square, enzyme_width, 4, enzyme_vector, x1, x2, x3, x4); 17 | } 18 | int main() { 19 | double vals[] = {23.1, 10.0, 100.0, 3.14}; 20 | double expected[] = {square(vals[0]), square(vals[1]), square(vals[2]), 21 | square(vals[3])}; 22 | Vector result = vecsquare(vals[0], vals[1], vals[2], vals[3]); 23 | } -------------------------------------------------------------------------------- /9_multisource/Makefile: -------------------------------------------------------------------------------- 1 | all: lib.exe 2 | 3 | clean: 4 | rm -f *.o *.ll *.exe 5 | 6 | %.o: %.c 7 | ../dockerscript.sh clang-12 -c -fuse-ld=lld -flto /host/$^ -O2 -ffast-math -o /host/$@ 8 | 9 | lib.exe: myblas.o multisource.o 10 | ../dockerscript.sh clang-12 -fuse-ld=lld -flto /host/myblas.o /host/multisource.o -O2 -ffast-math -o /host/$@ -Wl,-mllvm=-load=/Enzyme/enzyme/build/Enzyme/LLDEnzyme-12.so 11 | 12 | run-%: %.exe 13 | ../dockerscript.sh /host/$^ 3 14 | -------------------------------------------------------------------------------- /9_multisource/multisource.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "myblas.h" 6 | 7 | double dotabs(struct complex* alpha, struct complex* beta, int n) { 8 | struct complex prod = myblas_cdot(alpha, beta, n); 9 | return myblas_cabs(prod); 10 | } 11 | 12 | void __enzyme_autodiff(void*, ...); 13 | int enzyme_const, enzyme_dup, enzyme_out; 14 | 15 | int main(int argc, char *argv[]) { 16 | int n = 3; 17 | if (argc > 1) { 18 | n = atoi(argv[1]); 19 | } 20 | 21 | 22 | struct complex *A = (struct complex*)malloc(sizeof(struct complex) * n); 23 | assert(A != 0); 24 | for(int i=0; i 2 | 3 | void __device__ square_impl(double* x_in, double *x_out) { 4 | x_out[0] = x_in[0] * x_in[0]; 5 | } 6 | 7 | typedef void (*f_ptr)(double*, double*); 8 | 9 | extern void __device__ __enzyme_autodiff(f_ptr, 10 | int, double*, double*, 11 | int, double*, double* 12 | ); 13 | 14 | void __global__ square(double* x_in, double *x_out) { 15 | square_impl(x_in, x_out); 16 | } 17 | 18 | int __device__ enzyme_dup; 19 | int __device__ enzyme_out; 20 | int __device__ enzyme_const; 21 | 22 | void __global__ square_grad(double* x, double *d_x, double *y, double *d_y) { 23 | 24 | __enzyme_autodiff(square_impl, 25 | enzyme_dup, x, d_x, 26 | enzyme_dup, y, d_y); 27 | 28 | } 29 | 30 | int main() { 31 | 32 | // Device pointers 33 | double *x, *d_x, *y, *d_y; 34 | 35 | // Allocate GPU device memory 36 | cudaMalloc(&x, sizeof(*x)); 37 | cudaMalloc(&d_x, sizeof(*d_x)) 38 | cudaMalloc(&y, sizeof(*y)); 39 | cudaMalloc(&d_y, sizeof(*d_y)) 40 | 41 | // Initialize device values 42 | double host_x = 1.4; 43 | double host_d_x = 0.0; 44 | double host_y; 45 | double host_d_y = 1.0; 46 | 47 | // Copy data to device 48 | cudaMemcpy(x, &host_x, sizeof(*x), cudaMemcpyHostToDevice); 49 | cudaMemcpy(d_x, &host_d_x, sizeof(*d_x), cudaMemcpyHostToDevice); 50 | cudaMemcpy(y, &host_y, sizeof(*y), cudaMemcpyHostToDevice); 51 | cudaMemcpy(d_y, &host_d_y, sizeof(*d_y), cudaMemcpyHostToDevice); 52 | 53 | #ifdef FORWARD 54 | // Forward pass only 55 | square<<<1, 1>>>(x, y); 56 | #else 57 | // Forward and backward pass 58 | square_grad<<<1, 1>>>(x, d_x, y, d_y); 59 | #endif 60 | 61 | // Synchronize device memory 62 | cudaDeviceSynchronize(); 63 | 64 | // Copy data from device to host 65 | cudaMemcpy(&host_x, x, sizeof(*x), cudaMemcpyDeviceToHost); 66 | cudaMemcpy(&host_d_x, d_x, sizeof(*d_x), cudaMemcpyDeviceToHost); 67 | cudaMemcpy(&host_y, y, sizeof(*y), cudaMemcpyDeviceToHost); 68 | cudaMemcpy(&host_d_y, d_y, sizeof(*d_y), cudaMemcpyDeviceToHost); 69 | 70 | // Print results 71 | printf("%f %f\n", host_x, host_y); 72 | printf("%f %f\n", host_d_x, host_d_y); 73 | 74 | } -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | RUN apt-get -y update && apt-get install -y --no-install-recommends curl gnupg lsb-core software-properties-common \ 6 | && curl -fsSL https://apt.llvm.org/llvm-snapshot.gpg.key|apt-key add - \ 7 | && apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-12 main" \ 8 | && apt-get install -y --no-install-recommends autoconf cmake ninja-build gcc g++ libtool gfortran llvm-12-dev lld-12 clang-12 libopenmpi-dev openmpi-bin git \ 9 | && apt-get autoremove -y --purge \ 10 | && apt-get clean -y \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Get & install Enzyme 14 | RUN git clone https://github.com/wsmoses/Enzyme.git \ 15 | && cd Enzyme/enzyme \ 16 | && mkdir build && cd build \ 17 | && cmake -G Ninja .. -DCMAKE_BUILD_TYPE=Debug \ 18 | && ninja 19 | 20 | RUN update-alternatives --install /usr/bin/clang clang /usr/bin/clang-12 10 --slave /usr/bin/clang++ clang++ /usr/bin/clang++-12 \ 21 | && update-alternatives --install /usr/bin/opt opt /usr/bin/opt-12 10 \ 22 | && update-alternatives --install /usr/bin/lld lld /usr/bin/lld-12 10 \ 23 | && update-alternatives --install /usr/bin/llvm-symbolizer llvm-symbolizer /usr/bin/llvm-symbolizer-12 10 24 | 25 | ENV DEBIAN_FRONTEND= 26 | -------------------------------------------------------------------------------- /dockerscript.sh: -------------------------------------------------------------------------------- 1 | docker run -v `pwd`:/host wsmoses/enzyme "$@" 2 | -------------------------------------------------------------------------------- /julia/introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Enzyme.jl\n", 7 | "\n", 8 | "Julia is a high-level programming language using LLVM as a compiler backend.\n", 9 | "Enzyme.jl uses Julia's GPU compiler infrastructure to provide a custom optimization\n", 10 | "pipeline that inserts Enzyme LLVM pass. \n", 11 | "\n", 12 | "It uses Orc (v2/v1) to then JIT the adjoints and call them through Julia foreign-function\n", 13 | "interface." 14 | ], 15 | "metadata": {} 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "function mysum(X)\n", 21 | " acc = zero(eltype(X))\n", 22 | " @simd for x in X\n", 23 | " acc += x\n", 24 | " end\n", 25 | " acc\n", 26 | "end" 27 | ], 28 | "metadata": {} 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "source": [ 33 | "# Installing Enzyme\n", 34 | "\n", 35 | "Tutorial tested with Julia 1.7-beta3\n", 36 | "\n", 37 | "Using the Julia package manger:\n", 38 | "```julia\n", 39 | "import Pkg\n", 40 | "Pkg.add(\"Enzyme\")\n", 41 | "```" 42 | ], 43 | "metadata": {} 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 37, 48 | "source": [ 49 | "import Pkg\n", 50 | "Pkg.activate(; temp=true)\n", 51 | "Pkg.add(Pkg.PackageSpec(name=\"Enzyme\", rev=\"822afeff2c8a9b87c8fb93c6415cc3ffb19924e8\"))\n", 52 | "Pkg.add(\"BenchmarkTools\")\n", 53 | "Pkg.add(\"ForwardDiff\")" 54 | ], 55 | "outputs": [ 56 | { 57 | "output_type": "stream", 58 | "name": "stdout", 59 | "text": [ 60 | "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m new project at `/tmp/jl_yAfFxB`\n", 61 | "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n", 62 | "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/tmp/jl_yAfFxB/Project.toml`\n", 63 | " \u001b[90m [7da242da] \u001b[39m\u001b[92m+ Enzyme v0.7.0 `https://github.com/wsmoses/Enzyme.jl.git#822afef`\u001b[39m\n", 64 | "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/tmp/jl_yAfFxB/Manifest.toml`\n", 65 | " \u001b[90m [79e6a3ab] \u001b[39m\u001b[92m+ Adapt v3.3.1\u001b[39m\n", 66 | " \u001b[90m [fa961155] \u001b[39m\u001b[92m+ CEnum v0.4.1\u001b[39m\n", 67 | " \u001b[90m [7da242da] \u001b[39m\u001b[92m+ Enzyme v0.7.0 `https://github.com/wsmoses/Enzyme.jl.git#822afef`\u001b[39m\n", 68 | " \u001b[90m [e2ba6199] \u001b[39m\u001b[92m+ ExprTools v0.1.6\u001b[39m\n", 69 | " \u001b[90m [61eb1bfa] \u001b[39m\u001b[92m+ GPUCompiler v0.13.7\u001b[39m\n", 70 | " \u001b[90m [692b3bcd] \u001b[39m\u001b[92m+ JLLWrappers v1.3.0\u001b[39m\n", 71 | " \u001b[90m [929cbde3] \u001b[39m\u001b[92m+ LLVM v4.6.0\u001b[39m\n", 72 | " \u001b[90m [d8793406] \u001b[39m\u001b[92m+ ObjectFile v0.3.7\u001b[39m\n", 73 | " \u001b[90m [21216c6a] \u001b[39m\u001b[92m+ Preferences v1.2.2\u001b[39m\n", 74 | " \u001b[90m [189a3867] \u001b[39m\u001b[92m+ Reexport v1.2.2\u001b[39m\n", 75 | " \u001b[90m [53d494c1] \u001b[39m\u001b[92m+ StructIO v0.3.0\u001b[39m\n", 76 | " \u001b[90m [a759f4b9] \u001b[39m\u001b[92m+ TimerOutputs v0.5.13\u001b[39m\n", 77 | " \u001b[90m [7cc45869] \u001b[39m\u001b[92m+ Enzyme_jll v0.0.21+0\u001b[39m\n", 78 | " \u001b[90m [dad2f222] \u001b[39m\u001b[92m+ LLVMExtra_jll v0.0.11+0\u001b[39m\n", 79 | " \u001b[90m [0dad84c5] \u001b[39m\u001b[92m+ ArgTools\u001b[39m\n", 80 | " \u001b[90m [56f22d72] \u001b[39m\u001b[92m+ Artifacts\u001b[39m\n", 81 | " \u001b[90m [2a0f44e3] \u001b[39m\u001b[92m+ Base64\u001b[39m\n", 82 | " \u001b[90m [ade2ca70] \u001b[39m\u001b[92m+ Dates\u001b[39m\n", 83 | " \u001b[90m [f43a241f] \u001b[39m\u001b[92m+ Downloads\u001b[39m\n", 84 | " \u001b[90m [b77e0a4c] \u001b[39m\u001b[92m+ InteractiveUtils\u001b[39m\n", 85 | " \u001b[90m [b27032c2] \u001b[39m\u001b[92m+ LibCURL\u001b[39m\n", 86 | " \u001b[90m [76f85450] \u001b[39m\u001b[92m+ LibGit2\u001b[39m\n", 87 | " \u001b[90m [8f399da3] \u001b[39m\u001b[92m+ Libdl\u001b[39m\n", 88 | " \u001b[90m [37e2e46d] \u001b[39m\u001b[92m+ LinearAlgebra\u001b[39m\n", 89 | " \u001b[90m [56ddb016] \u001b[39m\u001b[92m+ Logging\u001b[39m\n", 90 | " \u001b[90m [d6f4376e] \u001b[39m\u001b[92m+ Markdown\u001b[39m\n", 91 | " \u001b[90m [ca575930] \u001b[39m\u001b[92m+ NetworkOptions\u001b[39m\n", 92 | " \u001b[90m [44cfe95a] \u001b[39m\u001b[92m+ Pkg\u001b[39m\n", 93 | " \u001b[90m [de0858da] \u001b[39m\u001b[92m+ Printf\u001b[39m\n", 94 | " \u001b[90m [3fa0cd96] \u001b[39m\u001b[92m+ REPL\u001b[39m\n", 95 | " \u001b[90m [9a3f8284] \u001b[39m\u001b[92m+ Random\u001b[39m\n", 96 | " \u001b[90m [ea8e919c] \u001b[39m\u001b[92m+ SHA\u001b[39m\n", 97 | " \u001b[90m [9e88b42a] \u001b[39m\u001b[92m+ Serialization\u001b[39m\n", 98 | " \u001b[90m [6462fe0b] \u001b[39m\u001b[92m+ Sockets\u001b[39m\n", 99 | " \u001b[90m [fa267f1f] \u001b[39m\u001b[92m+ TOML\u001b[39m\n", 100 | " \u001b[90m [a4e569a6] \u001b[39m\u001b[92m+ Tar\u001b[39m\n", 101 | " \u001b[90m [8dfed614] \u001b[39m\u001b[92m+ Test\u001b[39m\n", 102 | " \u001b[90m [cf7118a7] \u001b[39m\u001b[92m+ UUIDs\u001b[39m\n", 103 | " \u001b[90m [4ec0a83e] \u001b[39m\u001b[92m+ Unicode\u001b[39m\n", 104 | " \u001b[90m [e66e0078] \u001b[39m\u001b[92m+ CompilerSupportLibraries_jll\u001b[39m\n", 105 | " \u001b[90m [deac9b47] \u001b[39m\u001b[92m+ LibCURL_jll\u001b[39m\n", 106 | " \u001b[90m [29816b5a] \u001b[39m\u001b[92m+ LibSSH2_jll\u001b[39m\n", 107 | " \u001b[90m [c8ffd9c3] \u001b[39m\u001b[92m+ MbedTLS_jll\u001b[39m\n", 108 | " \u001b[90m [14a3606d] \u001b[39m\u001b[92m+ MozillaCACerts_jll\u001b[39m\n", 109 | " \u001b[90m [4536629a] \u001b[39m\u001b[92m+ OpenBLAS_jll\u001b[39m\n", 110 | " \u001b[90m [83775a58] \u001b[39m\u001b[92m+ Zlib_jll\u001b[39m\n", 111 | " \u001b[90m [8e850b90] \u001b[39m\u001b[92m+ libblastrampoline_jll\u001b[39m\n", 112 | " \u001b[90m [8e850ede] \u001b[39m\u001b[92m+ nghttp2_jll\u001b[39m\n", 113 | " \u001b[90m [3f19e933] \u001b[39m\u001b[92m+ p7zip_jll\u001b[39m\n", 114 | "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n", 115 | "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/tmp/jl_yAfFxB/Project.toml`\n", 116 | " \u001b[90m [6e4b80f9] \u001b[39m\u001b[92m+ BenchmarkTools v1.2.0\u001b[39m\n", 117 | "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/tmp/jl_yAfFxB/Manifest.toml`\n", 118 | " \u001b[90m [6e4b80f9] \u001b[39m\u001b[92m+ BenchmarkTools v1.2.0\u001b[39m\n", 119 | " \u001b[90m [682c06a0] \u001b[39m\u001b[92m+ JSON v0.21.2\u001b[39m\n", 120 | " \u001b[90m [69de0a69] \u001b[39m\u001b[92m+ Parsers v2.1.2\u001b[39m\n", 121 | " \u001b[90m [a63ad114] \u001b[39m\u001b[92m+ Mmap\u001b[39m\n", 122 | " \u001b[90m [9abbd945] \u001b[39m\u001b[92m+ Profile\u001b[39m\n", 123 | " \u001b[90m [2f01184e] \u001b[39m\u001b[92m+ SparseArrays\u001b[39m\n", 124 | " \u001b[90m [10745b16] \u001b[39m\u001b[92m+ Statistics\u001b[39m\n", 125 | "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n", 126 | "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/tmp/jl_yAfFxB/Project.toml`\n", 127 | " \u001b[90m [f6369f11] \u001b[39m\u001b[92m+ ForwardDiff v0.10.23\u001b[39m\n", 128 | "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/tmp/jl_yAfFxB/Manifest.toml`\n", 129 | " \u001b[90m [d360d2e6] \u001b[39m\u001b[92m+ ChainRulesCore v1.11.1\u001b[39m\n", 130 | " \u001b[90m [9e997f8a] \u001b[39m\u001b[92m+ ChangesOfVariables v0.1.1\u001b[39m\n", 131 | " \u001b[90m [bbf7d656] \u001b[39m\u001b[92m+ CommonSubexpressions v0.3.0\u001b[39m\n", 132 | " \u001b[90m [34da2185] \u001b[39m\u001b[92m+ Compat v3.40.0\u001b[39m\n", 133 | " \u001b[90m [163ba53b] \u001b[39m\u001b[92m+ DiffResults v1.0.3\u001b[39m\n", 134 | " \u001b[90m [b552c78f] \u001b[39m\u001b[92m+ DiffRules v1.4.0\u001b[39m\n", 135 | " \u001b[90m [ffbed154] \u001b[39m\u001b[92m+ DocStringExtensions v0.8.6\u001b[39m\n", 136 | " \u001b[90m [f6369f11] \u001b[39m\u001b[92m+ ForwardDiff v0.10.23\u001b[39m\n", 137 | " \u001b[90m [3587e190] \u001b[39m\u001b[92m+ InverseFunctions v0.1.2\u001b[39m\n", 138 | " \u001b[90m [92d709cd] \u001b[39m\u001b[92m+ IrrationalConstants v0.1.1\u001b[39m\n", 139 | " \u001b[90m [2ab3a3ac] \u001b[39m\u001b[92m+ LogExpFunctions v0.3.5\u001b[39m\n", 140 | " \u001b[90m [1914dd2f] \u001b[39m\u001b[92m+ MacroTools v0.5.9\u001b[39m\n", 141 | " \u001b[90m [77ba4419] \u001b[39m\u001b[92m+ NaNMath v0.3.5\u001b[39m\n", 142 | " \u001b[90m [276daf66] \u001b[39m\u001b[92m+ SpecialFunctions v1.8.1\u001b[39m\n", 143 | " \u001b[90m [90137ffa] \u001b[39m\u001b[92m+ StaticArrays v1.2.13\u001b[39m\n", 144 | " \u001b[90m [efe28fd5] \u001b[39m\u001b[92m+ OpenSpecFun_jll v0.5.5+0\u001b[39m\n", 145 | " \u001b[90m [8bb1440f] \u001b[39m\u001b[92m+ DelimitedFiles\u001b[39m\n", 146 | " \u001b[90m [8ba89e20] \u001b[39m\u001b[92m+ Distributed\u001b[39m\n", 147 | " \u001b[90m [1a1011a3] \u001b[39m\u001b[92m+ SharedArrays\u001b[39m\n", 148 | " \u001b[90m [05823500] \u001b[39m\u001b[92m+ OpenLibm_jll\u001b[39m\n" 149 | ] 150 | } 151 | ], 152 | "metadata": {} 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 38, 157 | "source": [ 158 | "using Enzyme\n", 159 | "using ForwardDiff\n", 160 | "using BenchmarkTools" 161 | ], 162 | "outputs": [], 163 | "metadata": {} 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "source": [ 168 | "# Activity annotations\n", 169 | "- `Const`\n", 170 | "- `Active`\n", 171 | "- `Duplicated`\n", 172 | "- `DuplicatedNoNeed`" 173 | ], 174 | "metadata": {} 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 39, 179 | "source": [ 180 | "square(x) = x^2" 181 | ], 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "square (generic function with 1 method)" 187 | ] 188 | }, 189 | "metadata": {}, 190 | "output_type": "display_data" 191 | } 192 | ], 193 | "metadata": {} 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 40, 198 | "source": [ 199 | "autodiff(square, 1.0)" 200 | ], 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "()" 206 | ] 207 | }, 208 | "metadata": {}, 209 | "output_type": "display_data" 210 | } 211 | ], 212 | "metadata": {} 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "source": [ 217 | "Default activity for values is `Const`" 218 | ], 219 | "metadata": {} 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 41, 224 | "source": [ 225 | "autodiff(square, Const(1.0))" 226 | ], 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "()" 232 | ] 233 | }, 234 | "metadata": {}, 235 | "output_type": "display_data" 236 | } 237 | ], 238 | "metadata": {} 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 42, 243 | "source": [ 244 | "autodiff(square, Active(1.0))" 245 | ], 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "(2.0,)" 251 | ] 252 | }, 253 | "metadata": {}, 254 | "output_type": "display_data" 255 | } 256 | ], 257 | "metadata": {} 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "source": [ 262 | "## Supporting mutating functions\n", 263 | "\n", 264 | "Enzyme can differentiate through mutating functions. This requires that the users passes in the shadow variables with the `Duplicated` or `DuplicatedNoNeed` activity annotation." 265 | ], 266 | "metadata": {} 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 43, 271 | "source": [ 272 | "function cube(y, x)\n", 273 | "\ty[] = x[]^3\n", 274 | "\treturn nothing\n", 275 | "end" 276 | ], 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "cube (generic function with 1 method)" 282 | ] 283 | }, 284 | "metadata": {}, 285 | "output_type": "display_data" 286 | } 287 | ], 288 | "metadata": {} 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 44, 293 | "source": [ 294 | "x = Ref(4.0)\n", 295 | "y = Ref(0.0)\n", 296 | "cube(y, x)\n", 297 | "y[]" 298 | ], 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "64.0" 304 | ] 305 | }, 306 | "metadata": {}, 307 | "output_type": "display_data" 308 | } 309 | ], 310 | "metadata": {} 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "source": [ 315 | "\n", 316 | "In order to calculate the gradient of `x`, we have to propagate `1.0` into the\n", 317 | "shadow `dy`.\n" 318 | ], 319 | "metadata": {} 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 45, 324 | "source": [ 325 | "x = Ref(4.0)\n", 326 | "dx = Ref(0.0)\n", 327 | "\n", 328 | "y = Ref(0.0)\n", 329 | "dy = Ref(1.0)\n", 330 | "\n", 331 | "autodiff(cube, Duplicated(y, dy), Duplicated(x, dx))\n", 332 | "y[], dy[], x[], dx[]" 333 | ], 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "(64.0, 0.0, 4.0, 48.0)" 339 | ] 340 | }, 341 | "metadata": {}, 342 | "output_type": "display_data" 343 | } 344 | ], 345 | "metadata": {} 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "source": [ 350 | "# Reflection" 351 | ], 352 | "metadata": {} 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 46, 357 | "source": [ 358 | "\n", 359 | "Enzyme.Compiler.enzyme_code_llvm(cube, Const,\n", 360 | "\tTuple{Enzyme.Duplicated{Base.RefValue{Float64}}, \n", 361 | "\tDuplicated{Base.RefValue{Float64}}}, debuginfo=:none)" 362 | ], 363 | "outputs": [ 364 | { 365 | "output_type": "stream", 366 | "name": "stdout", 367 | "text": [ 368 | "; Function Attrs: alwaysinline\n", 369 | "define void @diffejulia_cube_9969wrap({}* %0, {}* %1, {}* %2, {}* %3) #3 {\n", 370 | "entry:\n", 371 | " %\"'ipc6.i\" = bitcast {}* %3 to double*\n", 372 | " %4 = bitcast {}* %2 to double*\n", 373 | " %5 = load double, double* %4, align 8\n", 374 | " %6 = fmul double %5, %5\n", 375 | " %7 = fmul double %5, %6\n", 376 | " %\"'ipc.i\" = bitcast {}* %1 to double*\n", 377 | " %8 = bitcast {}* %0 to double*\n", 378 | " store double %7, double* %8, align 8\n", 379 | " %9 = load double, double* %\"'ipc.i\", align 8\n", 380 | " store double 0.000000e+00, double* %\"'ipc.i\", align 8\n", 381 | " %10 = load double, double* %\"'ipc6.i\", align 8\n", 382 | " %11 = fmul fast double %6, 3.000000e+00\n", 383 | " %reass.mul = fmul fast double %11, %9\n", 384 | " %12 = fadd fast double %reass.mul, %10\n", 385 | " store double %12, double* %\"'ipc6.i\", align 8\n", 386 | " ret void\n", 387 | "}\n" 388 | ] 389 | } 390 | ], 391 | "metadata": {} 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "source": [ 396 | "# Differentiating through control-flow\n", 397 | "Let's differentiate through some control flow. This kind of scalar code is where normally one would use `ForwardDiff.jl` since the machine learning optimized toolkits like Zygote have unacceptable overheads." 398 | ], 399 | "metadata": {} 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 47, 404 | "source": [ 405 | "# Taylor series for `-log(1-x)`\n", 406 | "# eval at -log(1-1/2) = -log(1/2)\n", 407 | "function taylor(f::T, N=10^7) where T\n", 408 | " g = zero(T)\n", 409 | " for i in 1:N\n", 410 | " g += f^i / i\n", 411 | " end\n", 412 | " return g\n", 413 | "end\n", 414 | "\n", 415 | "autodiff(taylor, Active(0.5), Const(10^8))\n" 416 | ], 417 | "outputs": [ 418 | { 419 | "data": { 420 | "text/plain": [ 421 | "(2.0,)" 422 | ] 423 | }, 424 | "metadata": {}, 425 | "output_type": "display_data" 426 | } 427 | ], 428 | "metadata": {} 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 48, 433 | "source": [ 434 | "fwd_taylor(x) = ForwardDiff.derivative(taylor, 0.5)\n", 435 | "\n", 436 | "enz_taylor(x) = autodiff(taylor, Active(x))\n" 437 | ], 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "enz_taylor (generic function with 1 method)" 443 | ] 444 | }, 445 | "metadata": {}, 446 | "output_type": "display_data" 447 | } 448 | ], 449 | "metadata": {} 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 49, 454 | "source": [ 455 | "\n", 456 | "@benchmark fwd_taylor($(Ref(0.5))[])" 457 | ], 458 | "outputs": [ 459 | { 460 | "data": { 461 | "text/plain": [ 462 | "BenchmarkTools.Trial: 6 samples with 1 evaluation.\n", 463 | " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m869.791 ms\u001b[22m\u001b[39m … \u001b[35m 1.031 s\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n", 464 | " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m903.407 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", 465 | " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m919.832 ms\u001b[22m\u001b[39m ± \u001b[32m61.069 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n", 466 | "\n", 467 | " \u001b[39m█\u001b[39m \u001b[39m█\u001b[39m█\u001b[34m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m█\u001b[39m \u001b[39m \u001b[39m█\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m█\u001b[39m \u001b[39m \n", 468 | " \u001b[39m█\u001b[39m▁\u001b[39m█\u001b[39m█\u001b[34m▁\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m \u001b[39m▁\n", 469 | " 870 ms\u001b[90m Histogram: frequency by time\u001b[39m 1.03 s \u001b[0m\u001b[1m<\u001b[22m\n", 470 | "\n", 471 | " Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m." 472 | ] 473 | }, 474 | "metadata": {}, 475 | "output_type": "display_data" 476 | } 477 | ], 478 | "metadata": {} 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 50, 483 | "source": [ 484 | "@benchmark enz_taylor($(Ref(0.5))[])" 485 | ], 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "BenchmarkTools.Trial: 11 samples with 1 evaluation.\n", 491 | " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m473.358 ms\u001b[22m\u001b[39m … \u001b[35m553.634 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n", 492 | " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m489.023 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", 493 | " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m495.482 ms\u001b[22m\u001b[39m ± \u001b[32m 23.014 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n", 494 | "\n", 495 | " \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[32m \u001b[39m\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \n", 496 | " \u001b[39m█\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m█\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m \u001b[39m▁\n", 497 | " 473 ms\u001b[90m Histogram: frequency by time\u001b[39m 554 ms \u001b[0m\u001b[1m<\u001b[22m\n", 498 | "\n", 499 | " Memory estimate\u001b[90m: \u001b[39m\u001b[33m16 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m1\u001b[39m." 500 | ] 501 | }, 502 | "metadata": {}, 503 | "output_type": "display_data" 504 | } 505 | ], 506 | "metadata": {} 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "source": [ 511 | "\n", 512 | "# Differentiating through more complicated codes\n", 513 | "\n", 514 | "## A custom matrix multiply" 515 | ], 516 | "metadata": {} 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 51, 521 | "source": [ 522 | "\n", 523 | "function mymul!(R, A, B)\n", 524 | " @assert axes(A,2) == axes(B,1)\n", 525 | " @inbounds @simd for i in eachindex(R)\n", 526 | " R[i] = 0\n", 527 | " end\n", 528 | " @inbounds for j in axes(B, 2), i in axes(A, 1)\n", 529 | " @inbounds @simd for k in axes(A,2)\n", 530 | " R[i,j] += A[i,k] * B[k,j]\n", 531 | " end\n", 532 | " end\n", 533 | " nothing\n", 534 | "end" 535 | ], 536 | "outputs": [ 537 | { 538 | "data": { 539 | "text/plain": [ 540 | "mymul! (generic function with 1 method)" 541 | ] 542 | }, 543 | "metadata": {}, 544 | "output_type": "display_data" 545 | } 546 | ], 547 | "metadata": {} 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 52, 552 | "source": [ 553 | "A = rand(1024, 64)\n", 554 | "B = rand(64, 512)\n", 555 | "\n", 556 | "R = zeros(size(A,1), size(B,2))\n", 557 | "∂z_∂R = rand(size(R)...) # Some gradient/tangent passed to us\n", 558 | "\n", 559 | "∂z_∂A = zero(A)\n", 560 | "∂z_∂B = zero(B)\n", 561 | "\n", 562 | "Enzyme.autodiff(mymul!, \n", 563 | "\tDuplicated(R, ∂z_∂R),\n", 564 | "\tDuplicated(A, ∂z_∂A),\n", 565 | "\tDuplicated(B, ∂z_∂B))" 566 | ], 567 | "outputs": [ 568 | { 569 | "data": { 570 | "text/plain": [ 571 | "()" 572 | ] 573 | }, 574 | "metadata": {}, 575 | "output_type": "display_data" 576 | } 577 | ], 578 | "metadata": {} 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "source": [ 583 | "\n", 584 | "Let's confirm correctness of result" 585 | ], 586 | "metadata": {} 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": 53, 591 | "source": [ 592 | "R ≈ A * B" 593 | ], 594 | "outputs": [ 595 | { 596 | "data": { 597 | "text/plain": [ 598 | "true" 599 | ] 600 | }, 601 | "metadata": {}, 602 | "output_type": "display_data" 603 | } 604 | ], 605 | "metadata": {} 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "source": [ 610 | "and correctness of the gradients" 611 | ], 612 | "metadata": {} 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 54, 617 | "source": [ 618 | "∂z_∂A ≈ ∂z_∂R * B'" 619 | ], 620 | "outputs": [ 621 | { 622 | "data": { 623 | "text/plain": [ 624 | "true" 625 | ] 626 | }, 627 | "metadata": {}, 628 | "output_type": "display_data" 629 | } 630 | ], 631 | "metadata": {} 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "source": [ 636 | "# Some more fun" 637 | ], 638 | "metadata": {} 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 55, 643 | "source": [ 644 | "struct LList\n", 645 | " next::Union{LList,Nothing}\n", 646 | "\tval::Float64\n", 647 | "end \n", 648 | "\n", 649 | "function sumlist(n::LList)\n", 650 | " sum = 0.0\n", 651 | " while n !== nothing\n", 652 | " sum += n.val\n", 653 | " n = n.next\n", 654 | " end\n", 655 | " sum\n", 656 | "end" 657 | ], 658 | "outputs": [ 659 | { 660 | "data": { 661 | "text/plain": [ 662 | "sumlist (generic function with 1 method)" 663 | ] 664 | }, 665 | "metadata": {}, 666 | "output_type": "display_data" 667 | } 668 | ], 669 | "metadata": {} 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 56, 674 | "source": [ 675 | "regular = LList(LList(nothing, 1.0), 2.0)\n", 676 | "shadow = LList(LList(nothing, 0.0), 0.0)\n", 677 | "autodiff(sumlist, Duplicated(regular, shadow))" 678 | ], 679 | "outputs": [ 680 | { 681 | "data": { 682 | "text/plain": [ 683 | "()" 684 | ] 685 | }, 686 | "metadata": {}, 687 | "output_type": "display_data" 688 | } 689 | ], 690 | "metadata": {} 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": 57, 695 | "source": [ 696 | "shadow.val ≈ 1.0" 697 | ], 698 | "outputs": [ 699 | { 700 | "data": { 701 | "text/plain": [ 702 | "true" 703 | ] 704 | }, 705 | "metadata": {}, 706 | "output_type": "display_data" 707 | } 708 | ], 709 | "metadata": {} 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": 58, 714 | "source": [ 715 | "shadow.next.val ≈ 1.0" 716 | ], 717 | "outputs": [ 718 | { 719 | "data": { 720 | "text/plain": [ 721 | "true" 722 | ] 723 | }, 724 | "metadata": {}, 725 | "output_type": "display_data" 726 | } 727 | ], 728 | "metadata": {} 729 | }, 730 | { 731 | "cell_type": "markdown", 732 | "source": [ 733 | "# Differentiating through Parallelism" 734 | ], 735 | "metadata": {} 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": 59, 740 | "source": [ 741 | "function tasktest(M, x)\n", 742 | " xr = Ref(x)\n", 743 | " task = Threads.@spawn begin\n", 744 | " @inbounds M[1] = xr[]\n", 745 | " end\n", 746 | " @inbounds M[2] = x\n", 747 | " wait(task)\n", 748 | " nothing\n", 749 | "end" 750 | ], 751 | "outputs": [ 752 | { 753 | "data": { 754 | "text/plain": [ 755 | "tasktest (generic function with 1 method)" 756 | ] 757 | }, 758 | "metadata": {}, 759 | "output_type": "display_data" 760 | } 761 | ], 762 | "metadata": {} 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 60, 767 | "source": [ 768 | "R = Float64[0., 0.]\n", 769 | "dR = Float64[2., 3.]\n", 770 | "\n", 771 | "Enzyme.autodiff(tasktest, Duplicated(R, dR), Active(2.0))" 772 | ], 773 | "outputs": [ 774 | { 775 | "output_type": "stream", 776 | "name": "stdout", 777 | "text": [ 778 | "┌ Warning: active variables passeed by value to jl_new_task are not yet supported\n", 779 | "└ @ Enzyme.Compiler /home/vchuravy/.julia/packages/Enzyme/2n29R/src/compiler.jl:212\n" 780 | ] 781 | }, 782 | { 783 | "data": { 784 | "text/plain": [ 785 | "(5.0,)" 786 | ] 787 | }, 788 | "metadata": {}, 789 | "output_type": "display_data" 790 | } 791 | ], 792 | "metadata": {} 793 | }, 794 | { 795 | "cell_type": "code", 796 | "execution_count": 61, 797 | "source": [ 798 | "Float64[2.0, 2.0] ≈ R\n", 799 | "Float64[0.0, 0.0] ≈ dR" 800 | ], 801 | "outputs": [ 802 | { 803 | "data": { 804 | "text/plain": [ 805 | "true" 806 | ] 807 | }, 808 | "metadata": {}, 809 | "output_type": "display_data" 810 | } 811 | ], 812 | "metadata": {} 813 | }, 814 | { 815 | "cell_type": "markdown", 816 | "source": [ 817 | "# Using the Enzyme API to integrate with a different language\n", 818 | "\n", 819 | "Enzyme exports a C-API (https://github.com/wsmoses/Enzyme/blob/main/enzyme/Enzyme/CApi.h)\n", 820 | "\n", 821 | "## Steps\n", 822 | "\n", 823 | "1. Obtain the LLVM IR of the code you want to differentiate\n", 824 | "2. Run an early set of optimizations\n", 825 | "3. Provide `TypeTree` information and activity for the input arguments\n", 826 | "4. Register custom adjoints for runtime functions\n", 827 | "5. Run `EnzymeCreateAugmentedPrimal`/`EnzymeCreatePrimalAndGradient` to synthesize gradients\n", 828 | "6. Compile and Link gradient code -- maybe using Orc\n", 829 | "7. Call from user program -- ABI can be finicky" 830 | ], 831 | "metadata": {} 832 | } 833 | ], 834 | "metadata": { 835 | "orig_nbformat": 4, 836 | "language_info": { 837 | "name": "python" 838 | } 839 | }, 840 | "nbformat": 4, 841 | "nbformat_minor": 2 842 | } -------------------------------------------------------------------------------- /julia_activity/.ipynb_checkpoints/activity-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Installing Enzyme" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import Pkg\n", 18 | "Pkg.add(\"Enzyme\")" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "using Enzyme" 28 | ] 29 | }, 30 | { 31 | "attachments": {}, 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Activity Annotations\n", 36 | "\n", 37 | "* `Const`\n", 38 | "* `Active`\n", 39 | "* `Duplicated`\n", 40 | "* `DuplicatedNoNeed`" 41 | ] 42 | }, 43 | { 44 | "attachments": {}, 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "square(x) = x^2" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "autodiff(Reverse, square, 1.0)" 58 | ] 59 | }, 60 | { 61 | "attachments": {}, 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Default activity for values is `Const`" 66 | ] 67 | }, 68 | { 69 | "attachments": {}, 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "autodiff(Reverse, square, Const(1.0))" 74 | ] 75 | }, 76 | { 77 | "attachments": {}, 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "When adding the `Active` annotation Enzyme differentiates with respect to the argument" 82 | ] 83 | }, 84 | { 85 | "attachments": {}, 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "autodiff(Reverse, square, Active(1.0))" 90 | ] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "Python 3", 96 | "language": "python", 97 | "name": "python3" 98 | }, 99 | "language_info": { 100 | "name": "python", 101 | "version": "3.11.1" 102 | }, 103 | "orig_nbformat": 4, 104 | "vscode": { 105 | "interpreter": { 106 | "hash": "5c7b89af1651d0b8571dde13640ecdccf7d5a6204171d6ab33e7c296e100e08a" 107 | } 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 2 112 | } 113 | -------------------------------------------------------------------------------- /julia_activity/.jupyter/desktop-workspaces/default-37a8.jupyterlab-workspace: -------------------------------------------------------------------------------- 1 | {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:activity.ipynb"]},"current":"notebook:activity.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.15111378687537627,0.8488862131246238,0]},"notebook:activity.ipynb":{"data":{"path":"activity.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}} -------------------------------------------------------------------------------- /julia_activity/activity.jl: -------------------------------------------------------------------------------- 1 | using Enzyme 2 | using Printf 3 | 4 | # Defining the square function 5 | square(x) = x^2; 6 | 7 | # No activity annotations 8 | result_1 = Enzyme.autodiff(Reverse, square, 1.0) 9 | printf("No annotations result: %f", result_1) 10 | 11 | # No activity annotations = constant annotation 12 | result_2 = Enzyme.autodiff(Reverse, square, Const(1.0)) 13 | printf("Equals constant annotations: %f", result_2) 14 | 15 | # Adding activity annotations 16 | result_3 = Enzyme.autodiff(Reverse, square, Active(1.0)) 17 | printf("Adding activity annotations: %f", result_3) -------------------------------------------------------------------------------- /julia_custom/custom.jl: -------------------------------------------------------------------------------- 1 | using Enzyme 2 | using Enzyme: EnzymeRules 3 | 4 | # Defining our function 5 | f(x) = x^2; 6 | 7 | function f_ip(x) 8 | x[1] *= x[1] 9 | return nothing 10 | end 11 | 12 | import .EnzymeRules: augmented_primal, reverse, Annotation, has_rrule, has_rrule_from_sig 13 | using .EnzymeRules 14 | 15 | function augmented_primal(config::ConfigWidth{1}, func::Const{typeof(f)}, ::Type{<:Active}, x::Active) 16 | if needs_primal(config) 17 | return AugmentedReturn(func.val(x.val), nothing, nothing) 18 | else 19 | return AugmentedReturn(nothing, nothing, nothing) 20 | end 21 | end 22 | 23 | function reverse(config::ConfigWidth{1}, ::Const{typeof(f)}, dret::Active, tape, x::Active) 24 | if needs_primal(config) 25 | return (10+2*x.val*dret.val,) 26 | else 27 | return (100+2*x.val*dret.val,) 28 | end 29 | end 30 | 31 | function augmented_primal(::Config{false, false, 1}, func::Const{typeof(f_ip)}, ::Type{<:Const}, x::Duplicated) 32 | v = x.val[1] 33 | x.val[1] *= v 34 | return AugmentedReturn(nothing, nothing, v) 35 | end 36 | 37 | function reverse(::Config{false, false, 1}, ::Const{typeof(f_ip)}, ::Type{<:Const}, tape, x::Duplicated) 38 | x.dval[1] = 100 + x.dval[1] * tape 39 | return () 40 | end 41 | 42 | # To which we can then apply the Enzyme calls 43 | Enzyme.autodiff(Enzyme.Reverse, f, Active(2.0))[1][1]; 44 | Enzyme.autodiff(Enzyme.Reverse, x->f(x)^2, Active(2.0))[1][1]; 45 | 46 | x = [2.0]; 47 | dx = [1.0]; 48 | 49 | Enzyme.autodiff(Enzyme.Reverse, f_ip, Duplicated(x, dx)); 50 | -------------------------------------------------------------------------------- /julia_fwd_and_batch/fwd_and_batch.jl: -------------------------------------------------------------------------------- 1 | using Enzyme 2 | 3 | # Defining our test function 4 | function f(x::Array{Float64}, y::Array{Float64}) 5 | y[1] = x[1] * x[1] + x[2] * x[1] 6 | return nothing 7 | end; 8 | 9 | # To use forward-mode we then have to seed the gradient computation with 10 | x = [2.0, 2.0]; 11 | dx = [1.0, 0.0]; 12 | y = [0.0]; 13 | dy = [0.0]; 14 | 15 | # Of which the second duplicated element then stores the tangent 16 | Enzyme.autodiff(Forward, f, Duplicated(x, dx), Duplicated(y, dy)) 17 | 18 | # If we then seek to propagate multiple tangents at the same time to obtain the Hessian in one autodiff call, we then seed 19 | # the following way 20 | y = [0.0]; 21 | x = [2.0, 2.0]; 22 | 23 | vdy = ([0.0],[0.0]); 24 | vdx = ([1.0, 0.0], [0.0, 1.0]); 25 | 26 | bx = [0.0, 0.0]; 27 | by = [1.0]; 28 | vdbx = ([0.0, 0.0], [0.0, 0.0]); 29 | vdby = ([0.0], [0.0]); 30 | 31 | # The AD-call then takes the following form 32 | Enzyme.autodiff( 33 | Forward, 34 | (x,y) -> Enzyme.autodiff_deferred(f, x, y), 35 | BatchDuplicated(Duplicated(x, bx), Duplicated.(vdx, vdbx)), 36 | BatchDuplicated(Duplicated(y, by), Duplicated.(vdy, vdby)), 37 | ); 38 | -------------------------------------------------------------------------------- /mpi/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzymeAD/Enzyme-Tutorial/803ddd188c65087b2ebe8aa19aa3983951bd1834/mpi/README.md -------------------------------------------------------------------------------- /mpi/disclaimer.txt: -------------------------------------------------------------------------------- 1 | MPI_Allgather 2 | MPI_Allreduce 3 | MPI_Barrier 4 | MPI_Bcast 5 | MPI_Comm_free 6 | MPI_Comm_rank 7 | MPI_Comm_size 8 | MPI_Comm_split 9 | MPI_Finalize 10 | MPI_Gather 11 | MPI_Scatter 12 | MPI_Get_count 13 | MPI_Graph_create 14 | MPI_Init 15 | MPI_Intercomm_create 16 | MPI_Irecv 17 | MPI_Issend 18 | MPI_Probe 19 | MPI_Recv 20 | MPI_Reduce 21 | MPI_Send 22 | MPI_Test 23 | MPI_Wait 24 | -------------------------------------------------------------------------------- /openmp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnzymeAD/Enzyme-Tutorial/803ddd188c65087b2ebe8aa19aa3983951bd1834/openmp/README.md -------------------------------------------------------------------------------- /openmp/parallel_for/Makefile: -------------------------------------------------------------------------------- 1 | CLANG = /home/lpaehler/Work/Dev-Tools/llvm-fortran/f18-llvm-project/build/bin/clang 2 | 3 | LLVM_PATH = /home/lpaehler/Work/Dev-Tools/llvm-fortran/f18-llvm-project/build 4 | 5 | ENZYME_PATH = /home/lpaehler/Work/AutomaticDifferentiation/Enzyme/build/Enzyme/LLVMEnzyme-13.so 6 | LLVM13_PATH = /home/lpaehler/Work/AutomaticDifferentiation/llvm-project/build 7 | 8 | all: omp_parallel_simple.o 9 | 10 | clean: 11 | rm -f *.o *.ll 12 | 13 | %.o: %.c 14 | $(LLVM_PATH)/bin/clang++ -O3 -Xclang -load -Xclang $(ENZYME_PATH) -ffast-math -fopenmp -o /host/$@ 15 | 16 | run-%: %.o 17 | ../dockerscript.sh /host/$^ 18 | -------------------------------------------------------------------------------- /openmp/parallel_for/OldMakefile: -------------------------------------------------------------------------------- 1 | all: omp_parallel_for.o 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %.o: %.c 7 | ../dockerscript.sh clang-12 /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -fopenmp=libomp -o /host/$@ 8 | 9 | run-%: %.o 10 | ../dockerscript.sh /host/$^ 11 | -------------------------------------------------------------------------------- /openmp/parallel_for/omp_parallel_for.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | // Do parallel for 5 | void omp(float *x, int npoints) { 6 | 7 | #pragma omp parallel for 8 | for (int i = 0; i < npoints; i++) { 9 | x[i] *= x[i]; 10 | } 11 | } 12 | 13 | 14 | double __enzyme_autodiff(void*, ...); 15 | 16 | int main() { 17 | 18 | // Initialize array 19 | float array[1000]; 20 | for(int i=0; i<1000; i++) { 21 | array[i] = i + 0.5; 22 | } 23 | 24 | // Set up the array to host the gradients 25 | float d_array[1000]; 26 | for(int i=0; i<1000; i++) { 27 | d_array[i] = 1.0f; 28 | } 29 | 30 | // Alter the entries 31 | #ifdef FORWARD 32 | sub(array, 1000); 33 | #else 34 | __enzyme_autodiff((void*)omp, array, d_array, 1000); 35 | #endif 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /openmp/parallel_for_nounroll/Makefile: -------------------------------------------------------------------------------- 1 | all: omp_parallel_for_nounroll.o 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %.o: %.c 7 | ../dockerscript.sh clang-12 /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -fopenmp -o /host/$@ 8 | 9 | run-%: %.o 10 | ../dockerscript.sh /host/$^ -------------------------------------------------------------------------------- /openmp/parallel_for_nounroll/omp_parallel_for_nounroll.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | // Do parallel for 5 | void omp(float *x, int npoints) { 6 | 7 | #pragma omp parallel for 8 | #pragma nounroll 9 | for (int i = 0; i < npoints; i++) { 10 | x[i] *= x[i]; 11 | } 12 | } 13 | 14 | 15 | double __enzyme_autodiff(void*, ...); 16 | 17 | int main() { 18 | 19 | // Initialize array 20 | float array[1000]; 21 | for(int i=0, i<1000; i++) { 22 | array[i] = i + 0.5; 23 | } 24 | 25 | // Set up the array to host the gradients 26 | float d_array[1000]; 27 | for(int i=0, i<1000; i++) { 28 | d_array[i] = 1.0f; 29 | } 30 | 31 | // Alter the entries 32 | #ifdef FORWARD 33 | sub(array, 1000); 34 | #else 35 | __enzyme_autodiff((void*)omp, array, d_array, 1000); 36 | #endif 37 | 38 | return 0; 39 | } -------------------------------------------------------------------------------- /openmp/parallel_simple/Makefile: -------------------------------------------------------------------------------- 1 | all: omp_parallel_simple.o 2 | 3 | clean: 4 | rm -f *.o *.ll 5 | 6 | %.o: %.c 7 | ../dockerscript.sh clang-12 /host/$^ -O3 -Xclang -load -Xclang /Enzyme/enzyme/build/Enzyme/ClangEnzyme-12.so -ffast-math -fopenmp -o /host/$@ 8 | 9 | run-%: %.o 10 | ../dockerscript.sh /host/$^ -------------------------------------------------------------------------------- /openmp/parallel_simple/omp_parallel.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Change the array entries 4 | void subdomain_change(float *x, int istart, int ipoints) { 5 | int i; 6 | for (i = 0; i < ipoints; i++) { 7 | x[istart+i] = x[i] + i; 8 | } 9 | } 10 | 11 | // Partition into subdomains and alter the entries 12 | void sub(float *x, int npoints) { 13 | int iam, nt, ipoints, istart; 14 | 15 | #pragma omp parallel default(shared) private(iam, nt, ipoints, istart) 16 | { 17 | iam = omp_get_thread_num(); 18 | nt = omp_get_num_threads(); 19 | 20 | // Size of partition 21 | ipoints = npoints / nt; 22 | 23 | // Starting array index 24 | istart = ipoints * iam; 25 | 26 | // Last thread may be required to do more 27 | if (iam == nt - 1) 28 | ipoints = npoints - istart; 29 | subdomain(x, istart, ipoints); 30 | } 31 | } 32 | 33 | 34 | void __enzyme_autodiff(void*, ...); 35 | 36 | int main() { 37 | 38 | // Initialize array 39 | float array[1000]; 40 | for(int i=0; i<1000; i++) { 41 | array[i] = 0.0; 42 | } 43 | 44 | // Set up the array to host the gradients 45 | float d_array[1000]; 46 | for(int i=0; i<1000; i++) { 47 | d_array[i] = 1.0f; 48 | } 49 | 50 | // Alter the entries 51 | // #ifdef FORWARD 52 | sub(array, 1000); 53 | // #else 54 | // __enzyme_autodiff((void*)sub, array, d_array, 1000); 55 | // #endif 56 | 57 | return 0; 58 | } 59 | --------------------------------------------------------------------------------