├── error.pdf ├── perfbar.pdf ├── juliacon_2016_slides.pdf ├── oxford_fall_2016_slides.pdf ├── manchester_fall_2016_slides.pdf ├── code.jl └── snippets.tex /error.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpsanders/ForwardDiffPresentation/master/error.pdf -------------------------------------------------------------------------------- /perfbar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpsanders/ForwardDiffPresentation/master/perfbar.pdf -------------------------------------------------------------------------------- /juliacon_2016_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpsanders/ForwardDiffPresentation/master/juliacon_2016_slides.pdf -------------------------------------------------------------------------------- /oxford_fall_2016_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpsanders/ForwardDiffPresentation/master/oxford_fall_2016_slides.pdf -------------------------------------------------------------------------------- /manchester_fall_2016_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpsanders/ForwardDiffPresentation/master/manchester_fall_2016_slides.pdf -------------------------------------------------------------------------------- /code.jl: -------------------------------------------------------------------------------- 1 | module ForwardDiffPresentation 2 | 3 | using Plots 4 | using ForwardDiff 5 | using BenchmarkTools 6 | 7 | # ForwardDiff vs. autograd benchmarks can be found in the ForwardDiff repository: 8 | # https://github.com/JuliaDiff/ForwardDiff.jl 9 | 10 | ################################ 11 | # Test function and derivative # 12 | ################################ 13 | 14 | testf(x) = exp(x) / sqrt(sin(x)^3 + cos(x)^3) 15 | 16 | function testderiv(x) 17 | numerator = 3*exp(x)*((sin(x)^2)*cos(x) - sin(x)*(cos(x)^2)) 18 | denominator = 2*(sin(x)^3 + cos(x)^3)^(3//2) 19 | return testf(x) - numerator/denominator 20 | end 21 | 22 | ######################### 23 | # Approximation methods # 24 | ######################### 25 | 26 | finitediff(f, x, h) = (f(x + h) - f(x - h)) / 2h 27 | complexdiff(f, x, h) = imag(f(x + im*h)) / h 28 | dualdiff(f, x, h = nothing) = ForwardDiff.partials(f(ForwardDiff.Dual(x, one(x))), 1) 29 | 30 | ############################## 31 | # Error calculation/plotting # 32 | ############################## 33 | 34 | const HRANGE = [10.0^i for i in -20:-1] 35 | 36 | function deriverr(deriv, x, hrange = HRANGE) 37 | true_deriv = testderiv(x) 38 | return Float64[max(abs(deriv(testf, x, h) - true_deriv) / abs(true_deriv), eps(Float64)) for h in hrange] 39 | end 40 | 41 | function deriverr_plot(x, hrange = HRANGE) 42 | finite_error = deriverr(finitediff, x, hrange) 43 | complex_error = deriverr(complexdiff, x, hrange) 44 | dual_error = deriverr(dualdiff, x, hrange) 45 | plot(hrange, finite_error, 46 | ylims = (1.0e-17, 1.0), 47 | linestyle = :dot, 48 | linewidth = 3, 49 | lab = "finite") 50 | plot!(hrange, complex_error, 51 | ylims = (1.0e-17, 1.0), 52 | linestyle = :dash, 53 | linewidth = 3, 54 | lab = "complex") 55 | plot!(hrange, dual_error, 56 | ylims = (1.0e-17, 1.0), 57 | lab = "dual") 58 | xaxis!("\$h\$ size", :log10, fontsize = 10) 59 | yaxis!("relative error", :log10, fontsize = 10) 60 | end 61 | 62 | #################################### 63 | # Performance calculation/plotting # 64 | #################################### 65 | 66 | function performance_plot() 67 | hard_time = 97 # time(minimum(@benchmark(testf(1.5)))) 68 | finite_time = 261 # time(minimum(@benchmark(finitediff(testf, 1.5, 1e-5)))) 69 | complex_time = 380 # time(minimum(@benchmark(complexdiff(testf, 1.5, 1e-10)))) 70 | dual_time = 188 # time(minimum(@benchmark(dualdiff(testf, 1.5)))) 71 | bar([1], [finite_time / hard_time], lab = "finite", 72 | bar_width = 0.5, 73 | xlims = (0.5, 3.5), 74 | ylims = (0.0, 5.0)) 75 | bar!([2], [complex_time / hard_time], bar_width = 0.5, lab = "complex") 76 | bar!([3], [dual_time / hard_time], bar_width = 0.5, lab = "dual") 77 | xticks!(Real[]) 78 | yaxis!("relative performance", fontsize = 10) 79 | end 80 | 81 | ###################################### 82 | # Perturbation confusion pseudo-code # 83 | ###################################### 84 | 85 | # # D(f, x_0) -> df/dx evaluated at x_0 86 | # const D = ForwardDiff.derivative 87 | # 88 | # # nested, closed over differentiation 89 | # D(x -> x * D(y -> x + y, 1), 1) 90 | # 91 | # # correct answer 92 | # df_dx_1 = D(x -> x * D(y -> x + y, 1), 1) 93 | # df_dx_1 = D(x -> x * (y -> 1)(1), 1) 94 | # df_dx_1 = D(x -> x, 1) 95 | # df_dx_1 = (x -> 1)(1) 96 | # df_dx_1 = 1 97 | # 98 | # # what ForwardDiff will compute 99 | # df_dx_1 = D(x -> x * D(y -> x + y, 1), 1) 100 | # df_dx_1 = D(x -> x * Eps[x + (1 + ϵ)], 1) 101 | # df_dx_1 = Eps[(1 + ϵ) * Eps[(1 + ϵ) + (1 + ϵ)]] 102 | # df_dx_1 = Eps[(1 + ϵ) * Eps[2 + 2ϵ]] 103 | # df_dx_1 = Eps[(1 + ϵ) * 2] 104 | # df_dx_1 = Eps[2 + 2ϵ] 105 | # df_dx_1 = 2 106 | 107 | end # module 108 | -------------------------------------------------------------------------------- /snippets.tex: -------------------------------------------------------------------------------- 1 | % defining f 2 | f: \mathbb{R} \to \mathbb{R} 3 | 4 | % complex step 5 | f(x + hi) = f(x) + f^{\prime}(x)hi + \frac{f^{\prime\prime}(x)}{2!}h^2i^2 \hdots = f(x) + f^{\prime}(x)hi - \frac{f^{\prime\prime}(x)}{2!}h^2 \hdots 6 | f^{\prime}(x) = \frac{\text{Im}[f(x + hi)]}{h} + \mathcal{O}(h^2) 7 | 8 | % dual diff 9 | f(x + y\epsilon) = f(x) + f^{\prime}(x)y\epsilon + \frac{f^{\prime\prime}(x)}{2!}y^2\epsilon^2 \hdots = f(x) + f^{\prime}(x)y\epsilon 10 | f^{\prime}(x) = \text{Eps}[f(x + \epsilon)] 11 | 12 | % defining \mathbf{g} 13 | \mathbf{g}: \mathbb{R}^n \to \mathbb{R}^m 14 | 15 | % defining g 16 | g: \mathbb{R}^n \to \mathbb{R} 17 | 18 | % gradient of g 19 | \nabla g(\mathbf{x}) = 20 | \sum_{i=1}^n \frac{\partial g(\mathbf{x})}{\partial x_i} = 21 | \nabla g(\mathbf{x}) = \begin{bmatrix} 22 | \frac{\partial g(\mathbf{x})}{\partial x_1} \\ 23 | \vdots \\ 24 | \frac{\partial g(\mathbf{x})}{\partial x_i} \\ 25 | \vdots \\ 26 | \frac{\partial g(\mathbf{x})}{\partial x_n} 27 | \end{bmatrix} 28 | 29 | % partial derivative of g 30 | \frac{\partial g(\mathbf{x})}{\partial x_i} = 31 | \text{Eps}[g(\begin{bmatrix} 32 | x_1 \\ 33 | \vdots \\ 34 | x_i + \epsilon \\ 35 | \vdots \\ 36 | x_n 37 | \end{bmatrix})] 38 | 39 | g(\begin{bmatrix} 40 | x_1 \\ 41 | \vdots \\ 42 | x_i + \epsilon \\ 43 | \vdots \\ 44 | x_n 45 | \end{bmatrix}) = g(\mathbf{x}) + \frac{\partial g(\mathbf{x})}{\partial x_i} \epsilon 46 | 47 | % multidimensional dual number 48 | f(x + y\epsilon) = f(x) + f^{\prime}(x)y\epsilon 49 | f(x + \sum_{i=1}^n y_i\epsilon_i) = f(x) + f^{\prime}(x)\sum_{i=1}^n y_i\epsilon_i 50 | 51 | % seeding the input vector 52 | \mathbf{x} = 53 | \begin{bmatrix} 54 | x_1 \\ 55 | \vdots \\ 56 | x_i \\ 57 | \vdots \\ 58 | x_n 59 | \end{bmatrix} \to 60 | \mathbf{x}_\epsilon = 61 | \begin{bmatrix} 62 | x_1 + \epsilon_1 \\ 63 | \vdots \\ 64 | x_i + \epsilon_i \\ 65 | \vdots \\ 66 | x_n + \epsilon_n 67 | \end{bmatrix} 68 | 69 | % gradient g eval 70 | g(\mathbf{x}_{\epsilon}) = g(\mathbf{x}) + \sum_{i=1}^n \frac{\partial g(\mathbf{x})}{\partial x_i} \epsilon_i 71 | 72 | % jacobian g eval 73 | \mathbf{g}(\mathbf{x}_{\epsilon}) = 74 | \begin{bmatrix} 75 | g_1(\mathbf{x}_{\epsilon}) \\ 76 | \vdots \\ 77 | g_j(\mathbf{x}_{\epsilon}) \\ 78 | \vdots \\ 79 | g_m(\mathbf{x}_{\epsilon}) 80 | \end{bmatrix} = 81 | \begin{bmatrix} 82 | g_1(\mathbf{x}) + \sum_{i=1}^{n} \frac{\partial g_1(\mathbf{x})}{\partial x_i}\epsilon_i \\ 83 | \vdots \\ 84 | g_j(\mathbf{x}) + \sum_{i=1}^{n} \frac{\partial g_j(\mathbf{x})}{\partial x_i}\epsilon_i \\ 85 | \vdots \\ 86 | g_m(\mathbf{x}) + \sum_{i=1}^{n} \frac{\partial g_m(\mathbf{x})}{\partial x_i}\epsilon_i \\ 87 | \end{bmatrix} \to 88 | \mathbf{J}(\mathbf{g})(\mathbf{x}) = 89 | \begin{bmatrix} 90 | \frac{\partial g_1(\mathbf{x})}{\partial x_1} && \hdots && \frac{\partial g_1(\mathbf{x})}{\partial x_i} && \hdots && \frac{\partial g_1(\mathbf{x})}{\partial x_n} \\ 91 | \vdots && \ddots && \vdots && \ddots \\ 92 | \frac{\partial g_j(\mathbf{x})}{\partial x_1} && \hdots && \frac{\partial g_j(\mathbf{x})}{\partial x_i} && \hdots && \frac{\partial g_j(\mathbf{x})}{\partial x_n} \\ 93 | \vdots && \ddots && \vdots && \ddots \\ 94 | \frac{\partial g_m(\mathbf{x})}{\partial x_1} && \hdots && \frac{\partial g_m(\mathbf{x})}{\partial x_i} && \hdots && \frac{\partial g_m(\mathbf{x})}{\partial x_n} \\ 95 | \end{bmatrix} 96 | 97 | % cumprod 98 | 99 | \textrm{cumprod}( 100 | \begin{bmatrix} 101 | x_1 \\ 102 | x_2 \\ 103 | x_3 \\ 104 | \vdots \\ 105 | x_n 106 | \end{bmatrix}) = 107 | \begin{bmatrix} 108 | x_1 \\ 109 | x_2x_1 \\ 110 | x_3x_2x_1 \\ 111 | \vdots \\ 112 | x_nx_{n-1}x_{n-2} \hdots x_1 113 | \end{bmatrix} 114 | 115 | \mathbf{J}(\textrm{cumprod})( 116 | \begin{bmatrix} 117 | x_1 \\ 118 | x_2 \\ 119 | x_3 120 | \end{bmatrix}) = 121 | \begin{bmatrix} 122 | x_1 & 0 & 0 \\ 123 | x_2x_1 & x_1 & 0 \\ 124 | x_3x_2 & x_3x_1 & x_2x_1 125 | \end{bmatrix} 126 | 127 | \mathbf{J}(\textrm{cumprod})( 128 | \begin{bmatrix} 129 | 1 \\ 130 | 2 \\ 131 | 3 132 | \end{bmatrix}) = 133 | \begin{bmatrix} 134 | 1 & 0 & 0 \\ 135 | 2 & 1 & 0 \\ 136 | 6 & 3 & 2 137 | \end{bmatrix} 138 | 139 | % benchmarks 140 | 141 | \begin{tabular}{lllll} 142 | Function & Input Size & autograd Time (s) & ForwardDiff Time (s) & Ratio \\ \hline 143 | Ackley & 10 & 0.001204 & 0.000001 & 1204.00 \\ 144 | Ackley & 100 & 0.008472 & 0.000048 & 176.50 \\ 145 | Ackley & 1000 & 0.081499 & 0.004925 & 16.55 \\ 146 | Ackley & 10000 & 0.835441 & 0.516848 & 1.65 \\ 147 | Ackley & 100000 & 8.361769 & 52.337054 & 0.15 \\ 148 | \end{tabular} 149 | 150 | \begin{tabular}{lllll} 151 | Function & Input Size & autograd Time (s) & ForwardDiff Time (s) & Ratio \\ \hline 152 | Rosenbrock & 10 & 0.000866 & 0.000001 & 866.0 \\ 153 | Rosenbrock & 100 & 0.004395 & 0.000028 & 156.96 \\ 154 | Rosenbrock & 1000 & 0.040702 & 0.002605 & 15.62 \\ 155 | Rosenbrock & 10000 & 0.411095 & 0.257495 & 1.60 \\ 156 | Rosenbrock & 100000 & 4.173851 & 26.596339 & 0.16 157 | \end{tabular} 158 | --------------------------------------------------------------------------------