├── .JuliaFormatter.toml ├── doc ├── pro.jmd ├── pro.md ├── divergences.pdf ├── divergences_files │ └── libs │ │ ├── bootstrap │ │ └── bootstrap-icons.woff │ │ ├── quarto-html │ │ ├── tippy.css │ │ ├── quarto-syntax-highlighting-e26003cea8cd680ca0c55a263523d882.css │ │ ├── anchor.min.js │ │ └── popper.min.js │ │ └── clipboard │ │ └── clipboard.min.js ├── DRIVER.py ├── y.csv ├── divergences.aux ├── out.txt ├── x.csv ├── divergences.qmd ├── divergences.md ├── z.csv ├── mdexample.jl ├── mdprob.py ├── mdexample_iv.jl ├── divergences.py └── example_iv_effcient_analytical.jl ├── .gitignore ├── test ├── Aqua.jl └── Project.toml ├── Project.toml ├── .github └── workflows │ ├── TagBot.yml │ ├── format_fix.yaml │ └── ci.yml ├── src ├── plots.jl ├── benchmark │ ├── print_table.jl │ └── benchmarks.jl ├── Divergences.jl └── divs.jl ├── LICENSE.md ├── CHANGELOG.md └── README.md /.JuliaFormatter.toml: -------------------------------------------------------------------------------- 1 | style = "sciml" 2 | -------------------------------------------------------------------------------- /doc/pro.jmd: -------------------------------------------------------------------------------- 1 | ## First 2 | 3 | An equation: 4 | 5 | $$ 6 | y = b 7 | $$ 8 | -------------------------------------------------------------------------------- /doc/pro.md: -------------------------------------------------------------------------------- 1 | ## First 2 | 3 | An equation: 4 | 5 | $$ 6 | y = b 7 | $$ 8 | -------------------------------------------------------------------------------- /doc/divergences.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gragusa/Divergences.jl/HEAD/doc/divergences.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.jl.cov 2 | *.jl.*.cov 3 | *.jl.mem 4 | docs/build/ 5 | docs/site/ 6 | Manifest.toml 7 | .DS_Store 8 | .vscode 9 | -------------------------------------------------------------------------------- /test/Aqua.jl: -------------------------------------------------------------------------------- 1 | using Test 2 | using Aqua 3 | using Divergences 4 | 5 | @testset "Aqua.jl" begin 6 | Aqua.test_all(Divergences) 7 | end 8 | -------------------------------------------------------------------------------- /test/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" 3 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 4 | 5 | [compat] 6 | Aqua = "0.8" 7 | -------------------------------------------------------------------------------- /doc/divergences_files/libs/bootstrap/bootstrap-icons.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gragusa/Divergences.jl/HEAD/doc/divergences_files/libs/bootstrap/bootstrap-icons.woff -------------------------------------------------------------------------------- /Project.toml: -------------------------------------------------------------------------------- 1 | name = "Divergences" 2 | uuid = "7f07e4c8-6df2-5971-ad44-bf349b9adcfd" 3 | author = "Giuseppe Ragusa " 4 | version = "0.4.1" 5 | 6 | [deps] 7 | Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" 8 | LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" 9 | NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" 10 | RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" 11 | 12 | [compat] 13 | Distances = "0.10" 14 | LaTeXStrings = "1.4" 15 | NaNMath = "^1" 16 | Test = "1" 17 | julia = "1.9" 18 | RecipesBase = "1.3" 19 | [extras] 20 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 21 | -------------------------------------------------------------------------------- /.github/workflows/TagBot.yml: -------------------------------------------------------------------------------- 1 | name: TagBot 2 | on: 3 | issue_comment: 4 | types: 5 | - created 6 | workflow_dispatch: 7 | inputs: 8 | lookback: 9 | default: 3 10 | permissions: 11 | actions: read 12 | checks: read 13 | contents: write 14 | deployments: read 15 | issues: read 16 | discussions: read 17 | packages: read 18 | pages: read 19 | pull-requests: read 20 | repository-projects: read 21 | security-events: read 22 | statuses: read 23 | jobs: 24 | TagBot: 25 | if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: JuliaRegistries/TagBot@v1 29 | with: 30 | token: ${{ secrets.GITHUB_TOKEN }} 31 | # Edit the following line to reflect the actual name of the GitHub Secret containing your private key 32 | ssh: ${{ secrets.DOCUMENTER_KEY }} 33 | # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }} 34 | -------------------------------------------------------------------------------- /src/plots.jl: -------------------------------------------------------------------------------- 1 | using RecipesBase 2 | using LaTeXStrings 3 | @recipe function f(r::Divergences.AbstractDivergence; min_u = 0, max_u = 3, lenout = 1000) 4 | # set a default value for an attribute with `-->` 5 | xlabel --> L"$u$" 6 | yguide --> L"$\gamma(u)$" 7 | #markershape --> :diamond 8 | # add a series for an error band 9 | step = (max_u - min_u) / lenout 10 | u = collect(min_u:step:max_u) 11 | y = r.(u) 12 | @series begin 13 | # force an argument with `:=` 14 | seriestype := :path 15 | # ignore series in legend and color cycling 16 | primary := false 17 | linecolor := nothing 18 | #fillcolor := :lightgray 19 | #fillalpha := 0.5 20 | #fillrange := r.y .- r.ε 21 | # ensure no markers are shown for the error band 22 | markershape := :none 23 | # return series data 24 | u, y 25 | end 26 | # get the seriescolor passed by the user 27 | c = get(plotattributes, :seriescolor, :auto) 28 | # highlight big errors, otherwise use the user-defined color 29 | #markercolor := ifelse.(r.ε .> ε_max, :red, c) 30 | # return data 31 | return u, y 32 | end 33 | -------------------------------------------------------------------------------- /.github/workflows/format_fix.yaml: -------------------------------------------------------------------------------- 1 | name: Auto-format (pushes in repo) 2 | 3 | on: 4 | push: 5 | branches: ["**"] # or ['main', 'develop', 'feature/**'] 6 | 7 | permissions: 8 | contents: write 9 | 10 | jobs: 11 | format-fix: 12 | if: "!contains(github.event.head_commit.message, 'auto-format with JuliaFormatter')" 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v5 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Setup Julia 20 | uses: julia-actions/setup-julia@v2 21 | with: 22 | version: '1' 23 | 24 | - name: Install JuliaFormatter 25 | run: julia -e 'using Pkg; Pkg.add("JuliaFormatter")' 26 | 27 | - name: Run formatter (sciml) 28 | run: julia -e 'using JuliaFormatter; format(".", verbose=true)' 29 | 30 | - name: Commit & push if changed 31 | run: | 32 | if ! git diff --quiet; then 33 | git config user.name "github-actions[bot]" 34 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" 35 | git add -A 36 | git commit -m "style: auto-format with JuliaFormatter (sciml)" 37 | git push 38 | fi 39 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The Divergence.jl package is licensed under the MIT "Expat" License: 2 | 3 | > Copyright (c) 2014: Giuseppe Ragusa. 4 | > 5 | > Permission is hereby granted, free of charge, to any person obtaining 6 | > a copy of this software and associated documentation files (the 7 | > "Software"), to deal in the Software without restriction, including 8 | > without limitation the rights to use, copy, modify, merge, publish, 9 | > distribute, sublicense, and/or sell copies of the Software, and to 10 | > permit persons to whom the Software is furnished to do so, subject to 11 | > the following conditions: 12 | > 13 | > The above copyright notice and this permission notice shall be 14 | > included in all copies or substantial portions of the Software. 15 | > 16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | > IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | > CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | > TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | > SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.4.1] 9 | 10 | - Fix several critical issues introduced by mistakes in the previous version. 11 | 12 | ## [0.4.0] 13 | 14 | ### Added 15 | - Divergence types are now callable, enabling `div(x, y)` and `div(x)` syntax 16 | - Broadcasting support for divergences using `div.(x, y)` 17 | - Backward compatibility for the old `evaluate(div, x, y)` API with deprecation warnings 18 | 19 | ### Changed 20 | - Divergences now inherit from `PreMetric` (via `Distances.jl`) instead of being standalone types 21 | - Improved performance through streamlined divergence evaluation 22 | - Updated constructor signatures for modified divergences 23 | 24 | ### Deprecated 25 | - `evaluate(div, x, y)` and `evaluate(div, x)` functions are deprecated in favor of callable syntax `div(x, y)` and `div(x)` 26 | 27 | ### Fixed 28 | - Type annotation issues in gradient and hessian functions 29 | - Improved numerical stability in divergence calculations 30 | 31 | ## [0.3.0] - Previous Release 32 | - Initial stable release with basic divergence functionality 33 | -------------------------------------------------------------------------------- /doc/divergences_files/libs/quarto-html/tippy.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-animation=fade][data-state=hidden]{opacity:0}[data-tippy-root]{max-width:calc(100vw - 10px)}.tippy-box{position:relative;background-color:#333;color:#fff;border-radius:4px;font-size:14px;line-height:1.4;white-space:normal;outline:0;transition-property:transform,visibility,opacity}.tippy-box[data-placement^=top]>.tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1} -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | 5 | pull_request: 6 | push: 7 | branches: [master] 8 | tags: ['*'] 9 | 10 | # allow cache cleanup (safe to add once per workflow) 11 | permissions: 12 | actions: write 13 | contents: read 14 | 15 | jobs: 16 | test: 17 | env: 18 | GKS_ENCODING: "utf8" 19 | GKSwstype: "nul" 20 | name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} 21 | runs-on: ${{ matrix.os }} 22 | continue-on-error: ${{ matrix.version == 'nightly' }} 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | version: 27 | - '1.9' 28 | - 'nightly' 29 | os: [ubuntu-latest] 30 | arch: [x64, x86] 31 | include: # spare windows/macos CI credits, run on julia-latest only 32 | - os: windows-latest 33 | version: '1' 34 | arch: x64 35 | - os: windows-latest 36 | version: '1' 37 | arch: x86 38 | - os: macOS-latest 39 | version: '1' 40 | arch: x64 41 | steps: 42 | - uses: actions/checkout@v5 43 | - uses: julia-actions/setup-julia@v2 44 | with: 45 | version: ${{ matrix.version }} 46 | arch: ${{ matrix.arch }} 47 | - uses: julia-actions/cache@v2 48 | - uses: julia-actions/julia-buildpkg@v1 49 | - uses: julia-actions/julia-runtest@v1 50 | continue-on-error: ${{ matrix.version == 'nightly' }} 51 | env: 52 | JULIA_NUM_THREADS: 4,1 53 | - uses: julia-actions/julia-processcoverage@v1 54 | - uses: codecov/codecov-action@v5 55 | with: 56 | files: lcov.info 57 | token: ${{ secrets.CODECOV_TOKEN }} -------------------------------------------------------------------------------- /src/benchmark/print_table.jl: -------------------------------------------------------------------------------- 1 | using BenchmarkTools 2 | using Distances 3 | using Printf 4 | 5 | include("benchmarks.jl") 6 | 7 | # BenchmarkTools stores things in a Dict so it loses ordering but we want to print the table 8 | # in a special order. Therefore define an order here: 9 | 10 | order = [:KullbackLeibler, 11 | :ReverseKullbackLeibler, 12 | :Hellinger, 13 | :CressieRead, 14 | :ChiSquared, 15 | :ModifiedDivergence] 16 | 17 | BenchmarkTools.DEFAULT_PARAMETERS.seconds = 2.0 # Long enough 18 | 19 | # Tuning 20 | if !isfile(@__DIR__, "params.json") 21 | tuning = tune!(SUITE; verbose = true); 22 | BenchmarkTools.save("params.json", "SUITE", params(SUITE)) 23 | end 24 | loadparams!(SUITE, BenchmarkTools.load("params.json")[1], :evals, :samples); 25 | 26 | # Run and judge 27 | results = run(SUITE; verbose = true) 28 | judgement = minimum(results) 29 | 30 | # Output the comparison table 31 | getname(T::DataType) = T.name.name 32 | 33 | function print_table(judgement) 34 | for typ in ("colwise", "pairwise") 35 | io = IOBuffer() 36 | println(io, "| distance | loop | $typ | gain |") 37 | println(io, "|----------- | -------| ----------| -------|") 38 | sorted_distances = collect(judgement[typ]) 39 | #sorted_distances = sort(collect(judgement[typ]), by = y -> findfirst(x -> x == getname(y[1]), order)) 40 | 41 | for (dist, result) in sorted_distances 42 | t_loop = BenchmarkTools.time(result["loop"]) 43 | t_spec = BenchmarkTools.time(result["specialized"]) 44 | print(io, "| ", string.(dist), " |") 45 | print(io, 46 | @sprintf("%9.6fs | %9.6fs | %7.4f |\n", 47 | t_loop / 1e9, 48 | t_spec / 1e9, 49 | (t_loop / t_spec))) 50 | end 51 | print(stdout, String(take!(io))) 52 | println() 53 | end 54 | end 55 | 56 | print_table(judgement) 57 | -------------------------------------------------------------------------------- /doc/DRIVER.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | exec(open('divergences.py').read()) 3 | 4 | 5 | def randiv(n=100, m=5, k=1, theta=0.0, rho=0.9, CP=20): 6 | """ 7 | Simulates instrumental variables regression data 8 | 9 | Returns: 10 | y: outcome variable (n x 1) 11 | covariates: matrix [x w] (n x (1 + k)) 12 | instruments: matrix [z w] (n x (m + k)) 13 | theory_val: theoretical strength measure (array length m) 14 | """ 15 | # Generate instrument strength vector 16 | tau = np.full(m, np.sqrt(CP / (m * n))) 17 | 18 | # Generate base data matrices 19 | z = np.random.randn(n, m) # Instruments 20 | w = np.random.randn(n, k) # Exogenous controls (corrected to k columns) 21 | 22 | # Generate correlated errors 23 | eta = np.random.randn(n, 1) 24 | u = rho * eta + np.sqrt(1 - rho**2) * np.random.randn(n, 1) 25 | 26 | # Create endogenous variable x (n x 1) 27 | x = z @ tau.reshape(-1, 1) + eta 28 | 29 | # Create outcome variable y (n x 1) 30 | y = x * theta + u 31 | 32 | # Create combined matrices 33 | covariates = np.hstack((x, w)) 34 | instruments = np.hstack((z, w)) 35 | 36 | # Calculate theoretical value (array length m) 37 | theory_val = (k * tau**2) / (1 + k * tau**2) 38 | 39 | return y, covariates, instruments, theory_val 40 | 41 | 42 | y, x, z, tv = randiv() 43 | 44 | divergence = KullbackLeibler() 45 | momfun = DefaultMomentFunction(y,x,z) 46 | problem = MDProblem(momfun, divergence) 47 | 48 | k = 2 49 | n = 100 50 | m = 6 51 | 52 | pi = np.random.uniform(0,1,n) 53 | theta = np.random.uniform(0, 1, k) 54 | 55 | 56 | u0 = np.concatenate((pi, theta)) 57 | lb = np.concatenate((np.zeros_like(pi), -10.0 * np.ones_like(theta))) 58 | ub = np.concatenate((np.inf*np.ones_like(pi), 10.0 * np.ones_like(theta))) 59 | 60 | # Define constraint bounds. Our constraint vector has length m+1. 61 | # For equality constraints, we set cl = cu. 62 | # For instance, suppose we require c(u) == 0. 63 | cl = np.zeros(m + 1) 64 | cu = np.zeros(m + 1) 65 | 66 | #prob = MDOptProblem(problem, u0, lb, ub, cl, cu) 67 | 68 | theta = np.array((1,2)) 69 | momfun.g(theta) -------------------------------------------------------------------------------- /doc/y.csv: -------------------------------------------------------------------------------- 1 | 4.534400727903336037e-01 2 | -3.726962711333793310e-01 3 | 4.756290312354569672e-01 4 | 6.061400272507512232e-01 5 | -3.064275130944626446e-01 6 | 1.953269844971823765e+00 7 | -1.254536494380667389e+00 8 | -3.552921823819787939e-01 9 | -2.508340213668912355e+00 10 | 2.085908067490494400e+00 11 | 6.809094722044641834e-01 12 | -9.218247193524797822e-02 13 | 1.444704050717032784e-02 14 | -8.389607438507218351e-01 15 | 2.184772195191130084e+00 16 | 5.972172045705298471e-01 17 | 1.482459928520135639e-01 18 | 7.187049804336708148e-01 19 | 2.744134197952955989e-01 20 | 1.766735999435156967e-01 21 | -5.772593293764261002e-01 22 | -3.211235985803653770e-01 23 | 1.106161645616177358e+00 24 | 1.534859348960212699e+00 25 | 1.508346481749807566e+00 26 | -5.402905858337447853e-01 27 | -8.826091730963186999e-01 28 | 3.829912473982199916e-02 29 | -1.851233689668075610e-01 30 | 6.455172020929682564e-01 31 | -1.437851199189063678e+00 32 | 9.501324808410407385e-01 33 | 3.574614183704746817e-02 34 | -1.126331950332877252e+00 35 | -4.622953957063831720e-01 36 | -1.283370537998172800e+00 37 | 8.524542994072983326e-01 38 | 4.943312396442680057e-01 39 | -4.350013301207570349e-01 40 | -7.234172343805733707e-01 41 | -1.104624703575391109e+00 42 | 9.443647039121307873e-01 43 | -5.059843200494723581e-01 44 | -1.341455832686308636e+00 45 | 4.473578066366112882e-03 46 | -5.617956068164855488e-01 47 | -2.345788233410650125e+00 48 | -3.781291664463863333e-01 49 | -4.743955077296018175e-01 50 | 1.343637143492848463e-02 51 | 1.261632443339366194e+00 52 | 4.247228666508588990e-01 53 | -6.673731106425970783e-01 54 | -5.366015064570825999e-01 55 | 1.902190441572049862e+00 56 | 1.200725330754551123e+00 57 | 2.275686504195926219e-01 58 | 5.885562003117093821e-02 59 | -1.958731974301114176e-01 60 | 1.753336286191466420e-01 61 | -7.672105174572009245e-01 62 | -4.389902019004270350e-01 63 | 1.086437035729067357e+00 64 | -5.309538335630283878e-01 65 | -1.406045756308372807e-01 66 | -2.107202927376866097e-01 67 | -2.447306251050986592e-01 68 | 2.125467791713697707e+00 69 | -2.659161432136301073e+00 70 | 1.772384177197678490e+00 71 | 1.429978589622439955e+00 72 | -2.111224792689559759e+00 73 | -3.275869210670834253e-02 74 | 8.962987690516099892e-02 75 | -9.957198465068788362e-01 76 | -1.384480106096983576e+00 77 | -1.316470007670462739e+00 78 | 1.469152527180052070e+00 79 | 8.096657742858849316e-01 80 | 1.414944280524625286e+00 81 | 7.269629281623196659e-01 82 | -1.598209705539952008e+00 83 | -3.063438385431636046e-01 84 | 7.065852611109912917e-01 85 | -8.559080287936740028e-01 86 | 1.112799983437065432e+00 87 | 1.472053815035164193e-01 88 | -1.371867705702622675e-01 89 | 6.092794468281547138e-01 90 | -3.241591054990826115e-01 91 | -1.376033090868865605e-01 92 | 1.133925787043992273e+00 93 | -8.545785241860158754e-01 94 | -2.179755581183107083e-03 95 | 6.256951098221441576e-02 96 | 1.804668905953725755e-01 97 | 2.762780281821699546e-01 98 | -8.289429517667962211e-01 99 | 8.439680112659962985e-01 100 | -1.534415222975048787e-01 101 | -------------------------------------------------------------------------------- /doc/divergences.aux: -------------------------------------------------------------------------------- 1 | \relax 2 | \providecommand*\new@tpo@label[2]{} 3 | \providecommand\hyper@newdestlabel[2]{} 4 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} 5 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined 6 | \global\let\oldnewlabel\newlabel 7 | \gdef\newlabel#1#2{\newlabelxx{#1}#2} 8 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} 9 | \AtEndDocument{\ifx\hyper@anchor\@undefined 10 | \let\newlabel\oldnewlabel 11 | \fi} 12 | \fi} 13 | \global\let\hyper@last\relax 14 | \gdef\HyperFirstAtBeginDocument#1{#1} 15 | \providecommand*\HyPL@Entry[1]{} 16 | \HyPL@Entry{0<>} 17 | \@writefile{toc}{\contentsline {section}{\nonumberline \texttt {Divergences.el}}{1}{section*.2}\protected@file@percent } 18 | \newlabel{divergences.el}{{}{1}{\texorpdfstring {\texttt {Divergences.el}}{Divergences.el}}{section*.2}{}} 19 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Definition}{1}{subsection*.4}\protected@file@percent } 20 | \newlabel{definition}{{}{1}{Definition}{subsection*.4}{}} 21 | \gdef \LT@i {\LT@entry 22 | {1}{73.27129pt}\LT@entry 23 | {1}{160.93872pt}\LT@entry 24 | {1}{57.6588pt}\LT@entry 25 | {1}{103.27501pt}\LT@entry 26 | {1}{51.6588pt}} 27 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Divergences}{2}{subsection*.6}\protected@file@percent } 28 | \newlabel{divergences}{{}{2}{Divergences}{subsection*.6}{}} 29 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Modified divergences}{2}{subsection*.8}\protected@file@percent } 30 | \newlabel{modified-divergences}{{}{2}{Modified divergences}{subsection*.8}{}} 31 | \gdef \LT@ii {\LT@entry 32 | {1}{74.06096pt}\LT@entry 33 | {1}{117.60448pt}\LT@entry 34 | {1}{117.60448pt}\LT@entry 35 | {1}{137.44621pt}} 36 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Fully modified divergences}{4}{subsection*.10}\protected@file@percent } 37 | \newlabel{fully-modified-divergences}{{}{4}{Fully modified divergences}{subsection*.10}{}} 38 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Example of divergences}{4}{subsection*.12}\protected@file@percent } 39 | \newlabel{example-of-divergences}{{}{4}{Example of divergences}{subsection*.12}{}} 40 | \@writefile{toc}{\contentsline {paragraph}{\nonumberline Kullback-Leibler divergence}{4}{paragraph*.14}\protected@file@percent } 41 | \newlabel{kullback-leibler-divergence}{{}{4}{Kullback-Leibler divergence}{paragraph*.14}{}} 42 | \@writefile{toc}{\contentsline {paragraph}{\nonumberline Reverse Kullback-Leibler divergence}{4}{paragraph*.16}\protected@file@percent } 43 | \newlabel{reverse-kullback-leibler-divergence}{{}{4}{Reverse Kullback-Leibler divergence}{paragraph*.16}{}} 44 | \@writefile{toc}{\contentsline {paragraph}{\nonumberline Chi-squared divergence}{5}{paragraph*.18}\protected@file@percent } 45 | \newlabel{chi-squared-divergence}{{}{5}{Chi-squared divergence}{paragraph*.18}{}} 46 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Cressie-Read divergences}{5}{subsection*.20}\protected@file@percent } 47 | \newlabel{cressie-read-divergences}{{}{5}{Cressie-Read divergences}{subsection*.20}{}} 48 | -------------------------------------------------------------------------------- /src/benchmark/benchmarks.jl: -------------------------------------------------------------------------------- 1 | using BenchmarkTools 2 | using Distances 3 | using Divergences 4 | 5 | const SUITE = BenchmarkGroup() 6 | 7 | function create_distances() 8 | divs = [KullbackLeibler(), 9 | ReverseKullbackLeibler(), 10 | Hellinger(), 11 | CressieRead(2.0), 12 | ChiSquared(), 13 | ModifiedDivergence(KullbackLeibler(), 2.0), 14 | FullyModifiedDivergence(KullbackLeibler(), 0.5, 2.0)] 15 | 16 | return divs 17 | end 18 | 19 | ########### 20 | # Eval # 21 | ########### 22 | 23 | SUITE["evaluation"] = BenchmarkGroup() 24 | 25 | function evaluate(dist, x, y) 26 | n = size(x, 1) 27 | T = typeof(dist(x[1, 1], y[1, 1])) 28 | return dist(x, y) 29 | end 30 | 31 | SUITE["gradient"] = BenchmarkGroup() 32 | 33 | ########### 34 | # Colwise # 35 | ########### 36 | 37 | SUITE["colwise"] = BenchmarkGroup() 38 | 39 | function evaluate_colwise(dist, x, y) 40 | n = size(x, 2) 41 | T = typeof(evaluate(dist, x[:, 1], y[:, 1])) 42 | r = Vector{T}(undef, n) 43 | for j in 1:n 44 | r[j] = @views evaluate(dist, x[:, j], y[:, j]) 45 | end 46 | return r 47 | end 48 | 49 | function add_colwise_benchmarks!(SUITE) 50 | m = 200 51 | n = 10000 52 | 53 | x = rand(m, n) 54 | y = rand(m, n) 55 | 56 | p = x 57 | q = y 58 | for i in 1:n 59 | p[:, i] /= sum(x[:, i]) 60 | q[:, i] /= sum(y[:, i]) 61 | end 62 | 63 | divs = create_distances() 64 | 65 | for (dists, (a, b)) in [(divs, (p, q))] 66 | for dist in (dists) 67 | Tdist = typeof(dist) 68 | SUITE["colwise"][Tdist] = BenchmarkGroup() 69 | SUITE["colwise"][Tdist]["loop"] = @benchmarkable evaluate_colwise($dist, $a, $b) 70 | SUITE["colwise"][Tdist]["specialized"] = @benchmarkable colwise($dist, $a, $b) 71 | end 72 | end 73 | end 74 | 75 | add_colwise_benchmarks!(SUITE) 76 | 77 | ############ 78 | # Pairwise # 79 | ############ 80 | 81 | SUITE["pairwise"] = BenchmarkGroup() 82 | 83 | function evaluate_pairwise(dist, x, y) 84 | nx = size(x, 2) 85 | ny = size(y, 2) 86 | T = typeof(evaluate(dist, x[:, 1], y[:, 1])) 87 | r = Matrix{T}(undef, nx, ny) 88 | for j in 1:ny 89 | @inbounds for i in 1:nx 90 | r[i, j] = @views evaluate(dist, x[:, i], y[:, j]) 91 | end 92 | end 93 | return r 94 | end 95 | 96 | function add_pairwise_benchmarks!(SUITE) 97 | m = 100 98 | nx = 200 99 | ny = 250 100 | 101 | x = rand(m, nx) 102 | y = rand(m, ny) 103 | 104 | p = x 105 | for i in 1:nx 106 | p[:, i] /= sum(x[:, i]) 107 | end 108 | 109 | q = y 110 | for i in 1:ny 111 | q[:, i] /= sum(y[:, i]) 112 | end 113 | 114 | divs = create_distances() 115 | 116 | for (dists, (a, b)) in [(divs, (p, q))] 117 | for dist in (dists) 118 | Tdist = typeof(dist) 119 | SUITE["pairwise"][Tdist] = BenchmarkGroup() 120 | SUITE["pairwise"][Tdist]["loop"] = @benchmarkable evaluate_pairwise($dist, $a, 121 | $b) 122 | SUITE["pairwise"][Tdist]["specialized"] = @benchmarkable pairwise( 123 | $dist, $a, $b; 124 | dims = 2) 125 | end 126 | end 127 | end 128 | 129 | add_pairwise_benchmarks!(SUITE) 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Divergences.jl 2 | 3 | [![codecov](https://codecov.io/gh/gragusa/Divergences.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/gragusa/Divergences.jl) 4 | 5 | `Divergences.jl` is a Julia package that makes evaluating divergence measures between two vectors easy. The package allows for calculating the *gradient* and the diagonal of the *Hessian* of several divergences. 6 | 7 | 8 | ## Supported divergences 9 | 10 | The package defines an abstract `Divergence` type with the following suptypes: 11 | 12 | * Kullback-Leibler divergence `KullbackLeibler` 13 | * Chi-square distance `ChiSquared` 14 | * Reverse Kullback-Leibler divergence `ReverseKullbackLeibler` 15 | * Cressie-Read divergences `CressieRead` 16 | 17 | These divergences differ from the equivalent ones defined in the `Distances` package because they are **normalized**. 18 | 19 | Also, the package provides methods for calculating their gradient and the (diagonal elements of the) Hessian matrix. 20 | 21 | The constructors for the types above are straightforward 22 | ```julia 23 | KullbackLeibler() 24 | ChiSqaured() 25 | ReverseKullbackLeibler() 26 | ``` 27 | The `CressieRead` type define a family of divergences indexed by a parameter `alpha`. The constructor for `CressieRead` is 28 | ```julia 29 | CR(::Real) 30 | ``` 31 | The Hellinger divergence is obtained by `CR(-1/2)`. For a certain value of `alpha`, `CressieRead` corresponds to a divergence with a defined specific type. For instance, `CR(1)` is equivalent to `ChiSquared` although the underlying code for evaluation and calculation of the gradient and Hessian are different. 32 | 33 | Three versions of each divergence in the above list are currently implemented: a vanilla version, a modified version, and a fully modified version. These modifications extend the domain of the divergence. 34 | 35 | The **modified** version takes an additional argument that specifies the point at which a convex extension modifies the divergence. 36 | ```julia 37 | ModifiedKullbackLeibler(theta::Real) 38 | ModifiedReverseKullbackLeibler(theta::Real) 39 | ModifiedCressieRead(alpha::Real, theta::Real) 40 | ``` 41 | 42 | Similarly, the **fully modified** version takes two additional arguments that specify the points at which a convex extension modifies the divergence. 43 | ```julia 44 | FullyModifiedKullbackLeibler(phi::Real, theta::Real) 45 | FullyModifiedReverseKullbackLeibler(phi::Real, theta::Real) 46 | FullyModifiedCressieRead(alpha::Real, phi::Real, theta::Real) 47 | ``` 48 | 49 | 50 | ## Basic usage 51 | 52 | ### Divergence between two vectors 53 | 54 | Each divergence corresponds to a *divergence type*. You can always compute a certain divergence between two vectors using the following syntax 55 | 56 | ```julia 57 | x = rand(100) 58 | y = rand(100) 59 | 𝒦ℒ = KullbackLeibler() 60 | 𝒦ℒ(x, y) 61 | ``` 62 | 63 | Here, `div` is an instance of a divergence type. 64 | 65 | We can also calculate the divergence between the vector ``x`` and the unit vector 66 | ```julia 67 | r = 𝒦ℒ(x) 68 | ``` 69 | 70 | The `Divergence` type is a subtype of `PreMetric` defined in the `Distances` package. As such, the divergences can be evaluated column-wise for `X::Matrix` and `Y::Matrix`. 71 | 72 | ```julia 73 | colwise(𝒦ℒ, X, Y) 74 | ``` 75 | 76 | The divergence function can also be broadcasted 77 | ```julia 78 | 𝒦ℒ.(x,y) 79 | ``` 80 | 81 | 82 | ### Gradient of the divergence 83 | 84 | To calculate the gradient of `div::Divergence` with respect to ``x::AbstractArray{Float64, 1}`` the 85 | `gradient` method can be used 86 | ```julia 87 | g = gradient(div, x, y) 88 | ``` 89 | or through its in-place version 90 | ```julia 91 | u = Vector{Float64}(undef, size(x)) 92 | gradient!(u, div, x, y) 93 | ``` 94 | 95 | ### Hessian of the divergence 96 | The `hessian` method calculates the Hessian of the divergence with respect to ``x`` 97 | ```julia 98 | h = hessian(div, x, y) 99 | ``` 100 | Its in-place variant is also defined 101 | ```julia 102 | u = Vector{Float64}(undef, size(x)) 103 | hessian!(u, div, x, y) 104 | ``` 105 | 106 | Notice that the the divergence's Hessian is sparse, where the diagonal entries are the only ones different from zero. For this reason, `hessian(div, x, y)` returns an `Array{T,1}` with the diagonal entries of the hessian. 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /doc/divergences_files/libs/quarto-html/quarto-syntax-highlighting-e26003cea8cd680ca0c55a263523d882.css: -------------------------------------------------------------------------------- 1 | /* quarto syntax highlight colors */ 2 | :root { 3 | --quarto-hl-ot-color: #003B4F; 4 | --quarto-hl-at-color: #657422; 5 | --quarto-hl-ss-color: #20794D; 6 | --quarto-hl-an-color: #5E5E5E; 7 | --quarto-hl-fu-color: #4758AB; 8 | --quarto-hl-st-color: #20794D; 9 | --quarto-hl-cf-color: #003B4F; 10 | --quarto-hl-op-color: #5E5E5E; 11 | --quarto-hl-er-color: #AD0000; 12 | --quarto-hl-bn-color: #AD0000; 13 | --quarto-hl-al-color: #AD0000; 14 | --quarto-hl-va-color: #111111; 15 | --quarto-hl-bu-color: inherit; 16 | --quarto-hl-ex-color: inherit; 17 | --quarto-hl-pp-color: #AD0000; 18 | --quarto-hl-in-color: #5E5E5E; 19 | --quarto-hl-vs-color: #20794D; 20 | --quarto-hl-wa-color: #5E5E5E; 21 | --quarto-hl-do-color: #5E5E5E; 22 | --quarto-hl-im-color: #00769E; 23 | --quarto-hl-ch-color: #20794D; 24 | --quarto-hl-dt-color: #AD0000; 25 | --quarto-hl-fl-color: #AD0000; 26 | --quarto-hl-co-color: #5E5E5E; 27 | --quarto-hl-cv-color: #5E5E5E; 28 | --quarto-hl-cn-color: #8f5902; 29 | --quarto-hl-sc-color: #5E5E5E; 30 | --quarto-hl-dv-color: #AD0000; 31 | --quarto-hl-kw-color: #003B4F; 32 | } 33 | 34 | /* other quarto variables */ 35 | :root { 36 | --quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; 37 | } 38 | 39 | pre > code.sourceCode > span { 40 | color: #003B4F; 41 | } 42 | 43 | code span { 44 | color: #003B4F; 45 | } 46 | 47 | code.sourceCode > span { 48 | color: #003B4F; 49 | } 50 | 51 | div.sourceCode, 52 | div.sourceCode pre.sourceCode { 53 | color: #003B4F; 54 | } 55 | 56 | code span.ot { 57 | color: #003B4F; 58 | font-style: inherit; 59 | } 60 | 61 | code span.at { 62 | color: #657422; 63 | font-style: inherit; 64 | } 65 | 66 | code span.ss { 67 | color: #20794D; 68 | font-style: inherit; 69 | } 70 | 71 | code span.an { 72 | color: #5E5E5E; 73 | font-style: inherit; 74 | } 75 | 76 | code span.fu { 77 | color: #4758AB; 78 | font-style: inherit; 79 | } 80 | 81 | code span.st { 82 | color: #20794D; 83 | font-style: inherit; 84 | } 85 | 86 | code span.cf { 87 | color: #003B4F; 88 | font-weight: bold; 89 | font-style: inherit; 90 | } 91 | 92 | code span.op { 93 | color: #5E5E5E; 94 | font-style: inherit; 95 | } 96 | 97 | code span.er { 98 | color: #AD0000; 99 | font-style: inherit; 100 | } 101 | 102 | code span.bn { 103 | color: #AD0000; 104 | font-style: inherit; 105 | } 106 | 107 | code span.al { 108 | color: #AD0000; 109 | font-style: inherit; 110 | } 111 | 112 | code span.va { 113 | color: #111111; 114 | font-style: inherit; 115 | } 116 | 117 | code span.bu { 118 | font-style: inherit; 119 | } 120 | 121 | code span.ex { 122 | font-style: inherit; 123 | } 124 | 125 | code span.pp { 126 | color: #AD0000; 127 | font-style: inherit; 128 | } 129 | 130 | code span.in { 131 | color: #5E5E5E; 132 | font-style: inherit; 133 | } 134 | 135 | code span.vs { 136 | color: #20794D; 137 | font-style: inherit; 138 | } 139 | 140 | code span.wa { 141 | color: #5E5E5E; 142 | font-style: italic; 143 | } 144 | 145 | code span.do { 146 | color: #5E5E5E; 147 | font-style: italic; 148 | } 149 | 150 | code span.im { 151 | color: #00769E; 152 | font-style: inherit; 153 | } 154 | 155 | code span.ch { 156 | color: #20794D; 157 | font-style: inherit; 158 | } 159 | 160 | code span.dt { 161 | color: #AD0000; 162 | font-style: inherit; 163 | } 164 | 165 | code span.fl { 166 | color: #AD0000; 167 | font-style: inherit; 168 | } 169 | 170 | code span.co { 171 | color: #5E5E5E; 172 | font-style: inherit; 173 | } 174 | 175 | code span.cv { 176 | color: #5E5E5E; 177 | font-style: italic; 178 | } 179 | 180 | code span.cn { 181 | color: #8f5902; 182 | font-style: inherit; 183 | } 184 | 185 | code span.sc { 186 | color: #5E5E5E; 187 | font-style: inherit; 188 | } 189 | 190 | code span.dv { 191 | color: #AD0000; 192 | font-style: inherit; 193 | } 194 | 195 | code span.kw { 196 | color: #003B4F; 197 | font-weight: bold; 198 | font-style: inherit; 199 | } 200 | 201 | .prevent-inlining { 202 | content: " 1 "A ModifiedDivergence requires ρ > 1" 35 | z = float(ρ) 36 | γ₀ = D(z) 37 | γ₁ = gradient(D, z) 38 | γ₂ = hessian(D, z) 39 | return ModifiedDivergence(D, (γ₀ = γ₀, γ₁ = γ₁, γ₂ = γ₂, ρ = z)) 40 | end 41 | 42 | function FullyModifiedDivergence(D::Divergence, φ::Union{Real, Int}, ρ::Union{Real, Int}) 43 | @assert ρ > 1 "A ModifiedDivergence requires ρ > 1" 44 | @assert φ < 1 && φ > 0 "A ModifiedDivergence requires φ ∈ (0,1)" 45 | z = float(ρ) 46 | γ₀ = D(z) 47 | γ₁ = gradient(D, z) 48 | γ₂ = hessian(D, z) 49 | w = float(φ) 50 | g₀ = D(w) 51 | g₁ = gradient(D, w) 52 | g₂ = hessian(D, w) 53 | return FullyModifiedDivergence(D, 54 | (γ₀ = γ₀, γ₁ = γ₁, γ₂ = γ₂, ρ = z, g₀ = g₀, g₁ = g₁, g₂ = g₂, φ = w)) 55 | end 56 | 57 | for div in (KullbackLeibler, 58 | ReverseKullbackLeibler, 59 | Hellinger, 60 | CressieRead, 61 | ChiSquared, 62 | ModifiedDivergence, 63 | FullyModifiedDivergence) 64 | @eval begin 65 | function (f::$div)(p, q) 66 | return γ(f, p/q)*q 67 | end 68 | end 69 | end 70 | 71 | for div in (KullbackLeibler, 72 | ReverseKullbackLeibler, 73 | Hellinger, 74 | CressieRead, 75 | ChiSquared, 76 | ModifiedDivergence, 77 | FullyModifiedDivergence) 78 | @eval begin 79 | function (f::$div)(p) 80 | return γ(f, p) 81 | end 82 | end 83 | end 84 | 85 | for div in (KullbackLeibler, 86 | ReverseKullbackLeibler, 87 | Hellinger, 88 | CressieRead, 89 | ChiSquared, 90 | ModifiedDivergence, 91 | FullyModifiedDivergence) 92 | @eval begin 93 | function (f::$div)(a::AbstractArray, b::AbstractArray) 94 | return sum(γ(f, a ./ b) .* b) 95 | end 96 | end 97 | end 98 | 99 | for div in (KullbackLeibler, 100 | ReverseKullbackLeibler, 101 | Hellinger, 102 | CressieRead, 103 | ChiSquared, 104 | ModifiedDivergence, 105 | FullyModifiedDivergence) 106 | @eval begin 107 | function (f::$div)(a::AbstractArray) 108 | return sum(γ(f, a)) 109 | end 110 | end 111 | end 112 | 113 | # Deprecated evaluate functions for backward compatibility 114 | function evaluate(f::AbstractDivergence, a::AbstractArray) 115 | Base.depwarn("evaluate(div, x) is deprecated, use div(x) instead", :evaluate) 116 | return sum(f.(a)) 117 | end 118 | 119 | function evaluate(f::AbstractDivergence, a::AbstractArray, b::AbstractArray) 120 | Base.depwarn("evaluate(div, x, y) is deprecated, use div(x, y) instead", :evaluate) 121 | return sum(f.(a ./ b) .* b) 122 | end 123 | 124 | function evaluate(f::AbstractDivergence, a::Real) 125 | Base.depwarn("evaluate(div, x) is deprecated, use div(x) instead", :evaluate) 126 | return f(a) 127 | end 128 | 129 | function evaluate(f::AbstractDivergence, a::Real, b::Real) 130 | Base.depwarn("evaluate(div, x, y) is deprecated, use div(x, y) instead", :evaluate) 131 | return f(a, b) 132 | end 133 | 134 | # Also keep the Distances.evaluate functions for compatibility 135 | function Distances.evaluate(f::AbstractDivergence, a::AbstractArray) 136 | Base.depwarn("evaluate(div, x) is deprecated, use div(x) instead", :evaluate) 137 | return sum(f.(a)) 138 | end 139 | 140 | function Distances.evaluate(f::AbstractDivergence, a::AbstractArray, b::AbstractArray) 141 | Base.depwarn("evaluate(div, x, y) is deprecated, use div(x, y) instead", :evaluate) 142 | return sum(f.(a ./ b) .* b) 143 | end 144 | 145 | include("divs.jl") 146 | include("plots.jl") 147 | 148 | export 149 | # KL 150 | KullbackLeibler, 151 | # RKL 152 | ReverseKullbackLeibler, 153 | # HD 154 | Hellinger, 155 | # CR 156 | CressieRead, 157 | # 158 | ChiSquared, 159 | # Modified 160 | ModifiedDivergence, 161 | # FullyModified 162 | FullyModifiedDivergence, 163 | # Abbr. 164 | # 𝒦ℒ, 165 | # ℬ𝓊𝓇ℊ, 166 | # 𝒞ℛ, 167 | # ℋ𝒟, 168 | # χ², 169 | # Deprecated 170 | evaluate 171 | end 172 | -------------------------------------------------------------------------------- /doc/out.txt: -------------------------------------------------------------------------------- 1 | 2 | ****************************************************************************** 3 | This program contains Ipopt, a library for large-scale nonlinear optimization. 4 | Ipopt is released as open source code under the Eclipse Public License (EPL). 5 | For more information visit https://github.com/coin-or/Ipopt 6 | ****************************************************************************** 7 | 8 | This is Ipopt version 3.14.17, running with linear solver MUMPS 5.6.2. 9 | 10 | Starting derivative checker for first derivatives. 11 | 12 | * jac_g [ 2, 6] = 5.3584845903814550e-04 v ~ 5.3591483253877421e-04 [ 1.239e-04] 13 | * jac_g [ 3, 19] = -3.0683933380389333e-05 v ~ -3.0653867593534056e-05 [ 3.007e-04] 14 | * jac_g [ 0, 33] = 3.8313848492413171e-04 v ~ 3.8307135241666401e-04 [ 1.752e-04] 15 | * jac_g [ 3, 33] = -2.2776641275915939e-04 v ~ -2.2790658249505213e-04 [ 6.150e-04] 16 | * jac_g [ 2, 44] = -1.4786828033625633e-04 v ~ -1.4783009470098205e-04 [ 2.583e-04] 17 | * jac_g [ 0, 66] = 3.4471674299317916e-05 v ~ 3.4433318135752694e-05 [ 3.836e-04] 18 | * jac_g [ 2, 66] = -2.1337131611093418e-04 v ~ -2.1340002550200774e-04 [ 1.345e-04] 19 | * jac_g [ 2, 80] = 8.1824549098388457e-05 v ~ 8.1920338652649097e-05 [ 9.579e-04] 20 | Starting derivative checker for second derivatives with obj_factor or lambda[i] set to 1.5. 21 | 22 | * 0-th constr_hess[ 33, 100] = 2.3473536762726555e-06 v ~ 2.3647750424515834e-06 [ 1.742e-04] 23 | * 2-th constr_hess[ 6, 100] = -1.1609115508295361e-04 v ~ -1.1607729210828001e-04 [ 1.194e-04] 24 | * 2-th constr_hess[ 19, 100] = 1.2838401380870684e-05 v ~ 1.2867055533088369e-05 [ 2.865e-04] 25 | * 2-th constr_hess[ 80, 100] = -1.5994067451743186e-05 v ~ -1.5977260893399528e-05 [ 1.681e-04] 26 | * 3-th constr_hess[ 15, 100] = 1.7611462485223907e-04 v ~ 1.7609519664980492e-04 [ 1.103e-04] 27 | * 5-th constr_hess[ 11, 101] = -3.7858014615351954e-05 v ~ -3.7844536682201439e-05 [ 1.348e-04] 28 | * 5-th constr_hess[ 12, 101] = -8.5644253651907917e-04 v ~ -8.5635644552028154e-04 [ 1.005e-04] 29 | * 5-th constr_hess[ 35, 101] = -3.3699510638421735e-05 v ~ -3.3688887135362431e-05 [ 1.062e-04] 30 | * 5-th constr_hess[ 51, 101] = -3.6353102428981381e-04 v ~ -3.6345531958810478e-04 [ 2.083e-04] 31 | * 5-th constr_hess[ 90, 101] = -7.6825894646135329e-05 v ~ -7.6794039623259626e-05 [ 3.186e-04] 32 | 33 | Derivative checker detected 18 error(s). 34 | 35 | Number of nonzeros in equality constraint Jacobian...: 714 36 | Number of nonzeros in inequality constraint Jacobian.: 0 37 | Number of nonzeros in Lagrangian Hessian.............: 300 38 | 39 | Total number of variables............................: 102 40 | variables with only lower bounds: 100 41 | variables with lower and upper bounds: 2 42 | variables with only upper bounds: 0 43 | Total number of equality constraints.................: 7 44 | Total number of inequality constraints...............: 0 45 | inequality constraints with only lower bounds: 0 46 | inequality constraints with lower and upper bounds: 0 47 | inequality constraints with only upper bounds: 0 48 | 49 | iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls 50 | 0 2.4945537e+01 4.87e+01 3.09e+00 -1.0 0.00e+00 - 0.00e+00 0.00e+00 0 51 | 1 1.5331236e+01 7.52e+00 1.29e+01 -1.0 1.27e+00 - 3.33e-01 8.46e-01f 1 52 | 2 8.6677632e+00 3.03e-02 1.48e+01 -1.0 1.50e+00 - 5.58e-01 1.00e+00f 1 53 | 3 3.6851388e+00 6.32e-03 3.52e+00 -1.0 1.00e+00 - 7.88e-01 1.00e+00f 1 54 | 4 2.9843319e+00 7.18e-03 6.88e-01 -1.0 3.39e-01 - 1.00e+00 1.00e+00f 1 55 | 5 2.9604395e+00 2.00e-04 1.52e-02 -1.7 5.43e-02 - 1.00e+00 1.00e+00h 1 56 | 6 2.9584392e+00 5.25e-06 5.82e-04 -3.8 1.09e-02 - 1.00e+00 1.00e+00h 1 57 | 7 2.9583701e+00 3.53e-09 4.38e-07 -5.7 3.16e-04 - 1.00e+00 1.00e+00h 1 58 | 8 2.9583700e+00 2.22e-16 1.51e-12 -8.6 4.39e-07 - 1.00e+00 1.00e+00h 1 59 | 60 | Number of Iterations....: 8 61 | 62 | (scaled) (unscaled) 63 | Objective...............: 2.9583699983817793e+00 2.9583699983817793e+00 64 | Dual infeasibility......: 1.5121568742624474e-12 1.5121568742624474e-12 65 | Constraint violation....: 2.2190582704695317e-16 2.2190582704695317e-16 66 | Variable bound violation: 0.0000000000000000e+00 0.0000000000000000e+00 67 | Complementarity.........: 2.5078555374258810e-09 2.5078555374258810e-09 68 | Overall NLP error.......: 2.5078555374258810e-09 2.5078555374258810e-09 69 | 70 | 71 | Number of objective function evaluations = 9 72 | Number of objective gradient evaluations = 9 73 | Number of equality constraint evaluations = 9 74 | Number of inequality constraint evaluations = 0 75 | Number of equality constraint Jacobian evaluations = 9 76 | Number of inequality constraint Jacobian evaluations = 0 77 | Number of Lagrangian Hessian evaluations = 8 78 | Total seconds in IPOPT = 0.048 79 | 80 | EXIT: Optimal Solution Found. 81 | -------------------------------------------------------------------------------- /doc/x.csv: -------------------------------------------------------------------------------- 1 | 1.215991590970437741e+00,9.261775475316413875e-01 2 | -4.850460751051989816e-01,1.909416640470130488e+00 3 | 4.529269708157035446e-03,-1.398567573819141208e+00 4 | 6.393980400376259032e-01,5.629692366905708623e-01 5 | 2.810888557005614130e-01,-6.506425691218269414e-01 6 | 1.565454730742296574e+00,-4.871253837646960516e-01 7 | -5.734075329657939424e-01,-5.923939242388691628e-01 8 | -2.065249701686487249e+00,-8.639907696798160286e-01 9 | -1.975808070336193767e+00,4.852162794482699215e-02 10 | 1.187487787703212394e+00,-8.309501164110377758e-01 11 | 8.353167563559255049e-01,2.704568257798388164e-01 12 | 1.623470848031986491e-01,-5.023810944913695287e-02 13 | -1.510011717205014259e-01,-2.389480468664097468e-01 14 | -7.246332322333719489e-01,-9.075636620415978850e-01 15 | 2.607452916534565013e+00,-5.767713305683327407e-01 16 | -1.279520012083450731e-01,7.553912258257560186e-01 17 | 1.671089760546760061e-01,5.009171876243807553e-01 18 | 8.709473065233998001e-01,-9.775552447985510485e-01 19 | 4.097890609914435212e-01,9.933230542922587281e-02 20 | -3.278590844936452031e-03,7.513871233717890341e-01 21 | -1.418933167891346159e+00,-1.669405281121371765e+00 22 | 1.001052166298696644e+00,5.433601923799350475e-01 23 | 1.959189749854419960e+00,-6.626237589458466859e-01 24 | 1.544002722597104027e+00,5.705986685931593305e-01 25 | 1.687075593477876279e+00,-7.632591565425168589e-01 26 | -4.652562200215930588e-01,-1.804882100664519040e+00 27 | -1.587652014975547310e+00,-1.627542437883162663e+00 28 | -1.203324206173798649e-01,4.808494666138199425e-02 29 | 1.300521337099061481e-01,2.597225017214818688e-01 30 | 9.028598758039556582e-01,-9.043166251044085779e-01 31 | -1.604016127970010031e+00,6.385924587773739169e-01 32 | 1.747651544552885072e+00,-1.661520062268959874e+00 33 | 6.473056610403982170e-02,-6.607979864731657049e-02 34 | -3.791121025049637439e-04,-1.211016199762456669e+00 35 | -1.245021105231848058e+00,-6.518361078021591704e-01 36 | -7.050103615811872393e-01,4.739867131641401637e-02 37 | 6.144959686029036749e-01,-8.604133652839524027e-01 38 | -1.761283801606972343e-01,-3.845555442298253523e-01 39 | -1.379676953035884113e+00,1.006292809214440531e+00 40 | -1.580994078045894424e+00,-5.768918695231487481e-01 41 | -2.335364962069430561e-04,8.356921120651418233e-01 42 | 2.560776596322695831e+00,-1.129706854657618109e+00 43 | 3.397246328428794593e-01,5.298041779152827813e-01 44 | -1.633975754963524984e+00,1.441568620657900368e+00 45 | -4.359418985905065824e-01,-2.471644500127289312e+00 46 | -2.647675941860244131e-01,-7.968952554704767932e-01 47 | -2.492085135043939470e+00,5.770721271805400177e-01 48 | -2.571193424262786520e-01,-2.030453860429926871e-01 49 | -3.782967529696374398e-01,3.711458733713088320e-01 50 | 8.377383991467055591e-01,-6.039851867158205767e-01 51 | 2.107401991811244546e+00,8.658978747289991507e-02 52 | 1.385257248003188613e+00,-1.556772353920794771e-01 53 | -1.369824482642111541e+00,1.167782061659807358e+00 54 | -1.377489341822618174e+00,2.544208433012131176e-01 55 | 2.612819663252514690e+00,3.376026620752021756e-01 56 | 8.305390175729218671e-03,-4.118769661224673806e-01 57 | 6.653058342623591193e-01,-4.876062240724935437e-01 58 | -3.955777551418172555e-01,-4.325581878196209096e-01 59 | 1.813106889730054239e-01,3.944521423782968439e-01 60 | 2.469357404384131816e-01,-4.209844808202629629e-01 61 | -5.841447617855288765e-01,2.897748568964129134e-01 62 | -3.171938174408879818e-01,2.075400798645438805e+00 63 | 7.323468179645951093e-01,8.711247034316923488e-01 64 | -4.260821002427601090e-01,-3.260235321678411347e-01 65 | -5.207853424007808929e-01,1.201213922163944847e+00 66 | 5.040402838902382499e-01,-4.080753730215513908e-01 67 | -3.621249302675313420e-01,-2.038124535177853858e+00 68 | 1.215046284887489669e+00,-1.008086310917404083e+00 69 | -2.913279427184061632e+00,-1.870791921025855675e+00 70 | 3.804289416263564316e-01,-3.515134840413086659e-01 71 | 1.739078926074321130e+00,1.841837918955169934e-02 72 | -2.436232936961063800e+00,1.676437312275282698e+00 73 | 2.643486044390405132e-01,3.269273737641626432e-01 74 | -5.394147209158433043e-01,-2.191005288088642422e-01 75 | -3.902906233385816659e-01,8.294055811834891712e-01 76 | -4.438527421041750798e-01,-2.211135309007885130e+00 77 | -2.080155424499641637e+00,2.356145581085659357e-01 78 | 2.300638600222202790e+00,7.708651938869668374e-01 79 | 1.192512429090090365e+00,-1.478586245779841546e+00 80 | 1.330522826932293734e+00,1.143754043206929083e+00 81 | 2.033432920943434308e-01,3.384964074944141199e-01 82 | -1.119534874474377384e+00,-4.152879139008012754e-01 83 | -5.919503764010738411e-01,6.327818661062848404e-01 84 | 9.224057089744708282e-01,2.270692857804395892e+00 85 | -1.853135050148211338e-01,1.818662550584951576e-01 86 | 7.908873416478194862e-01,2.482205863003360824e-01 87 | -4.947250415538235746e-01,-4.593608995402441164e-01 88 | -6.620850616447171433e-01,-8.498443694647918045e-01 89 | 8.902778036103065062e-02,8.303358165442455974e-01 90 | 1.489254253608170431e-01,-8.560838259088672242e-01 91 | -3.848609510114946453e-01,7.156623721939246729e-02 92 | 4.219952608497866686e-01,-4.776574467651166778e-01 93 | -8.006803200121124231e-01,4.789798257463918629e-01 94 | 6.248126914047237301e-01,3.336621052869482851e-01 95 | 3.770368693403891580e-01,1.037539944257899194e+00 96 | 6.578989202725575858e-01,-5.100163988547470328e-01 97 | -3.224252701090510320e-01,-2.698749352933712542e-01 98 | -1.367432217471818978e+00,-9.787637157823073641e-01 99 | 1.366262955439385030e+00,-4.442932600761115847e-01 100 | -7.743206910578889213e-01,3.773004930448521921e-01 101 | -------------------------------------------------------------------------------- /doc/divergences_files/libs/quarto-html/anchor.min.js: -------------------------------------------------------------------------------- 1 | // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat 2 | // 3 | // AnchorJS - v5.0.0 - 2023-01-18 4 | // https://www.bryanbraun.com/anchorjs/ 5 | // Copyright (c) 2023 Bryan Braun; Licensed MIT 6 | // 7 | // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat 8 | !function(A,e){"use strict";"function"==typeof define&&define.amd?define([],e):"object"==typeof module&&module.exports?module.exports=e():(A.AnchorJS=e(),A.anchors=new A.AnchorJS)}(globalThis,function(){"use strict";return function(A){function u(A){A.icon=Object.prototype.hasOwnProperty.call(A,"icon")?A.icon:"",A.visible=Object.prototype.hasOwnProperty.call(A,"visible")?A.visible:"hover",A.placement=Object.prototype.hasOwnProperty.call(A,"placement")?A.placement:"right",A.ariaLabel=Object.prototype.hasOwnProperty.call(A,"ariaLabel")?A.ariaLabel:"Anchor",A.class=Object.prototype.hasOwnProperty.call(A,"class")?A.class:"",A.base=Object.prototype.hasOwnProperty.call(A,"base")?A.base:"",A.truncate=Object.prototype.hasOwnProperty.call(A,"truncate")?Math.floor(A.truncate):64,A.titleText=Object.prototype.hasOwnProperty.call(A,"titleText")?A.titleText:""}function d(A){var e;if("string"==typeof A||A instanceof String)e=[].slice.call(document.querySelectorAll(A));else{if(!(Array.isArray(A)||A instanceof NodeList))throw new TypeError("The selector provided to AnchorJS was invalid.");e=[].slice.call(A)}return e}this.options=A||{},this.elements=[],u(this.options),this.add=function(A){var e,t,o,i,n,s,a,r,l,c,h,p=[];if(u(this.options),0!==(e=d(A=A||"h2, h3, h4, h5, h6")).length){for(null===document.head.querySelector("style.anchorjs")&&((A=document.createElement("style")).className="anchorjs",A.appendChild(document.createTextNode("")),void 0===(h=document.head.querySelector('[rel="stylesheet"],style'))?document.head.appendChild(A):document.head.insertBefore(A,h),A.sheet.insertRule(".anchorjs-link{opacity:0;text-decoration:none;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}",A.sheet.cssRules.length),A.sheet.insertRule(":hover>.anchorjs-link,.anchorjs-link:focus{opacity:1}",A.sheet.cssRules.length),A.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}",A.sheet.cssRules.length),A.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}',A.sheet.cssRules.length)),h=document.querySelectorAll("[id]"),t=[].map.call(h,function(A){return A.id}),i=0;i\]./()*\\\n\t\b\v\u00A0]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),A=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||A||!1}}}); 9 | // @license-end -------------------------------------------------------------------------------- /src/divs.jl: -------------------------------------------------------------------------------- 1 | function xlogx(x::Number) 2 | result = x * NaNMath.log(x) 3 | return iszero(x) ? zero(result) : result 4 | end 5 | 6 | function xlogy(x::Number, y::Number) 7 | result = x * NaNMath.log(y) 8 | return iszero(x) && !isnan(y) ? zero(result) : result 9 | end 10 | 11 | alogab(a, b) = xlogy(a, a/b) - a + b 12 | blogab(a, b) = -xlogy(b, a ./ b) + a - b 13 | aloga(a) = xlogx(a) - a + one(eltype(a)) 14 | loga(a) = -log(a) + a - one(eltype(a)) 15 | 16 | ## ------------------------------------------------------- 17 | ## Divergence functions 18 | ## ------------------------------------------------------- 19 | γ(::KullbackLeibler, a::T) where {T <: Real} = aloga(a) 20 | γ(::ReverseKullbackLeibler, a::T) where {T <: Real} = loga(a) 21 | γ(::Hellinger, a::T) where {T <: Real} = 2*a - 4*NaNMath.sqrt(a) + 2 22 | γ(::ChiSquared, a::T) where {T <: Real} = abs2(a - one(eltype(T)))*half(T) 23 | 24 | function γ(d::CressieRead{D}, a::T) where {T <: Real, D} 25 | α = d.α 26 | if a >= 0 27 | (a^(1 + α) + α - a*(1 + α))/(α*(1 + α)) 28 | else 29 | if α > 0 30 | zero(eltype(a)) 31 | else 32 | convert(eltype(a), NaN) 33 | end 34 | end 35 | end 36 | 37 | function γᵤ(d::D, a::T) where {T <: Real, D <: AbstractModifiedDivergence} 38 | (; γ₀, γ₁, γ₂, ρ) = d.m 39 | return (γ₀ + γ₁*(a-ρ) + half(T)*γ₂*(a-ρ)^2) 40 | end 41 | 42 | function γₗ(d::D, a::T) where {T <: Real, D <: AbstractModifiedDivergence} 43 | (; g₀, g₁, g₂, φ) = d.m 44 | return (g₀ + g₁*(a-φ) + half(T)*g₂*(a-φ)^2) 45 | end 46 | 47 | function γ(d::ModifiedDivergence, a::T) where {T <: Real} 48 | (; ρ) = d.m 49 | div = d.d 50 | return a > ρ ? γᵤ(d, a) : γ(div, a) 51 | end 52 | 53 | function γ(d::FullyModifiedDivergence, a::T) where {T <: Real} 54 | (; ρ, φ) = d.m 55 | div = d.d 56 | return a > ρ ? γᵤ(d, a) : a < φ ? γₗ(d, a) : γ(div, a) 57 | end 58 | 59 | function γ(d::AbstractDivergence, a::AbstractArray{T}) where {T <: Real} 60 | out = similar(a) 61 | for j in eachindex(a) 62 | out[j] = γ(d, a[j]) 63 | end 64 | return out 65 | end 66 | 67 | ## ------------------------------------------------------- 68 | ## Gradient 69 | ## ------------------------------------------------------- 70 | ∇ᵧ(::KullbackLeibler, a::T) where {T} = NaNMath.log(a) 71 | ∇ᵧ(::ReverseKullbackLeibler, a::T) where {T} = a > 0 ? -1/a + one(T) : convert(T, -Inf) 72 | function ∇ᵧ(d::CressieRead, a::T) where {T} 73 | return a >= 0 ? (a^d.α - one(T))/d.α : convert(T, sign(d.α)*Inf) 74 | end 75 | ∇ᵧ(d::Hellinger, a::T) where {T} = a > 0 ? 2(one(T)-one(T)/sqrt(a)) : convert(T, -Inf) 76 | ∇ᵧ(d::ChiSquared, a::T) where {T} = a - one(T) 77 | 78 | function ∇ᵤ(d::D, a::T) where {T, D <: AbstractModifiedDivergence} 79 | (; γ₀, γ₁, γ₂, ρ) = d.m 80 | return (γ₁ + γ₂*(a-ρ)) 81 | end 82 | 83 | function ∇ₗ(d::D, a::T) where {T, D <: AbstractModifiedDivergence} 84 | (; g₀, g₁, g₂, φ) = d.m 85 | return (g₁ + g₂*(a-φ)) 86 | end 87 | 88 | function ∇ᵧ(d::ModifiedDivergence, a::T) where {T <: Real} 89 | (; ρ) = d.m 90 | div = d.d 91 | return a > ρ ? ∇ᵤ(d, a) : ∇ᵧ(div, a) 92 | end 93 | 94 | function ∇ᵧ(d::FullyModifiedDivergence, a::T) where {T <: Real} 95 | (; ρ, φ) = d.m 96 | div = d.d 97 | return a > ρ ? ∇ᵤ(d, a) : a < φ ? ∇ₗ(d, a) : ∇ᵧ(div, a) 98 | end 99 | 100 | ## ------------------------------------------------------- 101 | ## Hessian 102 | ## ------------------------------------------------------- 103 | Hᵧ(::KullbackLeibler, a::T) where {T} = a > 0 ? one(T)/a : convert(T, Inf) 104 | Hᵧ(::ReverseKullbackLeibler, a::T) where {T} = a > 0 ? one(T)/a^2 : convert(T, Inf) 105 | Hᵧ(d::CressieRead, a::T) where {T} = a > 0 ? a^(d.α-1) : convert(T, Inf) 106 | Hᵧ(d::Hellinger, a::T) where {T} = a > 0 ? one(T)/sqrt(a^(3)) : convert(T, Inf) 107 | Hᵧ(d::ChiSquared, a::T) where {T} = one(T) 108 | 109 | function Hᵤ(d::D, a::T) where {T, D <: AbstractModifiedDivergence} 110 | (; γ₀, γ₁, γ₂, ρ) = d.m 111 | return γ₂ 112 | end 113 | 114 | function Hₗ(d::D, a::T) where {T, D <: AbstractModifiedDivergence} 115 | (; g₀, g₁, g₂, φ) = d.m 116 | return g₂ 117 | end 118 | 119 | function Hᵧ(d::ModifiedDivergence, a::T) where {T <: Real} 120 | (; ρ) = d.m 121 | div = d.d 122 | return a > ρ ? Hᵤ(d, a) : Hᵧ(div, a) 123 | end 124 | 125 | function Hᵧ(d::FullyModifiedDivergence, a::T) where {T <: Real} 126 | (; ρ, φ) = d.m 127 | div = d.d 128 | return a > ρ ? Hᵤ(d, a) : a < φ ? Hₗ(d, a) : Hᵧ(div, a) 129 | end 130 | 131 | ## ------------------------------------------------------- 132 | ## Syntax sugar 133 | ## ------------------------------------------------------- 134 | 135 | gradient(d::AbstractDivergence, a::T) where {T <: Real} = ∇ᵧ(d, a) 136 | gradient(d::AbstractDivergence, a::T, b::R) where {T <: Real, R <: Real} = ∇ᵧ(d, a/b) 137 | 138 | function gradient!(u::AbstractVector{T}, 139 | d::AbstractDivergence, 140 | a::AbstractArray{R}) where {T <: Real, R <: Real} 141 | @inbounds for i in eachindex(a, u) 142 | u[i] = ∇ᵧ(d, a[i]) 143 | end 144 | return u 145 | end 146 | 147 | function gradient!(u::AbstractVector{T}, 148 | d::AbstractDivergence, 149 | a::AbstractArray{R}, 150 | b::AbstractArray{S}) where {T <: Real, R <: Real, S <: Real} 151 | @inbounds for i in eachindex(a, b, u) 152 | u[i] = ∇ᵧ(d, a[i]/b[i]) 153 | end 154 | return u 155 | end 156 | 157 | function gradient(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real} 158 | u = similar(a) 159 | return gradient!(u, d, a) 160 | end 161 | 162 | function gradient(d::AbstractDivergence, 163 | a::AbstractArray{T}, 164 | b::AbstractArray{R}) where {T <: Real, R <: Real} 165 | u = similar(a, promote_type(T, R)) 166 | return gradient!(u, d, a, b) 167 | end 168 | 169 | function gradient_sum(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real} 170 | r = zero(R) 171 | @inbounds for i in eachindex(a) 172 | r += ∇ᵧ(d, a[i]) 173 | end 174 | return r 175 | end 176 | 177 | hessian(d::AbstractDivergence, a::T) where {T <: Real} = Hᵧ(d, a) 178 | hessian(d::AbstractDivergence, a::T, b::R) where {T <: Real, R <: Real} = Hᵧ(d, a/b) 179 | 180 | function hessian!(u::AbstractVector{R}, 181 | d::AbstractDivergence, 182 | a::AbstractArray{R}) where {R <: Real} 183 | @inbounds for i in eachindex(a, u) 184 | u[i] = Hᵧ(d, a[i]) 185 | end 186 | return u 187 | end 188 | 189 | function hessian!(u::AbstractVector{T}, 190 | d::AbstractDivergence, 191 | a::AbstractArray{R}, 192 | b::AbstractArray{S}) where {T <: Real, R <: Real, S <: Real} 193 | @inbounds for i in eachindex(a, b, u) 194 | u[i] = Hᵧ(d, a[i]/b[i]) 195 | end 196 | return u 197 | end 198 | 199 | function hessian(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real} 200 | u = similar(a) 201 | return hessian!(u, d, a) 202 | end 203 | 204 | function hessian_sum(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real} 205 | r = zero(R) 206 | @inbounds for i in eachindex(a) 207 | r += Hᵧ(d, a[i]) 208 | end 209 | return r 210 | end 211 | 212 | half(::Type{T}) where {T <: Real} = convert(T, 0.5) 213 | half(::Type{T}) where {T} = convert(eltype(T), 0.5) 214 | -------------------------------------------------------------------------------- /doc/divergences_files/libs/clipboard/clipboard.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * clipboard.js v2.0.11 3 | * https://clipboardjs.com/ 4 | * 5 | * Licensed MIT © Zeno Rocha 6 | */ 7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1 50 | 51 | | | **$\gamma(u)$** | Domain | **$\nabla_\gamma(u)$** | **$H_\gamma(u)$** | 52 | | ------------- | -------------------------------------------------------------- | ------------------- | -------------------------------------- | ------------------- | 53 | | $KL$ | $u \log(u) - u + 1$ | $(0,+\infty)$ | $\log(u)$ | $\frac{1}{u}$ | 54 | | $RKL$ | $\log(u) + u - 1$ | $(0,+\infty)$ | $1-\frac{1}{u}$ | $\frac{1}{u^2}$ | 55 | | $\mathscr{H}$ | $2u + (2 - 4\sqrt{u})$ | $(0,+\infty)$ | $2\left(1 - \frac{1}{\sqrt{u}}\right)$ | $\frac{1}{u^{3/2}}$ | 56 | | $\chi^2$ | $\frac{1}{2}(u - 1)^2$ | $(-\infty,+\infty)$ | $u - 1$ | $1$ | 57 | | $CR$ | $\frac{u^{1+\alpha} + \alpha - u(1+\alpha)}{\alpha(1+\alpha)}$ | $(0,+\infty)$ | $\frac{u^\alpha - 1}{\alpha}$ | $u^{\alpha-1}$ | 58 | 59 | The Cressie Read is a family of divergences whose members are indexed by $\alpha\in\mathbb{R}$. This family contains the chi-squared divergence ($\alpha = 1$), the Kullback Leibler divergence ($a \to 0$), the reverse Kullback Leibler divergence ($a \to -1$), and the Hellinger distance ($a = -1/2$). 60 | 61 | Since if $\alpha<0$, $\gamma$ in the Cressie Read family is not convex on $(-\infty, 0)$ and thus we set $\gamma(u)=+\infty$. 62 | 63 | ### Convex Conjugate 64 | 65 | The convex conjugate conjugate of $\gamma$ is defined as 66 | $$ 67 | \gamma^*(u) = \sup_{u\in\mathbb{R}} \left\{u\upsilon - \gamma(u)\right\}. 68 | $$ 69 | For continuously twice differentiable function, the convex conjugate is 70 | $$ 71 | \gamma^*(z) = z\,(\gamma')^{-1}(z) - \gamma\left((\gamma')^{-1}(z)\right). 72 | $$ 73 | where $(\gamma')^{-1}(z) := \{u: \gamma'(x) = z\}$. The domain of $\gamma^*$ is $(-\infty, d)$, where 74 | $$ 75 | d = \lim_{u\to +\infty} \gamma(u)/u. 76 | 77 | The first derivative of the convex conjugate $\gamma^*(z)$ can be found as: 78 | $$ 79 | \frac{d}{dz} \gamma^*(z) = (\gamma')^{-1}(z). 80 | $$ 81 | 82 | The second derivative can be derived using the inverse function theorem: 83 | $$ 84 | \frac{d^2}{dz^2} \gamma^*(z) = \frac{1}{\gamma''((\gamma')^{-1}(z))}. 85 | $$ 86 | 87 | ## Modified divergences 88 | 89 | | | $\gamma^*(z)$ | $\lim_{u \to \infty} \frac{\gamma(u)}{u}$ | $\lim_{u \to \infty} \frac{u \gamma'(u)}{\gamma(u)}$ | 90 | | ------------- | --------------------------------- | --------------------------------------------- | -------------------------------------------------------- | 91 | | $KL$ | $(e^z - 1)$ | $0$ | $1$ | 92 | | $RKL$ | $\log(1 - z) + 1$ | $1$ | $1$ | 93 | | $\mathscr{H}$ | $ (1 - 2\sqrt{1 - z})$ | $2$ | $0$ | 94 | | $\chi^2$ | $\left(z + \frac{z^2}{2}\right)$ | $\infty$ | $2$ | 95 | | $CR$ | $(z-1) (1+\alpha z)^{1/a}+\log \left((\alpha z+1)^{1/a}\right)+1$ | $0$ | $\begin{cases} 1+\alpha & \alpha>0 \\ 1 & \mathrm{otherwise}\end{cases}$ | 96 | 97 | For many of the divergences defined above the effective domain of their conjugate, $\gamma^*$, does not span $\mathbb{R}$ since $\gamma(u)/u \to l < +\infty$ as $u \to +\infty$. 98 | 99 | For some $\vartheta>0$, let $u_{\vartheta}\equiv 1+\vartheta$. The modified divergence $\gamma_{\vartheta}$ is defined as 100 | $$ 101 | \gamma_{\vartheta}(u) = \begin{cases} 102 | \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\ 103 | \newline\gamma(u), & u\in (0,u_{\vartheta})\\ 104 | \newline \lim_{u\to 0^{+}} \gamma(u), & u=0 \\ 105 | \newline+\infty, & u<0 106 | \end{cases}. 107 | $$ 108 | 109 | It is immediate to verify that this divergence still satisfies all the requirements and normalization of $\gamma$. Furthermore, it holds that 110 | $$ 111 | \lim_{u\to\infty}\frac{\gamma_{\vartheta}(u)}{u} = +\infty, 112 | \qquad \text{and}\qquad 113 | \lim_{u\to\infty}\frac{u\gamma'_{\vartheta}(u)}{\gamma_{\vartheta}(u)} = 2. 114 | $$ 115 | 116 | The first limit implies that the image of $\gamma'_{\vartheta}$ is the real line and thus $\overline{\mathrm{dom}\,\gamma^*_{\vartheta}}=(-\infty,+\infty)$. The expression for the conjugate is obtained by applying the Legendre-Fenchel transform to obtain 117 | $$ 118 | \gamma_{\vartheta}^*(u) = 119 | \begin{cases} 120 | a_{\vartheta}\upsilon^2 + b_{\vartheta}\upsilon + c_{\vartheta}, & \upsilon>\gamma'(u_{\vartheta}),\\ 121 | \newline \gamma^*(\upsilon), & u\leqslant \gamma'(u_{\vartheta}) 122 | \end{cases}, 123 | $$ 124 | 125 | where $a_{\vartheta} = 1/(2\gamma''(u_{\vartheta}))$, 126 | $b_{\vartheta}=u_{\vartheta} - 2a_{\vartheta}\gamma'(u_{\vartheta})$, 127 | and $c_{\vartheta}=-\gamma(u_{\vartheta}) + 128 | a_{\vartheta}\gamma'(u_{\vartheta}) - u_{\vartheta}^2/a_{\vartheta}$. 129 | The conjugate $\gamma_{\vartheta}^*(u)$ will have a closed form 130 | expression when so does the original divergence function. 131 | 132 | ## Fully modified divergences 133 | 134 | For some $\vartheta>0$ and $0 < \varphi < 1-a_{\gamma}$, let $u_{\vartheta}\equiv 135 | 1+\vartheta$ and $u_{\varphi} = a_{\gamma} + \varphi$. The **fully** 136 | modified divergence $\gamma_{\varphi, \vartheta}$ is defined as 137 | $$ 138 | \gamma_{\vartheta}(u) = \begin{cases} 139 | \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\ 140 | \newline\gamma(u), & u\in (u_{\varphi},u_{\vartheta})\\ 141 | \newline \gamma(u_{\varphi}) + \gamma'(u_{\varphi})(u-u_{\varphi}) + \frac{1}{2}\gamma''(u_{\varphi})(u-u_{\varphi})^2, & u\leqslant u_{\varphi}\\ 142 | \end{cases}. 143 | $$ 144 | It is immediate to verify that this divergence still satisfies all the 145 | requirements and normalization of $\gamma$, while being defined on all 146 | $\mathbb{R}$. 147 | 148 | 149 | 150 | ## Using `Divergences` package 151 | 152 | ```{julia} 153 | using Divergences 154 | ``` 155 | 156 | Suppose $a = [0.2, 0.4, 0.4]$ and $b = [0.1, 0.3, 0.6]$. 157 | 158 | ```{julia} 159 | a = [0.2, 0.4, 0.4] 160 | b = [0.1, 0.3, 0.6] 161 | ``` 162 | 163 | We instantiate 164 | 165 | ```{julia} 166 | KL = KullbackLeibler() 167 | D = KL(a, b) 168 | ``` 169 | 170 | To evaluate the gradient and the hessian 171 | 172 | ```{julia} 173 | Divergences.gradient(KL, a, b) 174 | ``` 175 | 176 | ```{julia} 177 | Divergences.hessian(KL, a, b) 178 | ``` 179 | 180 | ## Application to MDE estimation 181 | 182 | Consider the following optimization problem: 183 | 184 | $$ 185 | \begin{aligned} & \min_{\theta,\{\pi_{i}\}}\sum_{i=1}^{n}\gamma(\pi_{i})\\ 186 | s.t. & \sum\pi_{i}g(x_{i},\theta)=0\\ 187 | & \sum\pi_{i}=1 188 | \end{aligned} 189 | $$ 190 | where 191 | $$ 192 | g(x_{i},\theta)=\begin{pmatrix}x_i-\theta\\ 193 | (x_i-\theta)^{2}-1 194 | \end{pmatrix}. 195 | $$ 196 | and $x_i\sim N(0,0.64)$. The gradient of 197 | $$ 198 | \bar{g}(\theta, \pi) = \sum_{i=1}^n \pi_i g(x_i,\theta) 199 | $$ 200 | is 201 | $$ 202 | \underset{(m\times k)}{\frac{\partial\bar{g}_{n}(\theta,\pi)}{\partial\theta}}=\sum_{i=1}^{n}\pi_{i}\begin{pmatrix}-\theta\\ 203 | -(w_{i}-\theta) 204 | \end{pmatrix},\quad\underset{(m\times n)}{\frac{\partial\bar{g}_{n}(\theta,\pi)}{\partial\pi}}=\begin{pmatrix}w_{i}-\theta\\ 205 | (w_{i}-\theta)^{2}-1 206 | \end{pmatrix}'. 207 | $$ 208 | 209 | -------------------------------------------------------------------------------- /doc/divergences.md: -------------------------------------------------------------------------------- 1 | # `Divergences.el` 2 | 3 | `Divergences` is a Julia package that makes it easy to evaluate the 4 | value of divergences and their derivatives. These divergences are used 5 | to good effects in the package 6 | [MomentBasedEstimators](http://github.com/gragusa/MomentBasedEstimators.jl/git). 7 | 8 | ## Definition 9 | 10 | A divergence between $a\in \mathbb{R}^n$ and $b\in\mathbb{R}^n$ is 11 | defined as 12 | 13 | $$ 14 | D(a,b) = \sum_{i=1}^n \gamma(a_i/b_i) b_i, 15 | $$ 16 | 17 | where for $a_{\gamma}\in\mathbb{R}$, $a_{\gamma}<0$, 18 | $\gamma:(a_{\gamma},+\infty)\to\mathbb{R}_{+}$, is strictly convex and 19 | twice continuously differentiable on the interior of its domain 20 | $(a_{gamma}, +\infty)$. The divergence function is normalized as to 21 | satisfy $\gamma(1) = 0$, $\gamma'(1)=0$, and $\gamma''(1)=0$. The 22 | normalizations $\gamma(1) = \gamma'(1) = 0$ and $\gamma''(1) = 1$ do 23 | not restrict generality, since for any differentiable convex function 24 | $\gamma$ there exists another, say $\overline{\gamma}$, satisfying the 25 | normalization. 26 | 27 | It is convenient to view $\gamma$ as an extended-real valued function, 28 | defined on $\mathbb{R}$ and taking values in $[a_{\gamma}, +\infty]$ 29 | (see, e.g. p. 23 in Rockafellar, 1970). This means that the convex 30 | function $\gamma$ being defined a priori on $(a_{\gamma}, +\infty)$ can 31 | be extended outside its domain by setting $\gamma(u) = +\infty$ for all 32 | $u \in (-\infty, a_{\gamma})$. As for the boundary value of 33 | $a_{\gamma}$, we let $\gamma(a_{\gamma}) = lim_{u\to a_{\gamma}^+} 34 | \gamma(u)$, knowing that this limit is possibly $\infty$. This ensures 35 | that the extension of $\gamma$ is lower-semicontinuous on $\mathbb{R}$. 36 | 37 | 38 | The gradient and the hessian of the divergence with respect to $a$ are 39 | given by 40 | $$ 41 | \nabla_{a}D(a,b)\equiv\left.\frac{\partial D(u,v)}{\partial u}\right|_{u=a,v=b}=\left(\gamma'(a_{1}/b_{1}),\ldots,\gamma'(a_{n}/b_{n})\right), 42 | $$ 43 | and 44 | $$ 45 | \nabla_{a}^{2}D(a,b)\equiv\left.\frac{\partial^{2}D(u,v)}{\partial 46 | u\partial 47 | u}\right|_{u=a,v=b}=\mathrm{diag}\left(\frac{\gamma''(a_{1}/b_{1})}{b_{1}},\ldots,\frac{\gamma''(a_{n}/b_{n})}{b_{n}}\right), 48 | $$ 49 | respectively. Given the normalization $\gamma'(1)=0$, and $\gamma''(1)=1$, we have that 50 | $$ 51 | \nabla_{a}D(a,a) = 0, \quad \nabla^2_{a}D(a,a) = 0. 52 | $$ 53 | 54 | The conjugate of $\gamma$ is defined as 55 | $$ 56 | \gamma^*(u) = \sup_{u\in\mathbb{R}} \left\{u\upsilon - \gamma(u)\right\}. 57 | $$ 58 | 59 | The conjugate of the convex extended-real valued function $\gamma$ on 60 | \mathbb{R}, $\gamma^*$, is itself a convex lower semi-continuous 61 | function. Moreover, it follows from the above definition, that $\gamma$ is 62 | increasing on $\mathbb{R}$. Define 63 | 64 | $$ 65 | d = \lim_{u\to +\infty} \gamma(u)/u. 66 | $$ 67 | 68 | Then $\overline{\mathrm{dom}\gamma^*} = (-\infty, +\infty)$ where 69 | $\mathrm{dom}\gamma^* = \{\upsilon \in \mathrm{R}: \gamma^*(\upsilon) < 70 | +\infty \}$ is the effective domain of $\gamma^*$. 71 | 72 | #### Modified divergences 73 | 74 | For many of the divergences defined above the effective domain of 75 | their conjugate, $\gamma^*$, does not span $\mathbb{R}$ since 76 | $\gamma(u)/u \to l < +\infty$ as $u \to +\infty$. 77 | 78 | For some $\vartheta>0$, let $u_{\vartheta}\equiv 1+\vartheta$. The 79 | modified divergence $\gamma_{\vartheta}$ is defined as 80 | $$ 81 | \gamma_{\vartheta}(u) = \begin{cases} 82 | \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\ 83 | \newline\gamma(u), & u\in (a_{\gamma},u_{\vartheta})\\ 84 | \newline \lim_{u\to 0^{+}} \gamma(u), & u=0 \\ 85 | \newline+\infty, & u<0 86 | \end{cases}. 87 | $$ 88 | 89 | It is immediate to verify that this divergence still satisfies all the 90 | requirements and normalization of $\gamma$. Furthermore, it holds that 91 | $$ 92 | \lim_{u\to\infty}\frac{\gamma_{\vartheta}(u)}{u} = +\infty, 93 | \qquad \text{and}\qquad 94 | \lim_{u\to\infty}\frac{u\gamma'_{\vartheta}(u)}{\gamma_{\vartheta}(u)} = 2. 95 | $$ 96 | 97 | The first limit implies that the image of $\gamma'_{\vartheta}$ is the 98 | real line and thus 99 | $\overline{\mathrm{dom}\,\gamma^*_{\vartheta}}=(-\infty,+\infty)$. The 100 | expression for the conjugate is obtained by applying the Legendre-Fenchel transform to obtain 101 | $$ 102 | \gamma_{\vartheta}^*(u) = 103 | \begin{cases} 104 | a_{\vartheta}\upsilon^2 + b_{\vartheta}\upsilon + c_{\vartheta}, & \upsilon>\gamma'(u_{\vartheta}),\\ 105 | \newline \gamma^*(\upsilon), & u\leqslant \gamma'(u_{\vartheta}) 106 | \end{cases}, 107 | $$ 108 | 109 | where $a_{\vartheta} = 1/(2\gamma''(u_{\vartheta}))$, 110 | $b_{\vartheta}=u_{\vartheta} - 2a_{\vartheta}\gamma'(u_{\vartheta})$, 111 | and $c_{\vartheta}=-\gamma(u_{\vartheta}) + 112 | a_{\vartheta}\gamma'(u_{\vartheta}) - u_{\vartheta}^2/a_{\vartheta}$. 113 | The conjugate $\gamma_{\vartheta}^*(u)$ will have a closed form 114 | expression when so does the original divergence function. 115 | 116 | #### Fully modified divergences 117 | 118 | For some $\vartheta>0$ and $0 < \varphi < 1-a_{\gamma}$, let $u_{\vartheta}\equiv 119 | 1+\vartheta$ and $u_{\varphi} = a_{\gamma} + \varphi$. The **fully** 120 | modified divergence $\gamma_{\varphi, \vartheta}$ is defined as 121 | $$ 122 | \gamma_{\vartheta}(u) = \begin{cases} 123 | \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\ 124 | \newline\gamma(u), & u\in (u_{\varphi},u_{\vartheta})\\ 125 | \newline \gamma(u_{\varphi}) + \gamma'(u_{\varphi})(u-u_{\varphi}) + \frac{1}{2}\gamma''(u_{\varphi})(u-u_{\varphi})^2, & u\leqslant u_{\varphi}\\ 126 | \end{cases}. 127 | $$ 128 | It is immediate to verify that this divergence still satisfies all the 129 | requirements and normalization of $\gamma$, while being defined on all 130 | $\mathbb{R}$. 131 | 132 | ## Example of divergences 133 | 134 | The following divergence types are defined by `Divergences`. 135 | 136 | #### Kullback-Leibler divergence 137 | 138 | $$ 139 | D^{KL}(a,b) = \sum_{i=1}^n \gamma^{KL}(a_i/b_i) b_i 140 | $$ 141 | 142 | $$ 143 | \gamma^{KL}(u) = u\log(u) - u + 1 144 | $$ 145 | 146 | The gradient and the hessian are given by 147 | 148 | $$ 149 | \nabla_{a}^{2}D^{KL}(a,b) = \left(\log(a_1/b_1),\ldots,\log(a_n,b_n) 150 | \right), \quad \nabla_{a}^{2}D^{KL}(a,b) = \mathrm{diag}(1/a_1, \ldots, 1/a_n) 151 | $$ 152 | 153 | #### Reverse Kullback-Leibler divergence 154 | 155 | $$ 156 | D^{rKL}(a,b) = \sum_{i=1}^n \gamma^{rKL}(a_i/b_i) b_i 157 | $$ 158 | 159 | $$ 160 | \gamma^{rKL}(u) = -\log(u) + u - 1 161 | $$ 162 | 163 | The gradient and the hessian are given by 164 | 165 | $$ 166 | \nabla_{a}^{2}D^{rKL}(a,b) = \left(1-b_1/a_1,\ldots, 1 - b_n/a_n 167 | \right), \quad \nabla_{a}^{2}D^{rKL}(a,b) = \mathrm{diag}(b_1/a^2_1, \ldots, b_n/a^2_n) 168 | $$ 169 | 170 | For reverse Kullback Leibler divergence, 171 | $\gamma(u)=-\log(u)+u-1$, we have that $\gamma(u)/u \to 0$ as 172 | $u\to\infty$. The modified reverse Kullback Leibler divergence is given by 173 | $$ 174 | \gamma_{\vartheta}(u) = 175 | \begin{cases} 176 | -\log(u_{\vartheta}) + (1-\frac{1}{u_{\vartheta}})u+ \frac{1}{2u_{\vartheta}^2}(u-u_{\vartheta})^2, & u>u_{\vartheta}\\ 177 | \newline -\log(u) + u - 1, &0 < u\leqslant u_{\vartheta}\\ 178 | \newline +\infty, & u\leqslant0. 179 | \end{cases}. 180 | $$ 181 | 182 | The conjugate of $\gamma_{\theta}$ is given by 183 | $$ 184 | \gamma_{\vartheta}(u) = 185 | \begin{cases} 186 | a_{\vartheta}\upsilon^2 + b_{\vartheta}\upsilon + c_{\vartheta}, & \upsilon > 1-\frac{1}{u_{\vartheta}} \\ 187 | \newline -\log(1- \upsilon), & \upsilon \leqslant 1-\frac{1}{u_{\vartheta}}, 188 | \end{cases} 189 | $$ 190 | where $a_{\vartheta}=u^2_{\vartheta}/2$, $b_{\vartheta}=u_{\vartheta}(2-u_{\vartheta})$, and 191 | $c_{\vartheta}=\log(u_{\vartheta})-u_{\vartheta}-1+u_{\vartheta}(u_{\vartheta}-1)/2$. 192 | 193 | 194 | 195 | #### Chi-squared divergence 196 | 197 | $$ 198 | D^{\chi}(a,b) = \sum_{i=1}^n \gamma^{\chi}(a_i/b_i) b_i 199 | $$ 200 | 201 | $$ 202 | \gamma^{\chi}(u) = u^2/2 - u + 0.5 203 | $$ 204 | 205 | The gradient and the hessian are given by 206 | 207 | $$ 208 | \nabla_{a}^{2}D^{\chi}(a,b) = \left((a_1 - b_1)/b_1^2, \ldots, (a_n - b_n)/b_n^2 209 | \right), \quad \nabla_{a}^{2}D^{\chi}(a,b) = 210 | \mathrm{diag}\left(\frac{1}{b_1^2},\ldots, \frac{1}{b_n^2}\right) 211 | $$ 212 | 213 | #### Cressie-Read divergences 214 | 215 | The type `CressieRead` is a family of divergences. Members of this 216 | family are indexed by a function $\gamma$ indexed by parameter $\alpha$: 217 | 218 | $$ 219 | \gamma_{\alpha}^{CR}(a,b)=\frac{\left(\frac{a}{b}\right)^{1+\alpha}-1}{\alpha(\alpha+1)}-\frac{\left(\frac{a}{b}\right)-1}{\alpha}. 220 | $$ 221 | 222 | The gradient and the hessian are given by 223 | 224 | $$ 225 | \nabla_{a}^{2}D^{CR}_{\alpha}(a,b) = \left( 226 | \frac{\left(\frac{a_1}{b_1}\right)^{\alpha }-1}{\alpha b_1}, \ldots,\frac{\left(\frac{a_n}{b_n}\right)^{\alpha }-1}{\alpha b_n} 227 | \right), \quad 228 | \nabla_{a}^{2}D^{CR}_{\alpha}(a,b) = \mathrm{diag}\left(\frac{\left(\frac{a_1}{b_1}\right)^{\alpha }}{a_1 b_1},\ldots, 229 | \frac{\left(\frac{a_n}{b_n}\right)^{\alpha }}{a_n b_n} 230 | \right) 231 | $$ 232 | 233 | The Cressie-Read family contains the chi-squared divergence ($\alpha = 234 | 1$), the Kullback Leibler divergence ($a \to 0$), the reverse 235 | Kullback Leibler divergence ($a \to -1$), and the Hellinger distance ($a = -1/2$). 236 | 237 | For instance, for the Cressie Read family of divergences defined below, 238 | $$ 239 | \lim_{u\to +\infty}\gamma^{CR}_{\alpha}(u)/u = -1/\alpha 240 | $$ 241 | for all $\alpha\leqslant 0$. Also, for all $\alpha\leqslant 0$, the 242 | divergence is not convex on $(-\infty, 0)$ and thus a fully modified 243 | version can be considered. 244 | 245 | 246 | 247 | ## Using `Divergences` package 248 | 249 | ````julia 250 | using Divergences 251 | ```` 252 | 253 | 254 | 255 | 256 | 257 | Suppose $a = [0.2, 0.4, 0.4]$ and $b = [0.1, 0.3, 0.6]$. 258 | 259 | ````julia 260 | a = [0.2, 0.4, 0.4] 261 | b = [0.1, 0.3, 0.6] 262 | ```` 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | ````julia 271 | evaluate(KullbackLeibler(), a, b) 272 | 273 | ```` 274 | 275 | 276 | ```` 277 | 0.0915162218494357 278 | ```` 279 | 280 | 281 | 282 | ````julia 283 | gradient(KullbackLeibler(), a, b) 284 | 285 | ```` 286 | 287 | 288 | ```` 289 | 3-element Array{Float64,1}: 290 | 0.693147 291 | 0.287682 292 | -0.405465 293 | ```` 294 | 295 | 296 | 297 | ````julia 298 | hessian(KullbackLeibler(), a, b) 299 | ```` 300 | 301 | 302 | ```` 303 | 3-element Array{Float64,1}: 304 | 50.0 305 | 8.33333 306 | 4.16667 307 | ```` 308 | 309 | 310 | 311 | 312 | 313 | ````julia 314 | evaluate(ReverseKullbackLeibler(), a, b) 315 | 316 | ```` 317 | 318 | 319 | ```` 320 | 0.0876597250733698 321 | ```` 322 | 323 | 324 | 325 | ````julia 326 | gradient(ReverseKullbackLeibler(), a, b) 327 | 328 | ```` 329 | 330 | 331 | ```` 332 | 3-element Array{Float64,1}: 333 | 0.5 334 | 0.25 335 | -0.5 336 | ```` 337 | 338 | 339 | 340 | ````julia 341 | hessian(ReverseKullbackLeibler(), a, b) 342 | ```` 343 | 344 | 345 | ```` 346 | 3-element Array{Float64,1}: 347 | 2.5 348 | 1.875 349 | 3.75 350 | ```` 351 | 352 | 353 | -------------------------------------------------------------------------------- /doc/z.csv: -------------------------------------------------------------------------------- 1 | 4.967141530112326731e-01,-1.382643011711846559e-01,6.476885381006924902e-01,1.523029856408025351e+00,-2.341533747233359719e-01,9.261775475316413875e-01 2 | -2.341369569491805469e-01,1.579212815507391454e+00,7.674347291529087789e-01,-4.694743859349521098e-01,5.425600435859646575e-01,1.909416640470130488e+00 3 | -4.634176928124622563e-01,-4.657297535702568658e-01,2.419622715660341150e-01,-1.913280244657797891e+00,-1.724917832513032767e+00,-1.398567573819141208e+00 4 | -5.622875292409726944e-01,-1.012831120334423796e+00,3.142473325952738761e-01,-9.080240755212108938e-01,-1.412303701335291484e+00,5.629692366905708623e-01 5 | 1.465648768921554046e+00,-2.257763004865356582e-01,6.752820468792383735e-02,-1.424748186213456780e+00,-5.443827245251826596e-01,-6.506425691218269414e-01 6 | 1.109225897098660846e-01,-1.150993577422302794e+00,3.756980183456719580e-01,-6.006386899188049799e-01,-2.916937497932767798e-01,-4.871253837646960516e-01 7 | -6.017066122293969199e-01,1.852278184508937775e+00,-1.349722473793392094e-02,-1.057710928955900354e+00,8.225449121031890298e-01,-5.923939242388691628e-01 8 | -1.220843649971022238e+00,2.088635950047554035e-01,-1.959670123879775572e+00,-1.328186048898430505e+00,1.968612358691235187e-01,-8.639907696798160286e-01 9 | 7.384665799954104326e-01,1.713682811899704950e-01,-1.156482823882405281e-01,-3.011036955892887890e-01,-1.478521990367427374e+00,4.852162794482699215e-02 10 | -7.198442083947086401e-01,-4.606387709597875024e-01,1.057122226218915717e+00,3.436182895684614103e-01,-1.763040155362733952e+00,-8.309501164110377758e-01 11 | 3.240839693947950018e-01,-3.850822804163165358e-01,-6.769220003059587265e-01,6.116762888408678878e-01,1.030999522495950949e+00,2.704568257798388164e-01 12 | 9.312801191161985681e-01,-8.392175232226385395e-01,-3.092123758512145826e-01,3.312634314035639571e-01,9.755451271223591903e-01,-5.023810944913695287e-02 13 | -4.791742378452899520e-01,-1.856589766638171157e-01,-1.106334974006028204e+00,-1.196206624080670800e+00,8.125258223941980162e-01,-2.389480468664097468e-01 14 | 1.356240028570822931e+00,-7.201012158033384680e-02,1.003532897892024156e+00,3.616360250476341465e-01,-6.451197546051242737e-01,-9.075636620415978850e-01 15 | 3.613956055084139307e-01,1.538036566465969202e+00,-3.582603910995153795e-02,1.564643655814006218e+00,-2.619745104089744370e+00,-5.767713305683327407e-01 16 | 8.219025043752238302e-01,8.704706823817121020e-02,-2.990073504658674053e-01,9.176077653550229751e-02,-1.987568914600892800e+00,7.553912258257560186e-01 17 | -2.196718878375119310e-01,3.571125715117464128e-01,1.477894044741516089e+00,-5.182702182736473873e-01,-8.084936028931876129e-01,5.009171876243807553e-01 18 | -5.017570435845365440e-01,9.154021177020741362e-01,3.287511096596844595e-01,-5.297602037670388064e-01,5.132674331133560974e-01,-9.775552447985510485e-01 19 | 9.707754934804038727e-02,9.686449905328892163e-01,-7.020530938773523744e-01,-3.276621465977682113e-01,-3.921081531321576330e-01,9.933230542922587281e-02 20 | -1.463514948132118576e+00,2.961202770645760540e-01,2.610552721798893305e-01,5.113456642460889774e-03,-2.345871333751469168e-01,7.513871233717890341e-01 21 | -1.415370742050414243e+00,-4.206453227653590421e-01,-3.427145165267694860e-01,-8.022772692216189050e-01,-1.612857116660091350e-01,-1.669405281121371765e+00 22 | 4.040508568145383950e-01,1.886185901210530202e+00,1.745778128318389577e-01,2.575503907227643663e-01,-7.444591576616721440e-02,5.433601923799350475e-01 23 | -1.918771215299041488e+00,-2.651387544921687822e-02,6.023020994102643716e-02,2.463242112485286128e+00,-1.923609647811225232e-01,-6.626237589458466859e-01 24 | 3.015473423336124670e-01,-3.471176970524331162e-02,-1.168678037619532040e+00,1.142822814515020546e+00,7.519330326867741388e-01,5.705986685931593305e-01 25 | 7.910319470430469124e-01,-9.093874547947389253e-01,1.402794310936099187e+00,-1.401851062792280889e+00,5.868570938002702908e-01,-7.632591565425168589e-01 26 | 2.190455625809978546e+00,-9.905363251306883443e-01,-5.662977296027719154e-01,9.965136508764121936e-02,-5.034756541161992116e-01,-1.804882100664519040e+00 27 | -1.550663431066132691e+00,6.856297480602732697e-02,-1.062303713726104881e+00,4.735924306351815827e-01,-9.194242342338031504e-01,-1.627542437883162663e+00 28 | 1.549934405017539429e+00,-7.832532923362370836e-01,-3.220615162056755798e-01,8.135172173696697540e-01,-1.230864316433955219e+00,4.808494666138199425e-02 29 | 2.274599346041294157e-01,1.307142754282428099e+00,-1.607483234561227547e+00,1.846338585323042125e-01,2.598827942484235320e-01,2.597225017214818688e-01 30 | 7.818228717773103575e-01,-1.236950710878081949e+00,-1.320456613084276309e+00,5.219415656168976403e-01,2.969846732331860584e-01,-9.043166251044085779e-01 31 | 2.504928503458765388e-01,3.464482094969756898e-01,-6.800247215784908095e-01,2.322536971610035528e-01,2.930724732986812464e-01,6.385924587773739169e-01 32 | -7.143514180263678126e-01,1.865774511144756564e+00,4.738329209117875251e-01,-1.191303497202648609e+00,6.565536086338297217e-01,-1.661520062268959874e+00 33 | -9.746816702273214394e-01,7.870846037424520381e-01,1.158595579007404064e+00,-8.206823183517104603e-01,9.633761292443218105e-01,-6.607979864731657049e-02 34 | 4.127809269364983158e-01,8.220601599944900029e-01,1.896792982653947357e+00,-2.453881160028704989e-01,-7.537361643574895798e-01,-1.211016199762456669e+00 35 | -8.895144296255232952e-01,-8.158102849654382815e-01,-7.710170941410419976e-02,3.411519748166438881e-01,2.766907993300190549e-01,-6.518361078021591704e-01 36 | 8.271832490360238044e-01,1.300189187790701945e-02,1.453534077157316862e+00,-2.646568332379560795e-01,2.720169166589618825e+00,4.739867131641401637e-02 37 | 6.256673477650062098e-01,-8.571575564162825511e-01,-1.070892498061112308e+00,4.824724152431852930e-01,-2.234627853258508989e-01,-8.604133652839524027e-01 38 | 7.140004940920919863e-01,4.732376245735448461e-01,-7.282891265687277405e-02,-8.467937180684049769e-01,-1.514847224685864635e+00,-3.845555442298253523e-01 39 | -4.465149520670210759e-01,8.563987943234723232e-01,2.140937441302039612e-01,-1.245738778711988015e+00,1.731809258511819993e-01,1.006292809214440531e+00 40 | 3.853173797288367841e-01,-8.838574362011329955e-01,1.537251059455279067e-01,5.820871844599989631e-02,-1.142970297830623094e+00,-5.768918695231487481e-01 41 | 3.577873603482832898e-01,5.607845263682343928e-01,1.083051243175277012e+00,1.053802052034902959e+00,-1.377669367957091051e+00,8.356921120651418233e-01 42 | -9.378250399151227823e-01,5.150352672086597750e-01,5.137859509122087998e-01,5.150476863060479138e-01,3.852731490654721203e+00,-1.129706854657618109e+00 43 | 5.708905106931669859e-01,1.135565640180598912e+00,9.540017634932023149e-01,6.513912513057980025e-01,-3.152692446403456139e-01,5.298041779152827813e-01 44 | 7.589692204932674269e-01,-7.728252145375718030e-01,-2.368186067400088746e-01,-4.853635478291034588e-01,8.187413938632255583e-02,1.441568620657900368e+00 45 | 2.314658566673508666e+00,-1.867265192591748058e+00,6.862601903745134679e-01,-1.612715871189651651e+00,-4.719318657894334690e-01,-2.471644500127289312e+00 46 | 1.088950596967366069e+00,6.428001909546277037e-02,-1.077744777929306119e+00,-7.153037092599682234e-01,6.795977489346758382e-01,-7.968952554704767932e-01 47 | -7.303666317171366718e-01,2.164585895819748640e-01,4.557183990381378363e-02,-6.516003476058170873e-01,2.143944089325325653e+00,5.770721271805400177e-01 48 | 6.339190223180112271e-01,-2.025142586657607158e+00,1.864543147694276426e-01,-6.617864647683879831e-01,8.524333347962239626e-01,-2.030453860429926871e-01 49 | -7.925207384327006555e-01,-1.147364414668990140e-01,5.049872789804571438e-01,8.657551941701214782e-01,-1.200296407055776227e+00,3.711458733713088320e-01 50 | -3.345012358409483744e-01,-4.749453111609561740e-01,-6.533292325737118933e-01,1.765454240281096876e+00,4.049817109609555321e-01,-6.039851867158205767e-01 51 | -1.260883954335045187e+00,9.178619470547760839e-01,2.122156197012633250e+00,1.032465260551146846e+00,-1.519369965954013413e+00,8.658978747289991507e-02 52 | -4.842340728662513638e-01,1.266911149186622731e+00,-7.076694656187807464e-01,4.438194281462284341e-01,7.746340534293367774e-01,-1.556772353920794771e-01 53 | -9.269304715780829484e-01,-5.952535606180008043e-02,-3.241267340069072578e+00,-1.024387641334289833e+00,-2.525681513931603006e-01,1.167782061659807358e+00 54 | -1.247783181964849497e+00,1.632411303931635249e+00,-1.430141377960632676e+00,-4.400444866969837610e-01,1.307405772860913418e-01,2.544208433012131176e-01 55 | 1.441273289066115515e+00,-1.435862151179439383e+00,1.163163752154959596e+00,1.023306101958704889e-02,-9.815086510479509307e-01,3.376026620752021756e-01 56 | 4.621034742632707526e-01,1.990596955734700302e-01,-6.002168771587946816e-01,6.980208499001891442e-02,-3.853135968617601992e-01,-4.118769661224673806e-01 57 | 1.135173452512480419e-01,6.621306745210466804e-01,1.586016816145352060e+00,-1.237815498826849048e+00,2.133033374656266634e+00,-4.876062240724935437e-01 58 | -1.952087799522501887e+00,-1.517850950355833228e-01,5.883172064845765270e-01,2.809918677350326521e-01,-6.226995198205937943e-01,-4.325581878196209096e-01 59 | -2.081222503572752180e-01,-4.930009346588328234e-01,-5.893647569442115319e-01,8.496020970210246270e-01,3.570154859650473411e-01,3.944521423782968439e-01 60 | -6.929095952606542097e-01,8.995998754332507064e-01,3.072995208766093334e-01,8.128621188389600905e-01,6.296288419236122369e-01,-4.209844808202629629e-01 61 | -8.289950109220722840e-01,-5.601810401969695707e-01,7.472936051232618171e-01,6.103702654334648425e-01,-2.090159396414813242e-02,2.897748568964129134e-01 62 | 1.173273833087819934e-01,1.277664895788424904e+00,-5.915713888358299366e-01,5.470973811700379219e-01,-2.021926524338940601e-01,2.075400798645438805e+00 63 | -2.176812032272202879e-01,1.098776851987190106e+00,8.254163489880298465e-01,8.135096360006385252e-01,1.305478807154329068e+00,8.711247034316923488e-01 64 | 2.100384163275904881e-02,6.819529712949639055e-01,-3.102667565934560390e-01,3.241663524884421110e-01,-1.301430543676845852e-01,-3.260235321678411347e-01 65 | 9.699596499271818939e-02,5.951570254369136226e-01,-8.182206832334725233e-01,2.092387275685460235e+00,-1.006017381499701990e+00,1.201213922163944847e+00 66 | -1.214188612787732158e+00,1.158110873500067806e+00,7.916626939629358706e-01,6.241198170521551347e-01,6.283455092642799000e-01,-4.080753730215513908e-01 67 | -1.224677284691462302e-02,-8.972543714858315367e-01,7.580455819372633464e-02,-6.771617115121116859e-01,9.751197334177511555e-01,-2.038124535177853858e+00 68 | -1.470573815021386510e-01,-8.254971967925115450e-01,-3.213858416529934425e-01,4.129314542756243323e-01,-5.637245528039747100e-01,-1.008086310917404083e+00 69 | -8.222203955664314501e-01,2.436872114919123034e-01,2.449665711087227749e-01,-5.069431753711297617e-01,-4.710383056183227724e-01,-1.870791921025855675e+00 70 | 2.320499373576362934e-01,-1.448084341497324123e+00,-1.407463774376555232e+00,-7.184442212524360105e-01,-2.134471517118472494e-01,-3.515134840413086659e-01 71 | 3.109075655980045871e-01,1.475356216949551991e+00,8.576596232020193833e-01,-1.599385299634271118e-01,-1.901620790268883018e-02,1.841837918955169934e-02 72 | -1.002529364637808840e+00,-1.851313599238993066e-02,-2.886586389201383218e-01,3.227185603380894885e-01,-8.272309435523229615e-01,1.676437312275282698e+00 73 | 5.193465142411722857e-01,1.532738913002577696e+00,-1.087601484568575944e-01,4.017117220989414594e-01,6.901439917111125144e-01,3.269273737641626432e-01 74 | -4.012204718858362607e-01,2.240924818104167715e-01,1.259240078179485987e-02,9.767609854883171905e-02,-7.730097838554664813e-01,-2.191005288088642422e-01 75 | 2.451017425894271365e-02,4.979982912454497535e-01,1.451143607795041701e+00,9.592708260852068625e-01,2.153182457511556347e+00,8.294055811834891712e-01 76 | -7.673475628880495059e-01,8.723206367206781664e-01,1.833420057383517432e-01,2.189802933217672276e+00,-8.082982853551514690e-01,-2.211135309007885130e+00 77 | -8.397218421807760569e-01,-5.993926454440221541e-01,-2.123895724309806887e+00,-5.257550216807610477e-01,-7.591326615536979627e-01,2.356145581085659357e-01 78 | 1.503937864762076304e-01,3.417559757771594375e-01,1.876170839215886232e+00,9.504238381860502516e-01,-5.769036556624030920e-01,7.708651938869668374e-01 79 | -8.984146713483579516e-01,4.919191715065057147e-01,-1.320233207020642174e+00,1.831458765854353743e+00,1.179440120721287011e+00,-1.478586245779841546e+00 80 | -4.691756521047047990e-01,-1.713134529090877489e+00,1.353872374165412840e+00,-1.145398452526178862e-01,1.237816311973461758e+00,1.143754043206929083e+00 81 | -1.594427658794367098e+00,-5.993750229537728735e-01,5.243699718183165819e-03,4.698059376474205545e-02,-4.500654714792436395e-01,3.384964074944141199e-01 82 | 6.228499323474987470e-01,-1.067620429382594383e+00,-1.423794850212934948e-01,1.202956317118988594e-01,5.144388340587490172e-01,-4.152879139008012754e-01 83 | 7.116148780888897907e-01,-1.124642091837869229e+00,-1.534114170735622285e+00,1.277676821898509063e+00,3.323140119795916503e-01,6.327818661062848404e-01 84 | -7.484865365565536166e-01,1.551151975522522930e+00,1.156746342928586663e-01,1.179297184063826442e+00,6.751848141010895199e-02,2.270692857804395892e+00 85 | 2.060747924881987103e+00,1.755340842443204430e+00,-2.489641484790734993e-01,9.715709509543554168e-01,6.453759495851475458e-01,1.818662550584951576e-01 86 | 1.368631557532348664e+00,-9.649234605801044751e-01,6.860514599984393058e-01,1.058424486849587787e+00,-1.758739486423114284e+00,2.482205863003360824e-01 87 | -1.183258512665775086e+00,-2.039232177760100573e+00,-2.694068344445577634e-01,7.175422557959623138e-01,1.502357052096028101e+00,-4.593608995402441164e-01 88 | 7.409478041977518581e-02,1.628615545571291845e+00,-1.380101458214891386e+00,-1.703382439355154654e+00,-5.554769889661877874e-02,-8.498443694647918045e-01 89 | 3.840654489393072746e-01,-3.269474809409311095e-02,-2.067442100039876607e+00,-8.912003951278840708e-02,-1.304469500504853219e+00,8.303358165442455974e-01 90 | 6.696725488300384610e-01,3.665982460968482681e-01,-9.398797863273552489e-01,-5.138669173366935405e-01,-1.059213521888951570e+00,-8.560838259088672242e-01 91 | -6.267909727317187707e-02,9.551423205012382622e-01,-9.857260463355437263e-01,5.040465155178444068e-01,-5.302576183724407866e-01,7.156623721939246729e-02 92 | -7.928728322623441738e-01,-1.070303599545578271e-01,-1.035242322419374084e+00,-5.536493053471820414e-01,-1.197877892588848470e+00,-4.776574467651166778e-01 93 | 1.964725132916389283e+00,3.526355197172861139e-02,-6.997255079925855936e-01,2.139799107342220119e-01,-1.123280496908298232e-01,4.789798257463918629e-01 94 | -2.209695995332229823e-01,6.141667000434252177e-01,7.575077100473051050e-01,-5.305011476105274681e-01,-5.758182406446800128e-01,3.336621052869482851e-01 95 | -2.750516971516440146e-01,-2.301921164735584835e+00,-1.515191062198552263e+00,1.366874267444524671e+00,1.644967713501283679e+00,1.037539944257899194e+00 96 | -2.490360395563783191e-01,5.765569630557664249e-01,3.112501545435361061e-01,3.078880808455237705e+00,1.119574911434576769e+00,-5.100163988547470328e-01 97 | -1.279175914807665349e-01,-9.555404406004257556e-01,-1.606446320257572502e+00,2.034636358672231027e-01,-7.563507452843033496e-01,-2.698749352933712542e-01 98 | -1.422253709597674165e+00,-6.465728842425265688e-01,-1.081548003614394959e+00,1.687141635072564760e+00,8.816397569494505149e-01,-9.787637157823073641e-01 99 | -7.972641316617372007e-03,1.479944138890025851e+00,7.736830764761830348e-02,-8.612842013282636655e-01,1.523124077269657262e+00,-4.442932600761115847e-01 100 | 5.389100436846586684e-01,-1.037246154326456393e+00,-1.903386780836081871e-01,-8.756182533847571836e-01,-1.382799730964336060e+00,3.773004930448521921e-01 101 | -------------------------------------------------------------------------------- /doc/mdexample.jl: -------------------------------------------------------------------------------- 1 | using MathOptInterface, Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt 2 | using ForwardDiff, DifferentiationInterface 3 | using Divergences 4 | using Statistics, LinearAlgebra 5 | using Infiltrator 6 | 7 | ## --------------------------------------------------------------------- ## 8 | ## Moment Conditions & Jacobian 9 | ## --------------------------------------------------------------------- ## 10 | 11 | ## This function if the moment matrix of the estimation problem. 12 | ## This function should always be defined by the user. 13 | function g(x, θ) 14 | d = x .- θ 15 | return [d d .^ 2 .- 1] 16 | end 17 | 18 | ## This function is the gradient of the mean moment matrix 19 | ## which is a (m, k), where m is the number of moments and k 20 | ## is the number of parameters. 21 | ## \frac{\partial}{\partial\theta}\left[\sum_{i=1}^{n}\pi_{i}g(x_{i},\theta)/n\right] 22 | ## It should be written in a way that it can be used with ForwardDiff or Zygote. 23 | function ∇g(x, θ, π) 24 | d = x .- θ 25 | res = Matrix{promote_type(eltype(θ), eltype(π))}(undef, 2, 1) 26 | res[1] = -sum(π) ## mean here 27 | res[2] = -2.0*sum(π .* d) ## mean here 28 | return res 29 | end 30 | 31 | function λ∇g(θ, π, λ, x) 32 | return first(λ'∇g(x, θ, π) ./ n) 33 | end 34 | 35 | ## This must return a (k,n) 36 | function ∇gᵢλ!(dest::AbstractMatrix, x, θ, λ) 37 | d = x .- θ 38 | n = length(d) 39 | for j in axes(dest, 1) 40 | for i in axes(dest, 2) 41 | dest[j, i] = (-λ[1] .- 2.0 .* d[i] .* λ[2])/n 42 | end 43 | end 44 | return dest 45 | end 46 | 47 | Base.@propagate_inbounds function ∇gᵢλ(x, θ, λ) 48 | n = length(x) 49 | k = length(θ) 50 | res = Matrix{promote_type(eltype(θ), eltype(λ), eltype(x))}(undef, k, n) 51 | return ∇gᵢλ!(res, x, θ, λ) 52 | end 53 | 54 | ## --------------------------------------------------------------------- ## 55 | ## Optimization Problem 56 | ## --------------------------------------------------------------------- ## 57 | 58 | const MOI = MathOptInterface 59 | 60 | struct MDProblem{D} <: MOI.AbstractNLPEvaluator 61 | div::Divergences.AbstractDivergence 62 | data::D 63 | size::Tuple{Int, Int, Int} 64 | backend::DifferentiationInterface.AbstractADType 65 | end 66 | 67 | Base.size(md::MDProblem) = md.size 68 | divergence(md::MDProblem) = md.div 69 | 70 | function MOI.initialize(md::MDProblem, rf::Vector{Symbol}) 71 | for feat in rf 72 | if !(feat in [:Grad, :Jac, :Hess]) 73 | error("Unsupported feature $feat") 74 | end 75 | end 76 | end 77 | 78 | MOI.features_available(md::MDProblem) = [:Grad, :Jac, :Hess] 79 | 80 | ## --------------------------------------------------------------------- ## 81 | ## Objective function 82 | ## --------------------------------------------------------------------- ## 83 | function MOI.eval_objective(md::MDProblem, u::Vector{Float64}) 84 | n, k, m = size(md) 85 | return divergence(md)(view(u, 1:n)) 86 | end 87 | 88 | function MOI.eval_objective_gradient(md::MDProblem, res, u) 89 | n, k, m = size(md) 90 | T = eltype(res) 91 | Divergences.gradient!(view(res, 1:n), divergence(md), view(u, 1:n)) 92 | return fill!(view(res, (n + 1):(n + k)), zero(T)) 93 | end 94 | 95 | ## --------------------------------------------------------------------- ## 96 | ## Constraints 97 | ## --------------------------------------------------------------------- ## 98 | function MOI.eval_constraint(md::MDProblem, res, u) 99 | n, k, m = size(md) 100 | θ = view(u, (n + 1):(n + k)) 101 | π = view(u, 1:n) 102 | G = g(md.data, θ) 103 | return weighted_mean!(res, π, G) 104 | end 105 | 106 | function MOI.jacobian_structure(md::MDProblem) 107 | n, k, m = size(md) 108 | return rowcol_of_dense(n+1, m+1) 109 | end 110 | 111 | ## --------------------------------------------------------------------- ## 112 | ## Constraints Jacobian 113 | ## --------------------------------------------------------------------- ## 114 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u) 115 | n, k, m = size(md) 116 | θ = view(u, (n + 1):(n + k)) 117 | π = u[1:n] 118 | G = g(md.data, θ) 119 | G .= G ./ n 120 | ∇gₙ = ∇g(md.data, θ, π) 121 | ∇gₙ .= ∇gₙ ./ n 122 | return assign_matrix!(J, G, ∇gₙ) 123 | end 124 | 125 | ## --------------------------------------------------------------------- ## 126 | ## Hessian of the Lagrangian 127 | ## --------------------------------------------------------------------- ## 128 | 129 | ## The lagrangian is given by: 130 | ## 131 | ## L(π, θ, λ) = D(π, p) + λ'g(θ) 132 | 133 | function MOI.hessian_lagrangian_structure(md::MDProblem) 134 | n, k, m = size(md) 135 | hele = Int(n + n*k + k*k) 136 | rows = Array{Int64}(undef, hele) 137 | cols = Array{Int64}(undef, hele) 138 | ## Diagonal Elements 139 | for j in 1:n 140 | rows[j] = j 141 | cols[j] = j 142 | end 143 | idx = n+1 144 | 145 | # for j = 1:k 146 | # for s = 1:n 147 | # rows[idx] = s 148 | # cols[idx] = n + j 149 | # idx += 1 150 | # end 151 | # end 152 | 153 | ## Off-diagonal elements 154 | for j in 1:k 155 | for s in 1:n 156 | rows[idx] = n + j 157 | cols[idx] = s 158 | idx += 1 159 | end 160 | end 161 | 162 | ## Last Block 163 | for j in 1:k 164 | for s in 1:k 165 | rows[idx] = n + j 166 | cols[idx] = n + s 167 | idx += 1 168 | end 169 | end 170 | 171 | return [(r, c) for (r, c) in zip(rows, cols)] 172 | end 173 | 174 | function MOI.eval_hessian_lagrangian(md::MDProblem, hess, u, σ, λ) 175 | n, k, m = size(md) 176 | π = view(u, 1:n) 177 | θ = view(u, (n + 1):(n + k)) 178 | if σ==0 179 | @inbounds for j in 1:n 180 | hess[j] = 0.0 181 | end 182 | else 183 | hv = view(hess, 1:n) 184 | Divergences.hessian!(hv, divergence(md), π) 185 | hv .= hv .* σ 186 | end 187 | 188 | λv = view(λ, 1:m) 189 | #v = ∇gᵢ(md.data, θ)*λv./n 190 | ## As this matrix is symmetric, Ipopt expects that only the lower diagonal entries are specified.!!!! 191 | ## hess[n+1:n+n*k] .= vec(v') 192 | ∇gᵢλ!(reshape(view(hess, (n + 1):(n + n * k)), k, n), md.data, θ, λv) 193 | # @infiltrate 194 | ## If k>1, the we should only get the lower diagonal entries of 195 | ## the gradient of λ∇g 196 | ## hess[n+n*k+1:n+n*k+k^2] .= gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data)) 197 | ##vv = gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data)) 198 | ##@infiltrate 199 | return copy_lower_triangular!( 200 | view(hess, 201 | (n + n * k + 1):(n + n * k + (k * (k + 1) ÷ 2))), 202 | gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), 203 | Constant(md.data))) 204 | end 205 | 206 | ## --------------------------------------------------------------------- ## 207 | ## Problem 208 | ## --------------------------------------------------------------------- ## 209 | 210 | ## Small problem to test the implementation 211 | n = 1000 212 | k = 1 213 | m = 2 214 | 215 | 𝒟 = ChiSquared() 216 | ℳ𝒟 = FullyModifiedDivergence(𝒟, 0.7, 1.2) 217 | 218 | mdprob = MDProblem(𝒟, √0.64 .* randn(n), (n, k, m), AutoForwardDiff()) 219 | 220 | n, k, m = size(mdprob) 221 | 222 | model = Ipopt.Optimizer() 223 | π = MOI.add_variables(model, n) 224 | MOI.add_constraint.(model, π, MOI.GreaterThan(0.0)) 225 | θ = MOI.add_variables(model, k) 226 | MOI.add_constraint.(model, θ, MOI.GreaterThan(-10.0)) 227 | MOI.add_constraint.(model, θ, MOI.LessThan(+10.0)) 228 | 229 | MOI.get(model, MOI.NumberOfVariables()) 230 | 231 | for i in 1:n 232 | MOI.set(model, MOI.VariablePrimalStart(), π[i], 1.0) 233 | end 234 | 235 | for i in 1:k 236 | MOI.set(model, MOI.VariablePrimalStart(), θ[i], 0.0) 237 | end 238 | 239 | lb = [zeros(m); n] 240 | ub = [zeros(m); n] 241 | 242 | MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE) 243 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true) 244 | MOI.set(model, MOI.NLPBlock(), block_data) 245 | 246 | model.options["derivative_test"] = "none" 247 | model.options["derivative_test_print_all"] = "no" 248 | 249 | model.options["print_level"] = 3 250 | 251 | MOI.optimize!(model) 252 | MOI.get(model, MOI.TerminationStatus()) 253 | MOI.get(model, MOI.DualStatus()) 254 | MOI.get(model, MOI.PrimalStatus()) 255 | 256 | MOI.get(model, MOI.SolveTimeSec()) 257 | MOI.get(model, MOI.BarrierIterations()) 258 | 259 | xstar = MOI.get(model, MOI.VariablePrimal(), θ) 260 | 261 | function lagrangian(md::MDProblem, u, σ, λ) 262 | n, k, m = size(md) 263 | π = u[1:n] 264 | θ = u[(n + 1):(n + k)] 265 | return σ .* divergence(md)(π) + mean(π .* g(md.data, θ)*λ) 266 | end 267 | 268 | using Statistics 269 | 270 | p = [0.45793379249066035, 271 | 4.999416892014921, 272 | 9.182989399836064, 273 | 3.6958463315972025, 274 | 6.220383439227501, 275 | -9.964661752165114] 276 | lagrangian(mdprob, p, 1.5, [0.0, 0.0]) 277 | 278 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 1.5, [0.0, 0]), p); 279 | H = zeros(16) 280 | MOI.eval_hessian_lagrangian(mdprob, H, p, 1.5, [0.0, 0]) 281 | 282 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 0.0, [1.5, 0]), p); 283 | MOI.eval_hessian_lagrangian(mdprob, H, p, 0.0, [1.5, 0]) 284 | 285 | ## --------------------------------------------------------------------- ## 286 | ## Simple MC 287 | ## --------------------------------------------------------------------- ## 288 | 289 | β = Vector{Float64}(undef, 5000) 290 | for j in 1:5000 291 | x = √0.64 .* randn(1000) 292 | mdprob.data .= x 293 | MOI.optimize!(model) 294 | β[j] = MOI.get(model, MOI.VariablePrimal(), θ)[1] 295 | end 296 | 297 | using StatsPlots 298 | 299 | StatsPlots.density(β) 300 | StatsPlots.histogram(β; nbins = 80) 301 | 302 | ## --------------------------------------------------------------------- ## 303 | ## Utilities 304 | ## --------------------------------------------------------------------- ## 305 | 306 | """ 307 | assign_matrix!(J, g, ∇g) 308 | 309 | Assigns the elements of the block matrix `X = [[g'; ones(1, n)]; [∇g; zeros(m, k)]]` into the preallocated array `J`, excluding 310 | the `ones(1, n)` and `zeros(m, k)` blocks. 311 | 312 | # Arguments 313 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`. 314 | - `g::AbstractMatrix{T}`: An `n × m` matrix. 315 | - `∇g::AbstractMatrix{T}`: An `m × k` matrix. 316 | 317 | # Behavior 318 | - The function directly assigns: 319 | - The elements of the transpose of `g` (`g'`) in column-major order. 320 | - The elements of `∇g` in column-major order. 321 | - The blocks `ones(1, n)` and `zeros(m, k)` are skipped. 322 | 323 | # Example 324 | ```julia 325 | A = [1 2; 3 4; 5 6] # 3 × 2 matrix (n = 3, m = 2) 326 | B = [7 8; 9 10] # 2 × 2 matrix (m = 2, k = 2) 327 | 328 | J = Vector{Float64}(undef, 2 * 3 + 2 * 2) # Preallocate array 329 | assign_matrix!(J, g, ∇g) 330 | 331 | # J will be: 332 | # [1.0, 3.0, 5.0, 2.0, 4.0, 6.0, 7.0, 9.0, 8.0, 10.0] 333 | ``` 334 | """ 335 | Base.@propagate_inbounds function assign_matrix!(J, gg, Dg) 336 | n, m = size(gg) 337 | k = size(Dg, 2) 338 | 339 | # First block: gg' (m×n matrix) 340 | for i in 1:n 341 | for j in 1:m 342 | J[(i - 1) * (m + 1) + j] = gg[i, j] # Transpose while assigning 343 | end 344 | end 345 | 346 | # Row of ones (1×n matrix) 347 | for i in 1:n 348 | J[i * (m + 1)] = 1.0 349 | end 350 | 351 | # Second block: Dg (m×k matrix) 352 | for i in 1:k 353 | for j in 1:m 354 | J[n * (m + 1) + (i - 1) * m + j] = Dg[j, i] 355 | end 356 | end 357 | 358 | # Final block of zeros (1×k matrix) 359 | baseIdx = n*(m+1) + k*m 360 | for i in 1:k 361 | J[baseIdx + i] = 0.0 362 | end 363 | 364 | return J 365 | end 366 | 367 | function assign_matrix(J, gg, Dg) 368 | n, m = size(gg) 369 | k = size(Dg, 2) 370 | R = [[gg'; ones(1, n)] [Dg; zeros(1, k)]] 371 | return J .= vec(R) 372 | end 373 | 374 | using SparseArrays 375 | 376 | function rowcol_of_sparse(g::SparseMatrixCSC; offset_row = 0, offset_col = 0) 377 | rows = rowvals(g) 378 | vals = nonzeros(g) 379 | m, n = size(g) 380 | tup = Tuple{Int64, Int64}[] 381 | for j in 1:n 382 | for i in nzrange(g, j) 383 | push!(tup, (rows[i]+offset_row, j+offset_col)) 384 | end 385 | end 386 | return tup 387 | end 388 | 389 | function weighted_mean!(μ::AbstractVector{T}, 390 | w::AbstractVector, 391 | x::AbstractMatrix) where {T} 392 | fill!(μ, zero(T)) 393 | @inbounds for j in axes(x, 2) 394 | for i in axes(x, 1) 395 | μ[j] += w[i]*x[i, j]/n 396 | end 397 | end 398 | μ[end] = sum(w) 399 | #μ[1:end-1] ./= n 400 | return μ 401 | end 402 | 403 | """ 404 | rowcol_of_dense(g::AbstractMatrix; offset_row = 0, offset_col = 0) 405 | 406 | Returns a tuple of row and column indices for all elements in a dense matrix `g`, with optional offsets for rows and columns. 407 | 408 | # Arguments 409 | - `g::AbstractMatrix`: The input dense matrix. 410 | - `offset_row::Int` (default: 0): An offset to be added to each row index. 411 | - `offset_col::Int` (default: 0): An offset to be added to each column index. 412 | 413 | # Returns 414 | A vector of tuples `(row, col)` representing the indices of all elements in the dense matrix. 415 | 416 | # Example 417 | ```julia 418 | g = [1 2; 3 4] 419 | rowcol_of_dense(g) # [(1, 1), (2, 1), (1, 2), (2, 2)] 420 | ``` 421 | """ 422 | function rowcol_of_dense(n, m; offset_row = 0, offset_col = 0) 423 | tup = Tuple{Int64, Int64}[] # Initialize an empty vector of tuples 424 | @inbounds for j in 1:n 425 | for i in 1:m 426 | push!(tup, (i + offset_row, j + offset_col)) 427 | end 428 | end 429 | return tup 430 | end 431 | 432 | function copy_lower_triangular!(x::AbstractVector{T}, A::Matrix{T}) where {T} 433 | @assert isquare(A) 434 | n = size(A, 1) 435 | len = (n * (n + 1)) ÷ 2 # Length of output vector 436 | @assert len == (n * (n + 1)) ÷ 2 437 | idx = 1 438 | @inbounds for j in 1:n 439 | for i in j:n 440 | x[idx] = A[i, j] 441 | idx += 1 442 | end 443 | end 444 | return x 445 | end 446 | 447 | function copy_lower_triangular!(x::AbstractVector{T}, A::Vector{T}) where {T} 448 | n = length(A) 449 | @assert n == 1 "`copy_lower_triangular!` for vector make sense only for singleton vector" 450 | @assert 1 == (n * (n + 1)) ÷ 2 "The dimension of the dest vector is wrong as it should be $(n*(n+1))//2" 451 | x .= A 452 | return x 453 | end 454 | 455 | # optprob = OptimizationFunction(divergence, Optimization.AutoForwardDiff(), cons = cons) 456 | # prob = OptimizationProblem(optprob, x0, _p, 457 | # lcons = repeat([0.], 2), 458 | # ucons = repeat([0.], 2), 459 | # lb = [repeat([0], 100); -Inf], 460 | # ub = [repeat([+Inf], 100); +Inf]) 461 | 462 | # solver = OptimizationMOI.MOI.OptimizerWithAttributes(Ipopt.Optimizer, "print_level" => 0) 463 | 464 | # solve(prob, solver) 465 | -------------------------------------------------------------------------------- /doc/mdprob.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cyipopt 3 | import numpy as np 4 | from scipy.sparse import coo_array, csc_array 5 | from abc import ABC, abstractmethod 6 | 7 | # ============================================================================= 8 | # Abstract Base Class for Moment Functions 9 | # ============================================================================= 10 | class AbstractMomentFunction(ABC): 11 | 12 | @abstractmethod 13 | def g(self, theta): 14 | """ 15 | Compute the moment matrix G(θ) for a given parameter vector θ. 16 | Parameters 17 | ---------- 18 | theta : ndarray, shape (k,) 19 | Parameter vector. 20 | 21 | Returns 22 | ------- 23 | Z : ndarray, shape (n, m) 24 | Moment matrix. 25 | """ 26 | pass 27 | 28 | @abstractmethod 29 | def Dg(self, theta, pi): 30 | """ 31 | Compute the derivative of sum(pi*G(θ)) with respect to θ. 32 | 33 | Parameters 34 | ---------- 35 | theta : ndarray, shape (k,) 36 | Parameter vector. 37 | pi : ndarray, shape (n,) 38 | Weight vector. 39 | 40 | Returns 41 | ------- 42 | dG : ndarray, shape (m, k) 43 | Derivative matrix. 44 | """ 45 | pass 46 | 47 | @abstractmethod 48 | def Dg_lambda(self, theta, lam): 49 | """ 50 | Compute the derivative of lambda'G(θ) with respect to θ. 51 | 52 | Parameters 53 | ---------- 54 | theta : ndarray, shape (k,) 55 | Parameter vector. 56 | lam : ndarray, shape (m,) 57 | Lagrange multiplier vector. 58 | 59 | Returns 60 | ------- 61 | dgl : ndarray, shape (n, k) 62 | The derivative matrix. 63 | """ 64 | pass 65 | 66 | @abstractmethod 67 | def Dg_lambda_pi(self, theta, lam, pi): 68 | """ 69 | Compute the derivative of pi*g with respect to θ, 70 | weighted by pi. 71 | 72 | Parameters 73 | ---------- 74 | theta : ndarray, shape (k,) 75 | Parameter vector. 76 | lam : ndarray, shape (m,) 77 | Lagrange multiplier vector. 78 | pi : ndarray, shape (n,) 79 | Weight vector. 80 | 81 | Returns 82 | ------- 83 | dgl : ndarray, shape (n, k) 84 | The weighted derivative matrix. 85 | """ 86 | pass 87 | 88 | @abstractmethod 89 | def Hg_lambda(self, theta, lam, pi): 90 | """ 91 | Compute the Hessian of lambda' g with respect to theta. 92 | (In this example, it returns a zero matrix as a placeholder.) 93 | 94 | Parameters 95 | ---------- 96 | theta : ndarray, shape (k,) 97 | Parameter vector. 98 | lam : ndarray, shape (m,) 99 | Lagrange multiplier vector. 100 | pi : ndarray, shape (n,) 101 | Weight vector. 102 | 103 | Returns 104 | ------- 105 | H : ndarray, shape (k, k) 106 | Hessian matrix. 107 | """ 108 | pass 109 | 110 | # ============================================================================= 111 | # A Concrete Implementation of the Moment Function 112 | # Instrumental Variables Moment Function 113 | # ============================================================================= 114 | class DefaultMomentFunction(AbstractMomentFunction): 115 | def __init__(self, y, x, z): 116 | """ 117 | Initialize the moment function caches. 118 | 119 | Parameters 120 | ---------- 121 | y : ndarray, shape (n,) 122 | Response variable. 123 | x : ndarray, shape (n, k) 124 | Regressors (first column is x, additional columns may be included). 125 | z : ndarray, shape (n, m) 126 | Instrumental variables. 127 | """ 128 | 129 | self.y = np.ravel(y) 130 | self.n = self.y.shape[0] 131 | self.x = np.asarray(x) 132 | if self.x.ndim == 1: 133 | self.x = self.x.reshape(-1, 1) 134 | self.k = self.x.shape[1] 135 | 136 | # Ensure z is 2D 137 | self.z = np.asarray(z) 138 | if self.z.ndim == 1: 139 | self.z = self.z.reshape(-1, 1) 140 | self.m = self.z.shape[1] 141 | 142 | # Validate dimensions 143 | if self.x.shape[0] != self.n or self.z.shape[0] != self.n: 144 | raise ValueError("All inputs (y, x, z) must have the same number of observations") 145 | 146 | # Allocate caches (same shapes as the originals) 147 | self.Y = np.empty_like(self.y) # for temporary n-vector operations 148 | self.X = np.empty_like(self.x) # for temporary (n,k) operations 149 | self.Z = np.empty_like(self.z) # for temporary (n,m) operations 150 | # Cache for the gradient matrix: shape (m,k) 151 | self.dG = np.empty((self.m, self.k), dtype=float) 152 | 153 | def g(self, theta): 154 | # Compute Y = x dot theta, storing the result in the cache self.Y. 155 | np.matmul(self.x, theta, out=self.Y) 156 | # Overwrite Y with (y - x dot theta) in place. 157 | np.subtract(self.y, self.Y, out=self.Y) 158 | # Compute Z: for each observation i, multiply row z[i] by Y[i]. 159 | 160 | self.Z[:] = self.z * self.Y[:, np.newaxis] 161 | return self.Z 162 | 163 | def Dg(self, theta, pi): 164 | # Compute X = pi * x (elementwise multiplication along rows). 165 | self.X[:] = self.x * pi[:, np.newaxis] 166 | # Compute dG = - z^T dot X, storing in self.dG. 167 | np.dot(self.z.T, self.X, out=self.dG) 168 | self.dG *= -1.0 169 | return self.dG 170 | 171 | def Dg_lambda(self, theta, lam): 172 | # Compute Y = z lam (vector of length n). 173 | np.matmul(self.z, lam, out=self.Y) 174 | # Compute X = - Y * x (each row scaled by -Y[i]) and divide by n. 175 | self.X[:] = -self.Y[:, np.newaxis] * self.x 176 | self.X /= self.n 177 | return self.X 178 | 179 | def Dg_lambda_pi(self, theta, lam, pi): 180 | # Compute the unweighted derivative first. 181 | dgl = self.Dg_lambda(theta, lam) 182 | # Multiply each row by the corresponding pi element. 183 | dgl[:] = dgl * pi[:, np.newaxis] 184 | return dgl 185 | 186 | def Dg_lambda_inplace(self, J, theta, lam, pi=None): 187 | if pi is None: 188 | dgl = self.Dg_lambda(theta, lam) 189 | else: 190 | dgl = self.Dg_lambda_pi(theta, lam, pi) 191 | # Flatten the dgl array (C-order) and copy into J. 192 | np.copyto(J, dgl.ravel()) 193 | 194 | def Hg_lambda(self, theta, lam, pi): 195 | # Placeholder: returns a zero matrix of shape (k, k). 196 | return np.zeros((self.k, self.k), dtype=float) 197 | 198 | # ============================================================================= 199 | # The MDProblem class 200 | # ============================================================================= 201 | class MDProblem(cyipopt.Problem): 202 | r""" 203 | A Python translation of the Ipopt problem defined in Julia. 204 | 205 | The decision variable vector `u` is assumed to be partitioned as 206 | u = [π; θ] 207 | with π an n‐vector and θ a k‐vector. 208 | 209 | The constraints are defined in terms of a moment function 210 | g(θ) = [ z_i * (y_i - x_i' θ) ]_{i=1}^n 211 | and a “weighted‐sum” constraint computed as: 212 | c(j) = (1/n) Σ_{i=1}^{n} π[i] g(θ)[i, j] for j=1,…, m 213 | c(m+1) = Σ_{i=1}^{n} π[i] 214 | so that the overall constraint vector is of length (m+1). 215 | 216 | The objective function is taken to be a divergence function of π. 217 | (Typically you’ll supply a divergence object with methods `__call__`, 218 | `gradient`, and `hessian`.) 219 | """ 220 | def __init__(self, moment, divergence): 221 | """ 222 | Parameters 223 | ---------- 224 | moment : MomentFunction instance 225 | Holds the data and cached arrays for computing g and its derivatives. 226 | divergence : object 227 | A divergence object supporting __call__(pi), gradient(pi) and hessian(pi). 228 | backend : any, optional 229 | (Optional) backend information. 230 | """ 231 | self.moment = moment 232 | self.divergence = divergence 233 | # Dimensions: n = number of observations, k = dimension of θ, m = dimension of instruments 234 | self.n = moment.n 235 | self.k = moment.k 236 | self.m = moment.m 237 | 238 | # ---------------------------- 239 | # Objective and its gradient 240 | # ---------------------------- 241 | def objective(self, u): 242 | r""" 243 | Evaluate the objective function: 244 | f(u) = divergence(π) 245 | where u = [π; θ]. 246 | """ 247 | pi = u[:self.n] 248 | return self.divergence(pi) 249 | 250 | def gradient(self, u): 251 | r""" 252 | Evaluate the gradient of the objective with respect to u. 253 | 254 | The derivative with respect to π is given by divergence.gradient(pi) 255 | and with respect to θ is zero. 256 | """ 257 | pi = u[:self.n] 258 | grad = np.empty_like(u) 259 | grad[:self.n] = self.divergence.gradient(pi) 260 | grad[self.n:] = 0.0 261 | return grad 262 | 263 | # ---------------------------- 264 | # Constraints and their Jacobian 265 | # ---------------------------- 266 | def constraints(self, u): 267 | r""" 268 | Evaluate the constraints. 269 | 270 | Let G = g(θ) be the (n×m) moment matrix computed by the cached moment function. 271 | Then define 272 | c(j) = (1/n)*Σ_{i=1}^{n} π[i]*G[i,j] for j = 1,..., m 273 | c(m+1) = Σ_{i=1}^{n} π[i] 274 | so that the constraint vector has length (m+1). 275 | """ 276 | pi = u[:self.n] 277 | theta = u[self.n:] 278 | # Evaluate G = g(θ) (note: this call reuses cached arrays in self.moment) 279 | G = self.moment.g(theta) 280 | constr = np.empty(self.m + 1, dtype=np.float64) 281 | # For j = 0,..., m-1: 282 | constr[:self.m] = np.sum(pi[:, None] * G, axis=0) / self.n 283 | # Last constraint: sum of π 284 | constr[self.m] = np.sum(pi) - self.n 285 | return constr 286 | 287 | def jacobian(self, u): 288 | r""" 289 | Evaluate the constraint Jacobian. 290 | 291 | The Jacobian is the block matrix: 292 | J = [ (G/n)^T (Dg/n)^T ] 293 | [ ones(1,n) zeros(1,k) ] 294 | where G = g(θ) is (n×m) and Dg = Dg(θ, π) is (m×k). We return J as a 295 | 2D array of shape ((m+1) x (n+k)). (Ipopt may require a flattened version.) 296 | """ 297 | pi = u[:self.n] 298 | theta = u[self.n:] 299 | # Compute G and its derivative Dg; note that our moment function routines 300 | # use cached arrays to avoid allocation. 301 | G = self.moment.g(theta) 302 | G_scaled = G / self.n 303 | Dg = self.moment.Dg(theta, pi) 304 | Dg_scaled = Dg / self.n 305 | # Build the top m rows: for constraints 1..m. 306 | # With respect to π: derivative is (G_scaled)^T, with respect to θ: derivative is Dg_scaled. 307 | top_block = np.hstack((G_scaled.T, Dg_scaled)) 308 | # Last row: derivative of constraint c(m+1) with respect to π is 1 and with respect to θ is 0. 309 | last_row = np.hstack((np.ones((1, self.n)), np.zeros((1, self.k)))) 310 | J_full = np.vstack((top_block, last_row)) 311 | # If a flat vector is needed (for example by Ipopt) then you might return J_full.ravel() 312 | return J_full 313 | 314 | # ---------------------------- 315 | # Hessian of the Lagrangian 316 | # ---------------------------- 317 | def hessianstructure(self): 318 | """Return the (row, col) indices of the lower-triangular non-zero elements of H.""" 319 | # Diagonal elements of D: (0,0), (1,1), ..., (n-1,n-1) 320 | n = self.n 321 | k = self.k 322 | diag_rows = np.arange(n) 323 | diag_cols = np.arange(n) 324 | 325 | # Off-diagonal block Dg' (k x n block starting at row n, column 0) 326 | block_rows = np.repeat(np.arange(n, n + k), n) 327 | block_cols = np.tile(np.arange(n), k) 328 | 329 | # Combine indices 330 | rows = np.concatenate([diag_rows, block_rows]) 331 | cols = np.concatenate([diag_cols, block_cols]) 332 | return rows, cols 333 | 334 | def hessian(self, u, lam, sigma): 335 | r""" 336 | Evaluate the Hessian of the Lagrangian 337 | L(π, θ, λ) = divergence(π) + λ' g(θ) 338 | at the point u, with scalar multiplier sigma and Lagrange multiplier lam. 339 | 340 | The Hessian is returned as a flat vector containing: 341 | - The first n entries: if sigma==0, zeros; otherwise, sigma times the divergence Hessian at π. 342 | - The next n*k entries: the flattened version (row-major) of Dgλ (the derivative of λ'g w.r.t. θ). 343 | - The final k*(k+1)//2 entries: the lower triangular part of Hgλ. 344 | (In the Julia code Hgλ is a zero matrix; here we follow that.) 345 | """ 346 | pi = u[:self.n] 347 | theta = u[self.n:] 348 | if sigma != 0: 349 | D_diag = self.divergence.hessian(pi)*sigma 350 | else: 351 | D_diag = np.zeros(self.n) 352 | 353 | # Get Dg (n x k matrix) and transpose it to k x n (Dg') 354 | Dg = self.moment.Dg_lambda(theta, lam[:self.m]) 355 | Dg_T_flat = Dg.T.flatten() # Flatten in row-major order 356 | 357 | # Combine D diagonal and Dg' block values 358 | H = np.concatenate([D_diag, Dg_T_flat]) 359 | return H 360 | 361 | # def intermediate(self, alg_mod, iter_count, obj_value, inf_pr, inf_du, mu, 362 | # d_norm, regularization_size, alpha_du, alpha_pr, 363 | # ls_trials): 364 | # """Prints information at every Ipopt iteration.""" 365 | # iterate = self.get_current_iterate() 366 | # infeas = self.get_current_violations() 367 | # primal = iterate["x"] 368 | # jac = self.jacobian(primal) 369 | 370 | # print("Iteration:", iter_count) 371 | # print("Primal iterate:", primal) 372 | # print("Flattened Jacobian:", jac) 373 | 374 | 375 | 376 | exec(open('divergences.py').read()) 377 | 378 | 379 | def randiv(n=100, m=5, k=1, theta=0.0, rho=0.9, CP=20): 380 | """ 381 | Simulates instrumental variables regression data 382 | 383 | Returns: 384 | y: outcome variable (n x 1) 385 | covariates: matrix [x w] (n x (1 + k)) 386 | instruments: matrix [z w] (n x (m + k)) 387 | theory_val: theoretical strength measure (array length m) 388 | """ 389 | # Generate instrument strength vector 390 | tau = np.full(m, np.sqrt(CP / (m * n))) 391 | 392 | # Generate base data matrices 393 | z = np.random.randn(n, m) # Instruments 394 | w = np.random.randn(n, k) # Exogenous controls (corrected to k columns) 395 | 396 | # Generate correlated errors 397 | eta = np.random.randn(n, 1) 398 | u = rho * eta + np.sqrt(1 - rho**2) * np.random.randn(n, 1) 399 | 400 | # Create endogenous variable x 401 | x = z @ tau.reshape(-1, 1) + eta 402 | 403 | # Create outcome variable y (n,) 404 | y = x * theta + u 405 | 406 | # Create combined matrices 407 | covariates = np.hstack((x, w)) 408 | instruments = np.hstack((z, w)) 409 | 410 | 411 | 412 | return y, covariates, instruments 413 | 414 | 415 | n = 100 416 | n_instruments = 5 417 | n_exo = 1 418 | np.random.seed(42) 419 | y, x, z = randiv(n=n,k=n_exo, m=n_instruments) 420 | n, m = z.shape 421 | n, k = x.shape 422 | 423 | np.savetxt('y.csv', y, delimiter=',') # Shape (n,) 424 | np.savetxt('x.csv', x, delimiter=',') # Shape (n, n_exo) 425 | np.savetxt('z.csv', z, delimiter=',') # Shape (n, n_instruments) 426 | 427 | divergence = KullbackLeibler() 428 | momfun = DefaultMomentFunction(y,x,z) 429 | problem = MDProblem(momfun, divergence) 430 | 431 | pi = np.random.uniform(0,1,n) 432 | theta = np.random.uniform(0, 1, k) 433 | 434 | 435 | u0 = np.concatenate((pi, theta)) 436 | lb = np.concatenate((np.zeros_like(pi), -10.0 * np.ones_like(theta))) 437 | ub = np.concatenate((np.inf*np.ones_like(pi), 10.0 * np.ones_like(theta))) 438 | 439 | # Define constraint bounds. Our constraint vector has length m+1. 440 | # For equality constraints, we set cl = cu. 441 | # For instance, suppose we require c(u) == 0. 442 | cl = np.zeros(m + 1) 443 | cu = np.zeros(m + 1) 444 | 445 | #prob = MDOptProblem(problem, u0, lb, ub, cl, cu) 446 | pi = u0[:n] 447 | theta = u0[n:] 448 | lam = np.ones(m) 449 | 450 | momfun.g(theta) 451 | momfun.Dg(theta, pi) 452 | momfun.Dg_lambda(theta, lam) 453 | momfun.Dg_lambda_pi(theta, lam, pi) 454 | 455 | p = cyipopt.Problem( 456 | n=len(u0), 457 | m=len(cl), 458 | problem_obj=problem, 459 | lb=lb, 460 | ub=ub, 461 | cl=cl, 462 | cu=cu, 463 | ) 464 | 465 | p.add_option('derivative_test', 'second-order') 466 | p.add_option('print_level', 5) 467 | p.add_option('derivative_test_print_all', 'no') 468 | p.solve(u0) 469 | -------------------------------------------------------------------------------- /doc/mdexample_iv.jl: -------------------------------------------------------------------------------- 1 | using MathOptInterface, Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt 2 | using ForwardDiff, DifferentiationInterface 3 | using Divergences 4 | using Statistics, LinearAlgebra 5 | using Infiltrator 6 | 7 | 8 | ## --------------------------------------------------------------------- ## 9 | ## Moment Conditions & Jacobian 10 | ## --------------------------------------------------------------------- ## 11 | 12 | ## This function if the moment matrix of the estimation problem. 13 | ## This function should always be defined by the user. 14 | function g(θ, p) 15 | (y,x,z) = p.data 16 | z.*(y .- x*θ) 17 | end 18 | 19 | function Jgλ(θ, λ, p) 20 | -(z*λ).*x 21 | end 22 | 23 | function Hgλ!(H, θ, π, λ, p) 24 | fill!(H, 0.0) 25 | end 26 | 27 | function ∇g(θ, λ, π) 28 | sum(Jgλ!(J, θ, )) 29 | 30 | 31 | ## ----- 32 | ## High-performant 33 | ## ----- 34 | 35 | y = randn(100) 36 | x = randn(100,2) 37 | z = randn(100,4) 38 | 39 | p = (y y, 40 | x = x, 41 | z = z, 42 | Y = similar(y), 43 | X = similar(X), 44 | Z = similar(z), 45 | ∂G = Vector{Float64}(undef, m, k) 46 | ); 47 | 48 | 49 | function g(θ, p) 50 | (y, x, z, Y, X, Z, ∂G) = p 51 | mul!(Y, x, θ) 52 | broadcast!(-, Y, y, u) 53 | broadcast!(*, Z, z, u) 54 | return Z 55 | end 56 | 57 | function Dgn(θ, π, p) 58 | (y, x, z, Y, X, Z, ∂G) = p 59 | broadcast!(*, Y, π, x) 60 | mul!(∂g, z', Y) 61 | return ∂g 62 | end 63 | 64 | function Dgλ(θ, λ, p) 65 | (y, x, z, Y, X, Z, ∂G) = p 66 | mul!(Y, z, λ) 67 | broadcast!(*, Z, -u, x) 68 | return Z 69 | end 70 | 71 | function Dgλ(θ, λ, π, p) 72 | (y, x, z, G, ∇, u) = p 73 | ∂gλ = Dgλ(θ, λ, p) 74 | broadcast!(*, ∂gλ, ∂gλ, π) 75 | return ∂gλ 76 | end 77 | 78 | function Dgλ!(J, θ, λ, p) 79 | Dgλ(θ, λ, p) 80 | copy!(J, p.∇) 81 | end 82 | 83 | function Dgλ!(J, θ, λ, π, p) 84 | Dgλ(θ, λ, π, p) 85 | copy!(J, p.∇) 86 | end 87 | 88 | function Hgλ!(H, θ, λ, π, p) 89 | ## Note: Only the lower triangular matrix needs to 90 | ## be updated. 91 | fill!(H, 0.0) 92 | end 93 | 94 | ## Derived 95 | 96 | 97 | 98 | 99 | ## This function is the gradient of the mean moment matrix 100 | ## which is a (m, k), where m is the number of moments and k 101 | ## is the number of parameters. 102 | ## \frac{\partial}{\partial\theta}\left[\sum_{i=1}^{n}\pi_{i}g(x_{i},\theta)/n\right] 103 | ## It should be written in a way that it can be used with ForwardDiff or Zygote. 104 | function ∇g(θ, π, p) 105 | n, k, m = size(p) 106 | (y,x,z) = p.data 107 | res = - z'*(π.*x) 108 | return res 109 | end 110 | 111 | function λ∇g(θ, π, λ, p) 112 | vec(λ'∇g(θ, π, p)./n) 113 | end 114 | 115 | ## This must return a (n, k) 116 | function ∇gᵢλ!(dest::AbstractMatrix, θ, λ, p) 117 | n, k, m = size(p) 118 | (y,x,z) = p.data 119 | for j in axes(dest, 2) 120 | xv = view(x, :, j) 121 | dest[:,j] = (-(z*λ).*xv)/n 122 | end 123 | return dest 124 | end 125 | 126 | function ∇gᵢλ(θ, λ, p) 127 | n, k, m = size(p) 128 | res = Matrix{promote_type(eltype(θ), eltype(λ))}(undef, n, k) 129 | ∇gᵢλ!(res, θ, λ, p) 130 | end 131 | 132 | ## --------------------------------------------------------------------- ## 133 | ## Optimization Problem 134 | ## --------------------------------------------------------------------- ## 135 | 136 | const MOI = MathOptInterface 137 | 138 | struct MDProblem{D} <: MOI.AbstractNLPEvaluator 139 | div::Divergences.AbstractDivergence 140 | data::D 141 | size::Tuple{Int, Int, Int} 142 | backend::DifferentiationInterface.AbstractADType 143 | end 144 | 145 | Base.size(md::MDProblem) = md.size 146 | divergence(md::MDProblem) = md.div 147 | 148 | function MOI.initialize(md::MDProblem, rf::Vector{Symbol}) 149 | for feat in rf 150 | if !(feat in [:Grad, :Jac, :Hess]) 151 | error("Unsupported feature $feat") 152 | end 153 | end 154 | end 155 | 156 | MOI.features_available(md::MDProblem) = [:Grad, :Jac, :Hess] 157 | 158 | ## --------------------------------------------------------------------- ## 159 | ## Objective function 160 | ## --------------------------------------------------------------------- ## 161 | function MOI.eval_objective(md::MDProblem, u::Vector{Float64}) 162 | n, k, m = size(md) 163 | divergence(md)(view(u, 1:n)) 164 | end 165 | 166 | function MOI.eval_objective_gradient(md::MDProblem, res, u) 167 | n, k, m = size(md) 168 | T = eltype(res) 169 | Divergences.gradient!(view(res, 1:n), divergence(md), view(u, 1:n)) 170 | fill!(view(res, (n+1):(n+k)), zero(T)) 171 | end 172 | 173 | ## --------------------------------------------------------------------- ## 174 | ## Constraints 175 | ## --------------------------------------------------------------------- ## 176 | function MOI.eval_constraint(md::MDProblem, res, u) 177 | n, k, m = size(md) 178 | θ = view(u, (n+1):(n+k)) 179 | π = view(u, 1:n) 180 | G = g(θ, md) 181 | weighted_mean!(res, π, G) 182 | end 183 | 184 | 185 | ## --------------------------------------------------------------------- ## 186 | ## Constraints Jacobian 187 | ## --------------------------------------------------------------------- ## 188 | function MOI.jacobian_structure(md::MDProblem) 189 | n, k, m = size(md) 190 | rowcol_of_dense(n+k,m+1) 191 | end 192 | 193 | 194 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u) 195 | n, k, m = size(md) 196 | θ = view(u, (n+1):(n+k)) 197 | π = view(u, 1:n) 198 | G = g(θ, md) 199 | G .= G./n 200 | ∇gₙ = ∇g(θ, π, md) 201 | ∇gₙ .= ∇gₙ./n 202 | assign_matrix!(J, G, ∇gₙ) 203 | end 204 | 205 | ## --------------------------------------------------------------------- ## 206 | ## Hessian of the Lagrangian 207 | ## --------------------------------------------------------------------- ## 208 | 209 | ## The lagrangian is given by: 210 | ## 211 | ## L(π, θ, λ) = D(π, p) + λ'g(θ) 212 | 213 | function MOI.hessian_lagrangian_structure(md::MDProblem) 214 | n, k, m = size(md) 215 | hele = Int(n + n*k + k*(k+1)÷2) 216 | rows = Array{Int64}(undef, hele) 217 | cols = Array{Int64}(undef, hele) 218 | ## Diagonal Elements 219 | for j = 1:n 220 | rows[j] = j 221 | cols[j] = j 222 | end 223 | idx = n+1 224 | 225 | # for j = 1:k 226 | # for s = 1:n 227 | # rows[idx] = s 228 | # cols[idx] = n + j 229 | # idx += 1 230 | # end 231 | # end 232 | 233 | ## Off-diagonal elements 234 | for j = 1:k 235 | for s = 1:n 236 | rows[idx] = n + j 237 | cols[idx] = s 238 | idx += 1 239 | end 240 | end 241 | @infiltrate 242 | ## Last Block 243 | for j = 1:k 244 | for s = 1:j 245 | rows[idx] = n + j 246 | cols[idx] = n + s 247 | idx += 1 248 | end 249 | end 250 | 251 | [(r, c) for (r, c) in zip(rows,cols)] 252 | end 253 | 254 | function MOI.eval_hessian_lagrangian(md::MDProblem, hess, u, σ, λ) 255 | n, k, m = size(md) 256 | π = view(u, 1:n) 257 | θ = view(u, (n+1):(n+k)) 258 | if σ==0 259 | @inbounds for j=1:n 260 | hess[j] = 0.0 261 | end 262 | else 263 | hv = view(hess, 1:n) 264 | Divergences.hessian!(hv, divergence(md), π) 265 | hv .= hv.*σ 266 | end 267 | 268 | λv = view(λ, 1:m) 269 | #v = ∇gᵢ(md.data, θ)*λv./n 270 | ## As this matrix is symmetric, Ipopt expects that only the lower diagonal entries are specified.!!!! 271 | ## hess[n+1:n+n*k] .= vec(v') 272 | ∇gᵢλ!(reshape(view(hess, n+1:n+n*k), n, k), θ, λv, md) 273 | #@infiltrate 274 | ## If k>1, the we should only get the lower diagonal entries of 275 | ## the gradient of λ∇g 276 | ## hess[n+n*k+1:n+n*k+k^2] .= gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data)) 277 | ##vv = gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data)) 278 | ##@infiltrate 279 | copy_lower_triangular!(view(hess, n+n*k+1:n+n*k+(k*(k+1)÷2)), jacobian(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md))) 280 | end 281 | 282 | 283 | 284 | ## --------------------------------------------------------------------- ## 285 | ## Problem 286 | ## --------------------------------------------------------------------- ## 287 | 288 | ## Small problem to test the implementation 289 | n = 100 290 | k = 2 291 | m = 4 292 | 293 | 𝒟 = ChiSquared() 294 | ℳ𝒟 = FullyModifiedDivergence(𝒟, 0.7, 1.2) 295 | 296 | mdprob = MDProblem(𝒟, (y = randn(n), x = randn(n,k), z = randn(n,m), u = randn(n), η = randn(n)), (n, k, m), AutoForwardDiff()) 297 | 298 | function simulate!(mdprob; θ = 0.5, γ = 0.1) 299 | n, k, m = size(mdprob) 300 | (y,x,z, u, η) = mdprob.data 301 | z .= randn(n,m) 302 | x[:,1] .= z*repeat([γ], m) + randn!(η) 303 | x[:,2] .= z*repeat([γ], m) + randn!(u) 304 | y .= x*repeat([θ], k) .+ randn!(u) 305 | return nothing 306 | end 307 | 308 | using Random 309 | simulate!(mdprob) 310 | n, k, m = size(mdprob) 311 | 312 | model = Ipopt.Optimizer() 313 | π = MOI.add_variables(model, n) 314 | MOI.add_constraint.(model, π, MOI.GreaterThan(0.0)) 315 | θ = MOI.add_variables(model, k) 316 | MOI.add_constraint.(model, θ, MOI.GreaterThan(-10.0)) 317 | MOI.add_constraint.(model, θ, MOI.LessThan(+10.0)) 318 | 319 | MOI.get(model, MOI.NumberOfVariables()) 320 | 321 | for i ∈ 1:n 322 | MOI.set(model, MOI.VariablePrimalStart(), π[i], 1.0) 323 | end 324 | 325 | for i ∈ 1:k 326 | MOI.set(model, MOI.VariablePrimalStart(), θ[i], 0.0) 327 | end 328 | 329 | lb = [zeros(m); n] 330 | ub = [zeros(m); n] 331 | 332 | MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE) 333 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true) 334 | MOI.set(model, MOI.NLPBlock(), block_data) 335 | 336 | model.options["derivative_test"] = "none" 337 | model.options["derivative_test_print_all"] = "no" 338 | 339 | model.options["print_level"] = 4 340 | 341 | MOI.optimize!(model) 342 | MOI.get(model, MOI.TerminationStatus()) 343 | MOI.get(model, MOI.DualStatus()) 344 | MOI.get(model, MOI.PrimalStatus()) 345 | 346 | MOI.get(model, MOI.SolveTimeSec()) 347 | MOI.get(model, MOI.BarrierIterations()) 348 | 349 | xstar = MOI.get(model, MOI.VariablePrimal(), θ) 350 | 351 | 352 | function lagrangian(md::MDProblem, u, σ, λ) 353 | n, k, m = size(md) 354 | π = u[1:n] 355 | θ = u[(n+1):(n+k)] 356 | σ.*divergence(md)(π) +mean(π.*g(θ, md)*λ) 357 | end 358 | 359 | 360 | using Statistics 361 | p = [0.45793379249066035, 4.999416892014921, 9.182989399836064, 3.6958463315972025, 6.220383439227501, 0.019436036309187443, 2.063484686999562, 10.894774879314305, 8.25546846552471, 4.029010019680072, -2.975818044182361, 1.4669020891138018] 362 | 363 | lagrangian(mdprob, p, 1.0, [1.5, 0.0]) 364 | 365 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 1.5, [1.5, 0.0]), p); 366 | H = zeros(34) 367 | MOI.eval_hessian_lagrangian(mdprob, H, p, 1.5, [1.5, 0.0]) 368 | 369 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 0.0, [1.5, 0]), p); 370 | MOI.eval_hessian_lagrangian(mdprob, H, p, 0.0, [1.5, 0]) 371 | 372 | ## --------------------------------------------------------------------- ## 373 | ## Simple MC 374 | ## --------------------------------------------------------------------- ## 375 | 376 | β = Matrix{Float64}(undef, 1000, 2) 377 | for j in 1:1000 378 | simulate!(mdprob) 379 | MOI.optimize!(model) 380 | β[j,:] .= MOI.get(model, MOI.VariablePrimal(), θ) 381 | end 382 | 383 | using StatsPlots 384 | 385 | StatsPlots.density(β) 386 | StatsPlots.histogram(β, nbins = 80) 387 | 388 | 389 | 390 | ## --------------------------------------------------------------------- ## 391 | ## Utilities 392 | ## --------------------------------------------------------------------- ## 393 | 394 | """ 395 | assign_matrix!(J, g, ∇g) 396 | 397 | Assigns the elements of the block matrix `X = [[g'; ones(1, n)]; [∇g; zeros(m, k)]]` into the preallocated array `J`, excluding 398 | the `ones(1, n)` and `zeros(m, k)` blocks. 399 | 400 | # Arguments 401 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`. 402 | - `g::AbstractMatrix{T}`: An `n × m` matrix. 403 | - `∇g::AbstractMatrix{T}`: An `m × k` matrix. 404 | 405 | # Behavior 406 | - The function directly assigns: 407 | - The elements of the transpose of `g` (`g'`) in column-major order. 408 | - The elements of `∇g` in column-major order. 409 | - The blocks `ones(1, n)` and `zeros(m, k)` are skipped. 410 | 411 | # Example 412 | ```julia 413 | A = [1 2; 3 4; 5 6] # 3 × 2 matrix (n = 3, m = 2) 414 | B = [7 8; 9 10] # 2 × 2 matrix (m = 2, k = 2) 415 | 416 | J = Vector{Float64}(undef, 2 * 3 + 2 * 2) # Preallocate array 417 | assign_matrix!(J, g, ∇g) 418 | 419 | # J will be: 420 | # [1.0, 3.0, 5.0, 2.0, 4.0, 6.0, 7.0, 9.0, 8.0, 10.0] 421 | ``` 422 | """ 423 | function assign_matrix!(J, gg, Dg) 424 | n, m = size(gg) # gg is n×m 425 | k = size(Dg,2) # Dg is m×k 426 | 427 | # Each column in R has (m+1) elements 428 | # First n columns come from gg' and ones 429 | # Next k columns come from Dg and zeros 430 | 431 | # First n columns (from gg' and ones row) 432 | for j in 1:n 433 | # Elements from gg' 434 | for i in 1:m 435 | J[(j-1)*(m+1) + i] = gg[j,i] 436 | end 437 | # Element from ones row 438 | J[j*(m+1)] = 1.0 439 | end 440 | 441 | # Next k columns (from Dg and zeros row) 442 | offset = n*(m+1) 443 | for j in 1:k 444 | # Elements from Dg 445 | for i in 1:m 446 | J[offset + (j-1)*(m+1) + i] = Dg[i,j] 447 | end 448 | # Element from zeros row 449 | J[offset + j*(m+1)] = 0.0 450 | end 451 | return J 452 | end 453 | 454 | function assign_matrix(J, gg, Dg) 455 | n, m = size(gg) 456 | k = size(Dg,2) 457 | R = [ [gg'; ones(1, n)] [Dg; zeros(1,k)]] 458 | J .= vec(R) 459 | end 460 | 461 | 462 | using SparseArrays 463 | 464 | function rowcol_of_sparse(g::SparseMatrixCSC; offset_row = 0, offset_col = 0) 465 | rows = rowvals(g) 466 | vals = nonzeros(g) 467 | m, n = size(g) 468 | tup = Tuple{Int64, Int64}[] 469 | for j ∈ 1:n 470 | for i ∈ nzrange(g, j) 471 | push!(tup, (rows[i]+offset_row, j+offset_col)) 472 | end 473 | end 474 | return tup 475 | end 476 | 477 | function weighted_mean!(μ::AbstractVector{T}, w::AbstractVector, x::AbstractMatrix) where T 478 | fill!(μ, zero(T)) 479 | @inbounds for j in axes(x,2) 480 | for i in axes(x,1) 481 | μ[j] += w[i]*x[i,j]/n 482 | end 483 | end 484 | μ[end] = sum(w) 485 | #μ[1:end-1] ./= n 486 | return μ 487 | end 488 | 489 | function weighted_sum(G, w) 490 | n, m = size(G) 491 | res = zeros(eltype(G), m) 492 | @inbounds for j in axes(G,2) 493 | for i in axes(G,1) 494 | res[j] += w[i]*G[i,j] 495 | end 496 | end 497 | return res 498 | end 499 | 500 | function weighted_sum2(G, w) 501 | @inbounds vec(sum(w.*G, dims=1)) 502 | end 503 | 504 | 505 | """ 506 | rowcol_of_dense(g::AbstractMatrix; offset_row = 0, offset_col = 0) 507 | 508 | Returns a tuple of row and column indices for all elements in a dense matrix `g`, with optional offsets for rows and columns. 509 | 510 | # Arguments 511 | - `g::AbstractMatrix`: The input dense matrix. 512 | - `offset_row::Int` (default: 0): An offset to be added to each row index. 513 | - `offset_col::Int` (default: 0): An offset to be added to each column index. 514 | 515 | # Returns 516 | A vector of tuples `(row, col)` representing the indices of all elements in the dense matrix. 517 | 518 | # Example 519 | ```julia 520 | g = [1 2; 3 4] 521 | rowcol_of_dense(g) # [(1, 1), (2, 1), (1, 2), (2, 2)] 522 | ``` 523 | """ 524 | function rowcol_of_dense(n ,m; offset_row = 0, offset_col = 0) 525 | tup = Tuple{Int64, Int64}[] # Initialize an empty vector of tuples 526 | @inbounds for j ∈ 1:n 527 | for i ∈ 1:m 528 | push!(tup, (i + offset_row, j + offset_col)) 529 | end 530 | end 531 | return tup 532 | end 533 | 534 | 535 | 536 | 537 | function copy_lower_triangular!(x::AbstractVector{T}, A::Matrix{T}) where T 538 | @assert issquare(A) 539 | n = size(A, 1) 540 | len = (n * (n + 1)) ÷ 2 # Length of output vector 541 | @assert len == (n * (n + 1)) ÷ 2 542 | idx = 1 543 | @inbounds for j in 1:n 544 | for i in j:n 545 | x[idx] = A[i, j] 546 | idx += 1 547 | end 548 | end 549 | return x 550 | end 551 | 552 | function copy_lower_triangular!(x::AbstractVector{T}, A::Vector{T}) where T 553 | n = length(A) 554 | @assert n == 1 "`copy_lower_triangular!` for vector make sense only for singleton vector" 555 | @assert 1 == (n * (n + 1)) ÷ 2 "The dimension of the dest vector is wrong as it should be $(n*(n+1))//2" 556 | x .= A 557 | return x 558 | end 559 | 560 | 561 | 562 | abstract type SmootherType end 563 | 564 | struct Truncated <: SmootherType end 565 | struct Bartlett <: SmootherType end 566 | 567 | @inline weight(::Truncated, s, St) = 1.0 568 | @inline weight(::Bartlett, s, St) = 1.0 - s/St 569 | 570 | # Base version 571 | function smooter_base(tt::T, G::Matrix, ξ::Integer) where {T<:SmootherType} 572 | N, M = size(G) 573 | nG = zeros(N, M) 574 | St = (2.0 * ξ + 1.0) / 2.0 575 | for m = 1:M 576 | for t = 1:N 577 | low = max((t - N), -ξ) 578 | high = min(t - 1, ξ) 579 | for s = low:high 580 | κ = weight(tt, s, St) 581 | @inbounds nG[t, m] += κ * G[t-s, m] 582 | end 583 | end 584 | end 585 | return (nG ./ (2 * ξ + 1)) 586 | end 587 | 588 | function smoother(tt::Truncated, G::Matrix{T}, ξ::Integer) where {T} 589 | N, M = size(G) 590 | nG = Matrix{T}(undef, N, M) 591 | smoother!(tt, nG, G, ξ) 592 | end 593 | 594 | function smoother!(tt::Truncated, dest, G::Matrix{T}, ξ::Integer) where {T} 595 | N, M = size(G) 596 | denom = 2ξ + 1 # normalization 597 | Threads.@threads for m in 1:M 598 | for t in 1:N 599 | low = max(t - N, -ξ) 600 | high = min(t - 1, ξ) 601 | acc = zero(T) 602 | @inbounds for s in low:high 603 | κ = weight(tt, s, ξ) 604 | acc += G[t - s, m] 605 | end 606 | dest[t, m] = acc / denom 607 | end 608 | end 609 | return dest 610 | end 611 | 612 | # optprob = OptimizationFunction(divergence, Optimization.AutoForwardDiff(), cons = cons) 613 | # prob = OptimizationProblem(optprob, x0, _p, 614 | # lcons = repeat([0.], 2), 615 | # ucons = repeat([0.], 2), 616 | # lb = [repeat([0], 100); -Inf], 617 | # ub = [repeat([+Inf], 100); +Inf]) 618 | 619 | # solver = OptimizationMOI.MOI.OptimizerWithAttributes(Ipopt.Optimizer, "print_level" => 0) 620 | 621 | # solve(prob, solver) 622 | -------------------------------------------------------------------------------- /doc/divergences_files/libs/quarto-html/popper.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @popperjs/core v2.11.7 - MIT License 3 | */ 4 | 5 | !function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).Popper={})}(this,(function(e){"use strict";function t(e){if(null==e)return window;if("[object Window]"!==e.toString()){var t=e.ownerDocument;return t&&t.defaultView||window}return e}function n(e){return e instanceof t(e).Element||e instanceof Element}function r(e){return e instanceof t(e).HTMLElement||e instanceof HTMLElement}function o(e){return"undefined"!=typeof ShadowRoot&&(e instanceof t(e).ShadowRoot||e instanceof ShadowRoot)}var i=Math.max,a=Math.min,s=Math.round;function f(){var e=navigator.userAgentData;return null!=e&&e.brands&&Array.isArray(e.brands)?e.brands.map((function(e){return e.brand+"/"+e.version})).join(" "):navigator.userAgent}function c(){return!/^((?!chrome|android).)*safari/i.test(f())}function p(e,o,i){void 0===o&&(o=!1),void 0===i&&(i=!1);var a=e.getBoundingClientRect(),f=1,p=1;o&&r(e)&&(f=e.offsetWidth>0&&s(a.width)/e.offsetWidth||1,p=e.offsetHeight>0&&s(a.height)/e.offsetHeight||1);var u=(n(e)?t(e):window).visualViewport,l=!c()&&i,d=(a.left+(l&&u?u.offsetLeft:0))/f,h=(a.top+(l&&u?u.offsetTop:0))/p,m=a.width/f,v=a.height/p;return{width:m,height:v,top:h,right:d+m,bottom:h+v,left:d,x:d,y:h}}function u(e){var n=t(e);return{scrollLeft:n.pageXOffset,scrollTop:n.pageYOffset}}function l(e){return e?(e.nodeName||"").toLowerCase():null}function d(e){return((n(e)?e.ownerDocument:e.document)||window.document).documentElement}function h(e){return p(d(e)).left+u(e).scrollLeft}function m(e){return t(e).getComputedStyle(e)}function v(e){var t=m(e),n=t.overflow,r=t.overflowX,o=t.overflowY;return/auto|scroll|overlay|hidden/.test(n+o+r)}function y(e,n,o){void 0===o&&(o=!1);var i,a,f=r(n),c=r(n)&&function(e){var t=e.getBoundingClientRect(),n=s(t.width)/e.offsetWidth||1,r=s(t.height)/e.offsetHeight||1;return 1!==n||1!==r}(n),m=d(n),y=p(e,c,o),g={scrollLeft:0,scrollTop:0},b={x:0,y:0};return(f||!f&&!o)&&(("body"!==l(n)||v(m))&&(g=(i=n)!==t(i)&&r(i)?{scrollLeft:(a=i).scrollLeft,scrollTop:a.scrollTop}:u(i)),r(n)?((b=p(n,!0)).x+=n.clientLeft,b.y+=n.clientTop):m&&(b.x=h(m))),{x:y.left+g.scrollLeft-b.x,y:y.top+g.scrollTop-b.y,width:y.width,height:y.height}}function g(e){var t=p(e),n=e.offsetWidth,r=e.offsetHeight;return Math.abs(t.width-n)<=1&&(n=t.width),Math.abs(t.height-r)<=1&&(r=t.height),{x:e.offsetLeft,y:e.offsetTop,width:n,height:r}}function b(e){return"html"===l(e)?e:e.assignedSlot||e.parentNode||(o(e)?e.host:null)||d(e)}function x(e){return["html","body","#document"].indexOf(l(e))>=0?e.ownerDocument.body:r(e)&&v(e)?e:x(b(e))}function w(e,n){var r;void 0===n&&(n=[]);var o=x(e),i=o===(null==(r=e.ownerDocument)?void 0:r.body),a=t(o),s=i?[a].concat(a.visualViewport||[],v(o)?o:[]):o,f=n.concat(s);return i?f:f.concat(w(b(s)))}function O(e){return["table","td","th"].indexOf(l(e))>=0}function j(e){return r(e)&&"fixed"!==m(e).position?e.offsetParent:null}function E(e){for(var n=t(e),i=j(e);i&&O(i)&&"static"===m(i).position;)i=j(i);return i&&("html"===l(i)||"body"===l(i)&&"static"===m(i).position)?n:i||function(e){var t=/firefox/i.test(f());if(/Trident/i.test(f())&&r(e)&&"fixed"===m(e).position)return null;var n=b(e);for(o(n)&&(n=n.host);r(n)&&["html","body"].indexOf(l(n))<0;){var i=m(n);if("none"!==i.transform||"none"!==i.perspective||"paint"===i.contain||-1!==["transform","perspective"].indexOf(i.willChange)||t&&"filter"===i.willChange||t&&i.filter&&"none"!==i.filter)return n;n=n.parentNode}return null}(e)||n}var D="top",A="bottom",L="right",P="left",M="auto",k=[D,A,L,P],W="start",B="end",H="viewport",T="popper",R=k.reduce((function(e,t){return e.concat([t+"-"+W,t+"-"+B])}),[]),S=[].concat(k,[M]).reduce((function(e,t){return e.concat([t,t+"-"+W,t+"-"+B])}),[]),V=["beforeRead","read","afterRead","beforeMain","main","afterMain","beforeWrite","write","afterWrite"];function q(e){var t=new Map,n=new Set,r=[];function o(e){n.add(e.name),[].concat(e.requires||[],e.requiresIfExists||[]).forEach((function(e){if(!n.has(e)){var r=t.get(e);r&&o(r)}})),r.push(e)}return e.forEach((function(e){t.set(e.name,e)})),e.forEach((function(e){n.has(e.name)||o(e)})),r}function C(e){return e.split("-")[0]}function N(e,t){var n=t.getRootNode&&t.getRootNode();if(e.contains(t))return!0;if(n&&o(n)){var r=t;do{if(r&&e.isSameNode(r))return!0;r=r.parentNode||r.host}while(r)}return!1}function I(e){return Object.assign({},e,{left:e.x,top:e.y,right:e.x+e.width,bottom:e.y+e.height})}function _(e,r,o){return r===H?I(function(e,n){var r=t(e),o=d(e),i=r.visualViewport,a=o.clientWidth,s=o.clientHeight,f=0,p=0;if(i){a=i.width,s=i.height;var u=c();(u||!u&&"fixed"===n)&&(f=i.offsetLeft,p=i.offsetTop)}return{width:a,height:s,x:f+h(e),y:p}}(e,o)):n(r)?function(e,t){var n=p(e,!1,"fixed"===t);return n.top=n.top+e.clientTop,n.left=n.left+e.clientLeft,n.bottom=n.top+e.clientHeight,n.right=n.left+e.clientWidth,n.width=e.clientWidth,n.height=e.clientHeight,n.x=n.left,n.y=n.top,n}(r,o):I(function(e){var t,n=d(e),r=u(e),o=null==(t=e.ownerDocument)?void 0:t.body,a=i(n.scrollWidth,n.clientWidth,o?o.scrollWidth:0,o?o.clientWidth:0),s=i(n.scrollHeight,n.clientHeight,o?o.scrollHeight:0,o?o.clientHeight:0),f=-r.scrollLeft+h(e),c=-r.scrollTop;return"rtl"===m(o||n).direction&&(f+=i(n.clientWidth,o?o.clientWidth:0)-a),{width:a,height:s,x:f,y:c}}(d(e)))}function F(e,t,o,s){var f="clippingParents"===t?function(e){var t=w(b(e)),o=["absolute","fixed"].indexOf(m(e).position)>=0&&r(e)?E(e):e;return n(o)?t.filter((function(e){return n(e)&&N(e,o)&&"body"!==l(e)})):[]}(e):[].concat(t),c=[].concat(f,[o]),p=c[0],u=c.reduce((function(t,n){var r=_(e,n,s);return t.top=i(r.top,t.top),t.right=a(r.right,t.right),t.bottom=a(r.bottom,t.bottom),t.left=i(r.left,t.left),t}),_(e,p,s));return u.width=u.right-u.left,u.height=u.bottom-u.top,u.x=u.left,u.y=u.top,u}function U(e){return e.split("-")[1]}function z(e){return["top","bottom"].indexOf(e)>=0?"x":"y"}function X(e){var t,n=e.reference,r=e.element,o=e.placement,i=o?C(o):null,a=o?U(o):null,s=n.x+n.width/2-r.width/2,f=n.y+n.height/2-r.height/2;switch(i){case D:t={x:s,y:n.y-r.height};break;case A:t={x:s,y:n.y+n.height};break;case L:t={x:n.x+n.width,y:f};break;case P:t={x:n.x-r.width,y:f};break;default:t={x:n.x,y:n.y}}var c=i?z(i):null;if(null!=c){var p="y"===c?"height":"width";switch(a){case W:t[c]=t[c]-(n[p]/2-r[p]/2);break;case B:t[c]=t[c]+(n[p]/2-r[p]/2)}}return t}function Y(e){return Object.assign({},{top:0,right:0,bottom:0,left:0},e)}function G(e,t){return t.reduce((function(t,n){return t[n]=e,t}),{})}function J(e,t){void 0===t&&(t={});var r=t,o=r.placement,i=void 0===o?e.placement:o,a=r.strategy,s=void 0===a?e.strategy:a,f=r.boundary,c=void 0===f?"clippingParents":f,u=r.rootBoundary,l=void 0===u?H:u,h=r.elementContext,m=void 0===h?T:h,v=r.altBoundary,y=void 0!==v&&v,g=r.padding,b=void 0===g?0:g,x=Y("number"!=typeof b?b:G(b,k)),w=m===T?"reference":T,O=e.rects.popper,j=e.elements[y?w:m],E=F(n(j)?j:j.contextElement||d(e.elements.popper),c,l,s),P=p(e.elements.reference),M=X({reference:P,element:O,strategy:"absolute",placement:i}),W=I(Object.assign({},O,M)),B=m===T?W:P,R={top:E.top-B.top+x.top,bottom:B.bottom-E.bottom+x.bottom,left:E.left-B.left+x.left,right:B.right-E.right+x.right},S=e.modifiersData.offset;if(m===T&&S){var V=S[i];Object.keys(R).forEach((function(e){var t=[L,A].indexOf(e)>=0?1:-1,n=[D,A].indexOf(e)>=0?"y":"x";R[e]+=V[n]*t}))}return R}var K={placement:"bottom",modifiers:[],strategy:"absolute"};function Q(){for(var e=arguments.length,t=new Array(e),n=0;n=0?-1:1,i="function"==typeof n?n(Object.assign({},t,{placement:e})):n,a=i[0],s=i[1];return a=a||0,s=(s||0)*o,[P,L].indexOf(r)>=0?{x:s,y:a}:{x:a,y:s}}(n,t.rects,i),e}),{}),s=a[t.placement],f=s.x,c=s.y;null!=t.modifiersData.popperOffsets&&(t.modifiersData.popperOffsets.x+=f,t.modifiersData.popperOffsets.y+=c),t.modifiersData[r]=a}},se={left:"right",right:"left",bottom:"top",top:"bottom"};function fe(e){return e.replace(/left|right|bottom|top/g,(function(e){return se[e]}))}var ce={start:"end",end:"start"};function pe(e){return e.replace(/start|end/g,(function(e){return ce[e]}))}function ue(e,t){void 0===t&&(t={});var n=t,r=n.placement,o=n.boundary,i=n.rootBoundary,a=n.padding,s=n.flipVariations,f=n.allowedAutoPlacements,c=void 0===f?S:f,p=U(r),u=p?s?R:R.filter((function(e){return U(e)===p})):k,l=u.filter((function(e){return c.indexOf(e)>=0}));0===l.length&&(l=u);var d=l.reduce((function(t,n){return t[n]=J(e,{placement:n,boundary:o,rootBoundary:i,padding:a})[C(n)],t}),{});return Object.keys(d).sort((function(e,t){return d[e]-d[t]}))}var le={name:"flip",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name;if(!t.modifiersData[r]._skip){for(var o=n.mainAxis,i=void 0===o||o,a=n.altAxis,s=void 0===a||a,f=n.fallbackPlacements,c=n.padding,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.flipVariations,h=void 0===d||d,m=n.allowedAutoPlacements,v=t.options.placement,y=C(v),g=f||(y===v||!h?[fe(v)]:function(e){if(C(e)===M)return[];var t=fe(e);return[pe(e),t,pe(t)]}(v)),b=[v].concat(g).reduce((function(e,n){return e.concat(C(n)===M?ue(t,{placement:n,boundary:p,rootBoundary:u,padding:c,flipVariations:h,allowedAutoPlacements:m}):n)}),[]),x=t.rects.reference,w=t.rects.popper,O=new Map,j=!0,E=b[0],k=0;k=0,S=R?"width":"height",V=J(t,{placement:B,boundary:p,rootBoundary:u,altBoundary:l,padding:c}),q=R?T?L:P:T?A:D;x[S]>w[S]&&(q=fe(q));var N=fe(q),I=[];if(i&&I.push(V[H]<=0),s&&I.push(V[q]<=0,V[N]<=0),I.every((function(e){return e}))){E=B,j=!1;break}O.set(B,I)}if(j)for(var _=function(e){var t=b.find((function(t){var n=O.get(t);if(n)return n.slice(0,e).every((function(e){return e}))}));if(t)return E=t,"break"},F=h?3:1;F>0;F--){if("break"===_(F))break}t.placement!==E&&(t.modifiersData[r]._skip=!0,t.placement=E,t.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function de(e,t,n){return i(e,a(t,n))}var he={name:"preventOverflow",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name,o=n.mainAxis,s=void 0===o||o,f=n.altAxis,c=void 0!==f&&f,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.padding,h=n.tether,m=void 0===h||h,v=n.tetherOffset,y=void 0===v?0:v,b=J(t,{boundary:p,rootBoundary:u,padding:d,altBoundary:l}),x=C(t.placement),w=U(t.placement),O=!w,j=z(x),M="x"===j?"y":"x",k=t.modifiersData.popperOffsets,B=t.rects.reference,H=t.rects.popper,T="function"==typeof y?y(Object.assign({},t.rects,{placement:t.placement})):y,R="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),S=t.modifiersData.offset?t.modifiersData.offset[t.placement]:null,V={x:0,y:0};if(k){if(s){var q,N="y"===j?D:P,I="y"===j?A:L,_="y"===j?"height":"width",F=k[j],X=F+b[N],Y=F-b[I],G=m?-H[_]/2:0,K=w===W?B[_]:H[_],Q=w===W?-H[_]:-B[_],Z=t.elements.arrow,$=m&&Z?g(Z):{width:0,height:0},ee=t.modifiersData["arrow#persistent"]?t.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},te=ee[N],ne=ee[I],re=de(0,B[_],$[_]),oe=O?B[_]/2-G-re-te-R.mainAxis:K-re-te-R.mainAxis,ie=O?-B[_]/2+G+re+ne+R.mainAxis:Q+re+ne+R.mainAxis,ae=t.elements.arrow&&E(t.elements.arrow),se=ae?"y"===j?ae.clientTop||0:ae.clientLeft||0:0,fe=null!=(q=null==S?void 0:S[j])?q:0,ce=F+ie-fe,pe=de(m?a(X,F+oe-fe-se):X,F,m?i(Y,ce):Y);k[j]=pe,V[j]=pe-F}if(c){var ue,le="x"===j?D:P,he="x"===j?A:L,me=k[M],ve="y"===M?"height":"width",ye=me+b[le],ge=me-b[he],be=-1!==[D,P].indexOf(x),xe=null!=(ue=null==S?void 0:S[M])?ue:0,we=be?ye:me-B[ve]-H[ve]-xe+R.altAxis,Oe=be?me+B[ve]+H[ve]-xe-R.altAxis:ge,je=m&&be?function(e,t,n){var r=de(e,t,n);return r>n?n:r}(we,me,Oe):de(m?we:ye,me,m?Oe:ge);k[M]=je,V[M]=je-me}t.modifiersData[r]=V}},requiresIfExists:["offset"]};var me={name:"arrow",enabled:!0,phase:"main",fn:function(e){var t,n=e.state,r=e.name,o=e.options,i=n.elements.arrow,a=n.modifiersData.popperOffsets,s=C(n.placement),f=z(s),c=[P,L].indexOf(s)>=0?"height":"width";if(i&&a){var p=function(e,t){return Y("number"!=typeof(e="function"==typeof e?e(Object.assign({},t.rects,{placement:t.placement})):e)?e:G(e,k))}(o.padding,n),u=g(i),l="y"===f?D:P,d="y"===f?A:L,h=n.rects.reference[c]+n.rects.reference[f]-a[f]-n.rects.popper[c],m=a[f]-n.rects.reference[f],v=E(i),y=v?"y"===f?v.clientHeight||0:v.clientWidth||0:0,b=h/2-m/2,x=p[l],w=y-u[c]-p[d],O=y/2-u[c]/2+b,j=de(x,O,w),M=f;n.modifiersData[r]=((t={})[M]=j,t.centerOffset=j-O,t)}},effect:function(e){var t=e.state,n=e.options.element,r=void 0===n?"[data-popper-arrow]":n;null!=r&&("string"!=typeof r||(r=t.elements.popper.querySelector(r)))&&N(t.elements.popper,r)&&(t.elements.arrow=r)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function ve(e,t,n){return void 0===n&&(n={x:0,y:0}),{top:e.top-t.height-n.y,right:e.right-t.width+n.x,bottom:e.bottom-t.height+n.y,left:e.left-t.width-n.x}}function ye(e){return[D,L,A,P].some((function(t){return e[t]>=0}))}var ge={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(e){var t=e.state,n=e.name,r=t.rects.reference,o=t.rects.popper,i=t.modifiersData.preventOverflow,a=J(t,{elementContext:"reference"}),s=J(t,{altBoundary:!0}),f=ve(a,r),c=ve(s,o,i),p=ye(f),u=ye(c);t.modifiersData[n]={referenceClippingOffsets:f,popperEscapeOffsets:c,isReferenceHidden:p,hasPopperEscaped:u},t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-reference-hidden":p,"data-popper-escaped":u})}},be=Z({defaultModifiers:[ee,te,oe,ie]}),xe=[ee,te,oe,ie,ae,le,he,me,ge],we=Z({defaultModifiers:xe});e.applyStyles=ie,e.arrow=me,e.computeStyles=oe,e.createPopper=we,e.createPopperLite=be,e.defaultModifiers=xe,e.detectOverflow=J,e.eventListeners=ee,e.flip=le,e.hide=ge,e.offset=ae,e.popperGenerator=Z,e.popperOffsets=te,e.preventOverflow=he,Object.defineProperty(e,"__esModule",{value:!0})})); 6 | 7 | -------------------------------------------------------------------------------- /doc/divergences.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # ----------------------------------------------------------------------------- 4 | # Utility functions (mimicking the Julia xlogx / xlogy / aloga / alogab etc.) 5 | # ----------------------------------------------------------------------------- 6 | def xlogx(x): 7 | """ 8 | Returns x * log(x) with the convention that 0*log(0)=0. 9 | Works for scalars or NumPy arrays. 10 | """ 11 | x = np.asarray(x) 12 | return np.where(x == 0, 0.0, x * np.log(x)) 13 | 14 | def xlogy(x, y): 15 | """ 16 | Returns x * log(y) with the convention that if x==0 then the result is 0. 17 | """ 18 | x = np.asarray(x) 19 | return np.where(x == 0, 0.0, x * np.log(y)) 20 | 21 | def alogab(a, b): 22 | """ 23 | a*log(a/b) - a + b. 24 | """ 25 | a = np.asarray(a) 26 | b = np.asarray(b) 27 | return xlogy(a, a / b) - a + b 28 | 29 | def blogab(a, b): 30 | """ 31 | -b*log(a/b) + a - b. 32 | """ 33 | a = np.asarray(a) 34 | b = np.asarray(b) 35 | return -xlogy(b, a / b) + a - b 36 | 37 | def aloga(a): 38 | """ 39 | a*log(a) - a + 1. 40 | """ 41 | a = np.asarray(a) 42 | return xlogx(a) - a + 1.0 43 | 44 | def loga(a): 45 | """ 46 | -log(a) + a - 1. 47 | """ 48 | a = np.asarray(a) 49 | # For a<=0, we return Inf. 50 | return np.where(a > 0, -np.log(a) + a - 1.0, np.inf) 51 | 52 | # ----------------------------------------------------------------------------- 53 | # Base divergence class 54 | # ----------------------------------------------------------------------------- 55 | class AbstractDivergence: 56 | def __call__(self, a, b=None): 57 | """ 58 | Evaluate the divergence. If b is provided, the two‐argument version is used. 59 | Otherwise the one‐argument version is used. 60 | """ 61 | if b is None: 62 | return self.eval(a) 63 | else: 64 | return self.eval(a, b) 65 | 66 | def eval_scalar(self, a, b=None): 67 | raise NotImplementedError("eval_scalar must be implemented in subclasses.") 68 | 69 | def eval(self, a, b=None): 70 | return np.sum(self.eval_scalar(a, b)) 71 | 72 | def gradient(self, a, b=None): 73 | """ 74 | Returns the gradient with respect to the second argument. 75 | For one-argument evaluation, returns the derivative with respect to a. 76 | """ 77 | raise NotImplementedError("gradient must be implemented in subclasses.") 78 | 79 | def hessian(self, a, b=None): 80 | """ 81 | Returns the (scalar or elementwise) Hessian. 82 | """ 83 | raise NotImplementedError("hessian must be implemented in subclasses.") 84 | 85 | # ----------------------------------------------------------------------------- 86 | # Kullback-Leibler Divergence 87 | # ----------------------------------------------------------------------------- 88 | class KullbackLeibler(AbstractDivergence): 89 | def eval_scalar(self, a, b=None): 90 | a = np.asarray(a) 91 | if b is None: 92 | return aloga(a) 93 | else: 94 | return aloga(a/b) 95 | 96 | def gradient(self, a, b=None): 97 | a = np.asarray(a) 98 | if b is None: 99 | # derivative of aloga: log(a) 100 | return np.where(a > 0, np.log(a), -np.inf) 101 | else: 102 | a = np.asarray(a) 103 | b = np.asarray(b) 104 | cond = (a > 0) & (b > 0) 105 | return np.where(cond, np.log(a / b), -np.inf) 106 | 107 | def hessian(self, a, b=None): 108 | a = np.asarray(a) 109 | if b is None: 110 | # Hessian: 1/a for a>0, else Inf. 111 | return np.where(a > 0, 1.0 / a, np.inf) 112 | else: 113 | a = np.asarray(a) 114 | b = np.asarray(b) 115 | cond = (a > 0) & (b > 0) 116 | return np.where(cond, 1.0 / a, np.inf) 117 | 118 | # ----------------------------------------------------------------------------- 119 | # Reverse Kullback-Leibler Divergence 120 | # ----------------------------------------------------------------------------- 121 | class ReverseKullbackLeibler(AbstractDivergence): 122 | def eval_scalar(self, a, b=None): 123 | a = np.asarray(a) 124 | if b is None: 125 | return loga(a) 126 | else: 127 | return loga(a/b)*b 128 | 129 | def gradient(self, a, b=None): 130 | a = np.asarray(a) 131 | if b is None: 132 | # derivative of loga: -1/a + 1 133 | return np.where(a > 0, -1.0 / a + 1.0, -np.inf) 134 | else: 135 | a = np.asarray(a) 136 | b = np.asarray(b) 137 | cond = (a > 0) & (b > 0) 138 | # gradient: -b/a + 1. 139 | return np.where(cond, -b / a + 1.0, -np.inf) 140 | 141 | def hessian(self, a, b=None): 142 | a = np.asarray(a) 143 | if b is None: 144 | # Hessian: 1/a^2 for a>0. 145 | return np.where(a > 0, 1.0 / (a ** 2), np.inf) 146 | else: 147 | a = np.asarray(a) 148 | b = np.asarray(b) 149 | cond = (a > 0) & (b > 0) 150 | return np.where(cond, b / (a ** 2), np.inf) 151 | 152 | # ----------------------------------------------------------------------------- 153 | # Hellinger Divergence 154 | # ----------------------------------------------------------------------------- 155 | class Hellinger(AbstractDivergence): 156 | def eval_scalar(self, a, b=None): 157 | a = np.asarray(a) 158 | if b is None: 159 | # γ(a) = 2*a - 4*sqrt(a) + 2. 160 | return 2 * a - 4 * np.sqrt(a) + 2 161 | else: 162 | a = np.asarray(a) 163 | b = np.asarray(b) 164 | # γ(a,b) = 2*a + (2 - 4*sqrt(a/b))*b. 165 | return 2 * a + (2 - 4 * np.sqrt(a / b)) * b 166 | 167 | def gradient(self, a, b=None): 168 | a = np.asarray(a) 169 | if b is None: 170 | # derivative: 2 - 2/sqrt(a) 171 | return np.where(a > 0, 2 - 2 / np.sqrt(a), -np.inf) 172 | else: 173 | a = np.asarray(a) 174 | b = np.asarray(b) 175 | cond = (a > 0) & (b > 0) 176 | # derivative: 2*(1 - 1/sqrt(a/b)) 177 | return np.where(cond, 2 * (1 - 1 / np.sqrt(a / b)), -np.inf) 178 | 179 | def hessian(self, a, b=None): 180 | a = np.asarray(a) 181 | if b is None: 182 | # Hessian: 1/sqrt(a^3) 183 | return np.where(a > 0, 1.0 / np.sqrt(a ** 3), np.inf) 184 | else: 185 | a = np.asarray(a) 186 | b = np.asarray(b) 187 | cond = (a > 0) & (b > 0) 188 | return np.where(cond, np.sqrt(b) / np.sqrt(a ** 3), np.inf) 189 | 190 | # ----------------------------------------------------------------------------- 191 | # Chi-Squared Divergence 192 | # ----------------------------------------------------------------------------- 193 | class ChiSquared(AbstractDivergence): 194 | def eval_scalar(self, a, b=None): 195 | a = np.asarray(a) 196 | if b is None: 197 | # γ(a) = 0.5*(a - 1)^2. 198 | return 0.5 * (a - 1) ** 2 199 | else: 200 | a = np.asarray(a) 201 | b = np.asarray(b) 202 | # γ(a,b) = 0.5*(a - b)^2 / b. 203 | return 0.5 * ((a - b) ** 2) / b 204 | 205 | def gradient(self, a, b=None): 206 | a = np.asarray(a) 207 | if b is None: 208 | # derivative: a - 1. 209 | return a - 1 210 | else: 211 | a = np.asarray(a) 212 | b = np.asarray(b) 213 | # derivative: a/b - 1. 214 | return a / b - 1 215 | 216 | def hessian(self, a, b=None): 217 | a = np.asarray(a) 218 | if b is None: 219 | return np.ones_like(a) 220 | else: 221 | b = np.asarray(b) 222 | return np.where(b != 0, 1.0 / b, np.inf) 223 | 224 | # ----------------------------------------------------------------------------- 225 | # Cressie-Read Divergence (with parameter alpha) 226 | # ----------------------------------------------------------------------------- 227 | class CressieRead(AbstractDivergence): 228 | def __init__(self, alpha): 229 | self.alpha = alpha 230 | 231 | def eval_scalar(self, a, b=None): 232 | a = np.asarray(a) 233 | α = self.alpha 234 | if b is None: 235 | # For one argument: if a>=0 then 236 | # (a^(1+α) + α - a*(1+α))/(α*(1+α)) else (if α>0 then 0 else NaN) 237 | cond = (a >= 0) 238 | val = (a ** (1 + α) + α - a * (1 + α)) / (α * (1 + α)) 239 | # For negative a, return 0 if α>0 else NaN. 240 | return np.where(cond, val, 0.0 if α > 0 else np.nan) 241 | else: 242 | b = np.asarray(b) 243 | cond = (a > 0) & (b > 0) 244 | val = ((a / b) ** (1 + α) + α - (a / b) * (1 + α)) * b / (α * (1 + α)) 245 | return np.where(cond, val, 0.0 if α > 0 else np.nan) 246 | 247 | def gradient(self, a, b=None): 248 | a = np.asarray(a) 249 | α = self.alpha 250 | if b is None: 251 | cond = (a >= 0) 252 | val = (a ** α - 1) / α 253 | return np.where(cond, val, 0.0 if α > 0 else np.nan) 254 | else: 255 | b = np.asarray(b) 256 | cond = (a >= 0) & (b > 0) 257 | val = ((a / b) ** α - 1) / α 258 | return np.where(cond, val, 0.0 if α > 0 else np.nan) 259 | 260 | def hessian(self, a, b=None): 261 | a = np.asarray(a) 262 | α = self.alpha 263 | if b is None: 264 | cond = (a > 0) 265 | val = a ** (α - 1) 266 | return np.where(cond, val, np.inf) 267 | else: 268 | b = np.asarray(b) 269 | cond = (a > 0) & (b > 0) 270 | val = a ** (α - 1) * b ** (-α) 271 | return np.where(cond, val, np.inf) 272 | 273 | # ----------------------------------------------------------------------------- 274 | # Modified Divergences 275 | # ----------------------------------------------------------------------------- 276 | class ModifiedDivergence(AbstractDivergence): 277 | """ 278 | A modified divergence which uses an underlying divergence (self.base) 279 | and applies an upper modification when a > ρ * b. 280 | 281 | The parameters are passed as a dictionary with keys: 282 | - 'rho': the threshold parameter, 283 | - 'gamma0', 'gamma1', 'gamma2': parameters for the upper modification. 284 | """ 285 | def __init__(self, base_divergence, params): 286 | self.base = base_divergence 287 | self.params = params 288 | 289 | def eval_scalar(self, a, b=None): 290 | if b is None: 291 | a = np.asarray(a) 292 | rho = self.params.get('rho', 1) 293 | cond = a > rho 294 | # Upper modification for one argument: 295 | gamma0 = self.params.get('gamma0', 0) 296 | gamma1 = self.params.get('gamma1', 0) 297 | gamma2 = self.params.get('gamma2', 0) 298 | val_upper = gamma0 + gamma1 * (a - rho) + 0.5 * gamma2 * (a - rho) ** 2 299 | val_base = self.base.eval_scalar(a) 300 | return np.where(cond, val_upper, val_base) 301 | else: 302 | a = np.asarray(a) 303 | b = np.asarray(b) 304 | rho = self.params.get('rho', 1) 305 | cond = a > rho * b 306 | gamma0 = self.params.get('gamma0', 0) 307 | gamma1 = self.params.get('gamma1', 0) 308 | gamma2 = self.params.get('gamma2', 0) 309 | # Upper modification for two arguments: 310 | val_upper = (gamma0 + gamma1 * ((a / b) - rho) + 0.5 * gamma2 * ((a / b) - rho) ** 2) * b 311 | val_base = self.base.eval_scalar(a, b) 312 | return np.where(cond, val_upper, val_base) 313 | 314 | def gradient(self, a, b=None): 315 | rho = self.params.get('rho', 1) 316 | if b is None: 317 | a = np.asarray(a) 318 | cond = a > rho 319 | gamma1 = self.params.get('gamma1', 0) 320 | gamma2 = self.params.get('gamma2', 0) 321 | grad_upper = gamma1 + gamma2 * (a - rho) 322 | grad_base = self.base.gradient(a) 323 | return np.where(cond, grad_upper, grad_base) 324 | else: 325 | a = np.asarray(a) 326 | b = np.asarray(b) 327 | cond = a > rho * b 328 | gamma1 = self.params.get('gamma1', 0) 329 | gamma2 = self.params.get('gamma2', 0) 330 | grad_upper = gamma1 + (a / b) * gamma2 - gamma2 * rho 331 | grad_base = self.base.gradient(a, b) 332 | return np.where(cond, grad_upper, grad_base) 333 | 334 | def hessian(self, a, b=None): 335 | rho = self.params.get('rho', 1) 336 | if b is None: 337 | a = np.asarray(a) 338 | cond = a > rho 339 | gamma2 = self.params.get('gamma2', 0) 340 | hess_upper = gamma2 341 | hess_base = self.base.hessian(a) 342 | return np.where(cond, hess_upper, hess_base) 343 | else: 344 | a = np.asarray(a) 345 | b = np.asarray(b) 346 | cond = a > rho * b 347 | gamma2 = self.params.get('gamma2', 0) 348 | hess_upper = gamma2 / b 349 | hess_base = self.base.hessian(a, b) 350 | return np.where(cond, hess_upper, hess_base) 351 | 352 | # ----------------------------------------------------------------------------- 353 | # Fully Modified Divergence 354 | # ----------------------------------------------------------------------------- 355 | class FullyModifiedDivergence(AbstractDivergence): 356 | """ 357 | A fully modified divergence that uses both an upper and lower modification. 358 | 359 | Parameters are passed as a dictionary with keys: 360 | - 'rho' and 'phi': thresholds, 361 | - For the upper part: 'gamma0', 'gamma1', 'gamma2', 362 | - For the lower part: 'g0', 'g1', 'g2'. 363 | """ 364 | def __init__(self, base_divergence, params): 365 | self.base = base_divergence 366 | self.params = params 367 | 368 | def eval_scalar(self, a, b=None): 369 | rho = self.params.get('rho', 1) 370 | phi = self.params.get('phi', 1) 371 | if b is None: 372 | a = np.asarray(a) 373 | cond_upper = a > rho 374 | cond_lower = a < phi 375 | gamma0 = self.params.get('gamma0', 0) 376 | gamma1 = self.params.get('gamma1', 0) 377 | gamma2 = self.params.get('gamma2', 0) 378 | val_upper = gamma0 + gamma1 * (a - rho) + 0.5 * gamma2 * (a - rho) ** 2 379 | g0 = self.params.get('g0', 0) 380 | g1 = self.params.get('g1', 0) 381 | g2 = self.params.get('g2', 0) 382 | val_lower = g0 + g1 * (a - phi) + 0.5 * g2 * (a - phi) ** 2 383 | val_base = self.base.eval_scalar(a) 384 | return np.where(cond_upper, val_upper, np.where(cond_lower, val_lower, val_base)) 385 | else: 386 | a = np.asarray(a) 387 | b = np.asarray(b) 388 | cond_upper = a > rho * b 389 | cond_lower = a < phi * b 390 | gamma0 = self.params.get('gamma0', 0) 391 | gamma1 = self.params.get('gamma1', 0) 392 | gamma2 = self.params.get('gamma2', 0) 393 | val_upper = (gamma0 + gamma1 * ((a / b) - rho) + 0.5 * gamma2 * ((a / b) - rho) ** 2) * b 394 | g0 = self.params.get('g0', 0) 395 | g1 = self.params.get('g1', 0) 396 | g2 = self.params.get('g2', 0) 397 | val_lower = (g0 + g1 * ((a / b) - phi) + 0.5 * g2 * ((a / b) - phi) ** 2) * b 398 | val_base = self.base.eval_scalar(a, b) 399 | return np.where(cond_upper, val_upper, np.where(cond_lower, val_lower, val_base)) 400 | 401 | def gradient(self, a, b=None): 402 | rho = self.params.get('rho', 1) 403 | phi = self.params.get('phi', 1) 404 | if b is None: 405 | a = np.asarray(a) 406 | cond_upper = a > rho 407 | cond_lower = a < phi 408 | gamma1 = self.params.get('gamma1', 0) 409 | gamma2 = self.params.get('gamma2', 0) 410 | grad_upper = gamma1 + gamma2 * (a - rho) 411 | g1 = self.params.get('g1', 0) 412 | g2 = self.params.get('g2', 0) 413 | grad_lower = g1 + g2 * (a - phi) 414 | grad_base = self.base.gradient(a) 415 | return np.where(cond_upper, grad_upper, np.where(cond_lower, grad_lower, grad_base)) 416 | else: 417 | a = np.asarray(a) 418 | b = np.asarray(b) 419 | cond_upper = a > rho * b 420 | cond_lower = a < phi * b 421 | gamma1 = self.params.get('gamma1', 0) 422 | gamma2 = self.params.get('gamma2', 0) 423 | grad_upper = gamma1 + (a / b) * gamma2 - gamma2 * rho 424 | g1 = self.params.get('g1', 0) 425 | g2 = self.params.get('g2', 0) 426 | grad_lower = g1 + (a / b) * g2 - g2 * phi 427 | grad_base = self.base.gradient(a, b) 428 | return np.where(cond_upper, grad_upper, np.where(cond_lower, grad_lower, grad_base)) 429 | 430 | def hessian(self, a, b=None): 431 | rho = self.params.get('rho', 1) 432 | phi = self.params.get('phi', 1) 433 | if b is None: 434 | a = np.asarray(a) 435 | cond_upper = a > rho 436 | cond_lower = a < phi 437 | gamma2 = self.params.get('gamma2', 0) 438 | hess_upper = gamma2 439 | g2 = self.params.get('g2', 0) 440 | hess_lower = g2 441 | hess_base = self.base.hessian(a) 442 | return np.where(cond_upper, hess_upper, np.where(cond_lower, hess_lower, hess_base)) 443 | else: 444 | a = np.asarray(a) 445 | b = np.asarray(b) 446 | cond_upper = a > rho * b 447 | cond_lower = a < phi * b 448 | gamma2 = self.params.get('gamma2', 0) 449 | hess_upper = gamma2 / b 450 | g2 = self.params.get('g2', 0) 451 | hess_lower = g2 / b 452 | hess_base = self.base.hessian(a, b) 453 | return np.where(cond_upper, hess_upper, np.where(cond_lower, hess_lower, hess_base)) 454 | 455 | # ----------------------------------------------------------------------------- 456 | # Vectorized versions for arrays (optional) 457 | # ----------------------------------------------------------------------------- 458 | def eval_divergence(d, a, b=None): 459 | """ 460 | Evaluates the divergence d on each element of a (and b if provided) 461 | and returns the sum. 462 | """ 463 | if b is None: 464 | a = np.asarray(a) 465 | return np.sum([d.eval_scalar(val) for val in np.nditer(a)]) 466 | else: 467 | a = np.asarray(a) 468 | b = np.asarray(b) 469 | return np.sum([d.eval_scalar(a_val, b_val) 470 | for a_val, b_val in zip(np.nditer(a), np.nditer(b))]) 471 | 472 | def gradient_divergence(d, a, b=None): 473 | """ 474 | Returns an array with the elementwise gradient. 475 | """ 476 | if b is None: 477 | a = np.asarray(a) 478 | return np.array([d.gradient(val) for val in np.nditer(a)]) 479 | else: 480 | a = np.asarray(a) 481 | b = np.asarray(b) 482 | return np.array([d.gradient(a_val, b_val) 483 | for a_val, b_val in zip(np.nditer(a), np.nditer(b))]) 484 | 485 | def hessian_divergence(d, a, b=None): 486 | """ 487 | Returns an array with the elementwise Hessian. 488 | """ 489 | if b is None: 490 | a = np.asarray(a) 491 | return np.array([d.hessian(val) for val in np.nditer(a)]) 492 | else: 493 | a = np.asarray(a) 494 | b = np.asarray(b) 495 | return np.array([d.hessian(a_val, b_val) 496 | for a_val, b_val in zip(np.nditer(a), np.nditer(b))]) 497 | -------------------------------------------------------------------------------- /doc/example_iv_effcient_analytical.jl: -------------------------------------------------------------------------------- 1 | using MathOptInterface, Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt 2 | using ForwardDiff, DifferentiationInterface 3 | using Divergences 4 | using Statistics, LinearAlgebra 5 | using Infiltrator 6 | using StableRNGs 7 | 8 | ## ----- 9 | ## High-performant 10 | ## ----- 11 | function randiv(; n = 100, m = 5, k = 1, θ = 0.0, ρ = 0.9, CP = 20) 12 | ## Simulate 13 | ## y = xθ + w'γ + u 14 | ## x = zτ + w'ξ + η 15 | ## where z ∼ N(0, Iₘ), w ∼ N(0, Iₖ) 16 | ## (η ∼ N(0, I), u ∼ N(0, I) 17 | τ = fill(sqrt(CP / (m * n)), m) 18 | z = randn(n, m) ## Instruments 19 | w = randn(n, k-1) ## Exogenous 20 | η = randn(n, 1) 21 | u = ρ * η + √(1 - ρ^2) * randn(n, 1) 22 | x = z * τ + η 23 | y = x * θ + u 24 | return y, [x w], [z w] 25 | end 26 | 27 | y, x, z = randiv(; k = 1, CP = 5) 28 | 29 | p = (y = y, 30 | x = x, 31 | z = z, 32 | Y = similar(y), 33 | X = similar(x), 34 | Z = similar(z), 35 | ∂G = Matrix{Float64}(undef, size(z, 2), size(x, 2))); 36 | 37 | function g(θ, p) 38 | ## Gₙ(θ) 39 | (y, x, z, Y, X, Z, ∂G) = p.data 40 | mul!(Y, x, θ) 41 | broadcast!(-, Y, y, Y) 42 | broadcast!(*, Z, z, Y) 43 | return Z 44 | end 45 | 46 | function Dg(θ, π, p) 47 | (y, x, z, Y, X, Z, ∂G) = p.data 48 | broadcast!(*, X, π, x) 49 | mul!(∂G, z', -X) 50 | return ∂G 51 | end 52 | 53 | function Dgλ(θ, λ, p) 54 | (y, x, z, Y, X, Z, ∂G) = p.data 55 | mul!(Y, z, λ) 56 | broadcast!(*, X, -Y, x) 57 | broadcast!(/, X, X, n) 58 | return X 59 | end 60 | 61 | function Dgλ(θ, λ, π, p) 62 | ## Hπθ 63 | (y, x, z, Y, X, Z, ∂G) = p.data 64 | ∂gλ = Dgλ(θ, λ, p) 65 | broadcast!(*, ∂gλ, ∂gλ, π) 66 | return ∂gλ 67 | end 68 | 69 | function Dgλ!(J, θ, λ, p) 70 | ∂gλ = Dgλ(θ, λ, p) 71 | return copy!(J, vec(∂gλ)) 72 | end 73 | 74 | function Dgλ!(J, θ, λ, π, p) 75 | Dgλ(θ, λ, π, p) 76 | return copy!(J, p.∇) 77 | end 78 | 79 | function Hgλ(θ, λ, π, p) 80 | n, k, m = size(p) 81 | return zeros(k, k) 82 | end 83 | 84 | ## --------------------------------------------------------------------- ## 85 | ## Optimization Problem 86 | ## --------------------------------------------------------------------- ## 87 | 88 | const MOI = MathOptInterface 89 | 90 | struct MDProblem <: MOI.AbstractNLPEvaluator 91 | div::Divergences.AbstractDivergence 92 | data::Any 93 | backend::Any 94 | end 95 | 96 | Base.size(md::MDProblem) = (size(p.x)..., size(p.z, 2)) 97 | divergence(md::MDProblem) = md.div 98 | 99 | function MOI.initialize(md::MDProblem, rf::Vector{Symbol}) 100 | for feat in rf 101 | if !(feat in [:Grad, :Jac, :Hess]) 102 | error("Unsupported feature $feat") 103 | end 104 | end 105 | end 106 | 107 | MOI.features_available(md::MDProblem) = [:Grad, :Jac, :Hess] 108 | 109 | ## --------------------------------------------------------------------- ## 110 | ## Objective function 111 | ## --------------------------------------------------------------------- ## 112 | function MOI.eval_objective(md::MDProblem, u::Vector{Float64}) 113 | ## Objective function 114 | ## ∑ᵢ γ(πᵢ) 115 | n, k, m = size(md) 116 | return divergence(md)(view(u, 1:n)) 117 | end 118 | 119 | function MOI.eval_objective_gradient(md::MDProblem, res, u) 120 | ## Gradient of the objective function 121 | ## ∇π ∑ᵢ γ'(πᵢ) 122 | n, k, m = size(md) 123 | T = eltype(res) 124 | Divergences.gradient!(view(res, 1:n), divergence(md), view(u, 1:n)) 125 | return fill!(view(res, (n + 1):(n + k)), zero(T)) 126 | end 127 | 128 | ## --------------------------------------------------------------------- ## 129 | ## Constraints 130 | ## --------------------------------------------------------------------- ## 131 | function MOI.eval_constraint(md::MDProblem, res, u) 132 | ## Constraints 133 | ## ∑ᵢ πᵢ g(θᵢ) = 0 134 | ## ∑ᵢ πᵢ = n 135 | n, k, m = size(md) 136 | θ = view(u, (n + 1):(n + k)) 137 | π = view(u, 1:n) 138 | G = g(θ, md) 139 | return constraint!(res, π, G) 140 | end 141 | 142 | function constraint!(μ::AbstractVector{T}, w::AbstractVector, x::AbstractMatrix) where {T} 143 | fill!(μ, zero(T)) 144 | @inbounds for j in axes(x, 2) 145 | for i in axes(x, 1) 146 | μ[j] += w[i]*x[i, j] 147 | end 148 | end 149 | μ[end] = sum(w) 150 | return μ 151 | end 152 | 153 | ## --------------------------------------------------------------------- ## 154 | ## Constraints Jacobian 155 | ## --------------------------------------------------------------------- ## 156 | function MOI.jacobian_structure(md::MDProblem) 157 | n, k, m = size(md) 158 | return rowcol_of_dense(n+k, m+1) 159 | end 160 | 161 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u) 162 | n, k, m = size(md) 163 | θ = view(u, (n + 1):(n + k)) 164 | π = view(u, 1:n) 165 | G = g(θ, md) 166 | #@. G = G/n 167 | ∂g = Dg(θ, π, md) 168 | @. ∂g = ∂g 169 | return assign_constraint_jacobian!(J, G, ∂g) 170 | end 171 | 172 | """ 173 | assign_constraint_jacobian!(J, g, ∇g) 174 | 175 | Assigns the elements of the block matrix `X = [[G'; ones(1, n)]; [∇g ; zeros(m, k)]]`. 176 | 177 | # Arguments 178 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`. 179 | - `g::AbstractMatrix{T}`: An `n × m` matrix. 180 | - `∇g::AbstractMatrix{T}`: An `m × k` matrix. 181 | ``` 182 | """ 183 | function assign_constraint_jacobian!(J, gg, Dg) 184 | n, m = size(gg) 185 | k = size(Dg, 2) 186 | for j in 1:n 187 | # Elements from gg' 188 | for i in 1:m 189 | J[(j - 1) * (m + 1) + i] = gg[j, i] 190 | end 191 | # Element from ones row 192 | J[j * (m + 1)] = 1.0 193 | end 194 | # Next k columns (from Dg and zeros row) 195 | offset = n*(m+1) 196 | for j in 1:k 197 | # Elements from Dg 198 | for i in 1:m 199 | J[offset + (j - 1) * (m + 1) + i] = Dg[i, j] 200 | end 201 | # Element from 0 202 | J[offset + j * (m + 1)] = 0.0 203 | end 204 | return J 205 | end 206 | 207 | ## --------------------------------------------------------------------- ## 208 | ## Hessian of the Lagrangian of L(π, θ, λ) = D(π, p) + λ'g(θ) 209 | ## --------------------------------------------------------------------- ## 210 | function MOI.hessian_lagrangian_structure(md::MDProblem) 211 | n, k, m = size(md) 212 | hele = Int(n + n*k + k*(k+1)÷2) 213 | rows = Array{Int64}(undef, hele) 214 | cols = Array{Int64}(undef, hele) 215 | ## Diagonal Elements 216 | for j in 1:n 217 | rows[j] = j 218 | cols[j] = j 219 | end 220 | idx = n+1 221 | ## Off-diagonal elements 222 | for j in 1:k 223 | for s in 1:n 224 | rows[idx] = n + j 225 | cols[idx] = s 226 | idx += 1 227 | end 228 | end 229 | ## For linear problem this is not needed 230 | for j in 1:k 231 | for s in 1:j 232 | rows[idx] = n + j 233 | cols[idx] = n + s 234 | idx += 1 235 | end 236 | end 237 | return [(r, c) for (r, c) in zip(rows, cols)] 238 | end 239 | 240 | function MOI.eval_hessian_lagrangian(md::MDProblem, hess, u, σ, λ) 241 | n, k, m = size(md) 242 | π = view(u, 1:n) 243 | θ = view(u, (n + 1):(n + k)) 244 | if σ==0 245 | @inbounds for j in 1:n 246 | hess[j] = 0.0 247 | end 248 | else 249 | hv = view(hess, 1:n) 250 | Divergences.hessian!(hv, divergence(md), π) 251 | hv .= hv .* σ 252 | end 253 | λv = view(λ, 1:m) 254 | Dgλ!(view(hess, (n + 1):(n + n * k)), θ, λv, md) 255 | ## For linear problem this is not needed 256 | return copy_lower_triangular!( 257 | view(hess, 258 | (n + n * k + 1):(n + n * k + (k * (k + 1) ÷ 2))), 259 | Hgλ(θ, λ, π, md)) 260 | end 261 | 262 | ## --------------------------------------------------------------------- ## 263 | ## Problem with fixed theta 264 | ## --------------------------------------------------------------------- ## 265 | 266 | struct SMDProblem <: MOI.AbstractNLPEvaluator 267 | div::Divergences.AbstractDivergence 268 | G::Matrix{Float64} 269 | data::Any 270 | backend::Any 271 | end 272 | 273 | divergence(md::SMDProblem) = md.div 274 | momfun(md::SMDProblem) = md.G 275 | 276 | function MOI.initialize(md::SMDProblem, rf::Vector{Symbol}) 277 | for feat in rf 278 | if !(feat in [:Grad, :Jac, :Hess]) 279 | error("Unsupported feature $feat") 280 | end 281 | end 282 | end 283 | 284 | MOI.features_available(md::SMDProblem) = [:Grad, :Jac, :Hess] 285 | 286 | function MOI.eval_objective(md::SMDProblem, u::Vector{Float64}) 287 | return divergence(md)(u) 288 | end 289 | 290 | function MOI.eval_objective_gradient(md::SMDProblem, res, u) 291 | n, k, m = size(md) 292 | T = eltype(res) 293 | return Divergences.gradient!(res, divergence(md), u) 294 | end 295 | 296 | ## --------------------------------------------------------------------- ## 297 | ## Constraints 298 | ## --------------------------------------------------------------------- ## 299 | function MOI.eval_constraint(md::SMDProblem, res, u) 300 | π = u 301 | G = md.G 302 | return constraint!(res, π, G) 303 | end 304 | 305 | function constraint!(μ::AbstractVector{T}, w::AbstractVector, x::AbstractMatrix) where {T} 306 | fill!(μ, zero(T)) 307 | @inbounds for j in axes(x, 2) 308 | for i in axes(x, 1) 309 | μ[j] += w[i]*x[i, j] 310 | end 311 | end 312 | μ[end] = sum(w) 313 | return μ 314 | end 315 | 316 | ## --------------------------------------------------------------------- ## 317 | ## Constraints Jacobian 318 | ## --------------------------------------------------------------------- ## 319 | function MOI.jacobian_structure(md::SMDProblem) 320 | n, k, m = size(md) 321 | return rowcol_of_dense(n, m+1) 322 | end 323 | 324 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u) 325 | π = u 326 | G = md.G 327 | #@. G = G/n 328 | return assign_constraint_jacobian!(J, G) 329 | end 330 | 331 | """ 332 | assign_constraint_jacobian!(J, g) 333 | 334 | Assigns the elements of the block matrix `X = G'`. 335 | 336 | # Arguments 337 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`. 338 | - `g::AbstractMatrix{T}`: An `n × m` matrix. 339 | ``` 340 | """ 341 | function assign_constraint_jacobian!(J, gg) 342 | n, m = size(gg) 343 | k = size(Dg, 2) 344 | for j in 1:n 345 | # Elements from gg' 346 | for i in 1:m 347 | J[(j - 1) * (m + 1) + i] = gg[j, i] 348 | end 349 | # Element from ones row 350 | J[j * (m + 1)] = 1.0 351 | end 352 | return J 353 | end 354 | 355 | ## --------------------------------------------------------------------- ## 356 | ## Hessian of the Lagrangian of L(π, θ, λ) = D(π, p) + λ'g(θ) 357 | ## --------------------------------------------------------------------- ## 358 | function MOI.hessian_lagrangian_structure(md::SMDProblem) 359 | rows = Array{Int64}(undef, n) 360 | cols = Array{Int64}(undef, n) 361 | ## Diagonal Elements 362 | for j in 1:n 363 | rows[j] = j 364 | cols[j] = j 365 | end 366 | return [(r, c) for (r, c) in zip(rows, cols)] 367 | end 368 | 369 | function MOI.eval_hessian_lagrangian(md::SMDProblem, hess, u, σ, λ) 370 | π = u 371 | if σ==0 372 | @inbounds for j in 1:n 373 | hess[j] = 0.0 374 | end 375 | else 376 | hv = view(hess, 1:n) 377 | Divergences.hessian!(hv, divergence(md), π) 378 | hv .= hv .* σ 379 | end 380 | end 381 | 382 | ## --------------------------------------------------------------------- ## 383 | ## Problem 384 | ## --------------------------------------------------------------------- ## 385 | 386 | ℳ𝒟 = FullyModifiedDivergence(ReverseKullbackLeibler(), 0.1, 1.2) 387 | mdprob = MDProblem(ℳ𝒟, p, nothing) 388 | 389 | model = Ipopt.Optimizer() 390 | π = MOI.add_variables(model, n) 391 | MOI.add_constraint.(model, π, MOI.GreaterThan(0.0)) 392 | θ = MOI.add_variables(model, k) 393 | MOI.add_constraint.(model, θ, MOI.GreaterThan(-10.0)) 394 | MOI.add_constraint.(model, θ, MOI.LessThan(+10.0)) 395 | for i in 1:n 396 | MOI.set(model, MOI.VariablePrimalStart(), π[i], 1.0) 397 | end 398 | for i in 1:k 399 | MOI.set(model, MOI.VariablePrimalStart(), θ[i], 0.0) 400 | end 401 | lb = [zeros(m); n] 402 | ub = [zeros(m); n] 403 | MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE) 404 | 405 | model_el = deepcopy(model) 406 | model_md = deepcopy(model) 407 | 408 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true) 409 | MOI.set(model_md, MOI.NLPBlock(), block_data) 410 | for i in 1:k 411 | MOI.set(model_md, MOI.VariablePrimalStart(), θ[i], -0.01) 412 | end 413 | 414 | mdprob = MDProblem(ReverseKullbackLeibler(), p, nothing) 415 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true) 416 | MOI.set(model_el, MOI.NLPBlock(), block_data) 417 | 418 | model.options["derivative_test"] = "none" 419 | model.options["derivative_test_print_all"] = "no" 420 | 421 | model.options["print_level"] = 4 422 | 423 | MOI.optimize!(model) 424 | MOI.get(model, MOI.TerminationStatus()) 425 | MOI.get(model, MOI.DualStatus()) 426 | MOI.get(model, MOI.PrimalStatus()) 427 | 428 | MOI.get(model, MOI.SolveTimeSec()) 429 | MOI.get(model, MOI.BarrierIterations()) 430 | 431 | xstar = MOI.get(model, MOI.VariablePrimal(), θ) 432 | 433 | function lagrangian(md::MDProblem, u, σ, λ) 434 | n, k, m = size(md) 435 | π = u[1:n] 436 | θ = u[(n + 1):(n + k)] 437 | return σ .* divergence(md)(π) + mean(π .* g(θ, md)*λ) 438 | end 439 | 440 | using Statistics 441 | p = [0.45793379249066035, 442 | 4.999416892014921, 443 | 9.182989399836064, 444 | 3.6958463315972025, 445 | 6.220383439227501, 446 | 0.019436036309187443, 447 | 2.063484686999562, 448 | 10.894774879314305, 449 | 8.25546846552471, 450 | 4.029010019680072, 451 | -2.975818044182361, 452 | 1.4669020891138018] 453 | 454 | lagrangian(mdprob, p, 1.0, [1.5, 0.0]) 455 | 456 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 1.5, [1.5, 0.0]), p); 457 | H = zeros(34) 458 | MOI.eval_hessian_lagrangian(mdprob, H, p, 1.5, [1.5, 0.0]) 459 | 460 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 0.0, [1.5, 0]), p); 461 | MOI.eval_hessian_lagrangian(mdprob, H, p, 0.0, [1.5, 0]) 462 | 463 | ## --------------------------------------------------------------------- ## 464 | ## Simple MC 465 | ## --------------------------------------------------------------------- ## 466 | 467 | β_el = Matrix{Float64}(undef, 1000, 3) 468 | f_el = zeros(1000) 469 | β_md = Matrix{Float64}(undef, 1000, 3) 470 | f_md = zeros(1000) 471 | for j in 1:1000 472 | y, x, z = randiv(; k = 1, CP = 5) 473 | p.y .= y 474 | p.x .= x 475 | p.z .= z 476 | MOI.optimize!(model_el) 477 | MOI.optimize!(model_md) 478 | β_el[j, :] .= MOI.get(model_el, MOI.VariablePrimal(), θ) 479 | β_md[j, :] .= MOI.get(model_md, MOI.VariablePrimal(), θ) 480 | f_el[j] = model_el.inner.status 481 | f_md[j] = model_md.inner.status 482 | end 483 | 484 | using StatsPlots 485 | 486 | StatsPlots.density(β) 487 | StatsPlots.histogram(β; nbins = 80) 488 | 489 | ## --------------------------------------------------------------------- ## 490 | ## Utilities 491 | ## --------------------------------------------------------------------- ## 492 | 493 | # function assign_matrix(J, gg, Dg) 494 | # n, m = size(gg) 495 | # k = size(Dg,2) 496 | # R = [ [gg'; ones(1, n)] [Dg; zeros(1,k)]] 497 | # J .= vec(R) 498 | # end 499 | 500 | using SparseArrays 501 | 502 | function rowcol_of_sparse(g::SparseMatrixCSC; offset_row = 0, offset_col = 0) 503 | rows = rowvals(g) 504 | vals = nonzeros(g) 505 | m, n = size(g) 506 | tup = Tuple{Int64, Int64}[] 507 | for j in 1:n 508 | for i in nzrange(g, j) 509 | push!(tup, (rows[i]+offset_row, j+offset_col)) 510 | end 511 | end 512 | return tup 513 | end 514 | 515 | function weighted_sum(G, w) 516 | n, m = size(G) 517 | res = zeros(eltype(G), m) 518 | @inbounds for j in axes(G, 2) 519 | for i in axes(G, 1) 520 | res[j] += w[i]*G[i, j] 521 | end 522 | end 523 | return res 524 | end 525 | 526 | function weighted_sum2(G, w) 527 | @inbounds vec(sum(w .* G, dims = 1)) 528 | end 529 | 530 | """ 531 | rowcol_of_dense(g::AbstractMatrix; offset_row = 0, offset_col = 0) 532 | 533 | Returns a tuple of row and column indices for all elements in a dense matrix `g`, with optional offsets for rows and columns. 534 | 535 | # Arguments 536 | - `g::AbstractMatrix`: The input dense matrix. 537 | - `offset_row::Int` (default: 0): An offset to be added to each row index. 538 | - `offset_col::Int` (default: 0): An offset to be added to each column index. 539 | 540 | # Returns 541 | A vector of tuples `(row, col)` representing the indices of all elements in the dense matrix. 542 | 543 | # Example 544 | ```julia 545 | g = [1 2; 3 4] 546 | rowcol_of_dense(g) # [(1, 1), (2, 1), (1, 2), (2, 2)] 547 | ``` 548 | """ 549 | function rowcol_of_dense(n, m; offset_row = 0, offset_col = 0) 550 | tup = Tuple{Int64, Int64}[] # Initialize an empty vector of tuples 551 | @inbounds for j in 1:n 552 | for i in 1:m 553 | push!(tup, (i + offset_row, j + offset_col)) 554 | end 555 | end 556 | return tup 557 | end 558 | 559 | function copy_lower_triangular!(x::AbstractVector{T}, A::Matrix{T}) where {T} 560 | @assert issquare(A) 561 | n = size(A, 1) 562 | len = (n * (n + 1)) ÷ 2 # Length of output vector 563 | @assert len == (n * (n + 1)) ÷ 2 564 | idx = 1 565 | @inbounds for j in 1:n 566 | for i in j:n 567 | x[idx] = A[i, j] 568 | idx += 1 569 | end 570 | end 571 | return x 572 | end 573 | 574 | function copy_lower_triangular!(x::AbstractVector{T}, A::Vector{T}) where {T} 575 | n = length(A) 576 | @assert n == 1 "`copy_lower_triangular!` for vector make sense only for singleton vector" 577 | @assert 1 == (n * (n + 1)) ÷ 2 "The dimension of the dest vector is wrong as it should be $(n*(n+1))//2" 578 | x .= A 579 | return x 580 | end 581 | 582 | abstract type SmootherType end 583 | 584 | struct Truncated <: SmootherType end 585 | struct Bartlett <: SmootherType end 586 | 587 | @inline weight(::Truncated, s, St) = 1.0 588 | @inline weight(::Bartlett, s, St) = 1.0 - s/St 589 | 590 | # Base version 591 | function smooter_base(tt::T, G::Matrix, ξ::Integer) where {T <: SmootherType} 592 | N, M = size(G) 593 | nG = zeros(N, M) 594 | St = (2.0 * ξ + 1.0) / 2.0 595 | for m in 1:M 596 | for t in 1:N 597 | low = max((t - N), -ξ) 598 | high = min(t - 1, ξ) 599 | for s in low:high 600 | κ = weight(tt, s, St) 601 | @inbounds nG[t, m] += κ * G[t - s, m] 602 | end 603 | end 604 | end 605 | return (nG ./ (2 * ξ + 1)) 606 | end 607 | 608 | function smoother(tt::Truncated, G::Matrix{T}, ξ::Integer) where {T} 609 | N, M = size(G) 610 | nG = Matrix{T}(undef, N, M) 611 | return smoother!(tt, nG, G, ξ) 612 | end 613 | 614 | function smoother!(tt::Truncated, dest, G::Matrix{T}, ξ::Integer) where {T} 615 | N, M = size(G) 616 | denom = 2ξ + 1 # normalization 617 | Threads.@threads for m in 1:M 618 | for t in 1:N 619 | low = max(t - N, -ξ) 620 | high = min(t - 1, ξ) 621 | acc = zero(T) 622 | @inbounds for s in low:high 623 | κ = weight(tt, s, ξ) 624 | acc += G[t - s, m] 625 | end 626 | dest[t, m] = acc / denom 627 | end 628 | end 629 | return dest 630 | end 631 | 632 | # optprob = OptimizationFunction(divergence, Optimization.AutoForwardDiff(), cons = cons) 633 | # prob = OptimizationProblem(optprob, x0, _p, 634 | # lcons = repeat([0.], 2), 635 | # ucons = repeat([0.], 2), 636 | # lb = [repeat([0], 100); -Inf], 637 | # ub = [repeat([+Inf], 100); +Inf]) 638 | 639 | # solver = OptimizationMOI.MOI.OptimizerWithAttributes(Ipopt.Optimizer, "print_level" => 0) 640 | 641 | # solve(prob, solver) 642 | --------------------------------------------------------------------------------