├── .JuliaFormatter.toml
├── doc
    ├── pro.jmd
    ├── pro.md
    ├── divergences.pdf
    ├── divergences_files
    │   └── libs
    │   │   ├── bootstrap
    │   │       └── bootstrap-icons.woff
    │   │   ├── quarto-html
    │   │       ├── tippy.css
    │   │       ├── quarto-syntax-highlighting-e26003cea8cd680ca0c55a263523d882.css
    │   │       ├── anchor.min.js
    │   │       └── popper.min.js
    │   │   └── clipboard
    │   │       └── clipboard.min.js
    ├── DRIVER.py
    ├── y.csv
    ├── divergences.aux
    ├── out.txt
    ├── x.csv
    ├── divergences.qmd
    ├── divergences.md
    ├── z.csv
    ├── mdexample.jl
    ├── mdprob.py
    ├── mdexample_iv.jl
    ├── divergences.py
    └── example_iv_effcient_analytical.jl
├── .gitignore
├── test
    ├── Aqua.jl
    └── Project.toml
├── Project.toml
├── .github
    └── workflows
    │   ├── TagBot.yml
    │   ├── format_fix.yaml
    │   └── ci.yml
├── src
    ├── plots.jl
    ├── benchmark
    │   ├── print_table.jl
    │   └── benchmarks.jl
    ├── Divergences.jl
    └── divs.jl
├── LICENSE.md
├── CHANGELOG.md
└── README.md


/.JuliaFormatter.toml:
--------------------------------------------------------------------------------
1 | style = "sciml"
2 | 


--------------------------------------------------------------------------------
/doc/pro.jmd:
--------------------------------------------------------------------------------
1 | ## First
2 | 
3 | An equation:
4 | 
5 | $$
6 | y = b
7 | $$
8 | 


--------------------------------------------------------------------------------
/doc/pro.md:
--------------------------------------------------------------------------------
1 | ## First
2 | 
3 | An equation:
4 | 
5 | $$
6 | y = b
7 | $$
8 | 


--------------------------------------------------------------------------------
/doc/divergences.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gragusa/Divergences.jl/HEAD/doc/divergences.pdf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.jl.cov
2 | *.jl.*.cov
3 | *.jl.mem
4 | docs/build/
5 | docs/site/
6 | Manifest.toml
7 | .DS_Store
8 | .vscode
9 | 


--------------------------------------------------------------------------------
/test/Aqua.jl:
--------------------------------------------------------------------------------
1 | using Test
2 | using Aqua
3 | using Divergences
4 | 
5 | @testset "Aqua.jl" begin
6 |     Aqua.test_all(Divergences)
7 | end
8 | 


--------------------------------------------------------------------------------
/test/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
3 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4 | 
5 | [compat]
6 | Aqua = "0.8"
7 | 


--------------------------------------------------------------------------------
/doc/divergences_files/libs/bootstrap/bootstrap-icons.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gragusa/Divergences.jl/HEAD/doc/divergences_files/libs/bootstrap/bootstrap-icons.woff


--------------------------------------------------------------------------------
/Project.toml:
--------------------------------------------------------------------------------
 1 | name = "Divergences"
 2 | uuid = "7f07e4c8-6df2-5971-ad44-bf349b9adcfd"
 3 | author = "Giuseppe Ragusa <giuseppe.ragusa@gmail.com>"
 4 | version = "0.4.1"
 5 | 
 6 | [deps]
 7 | Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 8 | LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 9 | NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
10 | RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
11 | 
12 | [compat]
13 | Distances = "0.10"
14 | LaTeXStrings = "1.4"
15 | NaNMath = "^1"
16 | Test = "1"
17 | julia = "1.9"
18 | RecipesBase = "1.3"
19 | [extras]
20 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
21 | 


--------------------------------------------------------------------------------
/.github/workflows/TagBot.yml:
--------------------------------------------------------------------------------
 1 | name: TagBot
 2 | on:
 3 |   issue_comment:
 4 |     types:
 5 |       - created
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       lookback:
 9 |         default: 3
10 | permissions:
11 |   actions: read
12 |   checks: read
13 |   contents: write
14 |   deployments: read
15 |   issues: read
16 |   discussions: read
17 |   packages: read
18 |   pages: read
19 |   pull-requests: read
20 |   repository-projects: read
21 |   security-events: read
22 |   statuses: read
23 | jobs:
24 |   TagBot:
25 |     if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - uses: JuliaRegistries/TagBot@v1
29 |         with:
30 |           token: ${{ secrets.GITHUB_TOKEN }}
31 |           # Edit the following line to reflect the actual name of the GitHub Secret containing your private key
32 |           ssh: ${{ secrets.DOCUMENTER_KEY }}
33 |           # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }}
34 | 


--------------------------------------------------------------------------------
/src/plots.jl:
--------------------------------------------------------------------------------
 1 | using RecipesBase
 2 | using LaTeXStrings
 3 | @recipe function f(r::Divergences.AbstractDivergence; min_u = 0, max_u = 3, lenout = 1000)
 4 |     # set a default value for an attribute with `-->`
 5 |     xlabel --> L"$u$"
 6 |     yguide --> L"$\gamma(u)$"
 7 |     #markershape --> :diamond
 8 |     # add a series for an error band
 9 |     step = (max_u - min_u) / lenout
10 |     u = collect(min_u:step:max_u)
11 |     y = r.(u)
12 |     @series begin
13 |         # force an argument with `:=`
14 |         seriestype := :path
15 |         # ignore series in legend and color cycling
16 |         primary := false
17 |         linecolor := nothing
18 |         #fillcolor := :lightgray
19 |         #fillalpha := 0.5
20 |         #fillrange := r.y .- r.ε
21 |         # ensure no markers are shown for the error band
22 |         markershape := :none
23 |         # return series data
24 |         u, y
25 |     end
26 |     # get the seriescolor passed by the user
27 |     c = get(plotattributes, :seriescolor, :auto)
28 |     # highlight big errors, otherwise use the user-defined color
29 |     #markercolor := ifelse.(r.ε .> ε_max, :red, c)
30 |     # return data
31 |     return u, y
32 | end
33 | 


--------------------------------------------------------------------------------
/.github/workflows/format_fix.yaml:
--------------------------------------------------------------------------------
 1 | name: Auto-format (pushes in repo)
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["**"] # or ['main', 'develop', 'feature/**']
 6 | 
 7 | permissions:
 8 |   contents: write
 9 | 
10 | jobs:
11 |   format-fix:
12 |     if: "!contains(github.event.head_commit.message, 'auto-format with JuliaFormatter')"
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v5
16 |         with:
17 |           fetch-depth: 0
18 | 
19 |       - name: Setup Julia
20 |         uses: julia-actions/setup-julia@v2
21 |         with:
22 |           version: '1'
23 | 
24 |       - name: Install JuliaFormatter
25 |         run: julia -e 'using Pkg; Pkg.add("JuliaFormatter")'
26 | 
27 |       - name: Run formatter (sciml)
28 |         run: julia -e 'using JuliaFormatter; format(".", verbose=true)'
29 | 
30 |       - name: Commit & push if changed
31 |         run: |
32 |           if ! git diff --quiet; then
33 |             git config user.name "github-actions[bot]"
34 |             git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
35 |             git add -A
36 |             git commit -m "style: auto-format with JuliaFormatter (sciml)"
37 |             git push
38 |           fi
39 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The Divergence.jl package is licensed under the MIT "Expat" License:
 2 | 
 3 | > Copyright (c) 2014: Giuseppe Ragusa.
 4 | >
 5 | > Permission is hereby granted, free of charge, to any person obtaining
 6 | > a copy of this software and associated documentation files (the
 7 | > "Software"), to deal in the Software without restriction, including
 8 | > without limitation the rights to use, copy, modify, merge, publish,
 9 | > distribute, sublicense, and/or sell copies of the Software, and to
10 | > permit persons to whom the Software is furnished to do so, subject to
11 | > the following conditions:
12 | >
13 | > The above copyright notice and this permission notice shall be
14 | > included in all copies or substantial portions of the Software.
15 | >
16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | > IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | > CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | > TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | > SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [0.4.1]
 9 | 
10 | - Fix several critical issues introduced by mistakes in the previous version.
11 | 
12 | ## [0.4.0]
13 | 
14 | ### Added
15 | - Divergence types are now callable, enabling `div(x, y)` and `div(x)` syntax
16 | - Broadcasting support for divergences using `div.(x, y)`
17 | - Backward compatibility for the old `evaluate(div, x, y)` API with deprecation warnings
18 | 
19 | ### Changed
20 | - Divergences now inherit from `PreMetric` (via `Distances.jl`) instead of being standalone types
21 | - Improved performance through streamlined divergence evaluation
22 | - Updated constructor signatures for modified divergences
23 | 
24 | ### Deprecated
25 | - `evaluate(div, x, y)` and `evaluate(div, x)` functions are deprecated in favor of callable syntax `div(x, y)` and `div(x)`
26 | 
27 | ### Fixed
28 | - Type annotation issues in gradient and hessian functions
29 | - Improved numerical stability in divergence calculations
30 | 
31 | ## [0.3.0] - Previous Release
32 | - Initial stable release with basic divergence functionality
33 | 


--------------------------------------------------------------------------------
/doc/divergences_files/libs/quarto-html/tippy.css:
--------------------------------------------------------------------------------
1 | .tippy-box[data-animation=fade][data-state=hidden]{opacity:0}[data-tippy-root]{max-width:calc(100vw - 10px)}.tippy-box{position:relative;background-color:#333;color:#fff;border-radius:4px;font-size:14px;line-height:1.4;white-space:normal;outline:0;transition-property:transform,visibility,opacity}.tippy-box[data-placement^=top]>.tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1}


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 | 
 5 |   pull_request:
 6 |   push:
 7 |     branches: [master]
 8 |     tags: ['*']
 9 | 
10 | # allow cache cleanup (safe to add once per workflow)
11 | permissions:
12 |   actions: write
13 |   contents: read
14 | 
15 | jobs:
16 |   test:
17 |     env:
18 |       GKS_ENCODING: "utf8"
19 |       GKSwstype: "nul"
20 |     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
21 |     runs-on: ${{ matrix.os }}
22 |     continue-on-error: ${{ matrix.version == 'nightly' }}
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         version:
27 |           - '1.9'
28 |           - 'nightly'
29 |         os: [ubuntu-latest]
30 |         arch: [x64, x86]
31 |         include:  # spare windows/macos CI credits, run on julia-latest only
32 |           - os: windows-latest
33 |             version: '1'
34 |             arch: x64
35 |           - os: windows-latest
36 |             version: '1'
37 |             arch: x86
38 |           - os: macOS-latest
39 |             version: '1'
40 |             arch: x64
41 |     steps:
42 |       - uses: actions/checkout@v5
43 |       - uses: julia-actions/setup-julia@v2
44 |         with:
45 |           version: ${{ matrix.version }}
46 |           arch: ${{ matrix.arch }}
47 |       - uses: julia-actions/cache@v2
48 |       - uses: julia-actions/julia-buildpkg@v1
49 |       - uses: julia-actions/julia-runtest@v1
50 |         continue-on-error: ${{ matrix.version == 'nightly' }}
51 |         env:
52 |           JULIA_NUM_THREADS: 4,1
53 |       - uses: julia-actions/julia-processcoverage@v1
54 |       - uses: codecov/codecov-action@v5
55 |         with:
56 |           files: lcov.info
57 |           token: ${{ secrets.CODECOV_TOKEN }}


--------------------------------------------------------------------------------
/src/benchmark/print_table.jl:
--------------------------------------------------------------------------------
 1 | using BenchmarkTools
 2 | using Distances
 3 | using Printf
 4 | 
 5 | include("benchmarks.jl")
 6 | 
 7 | # BenchmarkTools stores things in a Dict so it loses ordering but we want to print the table
 8 | # in a special order. Therefore define an order here:
 9 | 
10 | order = [:KullbackLeibler,
11 |     :ReverseKullbackLeibler,
12 |     :Hellinger,
13 |     :CressieRead,
14 |     :ChiSquared,
15 |     :ModifiedDivergence]
16 | 
17 | BenchmarkTools.DEFAULT_PARAMETERS.seconds = 2.0 # Long enough
18 | 
19 | # Tuning
20 | if !isfile(@__DIR__, "params.json")
21 |     tuning = tune!(SUITE; verbose = true);
22 |     BenchmarkTools.save("params.json", "SUITE", params(SUITE))
23 | end
24 | loadparams!(SUITE, BenchmarkTools.load("params.json")[1], :evals, :samples);
25 | 
26 | # Run and judge
27 | results = run(SUITE; verbose = true)
28 | judgement = minimum(results)
29 | 
30 | # Output the comparison table
31 | getname(T::DataType) = T.name.name
32 | 
33 | function print_table(judgement)
34 |     for typ in ("colwise", "pairwise")
35 |         io = IOBuffer()
36 |         println(io, "|  distance  |  loop  |  $typ  |  gain  |")
37 |         println(io, "|----------- | -------| ----------| -------|")
38 |         sorted_distances = collect(judgement[typ])
39 |         #sorted_distances = sort(collect(judgement[typ]), by = y -> findfirst(x -> x == getname(y[1]), order))
40 | 
41 |         for (dist, result) in sorted_distances
42 |             t_loop = BenchmarkTools.time(result["loop"])
43 |             t_spec = BenchmarkTools.time(result["specialized"])
44 |             print(io, "| ", string.(dist), " |")
45 |             print(io,
46 |                 @sprintf("%9.6fs | %9.6fs | %7.4f |\n",
47 |                     t_loop / 1e9,
48 |                     t_spec / 1e9,
49 |                     (t_loop / t_spec)))
50 |         end
51 |         print(stdout, String(take!(io)))
52 |         println()
53 |     end
54 | end
55 | 
56 | print_table(judgement)
57 | 


--------------------------------------------------------------------------------
/doc/DRIVER.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | exec(open('divergences.py').read())
 3 | 
 4 | 
 5 | def randiv(n=100, m=5, k=1, theta=0.0, rho=0.9, CP=20):
 6 |     """
 7 |     Simulates instrumental variables regression data
 8 |     
 9 |     Returns:
10 |     y: outcome variable (n x 1)
11 |     covariates: matrix [x w] (n x (1 + k))
12 |     instruments: matrix [z w] (n x (m + k))
13 |     theory_val: theoretical strength measure (array length m)
14 |     """
15 |     # Generate instrument strength vector
16 |     tau = np.full(m, np.sqrt(CP / (m * n)))
17 |     
18 |     # Generate base data matrices
19 |     z = np.random.randn(n, m)    # Instruments
20 |     w = np.random.randn(n, k)    # Exogenous controls (corrected to k columns)
21 |     
22 |     # Generate correlated errors
23 |     eta = np.random.randn(n, 1)
24 |     u = rho * eta + np.sqrt(1 - rho**2) * np.random.randn(n, 1)
25 |     
26 |     # Create endogenous variable x (n x 1)
27 |     x = z @ tau.reshape(-1, 1) + eta
28 |     
29 |     # Create outcome variable y (n x 1)
30 |     y = x * theta + u
31 |     
32 |     # Create combined matrices
33 |     covariates = np.hstack((x, w))
34 |     instruments = np.hstack((z, w))
35 |     
36 |     # Calculate theoretical value (array length m)
37 |     theory_val = (k * tau**2) / (1 + k * tau**2)
38 |     
39 |     return y, covariates, instruments, theory_val
40 | 
41 | 
42 | y, x, z, tv = randiv()    
43 | 
44 | divergence = KullbackLeibler()
45 | momfun = DefaultMomentFunction(y,x,z)
46 | problem = MDProblem(momfun, divergence)
47 | 
48 | k = 2
49 | n = 100
50 | m = 6
51 | 
52 | pi = np.random.uniform(0,1,n)
53 | theta = np.random.uniform(0, 1, k)
54 | 
55 | 
56 | u0 = np.concatenate((pi, theta))
57 | lb = np.concatenate((np.zeros_like(pi), -10.0 * np.ones_like(theta)))
58 | ub = np.concatenate((np.inf*np.ones_like(pi), 10.0 * np.ones_like(theta)))
59 | 
60 | # Define constraint bounds. Our constraint vector has length m+1.
61 | # For equality constraints, we set cl = cu.
62 | # For instance, suppose we require c(u) == 0.
63 | cl = np.zeros(m + 1)
64 | cu = np.zeros(m + 1)
65 | 
66 | #prob = MDOptProblem(problem, u0, lb, ub, cl, cu)
67 | 
68 | theta = np.array((1,2))
69 | momfun.g(theta)


--------------------------------------------------------------------------------
/doc/y.csv:
--------------------------------------------------------------------------------
  1 | 4.534400727903336037e-01
  2 | -3.726962711333793310e-01
  3 | 4.756290312354569672e-01
  4 | 6.061400272507512232e-01
  5 | -3.064275130944626446e-01
  6 | 1.953269844971823765e+00
  7 | -1.254536494380667389e+00
  8 | -3.552921823819787939e-01
  9 | -2.508340213668912355e+00
 10 | 2.085908067490494400e+00
 11 | 6.809094722044641834e-01
 12 | -9.218247193524797822e-02
 13 | 1.444704050717032784e-02
 14 | -8.389607438507218351e-01
 15 | 2.184772195191130084e+00
 16 | 5.972172045705298471e-01
 17 | 1.482459928520135639e-01
 18 | 7.187049804336708148e-01
 19 | 2.744134197952955989e-01
 20 | 1.766735999435156967e-01
 21 | -5.772593293764261002e-01
 22 | -3.211235985803653770e-01
 23 | 1.106161645616177358e+00
 24 | 1.534859348960212699e+00
 25 | 1.508346481749807566e+00
 26 | -5.402905858337447853e-01
 27 | -8.826091730963186999e-01
 28 | 3.829912473982199916e-02
 29 | -1.851233689668075610e-01
 30 | 6.455172020929682564e-01
 31 | -1.437851199189063678e+00
 32 | 9.501324808410407385e-01
 33 | 3.574614183704746817e-02
 34 | -1.126331950332877252e+00
 35 | -4.622953957063831720e-01
 36 | -1.283370537998172800e+00
 37 | 8.524542994072983326e-01
 38 | 4.943312396442680057e-01
 39 | -4.350013301207570349e-01
 40 | -7.234172343805733707e-01
 41 | -1.104624703575391109e+00
 42 | 9.443647039121307873e-01
 43 | -5.059843200494723581e-01
 44 | -1.341455832686308636e+00
 45 | 4.473578066366112882e-03
 46 | -5.617956068164855488e-01
 47 | -2.345788233410650125e+00
 48 | -3.781291664463863333e-01
 49 | -4.743955077296018175e-01
 50 | 1.343637143492848463e-02
 51 | 1.261632443339366194e+00
 52 | 4.247228666508588990e-01
 53 | -6.673731106425970783e-01
 54 | -5.366015064570825999e-01
 55 | 1.902190441572049862e+00
 56 | 1.200725330754551123e+00
 57 | 2.275686504195926219e-01
 58 | 5.885562003117093821e-02
 59 | -1.958731974301114176e-01
 60 | 1.753336286191466420e-01
 61 | -7.672105174572009245e-01
 62 | -4.389902019004270350e-01
 63 | 1.086437035729067357e+00
 64 | -5.309538335630283878e-01
 65 | -1.406045756308372807e-01
 66 | -2.107202927376866097e-01
 67 | -2.447306251050986592e-01
 68 | 2.125467791713697707e+00
 69 | -2.659161432136301073e+00
 70 | 1.772384177197678490e+00
 71 | 1.429978589622439955e+00
 72 | -2.111224792689559759e+00
 73 | -3.275869210670834253e-02
 74 | 8.962987690516099892e-02
 75 | -9.957198465068788362e-01
 76 | -1.384480106096983576e+00
 77 | -1.316470007670462739e+00
 78 | 1.469152527180052070e+00
 79 | 8.096657742858849316e-01
 80 | 1.414944280524625286e+00
 81 | 7.269629281623196659e-01
 82 | -1.598209705539952008e+00
 83 | -3.063438385431636046e-01
 84 | 7.065852611109912917e-01
 85 | -8.559080287936740028e-01
 86 | 1.112799983437065432e+00
 87 | 1.472053815035164193e-01
 88 | -1.371867705702622675e-01
 89 | 6.092794468281547138e-01
 90 | -3.241591054990826115e-01
 91 | -1.376033090868865605e-01
 92 | 1.133925787043992273e+00
 93 | -8.545785241860158754e-01
 94 | -2.179755581183107083e-03
 95 | 6.256951098221441576e-02
 96 | 1.804668905953725755e-01
 97 | 2.762780281821699546e-01
 98 | -8.289429517667962211e-01
 99 | 8.439680112659962985e-01
100 | -1.534415222975048787e-01
101 | 


--------------------------------------------------------------------------------
/doc/divergences.aux:
--------------------------------------------------------------------------------
 1 | \relax 
 2 | \providecommand*\new@tpo@label[2]{}
 3 | \providecommand\hyper@newdestlabel[2]{}
 4 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 5 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 6 | \global\let\oldnewlabel\newlabel
 7 | \gdef\newlabel#1#2{\newlabelxx{#1}#2}
 8 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
 9 | \AtEndDocument{\ifx\hyper@anchor\@undefined
10 | \let\newlabel\oldnewlabel
11 | \fi}
12 | \fi}
13 | \global\let\hyper@last\relax 
14 | \gdef\HyperFirstAtBeginDocument#1{#1}
15 | \providecommand*\HyPL@Entry[1]{}
16 | \HyPL@Entry{0<</S/D>>}
17 | \@writefile{toc}{\contentsline {section}{\nonumberline \texttt  {Divergences.el}}{1}{section*.2}\protected@file@percent }
18 | \newlabel{divergences.el}{{}{1}{\texorpdfstring {\texttt {Divergences.el}}{Divergences.el}}{section*.2}{}}
19 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Definition}{1}{subsection*.4}\protected@file@percent }
20 | \newlabel{definition}{{}{1}{Definition}{subsection*.4}{}}
21 | \gdef \LT@i {\LT@entry 
22 |     {1}{73.27129pt}\LT@entry 
23 |     {1}{160.93872pt}\LT@entry 
24 |     {1}{57.6588pt}\LT@entry 
25 |     {1}{103.27501pt}\LT@entry 
26 |     {1}{51.6588pt}}
27 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Divergences}{2}{subsection*.6}\protected@file@percent }
28 | \newlabel{divergences}{{}{2}{Divergences}{subsection*.6}{}}
29 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Modified divergences}{2}{subsection*.8}\protected@file@percent }
30 | \newlabel{modified-divergences}{{}{2}{Modified divergences}{subsection*.8}{}}
31 | \gdef \LT@ii {\LT@entry 
32 |     {1}{74.06096pt}\LT@entry 
33 |     {1}{117.60448pt}\LT@entry 
34 |     {1}{117.60448pt}\LT@entry 
35 |     {1}{137.44621pt}}
36 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Fully modified divergences}{4}{subsection*.10}\protected@file@percent }
37 | \newlabel{fully-modified-divergences}{{}{4}{Fully modified divergences}{subsection*.10}{}}
38 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Example of divergences}{4}{subsection*.12}\protected@file@percent }
39 | \newlabel{example-of-divergences}{{}{4}{Example of divergences}{subsection*.12}{}}
40 | \@writefile{toc}{\contentsline {paragraph}{\nonumberline Kullback-Leibler divergence}{4}{paragraph*.14}\protected@file@percent }
41 | \newlabel{kullback-leibler-divergence}{{}{4}{Kullback-Leibler divergence}{paragraph*.14}{}}
42 | \@writefile{toc}{\contentsline {paragraph}{\nonumberline Reverse Kullback-Leibler divergence}{4}{paragraph*.16}\protected@file@percent }
43 | \newlabel{reverse-kullback-leibler-divergence}{{}{4}{Reverse Kullback-Leibler divergence}{paragraph*.16}{}}
44 | \@writefile{toc}{\contentsline {paragraph}{\nonumberline Chi-squared divergence}{5}{paragraph*.18}\protected@file@percent }
45 | \newlabel{chi-squared-divergence}{{}{5}{Chi-squared divergence}{paragraph*.18}{}}
46 | \@writefile{toc}{\contentsline {subsection}{\nonumberline Cressie-Read divergences}{5}{subsection*.20}\protected@file@percent }
47 | \newlabel{cressie-read-divergences}{{}{5}{Cressie-Read divergences}{subsection*.20}{}}
48 | 


--------------------------------------------------------------------------------
/src/benchmark/benchmarks.jl:
--------------------------------------------------------------------------------
  1 | using BenchmarkTools
  2 | using Distances
  3 | using Divergences
  4 | 
  5 | const SUITE = BenchmarkGroup()
  6 | 
  7 | function create_distances()
  8 |     divs = [KullbackLeibler(),
  9 |         ReverseKullbackLeibler(),
 10 |         Hellinger(),
 11 |         CressieRead(2.0),
 12 |         ChiSquared(),
 13 |         ModifiedDivergence(KullbackLeibler(), 2.0),
 14 |         FullyModifiedDivergence(KullbackLeibler(), 0.5, 2.0)]
 15 | 
 16 |     return divs
 17 | end
 18 | 
 19 | ###########
 20 | # Eval    #
 21 | ###########
 22 | 
 23 | SUITE["evaluation"] = BenchmarkGroup()
 24 | 
 25 | function evaluate(dist, x, y)
 26 |     n = size(x, 1)
 27 |     T = typeof(dist(x[1, 1], y[1, 1]))
 28 |     return dist(x, y)
 29 | end
 30 | 
 31 | SUITE["gradient"] = BenchmarkGroup()
 32 | 
 33 | ###########
 34 | # Colwise #
 35 | ###########
 36 | 
 37 | SUITE["colwise"] = BenchmarkGroup()
 38 | 
 39 | function evaluate_colwise(dist, x, y)
 40 |     n = size(x, 2)
 41 |     T = typeof(evaluate(dist, x[:, 1], y[:, 1]))
 42 |     r = Vector{T}(undef, n)
 43 |     for j in 1:n
 44 |         r[j] = @views evaluate(dist, x[:, j], y[:, j])
 45 |     end
 46 |     return r
 47 | end
 48 | 
 49 | function add_colwise_benchmarks!(SUITE)
 50 |     m = 200
 51 |     n = 10000
 52 | 
 53 |     x = rand(m, n)
 54 |     y = rand(m, n)
 55 | 
 56 |     p = x
 57 |     q = y
 58 |     for i in 1:n
 59 |         p[:, i] /= sum(x[:, i])
 60 |         q[:, i] /= sum(y[:, i])
 61 |     end
 62 | 
 63 |     divs = create_distances()
 64 | 
 65 |     for (dists, (a, b)) in [(divs, (p, q))]
 66 |         for dist in (dists)
 67 |             Tdist = typeof(dist)
 68 |             SUITE["colwise"][Tdist] = BenchmarkGroup()
 69 |             SUITE["colwise"][Tdist]["loop"] = @benchmarkable evaluate_colwise($dist, $a, $b)
 70 |             SUITE["colwise"][Tdist]["specialized"] = @benchmarkable colwise($dist, $a, $b)
 71 |         end
 72 |     end
 73 | end
 74 | 
 75 | add_colwise_benchmarks!(SUITE)
 76 | 
 77 | ############
 78 | # Pairwise #
 79 | ############
 80 | 
 81 | SUITE["pairwise"] = BenchmarkGroup()
 82 | 
 83 | function evaluate_pairwise(dist, x, y)
 84 |     nx = size(x, 2)
 85 |     ny = size(y, 2)
 86 |     T = typeof(evaluate(dist, x[:, 1], y[:, 1]))
 87 |     r = Matrix{T}(undef, nx, ny)
 88 |     for j in 1:ny
 89 |         @inbounds for i in 1:nx
 90 |             r[i, j] = @views evaluate(dist, x[:, i], y[:, j])
 91 |         end
 92 |     end
 93 |     return r
 94 | end
 95 | 
 96 | function add_pairwise_benchmarks!(SUITE)
 97 |     m = 100
 98 |     nx = 200
 99 |     ny = 250
100 | 
101 |     x = rand(m, nx)
102 |     y = rand(m, ny)
103 | 
104 |     p = x
105 |     for i in 1:nx
106 |         p[:, i] /= sum(x[:, i])
107 |     end
108 | 
109 |     q = y
110 |     for i in 1:ny
111 |         q[:, i] /= sum(y[:, i])
112 |     end
113 | 
114 |     divs = create_distances()
115 | 
116 |     for (dists, (a, b)) in [(divs, (p, q))]
117 |         for dist in (dists)
118 |             Tdist = typeof(dist)
119 |             SUITE["pairwise"][Tdist] = BenchmarkGroup()
120 |             SUITE["pairwise"][Tdist]["loop"] = @benchmarkable evaluate_pairwise($dist, $a,
121 |                 $b)
122 |             SUITE["pairwise"][Tdist]["specialized"] = @benchmarkable pairwise(
123 |                 $dist, $a, $b;
124 |                 dims = 2)
125 |         end
126 |     end
127 | end
128 | 
129 | add_pairwise_benchmarks!(SUITE)
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Divergences.jl
  2 | 
  3 | [![codecov](https://codecov.io/gh/gragusa/Divergences.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/gragusa/Divergences.jl)
  4 | 
  5 | `Divergences.jl` is a Julia package that makes evaluating divergence measures between two vectors easy. The package allows for calculating the *gradient*  and the diagonal of the *Hessian* of several divergences. 
  6 | 
  7 | 
  8 | ## Supported divergences
  9 | 
 10 | The package defines an abstract `Divergence` type with the following suptypes:
 11 | 
 12 | * Kullback-Leibler divergence `KullbackLeibler`
 13 | * Chi-square distance `ChiSquared`
 14 | * Reverse Kullback-Leibler divergence `ReverseKullbackLeibler`
 15 | * Cressie-Read divergences `CressieRead`
 16 | 
 17 | These divergences differ from the equivalent ones defined in the `Distances` package because they are **normalized**. 
 18 | 
 19 | Also, the package provides methods for calculating their gradient and the (diagonal elements of the) Hessian matrix.
 20 | 
 21 | The constructors for the types above are straightforward
 22 | ```julia
 23 | KullbackLeibler()
 24 | ChiSqaured()
 25 | ReverseKullbackLeibler()
 26 | ```
 27 | The `CressieRead` type define a family of divergences indexed by a parameter `alpha`. The constructor for `CressieRead` is
 28 | ```julia
 29 | CR(::Real)
 30 | ```
 31 | The Hellinger divergence is obtained by `CR(-1/2)`. For a certain value of `alpha`, `CressieRead` corresponds to a divergence with a defined specific type. For instance, `CR(1)` is equivalent to `ChiSquared` although the underlying code for evaluation and calculation of the gradient and Hessian are different. 
 32 | 
 33 | Three versions of each divergence in the above list are currently implemented: a vanilla version, a modified version, and a fully modified version. These modifications extend the domain of the divergence.
 34 | 
 35 | The **modified** version takes an additional argument that specifies the point at which a convex extension modifies the divergence. 
 36 | ```julia
 37 | ModifiedKullbackLeibler(theta::Real)
 38 | ModifiedReverseKullbackLeibler(theta::Real)
 39 | ModifiedCressieRead(alpha::Real, theta::Real)
 40 | ```
 41 | 
 42 | Similarly, the **fully modified** version takes two additional arguments that specify the points at which a convex extension modifies the divergence.
 43 | ```julia
 44 | FullyModifiedKullbackLeibler(phi::Real, theta::Real)
 45 | FullyModifiedReverseKullbackLeibler(phi::Real, theta::Real)
 46 | FullyModifiedCressieRead(alpha::Real, phi::Real, theta::Real)
 47 | ```
 48 | 
 49 | 
 50 | ## Basic usage 
 51 | 
 52 | ### Divergence between two vectors
 53 | 
 54 | Each divergence corresponds to a *divergence type*. You can always compute a certain divergence between two vectors using the following syntax
 55 | 
 56 | ```julia
 57 | x = rand(100)
 58 | y = rand(100)
 59 | 𝒦ℒ = KullbackLeibler()
 60 | 𝒦ℒ(x, y)
 61 | ```
 62 | 
 63 | Here, `div` is an instance of a divergence type. 
 64 | 
 65 | We can also calculate the divergence between the vector ``x`` and the unit vector
 66 | ```julia
 67 | r = 𝒦ℒ(x)
 68 | ```
 69 | 
 70 | The `Divergence` type is a subtype of `PreMetric` defined in the `Distances` package. As such, the divergences can be evaluated column-wise for `X::Matrix` and `Y::Matrix`. 
 71 | 
 72 | ```julia
 73 | colwise(𝒦ℒ, X, Y)
 74 | ```
 75 | 
 76 | The divergence function can also be broadcasted
 77 | ```julia
 78 | 𝒦ℒ.(x,y)
 79 | ```
 80 | 
 81 | 
 82 | ### Gradient of the divergence
 83 | 
 84 | To calculate the gradient of  `div::Divergence` with respect to ``x::AbstractArray{Float64, 1}`` the
 85 | `gradient` method can be used
 86 | ```julia
 87 | g = gradient(div, x, y)
 88 | ```
 89 | or through its in-place version
 90 | ```julia
 91 | u = Vector{Float64}(undef, size(x))
 92 | gradient!(u, div, x, y)
 93 | ```
 94 | 
 95 | ### Hessian of the divergence
 96 | The `hessian` method calculates the Hessian of the divergence with respect to ``x`` 
 97 | ```julia
 98 | h = hessian(div, x, y)
 99 | ```
100 | Its in-place variant is also defined
101 | ```julia
102 | u = Vector{Float64}(undef, size(x))
103 | hessian!(u, div, x, y)
104 | ```
105 | 
106 | Notice that the the divergence's Hessian is sparse, where the diagonal entries are the only ones different from zero. For this reason, `hessian(div, x, y)` returns an `Array{T,1}` with the diagonal entries of the hessian.
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/doc/divergences_files/libs/quarto-html/quarto-syntax-highlighting-e26003cea8cd680ca0c55a263523d882.css:
--------------------------------------------------------------------------------
  1 | /* quarto syntax highlight colors */
  2 | :root {
  3 |   --quarto-hl-ot-color: #003B4F;
  4 |   --quarto-hl-at-color: #657422;
  5 |   --quarto-hl-ss-color: #20794D;
  6 |   --quarto-hl-an-color: #5E5E5E;
  7 |   --quarto-hl-fu-color: #4758AB;
  8 |   --quarto-hl-st-color: #20794D;
  9 |   --quarto-hl-cf-color: #003B4F;
 10 |   --quarto-hl-op-color: #5E5E5E;
 11 |   --quarto-hl-er-color: #AD0000;
 12 |   --quarto-hl-bn-color: #AD0000;
 13 |   --quarto-hl-al-color: #AD0000;
 14 |   --quarto-hl-va-color: #111111;
 15 |   --quarto-hl-bu-color: inherit;
 16 |   --quarto-hl-ex-color: inherit;
 17 |   --quarto-hl-pp-color: #AD0000;
 18 |   --quarto-hl-in-color: #5E5E5E;
 19 |   --quarto-hl-vs-color: #20794D;
 20 |   --quarto-hl-wa-color: #5E5E5E;
 21 |   --quarto-hl-do-color: #5E5E5E;
 22 |   --quarto-hl-im-color: #00769E;
 23 |   --quarto-hl-ch-color: #20794D;
 24 |   --quarto-hl-dt-color: #AD0000;
 25 |   --quarto-hl-fl-color: #AD0000;
 26 |   --quarto-hl-co-color: #5E5E5E;
 27 |   --quarto-hl-cv-color: #5E5E5E;
 28 |   --quarto-hl-cn-color: #8f5902;
 29 |   --quarto-hl-sc-color: #5E5E5E;
 30 |   --quarto-hl-dv-color: #AD0000;
 31 |   --quarto-hl-kw-color: #003B4F;
 32 | }
 33 | 
 34 | /* other quarto variables */
 35 | :root {
 36 |   --quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
 37 | }
 38 | 
 39 | pre > code.sourceCode > span {
 40 |   color: #003B4F;
 41 | }
 42 | 
 43 | code span {
 44 |   color: #003B4F;
 45 | }
 46 | 
 47 | code.sourceCode > span {
 48 |   color: #003B4F;
 49 | }
 50 | 
 51 | div.sourceCode,
 52 | div.sourceCode pre.sourceCode {
 53 |   color: #003B4F;
 54 | }
 55 | 
 56 | code span.ot {
 57 |   color: #003B4F;
 58 |   font-style: inherit;
 59 | }
 60 | 
 61 | code span.at {
 62 |   color: #657422;
 63 |   font-style: inherit;
 64 | }
 65 | 
 66 | code span.ss {
 67 |   color: #20794D;
 68 |   font-style: inherit;
 69 | }
 70 | 
 71 | code span.an {
 72 |   color: #5E5E5E;
 73 |   font-style: inherit;
 74 | }
 75 | 
 76 | code span.fu {
 77 |   color: #4758AB;
 78 |   font-style: inherit;
 79 | }
 80 | 
 81 | code span.st {
 82 |   color: #20794D;
 83 |   font-style: inherit;
 84 | }
 85 | 
 86 | code span.cf {
 87 |   color: #003B4F;
 88 |   font-weight: bold;
 89 |   font-style: inherit;
 90 | }
 91 | 
 92 | code span.op {
 93 |   color: #5E5E5E;
 94 |   font-style: inherit;
 95 | }
 96 | 
 97 | code span.er {
 98 |   color: #AD0000;
 99 |   font-style: inherit;
100 | }
101 | 
102 | code span.bn {
103 |   color: #AD0000;
104 |   font-style: inherit;
105 | }
106 | 
107 | code span.al {
108 |   color: #AD0000;
109 |   font-style: inherit;
110 | }
111 | 
112 | code span.va {
113 |   color: #111111;
114 |   font-style: inherit;
115 | }
116 | 
117 | code span.bu {
118 |   font-style: inherit;
119 | }
120 | 
121 | code span.ex {
122 |   font-style: inherit;
123 | }
124 | 
125 | code span.pp {
126 |   color: #AD0000;
127 |   font-style: inherit;
128 | }
129 | 
130 | code span.in {
131 |   color: #5E5E5E;
132 |   font-style: inherit;
133 | }
134 | 
135 | code span.vs {
136 |   color: #20794D;
137 |   font-style: inherit;
138 | }
139 | 
140 | code span.wa {
141 |   color: #5E5E5E;
142 |   font-style: italic;
143 | }
144 | 
145 | code span.do {
146 |   color: #5E5E5E;
147 |   font-style: italic;
148 | }
149 | 
150 | code span.im {
151 |   color: #00769E;
152 |   font-style: inherit;
153 | }
154 | 
155 | code span.ch {
156 |   color: #20794D;
157 |   font-style: inherit;
158 | }
159 | 
160 | code span.dt {
161 |   color: #AD0000;
162 |   font-style: inherit;
163 | }
164 | 
165 | code span.fl {
166 |   color: #AD0000;
167 |   font-style: inherit;
168 | }
169 | 
170 | code span.co {
171 |   color: #5E5E5E;
172 |   font-style: inherit;
173 | }
174 | 
175 | code span.cv {
176 |   color: #5E5E5E;
177 |   font-style: italic;
178 | }
179 | 
180 | code span.cn {
181 |   color: #8f5902;
182 |   font-style: inherit;
183 | }
184 | 
185 | code span.sc {
186 |   color: #5E5E5E;
187 |   font-style: inherit;
188 | }
189 | 
190 | code span.dv {
191 |   color: #AD0000;
192 |   font-style: inherit;
193 | }
194 | 
195 | code span.kw {
196 |   color: #003B4F;
197 |   font-weight: bold;
198 |   font-style: inherit;
199 | }
200 | 
201 | .prevent-inlining {
202 |   content: "</";
203 | }
204 | 
205 | /*# sourceMappingURL=e90e9ab646ea9b1aaa61e089606e0f97.css.map */
206 | 


--------------------------------------------------------------------------------
/src/Divergences.jl:
--------------------------------------------------------------------------------
  1 | module Divergences
  2 | 
  3 | using NaNMath
  4 | using Distances
  5 | abstract type AbstractDivergence <: PreMetric end
  6 | abstract type Divergence <: AbstractDivergence end
  7 | abstract type AbstractModifiedDivergence <: AbstractDivergence end
  8 | 
  9 | struct CressieRead{T} <: Divergence
 10 |     α::T
 11 |     function CressieRead(α::T) where {T <: Union{Real, Int}}
 12 |         @assert (α != -1 && α != 0) "CressieRead is defined for all α != {-1,0}"
 13 |         a = float(α)
 14 |         return new{eltype(a)}(a)
 15 |     end
 16 | end
 17 | 
 18 | struct ChiSquared <: Divergence end
 19 | struct KullbackLeibler <: Divergence end
 20 | struct ReverseKullbackLeibler <: Divergence end
 21 | struct Hellinger <: Divergence end
 22 | 
 23 | struct ModifiedDivergence{D, T} <: AbstractModifiedDivergence
 24 |     d::D
 25 |     m::NamedTuple{(:γ₀, :γ₁, :γ₂, :ρ), Tuple{T, T, T, T}}
 26 | end
 27 | 
 28 | struct FullyModifiedDivergence{D, T} <: AbstractModifiedDivergence
 29 |     d::D
 30 |     m::NamedTuple{(:γ₀, :γ₁, :γ₂, :ρ, :g₀, :g₁, :g₂, :φ), Tuple{T, T, T, T, T, T, T, T}}
 31 | end
 32 | 
 33 | function ModifiedDivergence(D::Divergence, ρ::Union{Real, Int})
 34 |     @assert ρ > 1 "A ModifiedDivergence requires ρ > 1"
 35 |     z = float(ρ)
 36 |     γ₀ = D(z)
 37 |     γ₁ = gradient(D, z)
 38 |     γ₂ = hessian(D, z)
 39 |     return ModifiedDivergence(D, (γ₀ = γ₀, γ₁ = γ₁, γ₂ = γ₂, ρ = z))
 40 | end
 41 | 
 42 | function FullyModifiedDivergence(D::Divergence, φ::Union{Real, Int}, ρ::Union{Real, Int})
 43 |     @assert ρ > 1 "A ModifiedDivergence requires ρ > 1"
 44 |     @assert φ < 1 && φ > 0 "A ModifiedDivergence requires  φ ∈ (0,1)"
 45 |     z = float(ρ)
 46 |     γ₀ = D(z)
 47 |     γ₁ = gradient(D, z)
 48 |     γ₂ = hessian(D, z)
 49 |     w = float(φ)
 50 |     g₀ = D(w)
 51 |     g₁ = gradient(D, w)
 52 |     g₂ = hessian(D, w)
 53 |     return FullyModifiedDivergence(D,
 54 |         (γ₀ = γ₀, γ₁ = γ₁, γ₂ = γ₂, ρ = z, g₀ = g₀, g₁ = g₁, g₂ = g₂, φ = w))
 55 | end
 56 | 
 57 | for div in (KullbackLeibler,
 58 |     ReverseKullbackLeibler,
 59 |     Hellinger,
 60 |     CressieRead,
 61 |     ChiSquared,
 62 |     ModifiedDivergence,
 63 |     FullyModifiedDivergence)
 64 |     @eval begin
 65 |         function (f::$div)(p, q)
 66 |             return γ(f, p/q)*q
 67 |         end
 68 |     end
 69 | end
 70 | 
 71 | for div in (KullbackLeibler,
 72 |     ReverseKullbackLeibler,
 73 |     Hellinger,
 74 |     CressieRead,
 75 |     ChiSquared,
 76 |     ModifiedDivergence,
 77 |     FullyModifiedDivergence)
 78 |     @eval begin
 79 |         function (f::$div)(p)
 80 |             return γ(f, p)
 81 |         end
 82 |     end
 83 | end
 84 | 
 85 | for div in (KullbackLeibler,
 86 |     ReverseKullbackLeibler,
 87 |     Hellinger,
 88 |     CressieRead,
 89 |     ChiSquared,
 90 |     ModifiedDivergence,
 91 |     FullyModifiedDivergence)
 92 |     @eval begin
 93 |         function (f::$div)(a::AbstractArray, b::AbstractArray)
 94 |             return sum(γ(f, a ./ b) .* b)
 95 |         end
 96 |     end
 97 | end
 98 | 
 99 | for div in (KullbackLeibler,
100 |     ReverseKullbackLeibler,
101 |     Hellinger,
102 |     CressieRead,
103 |     ChiSquared,
104 |     ModifiedDivergence,
105 |     FullyModifiedDivergence)
106 |     @eval begin
107 |         function (f::$div)(a::AbstractArray)
108 |             return sum(γ(f, a))
109 |         end
110 |     end
111 | end
112 | 
113 | # Deprecated evaluate functions for backward compatibility
114 | function evaluate(f::AbstractDivergence, a::AbstractArray)
115 |     Base.depwarn("evaluate(div, x) is deprecated, use div(x) instead", :evaluate)
116 |     return sum(f.(a))
117 | end
118 | 
119 | function evaluate(f::AbstractDivergence, a::AbstractArray, b::AbstractArray)
120 |     Base.depwarn("evaluate(div, x, y) is deprecated, use div(x, y) instead", :evaluate)
121 |     return sum(f.(a ./ b) .* b)
122 | end
123 | 
124 | function evaluate(f::AbstractDivergence, a::Real)
125 |     Base.depwarn("evaluate(div, x) is deprecated, use div(x) instead", :evaluate)
126 |     return f(a)
127 | end
128 | 
129 | function evaluate(f::AbstractDivergence, a::Real, b::Real)
130 |     Base.depwarn("evaluate(div, x, y) is deprecated, use div(x, y) instead", :evaluate)
131 |     return f(a, b)
132 | end
133 | 
134 | # Also keep the Distances.evaluate functions for compatibility
135 | function Distances.evaluate(f::AbstractDivergence, a::AbstractArray)
136 |     Base.depwarn("evaluate(div, x) is deprecated, use div(x) instead", :evaluate)
137 |     return sum(f.(a))
138 | end
139 | 
140 | function Distances.evaluate(f::AbstractDivergence, a::AbstractArray, b::AbstractArray)
141 |     Base.depwarn("evaluate(div, x, y) is deprecated, use div(x, y) instead", :evaluate)
142 |     return sum(f.(a ./ b) .* b)
143 | end
144 | 
145 | include("divs.jl")
146 | include("plots.jl")
147 | 
148 | export
149 | # KL
150 |       KullbackLeibler,
151 | # RKL
152 |       ReverseKullbackLeibler,
153 | # HD
154 |       Hellinger,
155 | # CR
156 |       CressieRead,
157 | #
158 |       ChiSquared,
159 | # Modified
160 |       ModifiedDivergence,
161 | # FullyModified
162 |       FullyModifiedDivergence,
163 | # Abbr.
164 | # 𝒦ℒ,
165 | # ℬ𝓊𝓇ℊ,
166 | # 𝒞ℛ,
167 | # ℋ𝒟,
168 | # χ²,
169 | # Deprecated
170 |       evaluate
171 | end
172 | 


--------------------------------------------------------------------------------
/doc/out.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ******************************************************************************
 3 | This program contains Ipopt, a library for large-scale nonlinear optimization.
 4 |  Ipopt is released as open source code under the Eclipse Public License (EPL).
 5 |          For more information visit https://github.com/coin-or/Ipopt
 6 | ******************************************************************************
 7 | 
 8 | This is Ipopt version 3.14.17, running with linear solver MUMPS 5.6.2.
 9 | 
10 | Starting derivative checker for first derivatives.
11 | 
12 | * jac_g [    2,    6] =  5.3584845903814550e-04 v  ~  5.3591483253877421e-04  [ 1.239e-04]
13 | * jac_g [    3,   19] = -3.0683933380389333e-05 v  ~ -3.0653867593534056e-05  [ 3.007e-04]
14 | * jac_g [    0,   33] =  3.8313848492413171e-04 v  ~  3.8307135241666401e-04  [ 1.752e-04]
15 | * jac_g [    3,   33] = -2.2776641275915939e-04 v  ~ -2.2790658249505213e-04  [ 6.150e-04]
16 | * jac_g [    2,   44] = -1.4786828033625633e-04 v  ~ -1.4783009470098205e-04  [ 2.583e-04]
17 | * jac_g [    0,   66] =  3.4471674299317916e-05 v  ~  3.4433318135752694e-05  [ 3.836e-04]
18 | * jac_g [    2,   66] = -2.1337131611093418e-04 v  ~ -2.1340002550200774e-04  [ 1.345e-04]
19 | * jac_g [    2,   80] =  8.1824549098388457e-05 v  ~  8.1920338652649097e-05  [ 9.579e-04]
20 | Starting derivative checker for second derivatives with obj_factor or lambda[i] set to 1.5.
21 | 
22 | *     0-th constr_hess[   33,  100] =  2.3473536762726555e-06 v  ~  2.3647750424515834e-06  [ 1.742e-04]
23 | *     2-th constr_hess[    6,  100] = -1.1609115508295361e-04 v  ~ -1.1607729210828001e-04  [ 1.194e-04]
24 | *     2-th constr_hess[   19,  100] =  1.2838401380870684e-05 v  ~  1.2867055533088369e-05  [ 2.865e-04]
25 | *     2-th constr_hess[   80,  100] = -1.5994067451743186e-05 v  ~ -1.5977260893399528e-05  [ 1.681e-04]
26 | *     3-th constr_hess[   15,  100] =  1.7611462485223907e-04 v  ~  1.7609519664980492e-04  [ 1.103e-04]
27 | *     5-th constr_hess[   11,  101] = -3.7858014615351954e-05 v  ~ -3.7844536682201439e-05  [ 1.348e-04]
28 | *     5-th constr_hess[   12,  101] = -8.5644253651907917e-04 v  ~ -8.5635644552028154e-04  [ 1.005e-04]
29 | *     5-th constr_hess[   35,  101] = -3.3699510638421735e-05 v  ~ -3.3688887135362431e-05  [ 1.062e-04]
30 | *     5-th constr_hess[   51,  101] = -3.6353102428981381e-04 v  ~ -3.6345531958810478e-04  [ 2.083e-04]
31 | *     5-th constr_hess[   90,  101] = -7.6825894646135329e-05 v  ~ -7.6794039623259626e-05  [ 3.186e-04]
32 | 
33 | Derivative checker detected 18 error(s).
34 | 
35 | Number of nonzeros in equality constraint Jacobian...:      714
36 | Number of nonzeros in inequality constraint Jacobian.:        0
37 | Number of nonzeros in Lagrangian Hessian.............:      300
38 | 
39 | Total number of variables............................:      102
40 |                      variables with only lower bounds:      100
41 |                 variables with lower and upper bounds:        2
42 |                      variables with only upper bounds:        0
43 | Total number of equality constraints.................:        7
44 | Total number of inequality constraints...............:        0
45 |         inequality constraints with only lower bounds:        0
46 |    inequality constraints with lower and upper bounds:        0
47 |         inequality constraints with only upper bounds:        0
48 | 
49 | iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
50 |    0  2.4945537e+01 4.87e+01 3.09e+00  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0
51 |    1  1.5331236e+01 7.52e+00 1.29e+01  -1.0 1.27e+00    -  3.33e-01 8.46e-01f  1
52 |    2  8.6677632e+00 3.03e-02 1.48e+01  -1.0 1.50e+00    -  5.58e-01 1.00e+00f  1
53 |    3  3.6851388e+00 6.32e-03 3.52e+00  -1.0 1.00e+00    -  7.88e-01 1.00e+00f  1
54 |    4  2.9843319e+00 7.18e-03 6.88e-01  -1.0 3.39e-01    -  1.00e+00 1.00e+00f  1
55 |    5  2.9604395e+00 2.00e-04 1.52e-02  -1.7 5.43e-02    -  1.00e+00 1.00e+00h  1
56 |    6  2.9584392e+00 5.25e-06 5.82e-04  -3.8 1.09e-02    -  1.00e+00 1.00e+00h  1
57 |    7  2.9583701e+00 3.53e-09 4.38e-07  -5.7 3.16e-04    -  1.00e+00 1.00e+00h  1
58 |    8  2.9583700e+00 2.22e-16 1.51e-12  -8.6 4.39e-07    -  1.00e+00 1.00e+00h  1
59 | 
60 | Number of Iterations....: 8
61 | 
62 |                                    (scaled)                 (unscaled)
63 | Objective...............:   2.9583699983817793e+00    2.9583699983817793e+00
64 | Dual infeasibility......:   1.5121568742624474e-12    1.5121568742624474e-12
65 | Constraint violation....:   2.2190582704695317e-16    2.2190582704695317e-16
66 | Variable bound violation:   0.0000000000000000e+00    0.0000000000000000e+00
67 | Complementarity.........:   2.5078555374258810e-09    2.5078555374258810e-09
68 | Overall NLP error.......:   2.5078555374258810e-09    2.5078555374258810e-09
69 | 
70 | 
71 | Number of objective function evaluations             = 9
72 | Number of objective gradient evaluations             = 9
73 | Number of equality constraint evaluations            = 9
74 | Number of inequality constraint evaluations          = 0
75 | Number of equality constraint Jacobian evaluations   = 9
76 | Number of inequality constraint Jacobian evaluations = 0
77 | Number of Lagrangian Hessian evaluations             = 8
78 | Total seconds in IPOPT                               = 0.048
79 | 
80 | EXIT: Optimal Solution Found.
81 | 


--------------------------------------------------------------------------------
/doc/x.csv:
--------------------------------------------------------------------------------
  1 | 1.215991590970437741e+00,9.261775475316413875e-01
  2 | -4.850460751051989816e-01,1.909416640470130488e+00
  3 | 4.529269708157035446e-03,-1.398567573819141208e+00
  4 | 6.393980400376259032e-01,5.629692366905708623e-01
  5 | 2.810888557005614130e-01,-6.506425691218269414e-01
  6 | 1.565454730742296574e+00,-4.871253837646960516e-01
  7 | -5.734075329657939424e-01,-5.923939242388691628e-01
  8 | -2.065249701686487249e+00,-8.639907696798160286e-01
  9 | -1.975808070336193767e+00,4.852162794482699215e-02
 10 | 1.187487787703212394e+00,-8.309501164110377758e-01
 11 | 8.353167563559255049e-01,2.704568257798388164e-01
 12 | 1.623470848031986491e-01,-5.023810944913695287e-02
 13 | -1.510011717205014259e-01,-2.389480468664097468e-01
 14 | -7.246332322333719489e-01,-9.075636620415978850e-01
 15 | 2.607452916534565013e+00,-5.767713305683327407e-01
 16 | -1.279520012083450731e-01,7.553912258257560186e-01
 17 | 1.671089760546760061e-01,5.009171876243807553e-01
 18 | 8.709473065233998001e-01,-9.775552447985510485e-01
 19 | 4.097890609914435212e-01,9.933230542922587281e-02
 20 | -3.278590844936452031e-03,7.513871233717890341e-01
 21 | -1.418933167891346159e+00,-1.669405281121371765e+00
 22 | 1.001052166298696644e+00,5.433601923799350475e-01
 23 | 1.959189749854419960e+00,-6.626237589458466859e-01
 24 | 1.544002722597104027e+00,5.705986685931593305e-01
 25 | 1.687075593477876279e+00,-7.632591565425168589e-01
 26 | -4.652562200215930588e-01,-1.804882100664519040e+00
 27 | -1.587652014975547310e+00,-1.627542437883162663e+00
 28 | -1.203324206173798649e-01,4.808494666138199425e-02
 29 | 1.300521337099061481e-01,2.597225017214818688e-01
 30 | 9.028598758039556582e-01,-9.043166251044085779e-01
 31 | -1.604016127970010031e+00,6.385924587773739169e-01
 32 | 1.747651544552885072e+00,-1.661520062268959874e+00
 33 | 6.473056610403982170e-02,-6.607979864731657049e-02
 34 | -3.791121025049637439e-04,-1.211016199762456669e+00
 35 | -1.245021105231848058e+00,-6.518361078021591704e-01
 36 | -7.050103615811872393e-01,4.739867131641401637e-02
 37 | 6.144959686029036749e-01,-8.604133652839524027e-01
 38 | -1.761283801606972343e-01,-3.845555442298253523e-01
 39 | -1.379676953035884113e+00,1.006292809214440531e+00
 40 | -1.580994078045894424e+00,-5.768918695231487481e-01
 41 | -2.335364962069430561e-04,8.356921120651418233e-01
 42 | 2.560776596322695831e+00,-1.129706854657618109e+00
 43 | 3.397246328428794593e-01,5.298041779152827813e-01
 44 | -1.633975754963524984e+00,1.441568620657900368e+00
 45 | -4.359418985905065824e-01,-2.471644500127289312e+00
 46 | -2.647675941860244131e-01,-7.968952554704767932e-01
 47 | -2.492085135043939470e+00,5.770721271805400177e-01
 48 | -2.571193424262786520e-01,-2.030453860429926871e-01
 49 | -3.782967529696374398e-01,3.711458733713088320e-01
 50 | 8.377383991467055591e-01,-6.039851867158205767e-01
 51 | 2.107401991811244546e+00,8.658978747289991507e-02
 52 | 1.385257248003188613e+00,-1.556772353920794771e-01
 53 | -1.369824482642111541e+00,1.167782061659807358e+00
 54 | -1.377489341822618174e+00,2.544208433012131176e-01
 55 | 2.612819663252514690e+00,3.376026620752021756e-01
 56 | 8.305390175729218671e-03,-4.118769661224673806e-01
 57 | 6.653058342623591193e-01,-4.876062240724935437e-01
 58 | -3.955777551418172555e-01,-4.325581878196209096e-01
 59 | 1.813106889730054239e-01,3.944521423782968439e-01
 60 | 2.469357404384131816e-01,-4.209844808202629629e-01
 61 | -5.841447617855288765e-01,2.897748568964129134e-01
 62 | -3.171938174408879818e-01,2.075400798645438805e+00
 63 | 7.323468179645951093e-01,8.711247034316923488e-01
 64 | -4.260821002427601090e-01,-3.260235321678411347e-01
 65 | -5.207853424007808929e-01,1.201213922163944847e+00
 66 | 5.040402838902382499e-01,-4.080753730215513908e-01
 67 | -3.621249302675313420e-01,-2.038124535177853858e+00
 68 | 1.215046284887489669e+00,-1.008086310917404083e+00
 69 | -2.913279427184061632e+00,-1.870791921025855675e+00
 70 | 3.804289416263564316e-01,-3.515134840413086659e-01
 71 | 1.739078926074321130e+00,1.841837918955169934e-02
 72 | -2.436232936961063800e+00,1.676437312275282698e+00
 73 | 2.643486044390405132e-01,3.269273737641626432e-01
 74 | -5.394147209158433043e-01,-2.191005288088642422e-01
 75 | -3.902906233385816659e-01,8.294055811834891712e-01
 76 | -4.438527421041750798e-01,-2.211135309007885130e+00
 77 | -2.080155424499641637e+00,2.356145581085659357e-01
 78 | 2.300638600222202790e+00,7.708651938869668374e-01
 79 | 1.192512429090090365e+00,-1.478586245779841546e+00
 80 | 1.330522826932293734e+00,1.143754043206929083e+00
 81 | 2.033432920943434308e-01,3.384964074944141199e-01
 82 | -1.119534874474377384e+00,-4.152879139008012754e-01
 83 | -5.919503764010738411e-01,6.327818661062848404e-01
 84 | 9.224057089744708282e-01,2.270692857804395892e+00
 85 | -1.853135050148211338e-01,1.818662550584951576e-01
 86 | 7.908873416478194862e-01,2.482205863003360824e-01
 87 | -4.947250415538235746e-01,-4.593608995402441164e-01
 88 | -6.620850616447171433e-01,-8.498443694647918045e-01
 89 | 8.902778036103065062e-02,8.303358165442455974e-01
 90 | 1.489254253608170431e-01,-8.560838259088672242e-01
 91 | -3.848609510114946453e-01,7.156623721939246729e-02
 92 | 4.219952608497866686e-01,-4.776574467651166778e-01
 93 | -8.006803200121124231e-01,4.789798257463918629e-01
 94 | 6.248126914047237301e-01,3.336621052869482851e-01
 95 | 3.770368693403891580e-01,1.037539944257899194e+00
 96 | 6.578989202725575858e-01,-5.100163988547470328e-01
 97 | -3.224252701090510320e-01,-2.698749352933712542e-01
 98 | -1.367432217471818978e+00,-9.787637157823073641e-01
 99 | 1.366262955439385030e+00,-4.442932600761115847e-01
100 | -7.743206910578889213e-01,3.773004930448521921e-01
101 | 


--------------------------------------------------------------------------------
/doc/divergences_files/libs/quarto-html/anchor.min.js:
--------------------------------------------------------------------------------
1 | // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat
2 | //
3 | // AnchorJS - v5.0.0 - 2023-01-18
4 | // https://www.bryanbraun.com/anchorjs/
5 | // Copyright (c) 2023 Bryan Braun; Licensed MIT
6 | //
7 | // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat
8 | !function(A,e){"use strict";"function"==typeof define&&define.amd?define([],e):"object"==typeof module&&module.exports?module.exports=e():(A.AnchorJS=e(),A.anchors=new A.AnchorJS)}(globalThis,function(){"use strict";return function(A){function u(A){A.icon=Object.prototype.hasOwnProperty.call(A,"icon")?A.icon:"",A.visible=Object.prototype.hasOwnProperty.call(A,"visible")?A.visible:"hover",A.placement=Object.prototype.hasOwnProperty.call(A,"placement")?A.placement:"right",A.ariaLabel=Object.prototype.hasOwnProperty.call(A,"ariaLabel")?A.ariaLabel:"Anchor",A.class=Object.prototype.hasOwnProperty.call(A,"class")?A.class:"",A.base=Object.prototype.hasOwnProperty.call(A,"base")?A.base:"",A.truncate=Object.prototype.hasOwnProperty.call(A,"truncate")?Math.floor(A.truncate):64,A.titleText=Object.prototype.hasOwnProperty.call(A,"titleText")?A.titleText:""}function d(A){var e;if("string"==typeof A||A instanceof String)e=[].slice.call(document.querySelectorAll(A));else{if(!(Array.isArray(A)||A instanceof NodeList))throw new TypeError("The selector provided to AnchorJS was invalid.");e=[].slice.call(A)}return e}this.options=A||{},this.elements=[],u(this.options),this.add=function(A){var e,t,o,i,n,s,a,r,l,c,h,p=[];if(u(this.options),0!==(e=d(A=A||"h2, h3, h4, h5, h6")).length){for(null===document.head.querySelector("style.anchorjs")&&((A=document.createElement("style")).className="anchorjs",A.appendChild(document.createTextNode("")),void 0===(h=document.head.querySelector('[rel="stylesheet"],style'))?document.head.appendChild(A):document.head.insertBefore(A,h),A.sheet.insertRule(".anchorjs-link{opacity:0;text-decoration:none;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}",A.sheet.cssRules.length),A.sheet.insertRule(":hover>.anchorjs-link,.anchorjs-link:focus{opacity:1}",A.sheet.cssRules.length),A.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}",A.sheet.cssRules.length),A.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}',A.sheet.cssRules.length)),h=document.querySelectorAll("[id]"),t=[].map.call(h,function(A){return A.id}),i=0;i<e.length;i++)if(this.hasAnchorJSLink(e[i]))p.push(i);else{if(e[i].hasAttribute("id"))o=e[i].getAttribute("id");else if(e[i].hasAttribute("data-anchor-id"))o=e[i].getAttribute("data-anchor-id");else{for(r=a=this.urlify(e[i].textContent),s=0;n=t.indexOf(r=void 0!==n?a+"-"+s:r),s+=1,-1!==n;);n=void 0,t.push(r),e[i].setAttribute("id",r),o=r}(l=document.createElement("a")).className="anchorjs-link "+this.options.class,l.setAttribute("aria-label",this.options.ariaLabel),l.setAttribute("data-anchorjs-icon",this.options.icon),this.options.titleText&&(l.title=this.options.titleText),c=document.querySelector("base")?window.location.pathname+window.location.search:"",c=this.options.base||c,l.href=c+"#"+o,"always"===this.options.visible&&(l.style.opacity="1"),""===this.options.icon&&(l.style.font="1em/1 anchorjs-icons","left"===this.options.placement)&&(l.style.lineHeight="inherit"),"left"===this.options.placement?(l.style.position="absolute",l.style.marginLeft="-1.25em",l.style.paddingRight=".25em",l.style.paddingLeft=".25em",e[i].insertBefore(l,e[i].firstChild)):(l.style.marginLeft=".1875em",l.style.paddingRight=".1875em",l.style.paddingLeft=".1875em",e[i].appendChild(l))}for(i=0;i<p.length;i++)e.splice(p[i]-i,1);this.elements=this.elements.concat(e)}return this},this.remove=function(A){for(var e,t,o=d(A),i=0;i<o.length;i++)(t=o[i].querySelector(".anchorjs-link"))&&(-1!==(e=this.elements.indexOf(o[i]))&&this.elements.splice(e,1),o[i].removeChild(t));return this},this.removeAll=function(){this.remove(this.elements)},this.urlify=function(A){var e=document.createElement("textarea");return e.innerHTML=A,A=e.value,this.options.truncate||u(this.options),A.trim().replace(/'/gi,"").replace(/[& +$,:;=?@"#{}|^~[`%!'<>\]./()*\\\n\t\b\v\u00A0]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),A=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||A||!1}}});
9 | // @license-end


--------------------------------------------------------------------------------
/src/divs.jl:
--------------------------------------------------------------------------------
  1 | function xlogx(x::Number)
  2 |     result = x * NaNMath.log(x)
  3 |     return iszero(x) ? zero(result) : result
  4 | end
  5 | 
  6 | function xlogy(x::Number, y::Number)
  7 |     result = x * NaNMath.log(y)
  8 |     return iszero(x) && !isnan(y) ? zero(result) : result
  9 | end
 10 | 
 11 | alogab(a, b) = xlogy(a, a/b) - a + b
 12 | blogab(a, b) = -xlogy(b, a ./ b) + a - b
 13 | aloga(a) = xlogx(a) - a + one(eltype(a))
 14 | loga(a) = -log(a) + a - one(eltype(a))
 15 | 
 16 | ## -------------------------------------------------------
 17 | ## Divergence functions
 18 | ## -------------------------------------------------------
 19 | γ(::KullbackLeibler, a::T) where {T <: Real} = aloga(a)
 20 | γ(::ReverseKullbackLeibler, a::T) where {T <: Real} = loga(a)
 21 | γ(::Hellinger, a::T) where {T <: Real} = 2*a - 4*NaNMath.sqrt(a) + 2
 22 | γ(::ChiSquared, a::T) where {T <: Real} = abs2(a - one(eltype(T)))*half(T)
 23 | 
 24 | function γ(d::CressieRead{D}, a::T) where {T <: Real, D}
 25 |     α = d.α
 26 |     if a >= 0
 27 |         (a^(1 + α) + α - a*(1 + α))/(α*(1 + α))
 28 |     else
 29 |         if α > 0
 30 |             zero(eltype(a))
 31 |         else
 32 |             convert(eltype(a), NaN)
 33 |         end
 34 |     end
 35 | end
 36 | 
 37 | function γᵤ(d::D, a::T) where {T <: Real, D <: AbstractModifiedDivergence}
 38 |     (; γ₀, γ₁, γ₂, ρ) = d.m
 39 |     return (γ₀ + γ₁*(a-ρ) + half(T)*γ₂*(a-ρ)^2)
 40 | end
 41 | 
 42 | function γₗ(d::D, a::T) where {T <: Real, D <: AbstractModifiedDivergence}
 43 |     (; g₀, g₁, g₂, φ) = d.m
 44 |     return (g₀ + g₁*(a-φ) + half(T)*g₂*(a-φ)^2)
 45 | end
 46 | 
 47 | function γ(d::ModifiedDivergence, a::T) where {T <: Real}
 48 |     (; ρ) = d.m
 49 |     div = d.d
 50 |     return a > ρ ? γᵤ(d, a) : γ(div, a)
 51 | end
 52 | 
 53 | function γ(d::FullyModifiedDivergence, a::T) where {T <: Real}
 54 |     (; ρ, φ) = d.m
 55 |     div = d.d
 56 |     return a > ρ ? γᵤ(d, a) : a < φ ? γₗ(d, a) : γ(div, a)
 57 | end
 58 | 
 59 | function γ(d::AbstractDivergence, a::AbstractArray{T}) where {T <: Real}
 60 |     out = similar(a)
 61 |     for j in eachindex(a)
 62 |         out[j] = γ(d, a[j])
 63 |     end
 64 |     return out
 65 | end
 66 | 
 67 | ## -------------------------------------------------------
 68 | ## Gradient 
 69 | ## -------------------------------------------------------
 70 | ∇ᵧ(::KullbackLeibler, a::T) where {T} = NaNMath.log(a)
 71 | ∇ᵧ(::ReverseKullbackLeibler, a::T) where {T} = a > 0 ? -1/a + one(T) : convert(T, -Inf)
 72 | function ∇ᵧ(d::CressieRead, a::T) where {T}
 73 |     return a >= 0 ? (a^d.α - one(T))/d.α : convert(T, sign(d.α)*Inf)
 74 | end
 75 | ∇ᵧ(d::Hellinger, a::T) where {T} = a > 0 ? 2(one(T)-one(T)/sqrt(a)) : convert(T, -Inf)
 76 | ∇ᵧ(d::ChiSquared, a::T) where {T} = a - one(T)
 77 | 
 78 | function ∇ᵤ(d::D, a::T) where {T, D <: AbstractModifiedDivergence}
 79 |     (; γ₀, γ₁, γ₂, ρ) = d.m
 80 |     return (γ₁ + γ₂*(a-ρ))
 81 | end
 82 | 
 83 | function ∇ₗ(d::D, a::T) where {T, D <: AbstractModifiedDivergence}
 84 |     (; g₀, g₁, g₂, φ) = d.m
 85 |     return (g₁ + g₂*(a-φ))
 86 | end
 87 | 
 88 | function ∇ᵧ(d::ModifiedDivergence, a::T) where {T <: Real}
 89 |     (; ρ) = d.m
 90 |     div = d.d
 91 |     return a > ρ ? ∇ᵤ(d, a) : ∇ᵧ(div, a)
 92 | end
 93 | 
 94 | function ∇ᵧ(d::FullyModifiedDivergence, a::T) where {T <: Real}
 95 |     (; ρ, φ) = d.m
 96 |     div = d.d
 97 |     return a > ρ ? ∇ᵤ(d, a) : a < φ ? ∇ₗ(d, a) : ∇ᵧ(div, a)
 98 | end
 99 | 
100 | ## -------------------------------------------------------
101 | ## Hessian
102 | ## -------------------------------------------------------
103 | Hᵧ(::KullbackLeibler, a::T) where {T} = a > 0 ? one(T)/a : convert(T, Inf)
104 | Hᵧ(::ReverseKullbackLeibler, a::T) where {T} = a > 0 ? one(T)/a^2 : convert(T, Inf)
105 | Hᵧ(d::CressieRead, a::T) where {T} = a > 0 ? a^(d.α-1) : convert(T, Inf)
106 | Hᵧ(d::Hellinger, a::T) where {T} = a > 0 ? one(T)/sqrt(a^(3)) : convert(T, Inf)
107 | Hᵧ(d::ChiSquared, a::T) where {T} = one(T)
108 | 
109 | function Hᵤ(d::D, a::T) where {T, D <: AbstractModifiedDivergence}
110 |     (; γ₀, γ₁, γ₂, ρ) = d.m
111 |     return γ₂
112 | end
113 | 
114 | function Hₗ(d::D, a::T) where {T, D <: AbstractModifiedDivergence}
115 |     (; g₀, g₁, g₂, φ) = d.m
116 |     return g₂
117 | end
118 | 
119 | function Hᵧ(d::ModifiedDivergence, a::T) where {T <: Real}
120 |     (; ρ) = d.m
121 |     div = d.d
122 |     return a > ρ ? Hᵤ(d, a) : Hᵧ(div, a)
123 | end
124 | 
125 | function Hᵧ(d::FullyModifiedDivergence, a::T) where {T <: Real}
126 |     (; ρ, φ) = d.m
127 |     div = d.d
128 |     return a > ρ ? Hᵤ(d, a) : a < φ ? Hₗ(d, a) : Hᵧ(div, a)
129 | end
130 | 
131 | ## -------------------------------------------------------
132 | ## Syntax sugar
133 | ## -------------------------------------------------------
134 | 
135 | gradient(d::AbstractDivergence, a::T) where {T <: Real} = ∇ᵧ(d, a)
136 | gradient(d::AbstractDivergence, a::T, b::R) where {T <: Real, R <: Real} = ∇ᵧ(d, a/b)
137 | 
138 | function gradient!(u::AbstractVector{T},
139 |         d::AbstractDivergence,
140 |         a::AbstractArray{R}) where {T <: Real, R <: Real}
141 |     @inbounds for i in eachindex(a, u)
142 |         u[i] = ∇ᵧ(d, a[i])
143 |     end
144 |     return u
145 | end
146 | 
147 | function gradient!(u::AbstractVector{T},
148 |         d::AbstractDivergence,
149 |         a::AbstractArray{R},
150 |         b::AbstractArray{S}) where {T <: Real, R <: Real, S <: Real}
151 |     @inbounds for i in eachindex(a, b, u)
152 |         u[i] = ∇ᵧ(d, a[i]/b[i])
153 |     end
154 |     return u
155 | end
156 | 
157 | function gradient(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real}
158 |     u = similar(a)
159 |     return gradient!(u, d, a)
160 | end
161 | 
162 | function gradient(d::AbstractDivergence,
163 |         a::AbstractArray{T},
164 |         b::AbstractArray{R}) where {T <: Real, R <: Real}
165 |     u = similar(a, promote_type(T, R))
166 |     return gradient!(u, d, a, b)
167 | end
168 | 
169 | function gradient_sum(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real}
170 |     r = zero(R)
171 |     @inbounds for i in eachindex(a)
172 |         r += ∇ᵧ(d, a[i])
173 |     end
174 |     return r
175 | end
176 | 
177 | hessian(d::AbstractDivergence, a::T) where {T <: Real} = Hᵧ(d, a)
178 | hessian(d::AbstractDivergence, a::T, b::R) where {T <: Real, R <: Real} = Hᵧ(d, a/b)
179 | 
180 | function hessian!(u::AbstractVector{R},
181 |         d::AbstractDivergence,
182 |         a::AbstractArray{R}) where {R <: Real}
183 |     @inbounds for i in eachindex(a, u)
184 |         u[i] = Hᵧ(d, a[i])
185 |     end
186 |     return u
187 | end
188 | 
189 | function hessian!(u::AbstractVector{T},
190 |         d::AbstractDivergence,
191 |         a::AbstractArray{R},
192 |         b::AbstractArray{S}) where {T <: Real, R <: Real, S <: Real}
193 |     @inbounds for i in eachindex(a, b, u)
194 |         u[i] = Hᵧ(d, a[i]/b[i])
195 |     end
196 |     return u
197 | end
198 | 
199 | function hessian(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real}
200 |     u = similar(a)
201 |     return hessian!(u, d, a)
202 | end
203 | 
204 | function hessian_sum(d::AbstractDivergence, a::AbstractArray{R}) where {R <: Real}
205 |     r = zero(R)
206 |     @inbounds for i in eachindex(a)
207 |         r += Hᵧ(d, a[i])
208 |     end
209 |     return r
210 | end
211 | 
212 | half(::Type{T}) where {T <: Real} = convert(T, 0.5)
213 | half(::Type{T}) where {T} = convert(eltype(T), 0.5)
214 | 


--------------------------------------------------------------------------------
/doc/divergences_files/libs/clipboard/clipboard.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 |  * clipboard.js v2.0.11
3 |  * https://clipboardjs.com/
4 |  *
5 |  * Licensed MIT © Zeno Rocha
6 |  */
7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{container:document.body},n="";return"string"==typeof t?n=o(t,e):t instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(null==t?void 0:t.type)?n=o(t.value,e):(n=r()(t),c("copy")),n};function l(t){return(l="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}var s=function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{},e=t.action,n=void 0===e?"copy":e,o=t.container,e=t.target,t=t.text;if("copy"!==n&&"cut"!==n)throw new Error('Invalid "action" value, use either "copy" or "cut"');if(void 0!==e){if(!e||"object"!==l(e)||1!==e.nodeType)throw new Error('Invalid "target" value, use a valid Element');if("copy"===n&&e.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if("cut"===n&&(e.hasAttribute("readonly")||e.hasAttribute("disabled")))throw new Error('Invalid "target" attribute. You can\'t cut text from elements with "readonly" or "disabled" attributes')}return t?f(t,{container:o}):e?"cut"===n?a(e):f(e,{container:o}):void 0};function p(t){return(p="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}function d(t,e){for(var n=0;n<e.length;n++){var o=e[n];o.enumerable=o.enumerable||!1,o.configurable=!0,"value"in o&&(o.writable=!0),Object.defineProperty(t,o.key,o)}}function y(t,e){return(y=Object.setPrototypeOf||function(t,e){return t.__proto__=e,t})(t,e)}function h(n){var o=function(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}();return function(){var t,e=v(n);return t=o?(t=v(this).constructor,Reflect.construct(e,arguments,t)):e.apply(this,arguments),e=this,!(t=t)||"object"!==p(t)&&"function"!=typeof t?function(t){if(void 0!==t)return t;throw new ReferenceError("this hasn't been initialised - super() hasn't been called")}(e):t}}function v(t){return(v=Object.setPrototypeOf?Object.getPrototypeOf:function(t){return t.__proto__||Object.getPrototypeOf(t)})(t)}function m(t,e){t="data-clipboard-".concat(t);if(e.hasAttribute(t))return e.getAttribute(t)}var b=function(){!function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function");t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,writable:!0,configurable:!0}}),e&&y(t,e)}(r,i());var t,e,n,o=h(r);function r(t,e){var n;return function(t){if(!(t instanceof r))throw new TypeError("Cannot call a class as a function")}(this),(n=o.call(this)).resolveOptions(e),n.listenClick(t),n}return t=r,n=[{key:"copy",value:function(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{container:document.body};return f(t,e)}},{key:"cut",value:function(t){return a(t)}},{key:"isSupported",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:["copy","cut"],t="string"==typeof t?[t]:t,e=!!document.queryCommandSupported;return t.forEach(function(t){e=e&&!!document.queryCommandSupported(t)}),e}}],(e=[{key:"resolveOptions",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{};this.action="function"==typeof t.action?t.action:this.defaultAction,this.target="function"==typeof t.target?t.target:this.defaultTarget,this.text="function"==typeof t.text?t.text:this.defaultText,this.container="object"===p(t.container)?t.container:document.body}},{key:"listenClick",value:function(t){var e=this;this.listener=u()(t,"click",function(t){return e.onClick(t)})}},{key:"onClick",value:function(t){var e=t.delegateTarget||t.currentTarget,n=this.action(e)||"copy",t=s({action:n,container:this.container,target:this.target(e),text:this.text(e)});this.emit(t?"success":"error",{action:n,text:t,trigger:e,clearSelection:function(){e&&e.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(t){return m("action",t)}},{key:"defaultTarget",value:function(t){t=m("target",t);if(t)return document.querySelector(t)}},{key:"defaultText",value:function(t){return m("text",t)}},{key:"destroy",value:function(){this.listener.destroy()}}])&&d(t.prototype,e),n&&d(t,n),r}()},828:function(t){var e;"undefined"==typeof Element||Element.prototype.matches||((e=Element.prototype).matches=e.matchesSelector||e.mozMatchesSelector||e.msMatchesSelector||e.oMatchesSelector||e.webkitMatchesSelector),t.exports=function(t,e){for(;t&&9!==t.nodeType;){if("function"==typeof t.matches&&t.matches(e))return t;t=t.parentNode}}},438:function(t,e,n){var u=n(828);function i(t,e,n,o,r){var i=function(e,n,t,o){return function(t){t.delegateTarget=u(t.target,n),t.delegateTarget&&o.call(e,t)}}.apply(this,arguments);return t.addEventListener(n,i,r),{destroy:function(){t.removeEventListener(n,i,r)}}}t.exports=function(t,e,n,o,r){return"function"==typeof t.addEventListener?i.apply(null,arguments):"function"==typeof n?i.bind(null,document).apply(null,arguments):("string"==typeof t&&(t=document.querySelectorAll(t)),Array.prototype.map.call(t,function(t){return i(t,e,n,o,r)}))}},879:function(t,n){n.node=function(t){return void 0!==t&&t instanceof HTMLElement&&1===t.nodeType},n.nodeList=function(t){var e=Object.prototype.toString.call(t);return void 0!==t&&("[object NodeList]"===e||"[object HTMLCollection]"===e)&&"length"in t&&(0===t.length||n.node(t[0]))},n.string=function(t){return"string"==typeof t||t instanceof String},n.fn=function(t){return"[object Function]"===Object.prototype.toString.call(t)}},370:function(t,e,n){var f=n(879),l=n(438);t.exports=function(t,e,n){if(!t&&!e&&!n)throw new Error("Missing required arguments");if(!f.string(e))throw new TypeError("Second argument must be a String");if(!f.fn(n))throw new TypeError("Third argument must be a Function");if(f.node(t))return c=e,a=n,(u=t).addEventListener(c,a),{destroy:function(){u.removeEventListener(c,a)}};if(f.nodeList(t))return o=t,r=e,i=n,Array.prototype.forEach.call(o,function(t){t.addEventListener(r,i)}),{destroy:function(){Array.prototype.forEach.call(o,function(t){t.removeEventListener(r,i)})}};if(f.string(t))return t=t,e=e,n=n,l(document.body,t,e,n);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList");var o,r,i,u,c,a}},817:function(t){t.exports=function(t){var e,n="SELECT"===t.nodeName?(t.focus(),t.value):"INPUT"===t.nodeName||"TEXTAREA"===t.nodeName?((e=t.hasAttribute("readonly"))||t.setAttribute("readonly",""),t.select(),t.setSelectionRange(0,t.value.length),e||t.removeAttribute("readonly"),t.value):(t.hasAttribute("contenteditable")&&t.focus(),n=window.getSelection(),(e=document.createRange()).selectNodeContents(t),n.removeAllRanges(),n.addRange(e),n.toString());return n}},279:function(t){function e(){}e.prototype={on:function(t,e,n){var o=this.e||(this.e={});return(o[t]||(o[t]=[])).push({fn:e,ctx:n}),this},once:function(t,e,n){var o=this;function r(){o.off(t,r),e.apply(n,arguments)}return r._=e,this.on(t,r,n)},emit:function(t){for(var e=[].slice.call(arguments,1),n=((this.e||(this.e={}))[t]||[]).slice(),o=0,r=n.length;o<r;o++)n[o].fn.apply(n[o].ctx,e);return this},off:function(t,e){var n=this.e||(this.e={}),o=n[t],r=[];if(o&&e)for(var i=0,u=o.length;i<u;i++)o[i].fn!==e&&o[i].fn._!==e&&r.push(o[i]);return r.length?n[t]=r:delete n[t],this}},t.exports=e,t.exports.TinyEmitter=e}},r={},o.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return o.d(e,{a:e}),e},o.d=function(t,e){for(var n in e)o.o(e,n)&&!o.o(t,n)&&Object.defineProperty(t,n,{enumerable:!0,get:e[n]})},o.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},o(686).default;function o(t){if(r[t])return r[t].exports;var e=r[t]={exports:{}};return n[t](e,e.exports,o),e.exports}var n,r});


--------------------------------------------------------------------------------
/doc/divergences.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Divergences"
  3 | engine: julia
  4 | ---
  5 | 
  6 | # `Divergences.el`
  7 | 
  8 | `Divergences` is a Julia package that makes it easy to evaluate the value of divergences and their derivatives. 
  9 | 
 10 | ## Definition
 11 | 
 12 | A divergence between $a\in \mathbb{R}^n$ and $b\in\mathbb{R}^n$ is defined as
 13 | $$
 14 | D(a,b) = \sum_{i=1}^n \gamma(a_i/b_i) b_i,
 15 | $$
 16 | where $\gamma:(a_{\gamma},+\infty)\to\mathbb{R}_{+}$, $a_{\gamma}\in\mathbb{R}$ is strictly convex and twice continuously differentiable on the interior of its domain. The divergence function $\gamma$ is normalized as to satisfy $\gamma(1) = 0$, $\gamma'(1)=0$, and $\gamma''(1)=1$. 
 17 | 
 18 | The gradient and the hessian of the divergence with respect to $a$ are given by 
 19 | $$
 20 | \nabla_{a}D(a,b)\equiv\left.\frac{\partial D(u,v)}{\partial u}\right|_{u=a,v=b}=\begin{pmatrix}\gamma'(a_{1}/b_{1})\\
 21 | \gamma'(a_{2}/b_{2})\\
 22 | \vdots\\
 23 | \gamma'(a_{n}/b_{n})
 24 | \end{pmatrix}$$
 25 | and
 26 | $$
 27 | \nabla_{a}^{2}D(a,b)\equiv\left.\frac{\partial^{2}D(u,v)}{\partial u\partial u}\right|_{u=a,v=b}=\begin{pmatrix}\frac{\gamma''(a_{1}/b_{1})}{b_{1}} & 0 & \cdots & 0\\
 28 | 0 & \frac{\gamma''(a_{2}/b_{2})}{b_{2}} & 0 & \vdots\\
 29 | \vdots & 0 & \ddots & 0\\
 30 | 0 & \cdots & 0 & \frac{\gamma''(a_{n}/b_{n})}{b_{n}}
 31 | \end{pmatrix}
 32 | $$
 33 | respectively. Given the normalization $\gamma'(1)=0$, and $\gamma''(1)=1$, we have that
 34 | $$
 35 | \nabla_{a}D(a,a) = 0, \quad \nabla^2_{a}D(a,a) = 1.
 36 | $$
 37 | 
 38 | The divergences implemented in the packages are _(i)_ the Kullback-Leibler ($KL$), _(ii)_ the reverse Kullback-Leibler ($RKL$); _(iii)_ the Hellinger divergence ($\mathscr{H}$), _(iv)_ the chi-squared $(\chi^2)$, and _(v)_ the Cressie Read ($CR$) family of divergence. The table below report the form of these divergences together with their first and second order derivatives. 
 39 | 
 40 | <!-- 
 41 | This table has both arguments
 42 | 
 43 | | **Divergence**   | **$\gamma(u)$**                                                         | **$\nabla_\gamma(u)$**                           | **$H_\gamma(u)$**          |
 44 | | ---------------- | ----------------------------------------------------------------------- | ------------------------------------------------ | -------------------------- |
 45 | | $KL$             | $a \log\left(\frac{a}{b}\right) - a + b$                                | $\log\left(\frac{a}{b}\right)$                   | $\frac{1}{a}$              |
 46 | | $RKL$            | $-b \log\left(\frac{b}{a}\right) + a - b$                               | $-\frac{b}{a} + 1$                               | $\frac{b}{a^2}$            |
 47 | | $\mathscr{H}$    | $2a + \left(2 - 4\sqrt{\frac{a}{b}}\right)b$                            | $2\left(1 - \frac{1}{\sqrt{\frac{a}{b}}}\right)$ | $\frac{\sqrt{b}}{a^{3/2}}$ |
 48 | | **Chi-Squared**  | $\frac{1}{2}\frac{(a - b)^2}{b}$                                        | $\frac{a}{b} - 1$                                | $\frac{1}{b}$              |
 49 | | **Cressie-Read** | $\frac{(a/b)^{1+\alpha} + \alpha - (a/b)(1+\alpha)}{\alpha(1+\alpha)}b$ | $\frac{(a/b)^\alpha - 1}{\alpha}$                | $a^{\alpha-1}b^{-\alpha}$  | -->
 50 | 
 51 | |               | **$\gamma(u)$**                                                | Domain              | **$\nabla_\gamma(u)$**                 | **$H_\gamma(u)$**   |
 52 | | ------------- | -------------------------------------------------------------- | ------------------- | -------------------------------------- | ------------------- |
 53 | | $KL$          | $u \log(u) - u + 1$                                            | $(0,+\infty)$       | $\log(u)$                              | $\frac{1}{u}$       |
 54 | | $RKL$         | $\log(u) + u - 1$                                              | $(0,+\infty)$       | $1-\frac{1}{u}$                        | $\frac{1}{u^2}$     |
 55 | | $\mathscr{H}$ | $2u + (2 - 4\sqrt{u})$                                         | $(0,+\infty)$       | $2\left(1 - \frac{1}{\sqrt{u}}\right)$ | $\frac{1}{u^{3/2}}$ |
 56 | | $\chi^2$      | $\frac{1}{2}(u - 1)^2$                                         | $(-\infty,+\infty)$ | $u - 1$                                | $1$                 |
 57 | | $CR$          | $\frac{u^{1+\alpha} + \alpha - u(1+\alpha)}{\alpha(1+\alpha)}$ | $(0,+\infty)$       | $\frac{u^\alpha - 1}{\alpha}$          | $u^{\alpha-1}$      |
 58 | 
 59 | The Cressie Read is a family of divergences whose members are indexed by $\alpha\in\mathbb{R}$. This family contains the chi-squared divergence ($\alpha = 1$), the Kullback Leibler divergence ($a \to 0$), the reverse Kullback Leibler divergence ($a \to -1$), and the Hellinger distance ($a = -1/2$).
 60 | 
 61 | Since if $\alpha<0$,  $\gamma$ in the Cressie Read family is not convex on $(-\infty, 0)$ and thus we set $\gamma(u)=+\infty$. 
 62 | 
 63 | ### Convex Conjugate
 64 | 
 65 | The convex conjugate conjugate of $\gamma$ is defined as
 66 | $$
 67 | \gamma^*(u) = \sup_{u\in\mathbb{R}} \left\{u\upsilon - \gamma(u)\right\}.
 68 | $$
 69 | For continuously twice differentiable function, the convex conjugate is
 70 | $$
 71 | \gamma^*(z) = z\,(\gamma')^{-1}(z)  - \gamma\left((\gamma')^{-1}(z)\right).
 72 | $$
 73 | where $(\gamma')^{-1}(z) := \{u: \gamma'(x) = z\}$. The domain of $\gamma^*$ is $(-\infty, d)$, where
 74 | $$
 75 | d = \lim_{u\to +\infty} \gamma(u)/u.
 76 | 
 77 | The first derivative of the convex conjugate $\gamma^*(z)$ can be found as:
 78 | $$
 79 | \frac{d}{dz} \gamma^*(z) = (\gamma')^{-1}(z).
 80 | $$
 81 | 
 82 | The second derivative can be derived using the inverse function theorem:
 83 | $$
 84 | \frac{d^2}{dz^2} \gamma^*(z) = \frac{1}{\gamma''((\gamma')^{-1}(z))}.
 85 | $$
 86 | 
 87 | ## Modified divergences
 88 | 
 89 | |               | $\gamma^*(z)$                     | $\lim_{u \to \infty} \frac{\gamma(u)}{u}$ | $\lim_{u \to \infty} \frac{u \gamma'(u)}{\gamma(u)}$ |
 90 | | ------------- | --------------------------------- | --------------------------------------------- | -------------------------------------------------------- |
 91 | | $KL$          | $(e^z - 1)$                       | $0$                                           | $1$                                                      |
 92 | | $RKL$         | $\log(1 - z) + 1$                 | $1$                                           | $1$                                                      |
 93 | | $\mathscr{H}$ | $ (1 - 2\sqrt{1 - z})$           | $2$                                           | $0$                                                      |
 94 | | $\chi^2$      | $\left(z + \frac{z^2}{2}\right)$ | $\infty$                                      | $2$                                                      |
 95 | | $CR$          | $(z-1) (1+\alpha z)^{1/a}+\log \left((\alpha z+1)^{1/a}\right)+1$               | $0$                           | $\begin{cases} 1+\alpha & \alpha>0 \\ 1 & \mathrm{otherwise}\end{cases}$                                      |
 96 | 
 97 | For many of the divergences defined above the effective domain of their conjugate, $\gamma^*$, does not span $\mathbb{R}$ since $\gamma(u)/u \to l < +\infty$ as $u \to +\infty$. 
 98 | 
 99 | For some $\vartheta>0$, let $u_{\vartheta}\equiv 1+\vartheta$. The modified divergence $\gamma_{\vartheta}$ is defined as
100 | $$
101 | \gamma_{\vartheta}(u) = \begin{cases}
102 |   \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\
103 |   \newline\gamma(u), & u\in (0,u_{\vartheta})\\
104 |   \newline \lim_{u\to 0^{+}} \gamma(u), & u=0 \\
105 |   \newline+\infty, &  u<0
106 | \end{cases}.
107 | $$
108 | 
109 | It is immediate to verify that this divergence still satisfies all the requirements and normalization of $\gamma$. Furthermore, it holds that
110 | $$
111 |   \lim_{u\to\infty}\frac{\gamma_{\vartheta}(u)}{u} = +\infty,
112 |     \qquad \text{and}\qquad
113 |   \lim_{u\to\infty}\frac{u\gamma'_{\vartheta}(u)}{\gamma_{\vartheta}(u)} = 2.
114 | $$
115 | 
116 | The first limit implies that the image of $\gamma'_{\vartheta}$ is the real line and thus $\overline{\mathrm{dom}\,\gamma^*_{\vartheta}}=(-\infty,+\infty)$. The expression for the conjugate is obtained by applying the Legendre-Fenchel transform to obtain
117 | $$
118 | \gamma_{\vartheta}^*(u) =
119 | \begin{cases}
120 |   a_{\vartheta}\upsilon^2 + b_{\vartheta}\upsilon + c_{\vartheta}, & \upsilon>\gamma'(u_{\vartheta}),\\
121 |   \newline \gamma^*(\upsilon), & u\leqslant \gamma'(u_{\vartheta})
122 | \end{cases},
123 | $$
124 | 
125 | where $a_{\vartheta} = 1/(2\gamma''(u_{\vartheta}))$,
126 | $b_{\vartheta}=u_{\vartheta} - 2a_{\vartheta}\gamma'(u_{\vartheta})$,
127 | and $c_{\vartheta}=-\gamma(u_{\vartheta}) +
128 | a_{\vartheta}\gamma'(u_{\vartheta}) - u_{\vartheta}^2/a_{\vartheta}$.
129 | The conjugate $\gamma_{\vartheta}^*(u)$ will have a closed form
130 | expression when so does the original divergence function.
131 | 
132 | ## Fully modified divergences
133 | 
134 | For some $\vartheta>0$ and $0 < \varphi < 1-a_{\gamma}$, let $u_{\vartheta}\equiv
135 | 1+\vartheta$ and $u_{\varphi} = a_{\gamma} + \varphi$.  The **fully**
136 | modified divergence $\gamma_{\varphi, \vartheta}$ is defined as
137 | $$
138 | \gamma_{\vartheta}(u) = \begin{cases}
139 |   \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\
140 |   \newline\gamma(u), & u\in (u_{\varphi},u_{\vartheta})\\
141 |   \newline    \gamma(u_{\varphi}) + \gamma'(u_{\varphi})(u-u_{\varphi}) + \frac{1}{2}\gamma''(u_{\varphi})(u-u_{\varphi})^2, & u\leqslant u_{\varphi}\\
142 | \end{cases}.
143 | $$
144 | It is immediate to verify that this divergence still satisfies all the
145 | requirements and normalization of $\gamma$, while being defined on all
146 | $\mathbb{R}$. 
147 | 
148 | 
149 | 
150 | ## Using `Divergences` package
151 | 
152 | ```{julia}
153 | using Divergences
154 | ```
155 | 
156 | Suppose $a = [0.2, 0.4, 0.4]$ and $b = [0.1, 0.3, 0.6]$.
157 | 
158 | ```{julia}
159 | a = [0.2, 0.4, 0.4]
160 | b = [0.1, 0.3, 0.6]
161 | ```
162 | 
163 | We instantiate 
164 | 
165 | ```{julia}
166 | KL = KullbackLeibler()
167 | D = KL(a, b)
168 | ```
169 | 
170 | To evaluate the gradient and the hessian
171 | 
172 | ```{julia}
173 | Divergences.gradient(KL, a, b)
174 | ```
175 | 
176 | ```{julia}
177 | Divergences.hessian(KL, a, b)
178 | ```
179 | 
180 | ## Application to MDE estimation
181 | 
182 | Consider the following optimization problem:
183 | 
184 | $$
185 | \begin{aligned} & \min_{\theta,\{\pi_{i}\}}\sum_{i=1}^{n}\gamma(\pi_{i})\\
186 | s.t. & \sum\pi_{i}g(x_{i},\theta)=0\\
187 |  & \sum\pi_{i}=1
188 | \end{aligned}
189 | $$
190 | where
191 | $$
192 | g(x_{i},\theta)=\begin{pmatrix}x_i-\theta\\
193 | (x_i-\theta)^{2}-1
194 | \end{pmatrix}.
195 | $$
196 | and $x_i\sim N(0,0.64)$. The gradient of 
197 | $$
198 | \bar{g}(\theta, \pi) = \sum_{i=1}^n \pi_i g(x_i,\theta)
199 | $$ 
200 | is 
201 | $$
202 | \underset{(m\times k)}{\frac{\partial\bar{g}_{n}(\theta,\pi)}{\partial\theta}}=\sum_{i=1}^{n}\pi_{i}\begin{pmatrix}-\theta\\
203 | -(w_{i}-\theta)
204 | \end{pmatrix},\quad\underset{(m\times n)}{\frac{\partial\bar{g}_{n}(\theta,\pi)}{\partial\pi}}=\begin{pmatrix}w_{i}-\theta\\
205 | (w_{i}-\theta)^{2}-1
206 | \end{pmatrix}'.
207 | $$
208 | 
209 | 


--------------------------------------------------------------------------------
/doc/divergences.md:
--------------------------------------------------------------------------------
  1 | # `Divergences.el`
  2 | 
  3 | `Divergences` is a Julia package that makes it easy to evaluate the
  4 | value of divergences and their derivatives. These divergences are used
  5 | to good effects in the package
  6 | [MomentBasedEstimators](http://github.com/gragusa/MomentBasedEstimators.jl/git).
  7 | 
  8 | ## Definition
  9 | 
 10 | A divergence between $a\in \mathbb{R}^n$ and $b\in\mathbb{R}^n$ is
 11 | defined as
 12 | 
 13 | $$
 14 | D(a,b) = \sum_{i=1}^n \gamma(a_i/b_i) b_i,
 15 | $$
 16 | 
 17 | where for $a_{\gamma}\in\mathbb{R}$, $a_{\gamma}<0$,
 18 |  $\gamma:(a_{\gamma},+\infty)\to\mathbb{R}_{+}$, is strictly convex and
 19 |  twice continuously differentiable on the interior of its domain
 20 |  $(a_{gamma}, +\infty)$. The divergence function is normalized as to
 21 |  satisfy $\gamma(1) = 0$, $\gamma'(1)=0$, and $\gamma''(1)=0$. The
 22 |  normalizations $\gamma(1) = \gamma'(1) = 0$ and $\gamma''(1) = 1$ do
 23 |  not restrict generality, since for any differentiable convex function
 24 |  $\gamma$ there exists another, say $\overline{\gamma}$, satisfying the
 25 |  normalization.
 26 | 
 27 | It is convenient to view $\gamma$ as an extended-real valued function,
 28 | defined on $\mathbb{R}$ and taking values in $[a_{\gamma}, +\infty]$
 29 | (see, e.g. p. 23 in Rockafellar, 1970). This means that the convex
 30 | function $\gamma$ being defined a priori on $(a_{\gamma}, +\infty)$ can
 31 | be extended outside its domain by setting $\gamma(u) = +\infty$ for all
 32 | $u \in (-\infty, a_{\gamma})$. As for the boundary value of
 33 | $a_{\gamma}$, we let $\gamma(a_{\gamma}) = lim_{u\to a_{\gamma}^+}
 34 | \gamma(u)$, knowing that this limit is possibly $\infty$. This ensures
 35 | that the extension of $\gamma$ is lower-semicontinuous on $\mathbb{R}$.
 36 | 
 37 | 
 38 | The gradient and the hessian of the divergence with respect to $a$ are
 39 | given by 
 40 | $$
 41 | \nabla_{a}D(a,b)\equiv\left.\frac{\partial D(u,v)}{\partial u}\right|_{u=a,v=b}=\left(\gamma'(a_{1}/b_{1}),\ldots,\gamma'(a_{n}/b_{n})\right),
 42 | $$
 43 | and
 44 | $$
 45 | \nabla_{a}^{2}D(a,b)\equiv\left.\frac{\partial^{2}D(u,v)}{\partial
 46 | u\partial
 47 | u}\right|_{u=a,v=b}=\mathrm{diag}\left(\frac{\gamma''(a_{1}/b_{1})}{b_{1}},\ldots,\frac{\gamma''(a_{n}/b_{n})}{b_{n}}\right),
 48 | $$
 49 | respectively. Given the normalization $\gamma'(1)=0$, and $\gamma''(1)=1$, we have that
 50 | $$
 51 | \nabla_{a}D(a,a) = 0, \quad \nabla^2_{a}D(a,a) = 0.
 52 | $$
 53 | 
 54 | The conjugate of $\gamma$ is defined as
 55 | $$
 56 | \gamma^*(u) = \sup_{u\in\mathbb{R}} \left\{u\upsilon - \gamma(u)\right\}.
 57 | $$
 58 | 
 59 | The conjugate of the convex extended-real valued function $\gamma$ on
 60 | \mathbb{R}, $\gamma^*$, is itself a convex lower semi-continuous
 61 | function. Moreover, it follows from the above definition, that $\gamma$ is
 62 | increasing on $\mathbb{R}$. Define
 63 | 
 64 | $$
 65 | d = \lim_{u\to +\infty} \gamma(u)/u.
 66 | $$
 67 | 
 68 | Then  $\overline{\mathrm{dom}\gamma^*} = (-\infty, +\infty)$ where
 69 | $\mathrm{dom}\gamma^* = \{\upsilon \in \mathrm{R}: \gamma^*(\upsilon) <
 70 | +\infty \}$ is the effective domain of $\gamma^*$.
 71 | 
 72 | #### Modified divergences
 73 | 
 74 | For many of the divergences defined above the effective domain of
 75 | their conjugate, $\gamma^*$, does not span $\mathbb{R}$ since
 76 | $\gamma(u)/u \to l < +\infty$ as $u \to +\infty$. 
 77 | 
 78 | For some $\vartheta>0$, let $u_{\vartheta}\equiv 1+\vartheta$. The
 79 | modified divergence $\gamma_{\vartheta}$ is defined as
 80 | $$
 81 | \gamma_{\vartheta}(u) = \begin{cases}
 82 |   \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\
 83 |   \newline\gamma(u), & u\in (a_{\gamma},u_{\vartheta})\\
 84 |   \newline \lim_{u\to 0^{+}} \gamma(u), & u=0 \\
 85 |   \newline+\infty, &  u<0
 86 | \end{cases}.
 87 | $$
 88 | 
 89 | It is immediate to verify that this divergence still satisfies all the
 90 | requirements and normalization of $\gamma$. Furthermore, it holds that
 91 | $$
 92 |   \lim_{u\to\infty}\frac{\gamma_{\vartheta}(u)}{u} = +\infty,
 93 |     \qquad \text{and}\qquad
 94 |   \lim_{u\to\infty}\frac{u\gamma'_{\vartheta}(u)}{\gamma_{\vartheta}(u)} = 2.
 95 | $$
 96 | 
 97 | The first limit implies that the image of $\gamma'_{\vartheta}$ is the
 98 | real line and thus
 99 | $\overline{\mathrm{dom}\,\gamma^*_{\vartheta}}=(-\infty,+\infty)$. The
100 | expression for the conjugate is obtained by applying the Legendre-Fenchel transform to obtain
101 | $$
102 | \gamma_{\vartheta}^*(u) =
103 | \begin{cases}
104 |   a_{\vartheta}\upsilon^2 + b_{\vartheta}\upsilon + c_{\vartheta}, & \upsilon>\gamma'(u_{\vartheta}),\\
105 |   \newline \gamma^*(\upsilon), & u\leqslant \gamma'(u_{\vartheta})
106 | \end{cases},
107 | $$
108 | 
109 | where $a_{\vartheta} = 1/(2\gamma''(u_{\vartheta}))$,
110 | $b_{\vartheta}=u_{\vartheta} - 2a_{\vartheta}\gamma'(u_{\vartheta})$,
111 | and $c_{\vartheta}=-\gamma(u_{\vartheta}) +
112 | a_{\vartheta}\gamma'(u_{\vartheta}) - u_{\vartheta}^2/a_{\vartheta}$.
113 | The conjugate $\gamma_{\vartheta}^*(u)$ will have a closed form
114 | expression when so does the original divergence function.
115 | 
116 | #### Fully modified divergences
117 | 
118 | For some $\vartheta>0$ and $0 < \varphi < 1-a_{\gamma}$, let $u_{\vartheta}\equiv
119 | 1+\vartheta$ and $u_{\varphi} = a_{\gamma} + \varphi$.  The **fully**
120 | modified divergence $\gamma_{\varphi, \vartheta}$ is defined as
121 | $$
122 | \gamma_{\vartheta}(u) = \begin{cases}
123 |   \gamma(u_{\vartheta}) + \gamma'(u_{\vartheta})(u-u_{\vartheta}) + \frac{1}{2}\gamma''(u_{\vartheta})(u-u_{\vartheta})^2, & u\geqslant u_{\vartheta}\\
124 |   \newline\gamma(u), & u\in (u_{\varphi},u_{\vartheta})\\
125 |   \newline    \gamma(u_{\varphi}) + \gamma'(u_{\varphi})(u-u_{\varphi}) + \frac{1}{2}\gamma''(u_{\varphi})(u-u_{\varphi})^2, & u\leqslant u_{\varphi}\\
126 | \end{cases}.
127 | $$
128 | It is immediate to verify that this divergence still satisfies all the
129 | requirements and normalization of $\gamma$, while being defined on all
130 | $\mathbb{R}$. 
131 | 
132 | ## Example of divergences
133 | 
134 | The following divergence types are defined by `Divergences`.
135 | 
136 | #### Kullback-Leibler divergence
137 | 
138 | $$
139 | D^{KL}(a,b) = \sum_{i=1}^n \gamma^{KL}(a_i/b_i) b_i
140 | $$
141 | 
142 | $$
143 | \gamma^{KL}(u) = u\log(u) - u + 1
144 | $$
145 | 
146 | The gradient and the hessian are given by 
147 | 
148 | $$
149 | \nabla_{a}^{2}D^{KL}(a,b) = \left(\log(a_1/b_1),\ldots,\log(a_n,b_n)
150 | \right), \quad \nabla_{a}^{2}D^{KL}(a,b) = \mathrm{diag}(1/a_1, \ldots, 1/a_n)
151 | $$
152 | 
153 | #### Reverse Kullback-Leibler divergence
154 | 
155 | $$
156 | D^{rKL}(a,b) = \sum_{i=1}^n \gamma^{rKL}(a_i/b_i) b_i
157 | $$
158 | 
159 | $$
160 | \gamma^{rKL}(u) = -\log(u) + u - 1
161 | $$
162 | 
163 | The gradient and the hessian are given by 
164 | 
165 | $$
166 | \nabla_{a}^{2}D^{rKL}(a,b) = \left(1-b_1/a_1,\ldots, 1 - b_n/a_n
167 | \right), \quad \nabla_{a}^{2}D^{rKL}(a,b) = \mathrm{diag}(b_1/a^2_1, \ldots, b_n/a^2_n)
168 | $$
169 | 
170 | For reverse Kullback Leibler divergence,
171 |   $\gamma(u)=-\log(u)+u-1$, we have that $\gamma(u)/u \to 0$ as
172 |   $u\to\infty$.  The modified reverse Kullback Leibler divergence is given by
173 | $$
174 |     \gamma_{\vartheta}(u) =
175 |     \begin{cases}
176 |       -\log(u_{\vartheta}) + (1-\frac{1}{u_{\vartheta}})u+ \frac{1}{2u_{\vartheta}^2}(u-u_{\vartheta})^2, &  u>u_{\vartheta}\\
177 |       \newline -\log(u) + u - 1, &0 < u\leqslant u_{\vartheta}\\
178 |       \newline +\infty, & u\leqslant0.
179 |     \end{cases}.
180 | $$
181 | 
182 | The conjugate of $\gamma_{\theta}$ is given by
183 | $$
184 |     \gamma_{\vartheta}(u) =
185 |     \begin{cases}
186 |       a_{\vartheta}\upsilon^2 + b_{\vartheta}\upsilon + c_{\vartheta}, & \upsilon > 1-\frac{1}{u_{\vartheta}} \\
187 |     \newline -\log(1- \upsilon), & \upsilon \leqslant 1-\frac{1}{u_{\vartheta}},
188 |     \end{cases}
189 | $$
190 | where $a_{\vartheta}=u^2_{\vartheta}/2$, $b_{\vartheta}=u_{\vartheta}(2-u_{\vartheta})$, and
191 | $c_{\vartheta}=\log(u_{\vartheta})-u_{\vartheta}-1+u_{\vartheta}(u_{\vartheta}-1)/2$.
192 | 
193 | 
194 | 
195 | #### Chi-squared divergence
196 | 
197 | $$
198 | D^{\chi}(a,b) = \sum_{i=1}^n \gamma^{\chi}(a_i/b_i) b_i
199 | $$
200 | 
201 | $$
202 | \gamma^{\chi}(u) = u^2/2 - u + 0.5
203 | $$
204 | 
205 | The gradient and the hessian are given by 
206 | 
207 | $$
208 | \nabla_{a}^{2}D^{\chi}(a,b) = \left((a_1 - b_1)/b_1^2, \ldots, (a_n - b_n)/b_n^2
209 | \right), \quad \nabla_{a}^{2}D^{\chi}(a,b) =
210 | \mathrm{diag}\left(\frac{1}{b_1^2},\ldots, \frac{1}{b_n^2}\right)
211 | $$
212 | 
213 | #### Cressie-Read divergences
214 | 
215 | The type `CressieRead` is a family of divergences. Members of this
216 | family are indexed by a function $\gamma$ indexed by parameter $\alpha$:
217 | 
218 | $$
219 | \gamma_{\alpha}^{CR}(a,b)=\frac{\left(\frac{a}{b}\right)^{1+\alpha}-1}{\alpha(\alpha+1)}-\frac{\left(\frac{a}{b}\right)-1}{\alpha}.
220 | $$
221 | 
222 | The gradient and the hessian are given by 
223 | 
224 | $$
225 | \nabla_{a}^{2}D^{CR}_{\alpha}(a,b) = \left(
226 | \frac{\left(\frac{a_1}{b_1}\right)^{\alpha }-1}{\alpha  b_1}, \ldots,\frac{\left(\frac{a_n}{b_n}\right)^{\alpha }-1}{\alpha  b_n}
227 | \right), \quad 
228 | \nabla_{a}^{2}D^{CR}_{\alpha}(a,b) = \mathrm{diag}\left(\frac{\left(\frac{a_1}{b_1}\right)^{\alpha }}{a_1 b_1},\ldots,
229 | \frac{\left(\frac{a_n}{b_n}\right)^{\alpha }}{a_n b_n}
230 | \right)
231 | $$
232 | 
233 | The Cressie-Read family contains the chi-squared divergence ($\alpha =
234 | 1$), the Kullback Leibler divergence ($a \to 0$), the reverse
235 | Kullback Leibler divergence ($a \to -1$), and the Hellinger distance ($a = -1/2$).
236 | 
237 | For instance, for the Cressie Read family of divergences defined below, 
238 | $$
239 | \lim_{u\to +\infty}\gamma^{CR}_{\alpha}(u)/u = -1/\alpha
240 | $$
241 | for all $\alpha\leqslant 0$. Also, for all $\alpha\leqslant 0$, the
242 | divergence is not convex on $(-\infty, 0)$ and thus a fully modified
243 | version can be considered. 
244 | 
245 | 
246 | 
247 | ## Using `Divergences` package
248 | 
249 | ````julia
250 | using Divergences
251 | ````
252 | 
253 | 
254 | 
255 | 
256 | 
257 | Suppose $a = [0.2, 0.4, 0.4]$ and $b = [0.1, 0.3, 0.6]$.
258 | 
259 | ````julia
260 | a = [0.2, 0.4, 0.4]
261 | b = [0.1, 0.3, 0.6]
262 | ````
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | ````julia
271 | evaluate(KullbackLeibler(), a, b)
272 | 
273 | ````
274 | 
275 | 
276 | ````
277 | 0.0915162218494357
278 | ````
279 | 
280 | 
281 | 
282 | ````julia
283 | gradient(KullbackLeibler(), a, b)
284 | 
285 | ````
286 | 
287 | 
288 | ````
289 | 3-element Array{Float64,1}:
290 |   0.693147
291 |   0.287682
292 |  -0.405465
293 | ````
294 | 
295 | 
296 | 
297 | ````julia
298 | hessian(KullbackLeibler(), a, b)
299 | ````
300 | 
301 | 
302 | ````
303 | 3-element Array{Float64,1}:
304 |  50.0    
305 |   8.33333
306 |   4.16667
307 | ````
308 | 
309 | 
310 | 
311 | 
312 | 
313 | ````julia
314 | evaluate(ReverseKullbackLeibler(), a, b)
315 | 
316 | ````
317 | 
318 | 
319 | ````
320 | 0.0876597250733698
321 | ````
322 | 
323 | 
324 | 
325 | ````julia
326 | gradient(ReverseKullbackLeibler(), a, b)
327 | 
328 | ````
329 | 
330 | 
331 | ````
332 | 3-element Array{Float64,1}:
333 |   0.5 
334 |   0.25
335 |  -0.5
336 | ````
337 | 
338 | 
339 | 
340 | ````julia
341 | hessian(ReverseKullbackLeibler(), a, b)
342 | ````
343 | 
344 | 
345 | ````
346 | 3-element Array{Float64,1}:
347 |  2.5  
348 |  1.875
349 |  3.75
350 | ````
351 | 
352 | 
353 | 


--------------------------------------------------------------------------------
/doc/z.csv:
--------------------------------------------------------------------------------
  1 | 4.967141530112326731e-01,-1.382643011711846559e-01,6.476885381006924902e-01,1.523029856408025351e+00,-2.341533747233359719e-01,9.261775475316413875e-01
  2 | -2.341369569491805469e-01,1.579212815507391454e+00,7.674347291529087789e-01,-4.694743859349521098e-01,5.425600435859646575e-01,1.909416640470130488e+00
  3 | -4.634176928124622563e-01,-4.657297535702568658e-01,2.419622715660341150e-01,-1.913280244657797891e+00,-1.724917832513032767e+00,-1.398567573819141208e+00
  4 | -5.622875292409726944e-01,-1.012831120334423796e+00,3.142473325952738761e-01,-9.080240755212108938e-01,-1.412303701335291484e+00,5.629692366905708623e-01
  5 | 1.465648768921554046e+00,-2.257763004865356582e-01,6.752820468792383735e-02,-1.424748186213456780e+00,-5.443827245251826596e-01,-6.506425691218269414e-01
  6 | 1.109225897098660846e-01,-1.150993577422302794e+00,3.756980183456719580e-01,-6.006386899188049799e-01,-2.916937497932767798e-01,-4.871253837646960516e-01
  7 | -6.017066122293969199e-01,1.852278184508937775e+00,-1.349722473793392094e-02,-1.057710928955900354e+00,8.225449121031890298e-01,-5.923939242388691628e-01
  8 | -1.220843649971022238e+00,2.088635950047554035e-01,-1.959670123879775572e+00,-1.328186048898430505e+00,1.968612358691235187e-01,-8.639907696798160286e-01
  9 | 7.384665799954104326e-01,1.713682811899704950e-01,-1.156482823882405281e-01,-3.011036955892887890e-01,-1.478521990367427374e+00,4.852162794482699215e-02
 10 | -7.198442083947086401e-01,-4.606387709597875024e-01,1.057122226218915717e+00,3.436182895684614103e-01,-1.763040155362733952e+00,-8.309501164110377758e-01
 11 | 3.240839693947950018e-01,-3.850822804163165358e-01,-6.769220003059587265e-01,6.116762888408678878e-01,1.030999522495950949e+00,2.704568257798388164e-01
 12 | 9.312801191161985681e-01,-8.392175232226385395e-01,-3.092123758512145826e-01,3.312634314035639571e-01,9.755451271223591903e-01,-5.023810944913695287e-02
 13 | -4.791742378452899520e-01,-1.856589766638171157e-01,-1.106334974006028204e+00,-1.196206624080670800e+00,8.125258223941980162e-01,-2.389480468664097468e-01
 14 | 1.356240028570822931e+00,-7.201012158033384680e-02,1.003532897892024156e+00,3.616360250476341465e-01,-6.451197546051242737e-01,-9.075636620415978850e-01
 15 | 3.613956055084139307e-01,1.538036566465969202e+00,-3.582603910995153795e-02,1.564643655814006218e+00,-2.619745104089744370e+00,-5.767713305683327407e-01
 16 | 8.219025043752238302e-01,8.704706823817121020e-02,-2.990073504658674053e-01,9.176077653550229751e-02,-1.987568914600892800e+00,7.553912258257560186e-01
 17 | -2.196718878375119310e-01,3.571125715117464128e-01,1.477894044741516089e+00,-5.182702182736473873e-01,-8.084936028931876129e-01,5.009171876243807553e-01
 18 | -5.017570435845365440e-01,9.154021177020741362e-01,3.287511096596844595e-01,-5.297602037670388064e-01,5.132674331133560974e-01,-9.775552447985510485e-01
 19 | 9.707754934804038727e-02,9.686449905328892163e-01,-7.020530938773523744e-01,-3.276621465977682113e-01,-3.921081531321576330e-01,9.933230542922587281e-02
 20 | -1.463514948132118576e+00,2.961202770645760540e-01,2.610552721798893305e-01,5.113456642460889774e-03,-2.345871333751469168e-01,7.513871233717890341e-01
 21 | -1.415370742050414243e+00,-4.206453227653590421e-01,-3.427145165267694860e-01,-8.022772692216189050e-01,-1.612857116660091350e-01,-1.669405281121371765e+00
 22 | 4.040508568145383950e-01,1.886185901210530202e+00,1.745778128318389577e-01,2.575503907227643663e-01,-7.444591576616721440e-02,5.433601923799350475e-01
 23 | -1.918771215299041488e+00,-2.651387544921687822e-02,6.023020994102643716e-02,2.463242112485286128e+00,-1.923609647811225232e-01,-6.626237589458466859e-01
 24 | 3.015473423336124670e-01,-3.471176970524331162e-02,-1.168678037619532040e+00,1.142822814515020546e+00,7.519330326867741388e-01,5.705986685931593305e-01
 25 | 7.910319470430469124e-01,-9.093874547947389253e-01,1.402794310936099187e+00,-1.401851062792280889e+00,5.868570938002702908e-01,-7.632591565425168589e-01
 26 | 2.190455625809978546e+00,-9.905363251306883443e-01,-5.662977296027719154e-01,9.965136508764121936e-02,-5.034756541161992116e-01,-1.804882100664519040e+00
 27 | -1.550663431066132691e+00,6.856297480602732697e-02,-1.062303713726104881e+00,4.735924306351815827e-01,-9.194242342338031504e-01,-1.627542437883162663e+00
 28 | 1.549934405017539429e+00,-7.832532923362370836e-01,-3.220615162056755798e-01,8.135172173696697540e-01,-1.230864316433955219e+00,4.808494666138199425e-02
 29 | 2.274599346041294157e-01,1.307142754282428099e+00,-1.607483234561227547e+00,1.846338585323042125e-01,2.598827942484235320e-01,2.597225017214818688e-01
 30 | 7.818228717773103575e-01,-1.236950710878081949e+00,-1.320456613084276309e+00,5.219415656168976403e-01,2.969846732331860584e-01,-9.043166251044085779e-01
 31 | 2.504928503458765388e-01,3.464482094969756898e-01,-6.800247215784908095e-01,2.322536971610035528e-01,2.930724732986812464e-01,6.385924587773739169e-01
 32 | -7.143514180263678126e-01,1.865774511144756564e+00,4.738329209117875251e-01,-1.191303497202648609e+00,6.565536086338297217e-01,-1.661520062268959874e+00
 33 | -9.746816702273214394e-01,7.870846037424520381e-01,1.158595579007404064e+00,-8.206823183517104603e-01,9.633761292443218105e-01,-6.607979864731657049e-02
 34 | 4.127809269364983158e-01,8.220601599944900029e-01,1.896792982653947357e+00,-2.453881160028704989e-01,-7.537361643574895798e-01,-1.211016199762456669e+00
 35 | -8.895144296255232952e-01,-8.158102849654382815e-01,-7.710170941410419976e-02,3.411519748166438881e-01,2.766907993300190549e-01,-6.518361078021591704e-01
 36 | 8.271832490360238044e-01,1.300189187790701945e-02,1.453534077157316862e+00,-2.646568332379560795e-01,2.720169166589618825e+00,4.739867131641401637e-02
 37 | 6.256673477650062098e-01,-8.571575564162825511e-01,-1.070892498061112308e+00,4.824724152431852930e-01,-2.234627853258508989e-01,-8.604133652839524027e-01
 38 | 7.140004940920919863e-01,4.732376245735448461e-01,-7.282891265687277405e-02,-8.467937180684049769e-01,-1.514847224685864635e+00,-3.845555442298253523e-01
 39 | -4.465149520670210759e-01,8.563987943234723232e-01,2.140937441302039612e-01,-1.245738778711988015e+00,1.731809258511819993e-01,1.006292809214440531e+00
 40 | 3.853173797288367841e-01,-8.838574362011329955e-01,1.537251059455279067e-01,5.820871844599989631e-02,-1.142970297830623094e+00,-5.768918695231487481e-01
 41 | 3.577873603482832898e-01,5.607845263682343928e-01,1.083051243175277012e+00,1.053802052034902959e+00,-1.377669367957091051e+00,8.356921120651418233e-01
 42 | -9.378250399151227823e-01,5.150352672086597750e-01,5.137859509122087998e-01,5.150476863060479138e-01,3.852731490654721203e+00,-1.129706854657618109e+00
 43 | 5.708905106931669859e-01,1.135565640180598912e+00,9.540017634932023149e-01,6.513912513057980025e-01,-3.152692446403456139e-01,5.298041779152827813e-01
 44 | 7.589692204932674269e-01,-7.728252145375718030e-01,-2.368186067400088746e-01,-4.853635478291034588e-01,8.187413938632255583e-02,1.441568620657900368e+00
 45 | 2.314658566673508666e+00,-1.867265192591748058e+00,6.862601903745134679e-01,-1.612715871189651651e+00,-4.719318657894334690e-01,-2.471644500127289312e+00
 46 | 1.088950596967366069e+00,6.428001909546277037e-02,-1.077744777929306119e+00,-7.153037092599682234e-01,6.795977489346758382e-01,-7.968952554704767932e-01
 47 | -7.303666317171366718e-01,2.164585895819748640e-01,4.557183990381378363e-02,-6.516003476058170873e-01,2.143944089325325653e+00,5.770721271805400177e-01
 48 | 6.339190223180112271e-01,-2.025142586657607158e+00,1.864543147694276426e-01,-6.617864647683879831e-01,8.524333347962239626e-01,-2.030453860429926871e-01
 49 | -7.925207384327006555e-01,-1.147364414668990140e-01,5.049872789804571438e-01,8.657551941701214782e-01,-1.200296407055776227e+00,3.711458733713088320e-01
 50 | -3.345012358409483744e-01,-4.749453111609561740e-01,-6.533292325737118933e-01,1.765454240281096876e+00,4.049817109609555321e-01,-6.039851867158205767e-01
 51 | -1.260883954335045187e+00,9.178619470547760839e-01,2.122156197012633250e+00,1.032465260551146846e+00,-1.519369965954013413e+00,8.658978747289991507e-02
 52 | -4.842340728662513638e-01,1.266911149186622731e+00,-7.076694656187807464e-01,4.438194281462284341e-01,7.746340534293367774e-01,-1.556772353920794771e-01
 53 | -9.269304715780829484e-01,-5.952535606180008043e-02,-3.241267340069072578e+00,-1.024387641334289833e+00,-2.525681513931603006e-01,1.167782061659807358e+00
 54 | -1.247783181964849497e+00,1.632411303931635249e+00,-1.430141377960632676e+00,-4.400444866969837610e-01,1.307405772860913418e-01,2.544208433012131176e-01
 55 | 1.441273289066115515e+00,-1.435862151179439383e+00,1.163163752154959596e+00,1.023306101958704889e-02,-9.815086510479509307e-01,3.376026620752021756e-01
 56 | 4.621034742632707526e-01,1.990596955734700302e-01,-6.002168771587946816e-01,6.980208499001891442e-02,-3.853135968617601992e-01,-4.118769661224673806e-01
 57 | 1.135173452512480419e-01,6.621306745210466804e-01,1.586016816145352060e+00,-1.237815498826849048e+00,2.133033374656266634e+00,-4.876062240724935437e-01
 58 | -1.952087799522501887e+00,-1.517850950355833228e-01,5.883172064845765270e-01,2.809918677350326521e-01,-6.226995198205937943e-01,-4.325581878196209096e-01
 59 | -2.081222503572752180e-01,-4.930009346588328234e-01,-5.893647569442115319e-01,8.496020970210246270e-01,3.570154859650473411e-01,3.944521423782968439e-01
 60 | -6.929095952606542097e-01,8.995998754332507064e-01,3.072995208766093334e-01,8.128621188389600905e-01,6.296288419236122369e-01,-4.209844808202629629e-01
 61 | -8.289950109220722840e-01,-5.601810401969695707e-01,7.472936051232618171e-01,6.103702654334648425e-01,-2.090159396414813242e-02,2.897748568964129134e-01
 62 | 1.173273833087819934e-01,1.277664895788424904e+00,-5.915713888358299366e-01,5.470973811700379219e-01,-2.021926524338940601e-01,2.075400798645438805e+00
 63 | -2.176812032272202879e-01,1.098776851987190106e+00,8.254163489880298465e-01,8.135096360006385252e-01,1.305478807154329068e+00,8.711247034316923488e-01
 64 | 2.100384163275904881e-02,6.819529712949639055e-01,-3.102667565934560390e-01,3.241663524884421110e-01,-1.301430543676845852e-01,-3.260235321678411347e-01
 65 | 9.699596499271818939e-02,5.951570254369136226e-01,-8.182206832334725233e-01,2.092387275685460235e+00,-1.006017381499701990e+00,1.201213922163944847e+00
 66 | -1.214188612787732158e+00,1.158110873500067806e+00,7.916626939629358706e-01,6.241198170521551347e-01,6.283455092642799000e-01,-4.080753730215513908e-01
 67 | -1.224677284691462302e-02,-8.972543714858315367e-01,7.580455819372633464e-02,-6.771617115121116859e-01,9.751197334177511555e-01,-2.038124535177853858e+00
 68 | -1.470573815021386510e-01,-8.254971967925115450e-01,-3.213858416529934425e-01,4.129314542756243323e-01,-5.637245528039747100e-01,-1.008086310917404083e+00
 69 | -8.222203955664314501e-01,2.436872114919123034e-01,2.449665711087227749e-01,-5.069431753711297617e-01,-4.710383056183227724e-01,-1.870791921025855675e+00
 70 | 2.320499373576362934e-01,-1.448084341497324123e+00,-1.407463774376555232e+00,-7.184442212524360105e-01,-2.134471517118472494e-01,-3.515134840413086659e-01
 71 | 3.109075655980045871e-01,1.475356216949551991e+00,8.576596232020193833e-01,-1.599385299634271118e-01,-1.901620790268883018e-02,1.841837918955169934e-02
 72 | -1.002529364637808840e+00,-1.851313599238993066e-02,-2.886586389201383218e-01,3.227185603380894885e-01,-8.272309435523229615e-01,1.676437312275282698e+00
 73 | 5.193465142411722857e-01,1.532738913002577696e+00,-1.087601484568575944e-01,4.017117220989414594e-01,6.901439917111125144e-01,3.269273737641626432e-01
 74 | -4.012204718858362607e-01,2.240924818104167715e-01,1.259240078179485987e-02,9.767609854883171905e-02,-7.730097838554664813e-01,-2.191005288088642422e-01
 75 | 2.451017425894271365e-02,4.979982912454497535e-01,1.451143607795041701e+00,9.592708260852068625e-01,2.153182457511556347e+00,8.294055811834891712e-01
 76 | -7.673475628880495059e-01,8.723206367206781664e-01,1.833420057383517432e-01,2.189802933217672276e+00,-8.082982853551514690e-01,-2.211135309007885130e+00
 77 | -8.397218421807760569e-01,-5.993926454440221541e-01,-2.123895724309806887e+00,-5.257550216807610477e-01,-7.591326615536979627e-01,2.356145581085659357e-01
 78 | 1.503937864762076304e-01,3.417559757771594375e-01,1.876170839215886232e+00,9.504238381860502516e-01,-5.769036556624030920e-01,7.708651938869668374e-01
 79 | -8.984146713483579516e-01,4.919191715065057147e-01,-1.320233207020642174e+00,1.831458765854353743e+00,1.179440120721287011e+00,-1.478586245779841546e+00
 80 | -4.691756521047047990e-01,-1.713134529090877489e+00,1.353872374165412840e+00,-1.145398452526178862e-01,1.237816311973461758e+00,1.143754043206929083e+00
 81 | -1.594427658794367098e+00,-5.993750229537728735e-01,5.243699718183165819e-03,4.698059376474205545e-02,-4.500654714792436395e-01,3.384964074944141199e-01
 82 | 6.228499323474987470e-01,-1.067620429382594383e+00,-1.423794850212934948e-01,1.202956317118988594e-01,5.144388340587490172e-01,-4.152879139008012754e-01
 83 | 7.116148780888897907e-01,-1.124642091837869229e+00,-1.534114170735622285e+00,1.277676821898509063e+00,3.323140119795916503e-01,6.327818661062848404e-01
 84 | -7.484865365565536166e-01,1.551151975522522930e+00,1.156746342928586663e-01,1.179297184063826442e+00,6.751848141010895199e-02,2.270692857804395892e+00
 85 | 2.060747924881987103e+00,1.755340842443204430e+00,-2.489641484790734993e-01,9.715709509543554168e-01,6.453759495851475458e-01,1.818662550584951576e-01
 86 | 1.368631557532348664e+00,-9.649234605801044751e-01,6.860514599984393058e-01,1.058424486849587787e+00,-1.758739486423114284e+00,2.482205863003360824e-01
 87 | -1.183258512665775086e+00,-2.039232177760100573e+00,-2.694068344445577634e-01,7.175422557959623138e-01,1.502357052096028101e+00,-4.593608995402441164e-01
 88 | 7.409478041977518581e-02,1.628615545571291845e+00,-1.380101458214891386e+00,-1.703382439355154654e+00,-5.554769889661877874e-02,-8.498443694647918045e-01
 89 | 3.840654489393072746e-01,-3.269474809409311095e-02,-2.067442100039876607e+00,-8.912003951278840708e-02,-1.304469500504853219e+00,8.303358165442455974e-01
 90 | 6.696725488300384610e-01,3.665982460968482681e-01,-9.398797863273552489e-01,-5.138669173366935405e-01,-1.059213521888951570e+00,-8.560838259088672242e-01
 91 | -6.267909727317187707e-02,9.551423205012382622e-01,-9.857260463355437263e-01,5.040465155178444068e-01,-5.302576183724407866e-01,7.156623721939246729e-02
 92 | -7.928728322623441738e-01,-1.070303599545578271e-01,-1.035242322419374084e+00,-5.536493053471820414e-01,-1.197877892588848470e+00,-4.776574467651166778e-01
 93 | 1.964725132916389283e+00,3.526355197172861139e-02,-6.997255079925855936e-01,2.139799107342220119e-01,-1.123280496908298232e-01,4.789798257463918629e-01
 94 | -2.209695995332229823e-01,6.141667000434252177e-01,7.575077100473051050e-01,-5.305011476105274681e-01,-5.758182406446800128e-01,3.336621052869482851e-01
 95 | -2.750516971516440146e-01,-2.301921164735584835e+00,-1.515191062198552263e+00,1.366874267444524671e+00,1.644967713501283679e+00,1.037539944257899194e+00
 96 | -2.490360395563783191e-01,5.765569630557664249e-01,3.112501545435361061e-01,3.078880808455237705e+00,1.119574911434576769e+00,-5.100163988547470328e-01
 97 | -1.279175914807665349e-01,-9.555404406004257556e-01,-1.606446320257572502e+00,2.034636358672231027e-01,-7.563507452843033496e-01,-2.698749352933712542e-01
 98 | -1.422253709597674165e+00,-6.465728842425265688e-01,-1.081548003614394959e+00,1.687141635072564760e+00,8.816397569494505149e-01,-9.787637157823073641e-01
 99 | -7.972641316617372007e-03,1.479944138890025851e+00,7.736830764761830348e-02,-8.612842013282636655e-01,1.523124077269657262e+00,-4.442932600761115847e-01
100 | 5.389100436846586684e-01,-1.037246154326456393e+00,-1.903386780836081871e-01,-8.756182533847571836e-01,-1.382799730964336060e+00,3.773004930448521921e-01
101 | 


--------------------------------------------------------------------------------
/doc/mdexample.jl:
--------------------------------------------------------------------------------
  1 | using MathOptInterface, Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt
  2 | using ForwardDiff, DifferentiationInterface
  3 | using Divergences
  4 | using Statistics, LinearAlgebra
  5 | using Infiltrator
  6 | 
  7 | ## --------------------------------------------------------------------- ##
  8 | ## Moment Conditions & Jacobian
  9 | ## --------------------------------------------------------------------- ##
 10 | 
 11 | ## This function if the moment matrix of the estimation problem. 
 12 | ## This function should always be defined by the user.
 13 | function g(x, θ)
 14 |     d = x .- θ
 15 |     return [d d .^ 2 .- 1]
 16 | end
 17 | 
 18 | ## This function is the gradient of the mean moment matrix
 19 | ## which is a (m, k), where m is the number of moments and k 
 20 | ## is the number of parameters.
 21 | ## \frac{\partial}{\partial\theta}\left[\sum_{i=1}^{n}\pi_{i}g(x_{i},\theta)/n\right]
 22 | ## It should be written in a way that it can be used with ForwardDiff or Zygote.
 23 | function ∇g(x, θ, π)
 24 |     d = x .- θ
 25 |     res = Matrix{promote_type(eltype(θ), eltype(π))}(undef, 2, 1)
 26 |     res[1] = -sum(π)  ## mean here
 27 |     res[2] = -2.0*sum(π .* d) ## mean here
 28 |     return res
 29 | end
 30 | 
 31 | function λ∇g(θ, π, λ, x)
 32 |     return first(λ'∇g(x, θ, π) ./ n)
 33 | end
 34 | 
 35 | ## This must return a (k,n)
 36 | function ∇gᵢλ!(dest::AbstractMatrix, x, θ, λ)
 37 |     d = x .- θ
 38 |     n = length(d)
 39 |     for j in axes(dest, 1)
 40 |         for i in axes(dest, 2)
 41 |             dest[j, i] = (-λ[1] .- 2.0 .* d[i] .* λ[2])/n
 42 |         end
 43 |     end
 44 |     return dest
 45 | end
 46 | 
 47 | Base.@propagate_inbounds function ∇gᵢλ(x, θ, λ)
 48 |     n = length(x)
 49 |     k = length(θ)
 50 |     res = Matrix{promote_type(eltype(θ), eltype(λ), eltype(x))}(undef, k, n)
 51 |     return ∇gᵢλ!(res, x, θ, λ)
 52 | end
 53 | 
 54 | ## --------------------------------------------------------------------- ##
 55 | ## Optimization Problem
 56 | ## --------------------------------------------------------------------- ##
 57 | 
 58 | const MOI = MathOptInterface
 59 | 
 60 | struct MDProblem{D} <: MOI.AbstractNLPEvaluator
 61 |     div::Divergences.AbstractDivergence
 62 |     data::D
 63 |     size::Tuple{Int, Int, Int}
 64 |     backend::DifferentiationInterface.AbstractADType
 65 | end
 66 | 
 67 | Base.size(md::MDProblem) = md.size
 68 | divergence(md::MDProblem) = md.div
 69 | 
 70 | function MOI.initialize(md::MDProblem, rf::Vector{Symbol})
 71 |     for feat in rf
 72 |         if !(feat in [:Grad, :Jac, :Hess])
 73 |             error("Unsupported feature $feat")
 74 |         end
 75 |     end
 76 | end
 77 | 
 78 | MOI.features_available(md::MDProblem) = [:Grad, :Jac, :Hess]
 79 | 
 80 | ## --------------------------------------------------------------------- ##
 81 | ## Objective function
 82 | ## --------------------------------------------------------------------- ##
 83 | function MOI.eval_objective(md::MDProblem, u::Vector{Float64})
 84 |     n, k, m = size(md)
 85 |     return divergence(md)(view(u, 1:n))
 86 | end
 87 | 
 88 | function MOI.eval_objective_gradient(md::MDProblem, res, u)
 89 |     n, k, m = size(md)
 90 |     T = eltype(res)
 91 |     Divergences.gradient!(view(res, 1:n), divergence(md), view(u, 1:n))
 92 |     return fill!(view(res, (n + 1):(n + k)), zero(T))
 93 | end
 94 | 
 95 | ## --------------------------------------------------------------------- ##
 96 | ## Constraints
 97 | ## --------------------------------------------------------------------- ##
 98 | function MOI.eval_constraint(md::MDProblem, res, u)
 99 |     n, k, m = size(md)
100 |     θ = view(u, (n + 1):(n + k))
101 |     π = view(u, 1:n)
102 |     G = g(md.data, θ)
103 |     return weighted_mean!(res, π, G)
104 | end
105 | 
106 | function MOI.jacobian_structure(md::MDProblem)
107 |     n, k, m = size(md)
108 |     return rowcol_of_dense(n+1, m+1)
109 | end
110 | 
111 | ## --------------------------------------------------------------------- ##
112 | ## Constraints Jacobian
113 | ## --------------------------------------------------------------------- ##
114 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u)
115 |     n, k, m = size(md)
116 |     θ = view(u, (n + 1):(n + k))
117 |     π = u[1:n]
118 |     G = g(md.data, θ)
119 |     G .= G ./ n
120 |     ∇gₙ = ∇g(md.data, θ, π)
121 |     ∇gₙ .= ∇gₙ ./ n
122 |     return assign_matrix!(J, G, ∇gₙ)
123 | end
124 | 
125 | ## --------------------------------------------------------------------- ##
126 | ## Hessian of the Lagrangian
127 | ## --------------------------------------------------------------------- ##
128 | 
129 | ## The lagrangian is given by:
130 | ##
131 | ## L(π, θ, λ) = D(π, p) + λ'g(θ)
132 | 
133 | function MOI.hessian_lagrangian_structure(md::MDProblem)
134 |     n, k, m = size(md)
135 |     hele = Int(n + n*k + k*k)
136 |     rows = Array{Int64}(undef, hele)
137 |     cols = Array{Int64}(undef, hele)
138 |     ## Diagonal Elements
139 |     for j in 1:n
140 |         rows[j] = j
141 |         cols[j] = j
142 |     end
143 |     idx = n+1
144 | 
145 |     # for j = 1:k
146 |     #     for s = 1:n
147 |     #         rows[idx] = s
148 |     #         cols[idx] = n + j
149 |     #         idx += 1
150 |     #     end
151 |     # end
152 | 
153 |     ## Off-diagonal elements
154 |     for j in 1:k
155 |         for s in 1:n
156 |             rows[idx] = n + j
157 |             cols[idx] = s
158 |             idx += 1
159 |         end
160 |     end
161 | 
162 |     ## Last Block 
163 |     for j in 1:k
164 |         for s in 1:k
165 |             rows[idx] = n + j
166 |             cols[idx] = n + s
167 |             idx += 1
168 |         end
169 |     end
170 | 
171 |     return [(r, c) for (r, c) in zip(rows, cols)]
172 | end
173 | 
174 | function MOI.eval_hessian_lagrangian(md::MDProblem, hess, u, σ, λ)
175 |     n, k, m = size(md)
176 |     π = view(u, 1:n)
177 |     θ = view(u, (n + 1):(n + k))
178 |     if σ==0
179 |         @inbounds for j in 1:n
180 |             hess[j] = 0.0
181 |         end
182 |     else
183 |         hv = view(hess, 1:n)
184 |         Divergences.hessian!(hv, divergence(md), π)
185 |         hv .= hv .* σ
186 |     end
187 | 
188 |     λv = view(λ, 1:m)
189 |     #v = ∇gᵢ(md.data, θ)*λv./n
190 |     ## As this matrix is symmetric, Ipopt expects that only the lower diagonal entries are specified.!!!!
191 |     ## hess[n+1:n+n*k] .= vec(v')
192 |     ∇gᵢλ!(reshape(view(hess, (n + 1):(n + n * k)), k, n), md.data, θ, λv)
193 |     # @infiltrate
194 |     ## If k>1, the we should only get the lower diagonal entries of
195 |     ## the gradient of λ∇g
196 |     ## hess[n+n*k+1:n+n*k+k^2] .= gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data))
197 |     ##vv = gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data))
198 |     ##@infiltrate
199 |     return copy_lower_triangular!(
200 |         view(hess,
201 |             (n + n * k + 1):(n + n * k + (k * (k + 1) ÷ 2))),
202 |         gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv),
203 |             Constant(md.data)))
204 | end
205 | 
206 | ## --------------------------------------------------------------------- ##
207 | ## Problem
208 | ## --------------------------------------------------------------------- ##
209 | 
210 | ## Small problem to test the implementation
211 | n = 1000
212 | k = 1
213 | m = 2
214 | 
215 | 𝒟 = ChiSquared()
216 | ℳ𝒟 = FullyModifiedDivergence(𝒟, 0.7, 1.2)
217 | 
218 | mdprob = MDProblem(𝒟, √0.64 .* randn(n), (n, k, m), AutoForwardDiff())
219 | 
220 | n, k, m = size(mdprob)
221 | 
222 | model = Ipopt.Optimizer()
223 | π = MOI.add_variables(model, n)
224 | MOI.add_constraint.(model, π, MOI.GreaterThan(0.0))
225 | θ = MOI.add_variables(model, k)
226 | MOI.add_constraint.(model, θ, MOI.GreaterThan(-10.0))
227 | MOI.add_constraint.(model, θ, MOI.LessThan(+10.0))
228 | 
229 | MOI.get(model, MOI.NumberOfVariables())
230 | 
231 | for i in 1:n
232 |     MOI.set(model, MOI.VariablePrimalStart(), π[i], 1.0)
233 | end
234 | 
235 | for i in 1:k
236 |     MOI.set(model, MOI.VariablePrimalStart(), θ[i], 0.0)
237 | end
238 | 
239 | lb = [zeros(m); n]
240 | ub = [zeros(m); n]
241 | 
242 | MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
243 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true)
244 | MOI.set(model, MOI.NLPBlock(), block_data)
245 | 
246 | model.options["derivative_test"] = "none"
247 | model.options["derivative_test_print_all"] = "no"
248 | 
249 | model.options["print_level"] = 3
250 | 
251 | MOI.optimize!(model)
252 | MOI.get(model, MOI.TerminationStatus())
253 | MOI.get(model, MOI.DualStatus())
254 | MOI.get(model, MOI.PrimalStatus())
255 | 
256 | MOI.get(model, MOI.SolveTimeSec())
257 | MOI.get(model, MOI.BarrierIterations())
258 | 
259 | xstar = MOI.get(model, MOI.VariablePrimal(), θ)
260 | 
261 | function lagrangian(md::MDProblem, u, σ, λ)
262 |     n, k, m = size(md)
263 |     π = u[1:n]
264 |     θ = u[(n + 1):(n + k)]
265 |     return σ .* divergence(md)(π) + mean(π .* g(md.data, θ)*λ)
266 | end
267 | 
268 | using Statistics
269 | 
270 | p = [0.45793379249066035,
271 |     4.999416892014921,
272 |     9.182989399836064,
273 |     3.6958463315972025,
274 |     6.220383439227501,
275 |     -9.964661752165114]
276 | lagrangian(mdprob, p, 1.5, [0.0, 0.0])
277 | 
278 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 1.5, [0.0, 0]), p);
279 | H = zeros(16)
280 | MOI.eval_hessian_lagrangian(mdprob, H, p, 1.5, [0.0, 0])
281 | 
282 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 0.0, [1.5, 0]), p);
283 | MOI.eval_hessian_lagrangian(mdprob, H, p, 0.0, [1.5, 0])
284 | 
285 | ## --------------------------------------------------------------------- ##
286 | ## Simple MC
287 | ## --------------------------------------------------------------------- ##
288 | 
289 | β = Vector{Float64}(undef, 5000)
290 | for j in 1:5000
291 |     x = √0.64 .* randn(1000)
292 |     mdprob.data .= x
293 |     MOI.optimize!(model)
294 |     β[j] = MOI.get(model, MOI.VariablePrimal(), θ)[1]
295 | end
296 | 
297 | using StatsPlots
298 | 
299 | StatsPlots.density(β)
300 | StatsPlots.histogram(β; nbins = 80)
301 | 
302 | ## --------------------------------------------------------------------- ##
303 | ## Utilities
304 | ## --------------------------------------------------------------------- ##
305 | 
306 | """
307 |     assign_matrix!(J, g, ∇g)
308 | 
309 | Assigns the elements of the block matrix `X = [[g'; ones(1, n)]; [∇g; zeros(m, k)]]` into the preallocated array `J`, excluding
310 | the `ones(1, n)` and `zeros(m, k)` blocks.
311 | 
312 | # Arguments
313 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`.
314 | - `g::AbstractMatrix{T}`: An `n × m` matrix.
315 | - `∇g::AbstractMatrix{T}`: An `m × k` matrix.
316 | 
317 | # Behavior
318 | - The function directly assigns:
319 |   - The elements of the transpose of `g` (`g'`) in column-major order.
320 |   - The elements of `∇g` in column-major order.
321 | - The blocks `ones(1, n)` and `zeros(m, k)` are skipped.
322 | 
323 | # Example
324 | ```julia
325 | A = [1 2; 3 4; 5 6]  # 3 × 2 matrix (n = 3, m = 2)
326 | B = [7 8; 9 10]     # 2 × 2 matrix (m = 2, k = 2)
327 | 
328 | J = Vector{Float64}(undef, 2 * 3 + 2 * 2)  # Preallocate array
329 | assign_matrix!(J, g, ∇g)
330 | 
331 | # J will be:
332 | # [1.0, 3.0, 5.0, 2.0, 4.0, 6.0, 7.0, 9.0, 8.0, 10.0]
333 | ```
334 | """
335 | Base.@propagate_inbounds function assign_matrix!(J, gg, Dg)
336 |     n, m = size(gg)
337 |     k = size(Dg, 2)
338 | 
339 |     # First block: gg' (m×n matrix)
340 |     for i in 1:n
341 |         for j in 1:m
342 |             J[(i - 1) * (m + 1) + j] = gg[i, j]  # Transpose while assigning
343 |         end
344 |     end
345 | 
346 |     # Row of ones (1×n matrix)
347 |     for i in 1:n
348 |         J[i * (m + 1)] = 1.0
349 |     end
350 | 
351 |     # Second block: Dg (m×k matrix)
352 |     for i in 1:k
353 |         for j in 1:m
354 |             J[n * (m + 1) + (i - 1) * m + j] = Dg[j, i]
355 |         end
356 |     end
357 | 
358 |     # Final block of zeros (1×k matrix)
359 |     baseIdx = n*(m+1) + k*m
360 |     for i in 1:k
361 |         J[baseIdx + i] = 0.0
362 |     end
363 | 
364 |     return J
365 | end
366 | 
367 | function assign_matrix(J, gg, Dg)
368 |     n, m = size(gg)
369 |     k = size(Dg, 2)
370 |     R = [[gg'; ones(1, n)] [Dg; zeros(1, k)]]
371 |     return J .= vec(R)
372 | end
373 | 
374 | using SparseArrays
375 | 
376 | function rowcol_of_sparse(g::SparseMatrixCSC; offset_row = 0, offset_col = 0)
377 |     rows = rowvals(g)
378 |     vals = nonzeros(g)
379 |     m, n = size(g)
380 |     tup = Tuple{Int64, Int64}[]
381 |     for j in 1:n
382 |         for i in nzrange(g, j)
383 |             push!(tup, (rows[i]+offset_row, j+offset_col))
384 |         end
385 |     end
386 |     return tup
387 | end
388 | 
389 | function weighted_mean!(μ::AbstractVector{T},
390 |         w::AbstractVector,
391 |         x::AbstractMatrix) where {T}
392 |     fill!(μ, zero(T))
393 |     @inbounds for j in axes(x, 2)
394 |         for i in axes(x, 1)
395 |             μ[j] += w[i]*x[i, j]/n
396 |         end
397 |     end
398 |     μ[end] = sum(w)
399 |     #μ[1:end-1] ./= n
400 |     return μ
401 | end
402 | 
403 | """
404 |     rowcol_of_dense(g::AbstractMatrix; offset_row = 0, offset_col = 0)
405 | 
406 | Returns a tuple of row and column indices for all elements in a dense matrix `g`, with optional offsets for rows and columns.
407 | 
408 | # Arguments
409 | - `g::AbstractMatrix`: The input dense matrix.
410 | - `offset_row::Int` (default: 0): An offset to be added to each row index.
411 | - `offset_col::Int` (default: 0): An offset to be added to each column index.
412 | 
413 | # Returns
414 | A vector of tuples `(row, col)` representing the indices of all elements in the dense matrix.
415 | 
416 | # Example
417 | ```julia
418 | g = [1 2; 3 4]
419 | rowcol_of_dense(g)  # [(1, 1), (2, 1), (1, 2), (2, 2)]
420 | ```
421 | """
422 | function rowcol_of_dense(n, m; offset_row = 0, offset_col = 0)
423 |     tup = Tuple{Int64, Int64}[]  # Initialize an empty vector of tuples
424 |     @inbounds for j in 1:n
425 |         for i in 1:m
426 |             push!(tup, (i + offset_row, j + offset_col))
427 |         end
428 |     end
429 |     return tup
430 | end
431 | 
432 | function copy_lower_triangular!(x::AbstractVector{T}, A::Matrix{T}) where {T}
433 |     @assert isquare(A)
434 |     n = size(A, 1)
435 |     len = (n * (n + 1)) ÷ 2  # Length of output vector
436 |     @assert len == (n * (n + 1)) ÷ 2
437 |     idx = 1
438 |     @inbounds for j in 1:n
439 |         for i in j:n
440 |             x[idx] = A[i, j]
441 |             idx += 1
442 |         end
443 |     end
444 |     return x
445 | end
446 | 
447 | function copy_lower_triangular!(x::AbstractVector{T}, A::Vector{T}) where {T}
448 |     n = length(A)
449 |     @assert n == 1 "`copy_lower_triangular!` for vector make sense only for singleton vector"
450 |     @assert 1 == (n * (n + 1)) ÷ 2 "The dimension of the dest vector is wrong as it should be $(n*(n+1))//2"
451 |     x .= A
452 |     return x
453 | end
454 | 
455 | # optprob = OptimizationFunction(divergence, Optimization.AutoForwardDiff(), cons = cons)
456 | # prob = OptimizationProblem(optprob, x0, _p, 
457 | #                            lcons = repeat([0.], 2),    
458 | #                            ucons = repeat([0.], 2),
459 | #                            lb = [repeat([0], 100); -Inf], 
460 | #                            ub = [repeat([+Inf], 100); +Inf])
461 | 
462 | # solver = OptimizationMOI.MOI.OptimizerWithAttributes(Ipopt.Optimizer, "print_level" => 0)
463 | 
464 | # solve(prob, solver)
465 | 


--------------------------------------------------------------------------------
/doc/mdprob.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cyipopt
  3 | import numpy as np
  4 | from scipy.sparse import coo_array, csc_array
  5 | from abc import ABC, abstractmethod
  6 | 
  7 | # =============================================================================
  8 | # Abstract Base Class for Moment Functions
  9 | # =============================================================================
 10 | class AbstractMomentFunction(ABC):
 11 | 
 12 |     @abstractmethod
 13 |     def g(self, theta):
 14 |         """
 15 |         Compute the moment matrix G(θ) for a given parameter vector θ.
 16 |         Parameters
 17 |         ----------
 18 |         theta : ndarray, shape (k,)
 19 |             Parameter vector.
 20 | 
 21 |         Returns
 22 |         -------
 23 |         Z : ndarray, shape (n, m)
 24 |             Moment matrix.
 25 |         """
 26 |         pass
 27 | 
 28 |     @abstractmethod
 29 |     def Dg(self, theta, pi):
 30 |         """
 31 |         Compute the derivative of sum(pi*G(θ)) with respect to θ.
 32 | 
 33 |         Parameters
 34 |         ----------
 35 |         theta : ndarray, shape (k,)
 36 |             Parameter vector.
 37 |         pi : ndarray, shape (n,)
 38 |             Weight vector.
 39 | 
 40 |         Returns
 41 |         -------
 42 |         dG : ndarray, shape (m, k)
 43 |             Derivative matrix.
 44 |         """
 45 |         pass
 46 | 
 47 |     @abstractmethod
 48 |     def Dg_lambda(self, theta, lam):
 49 |         """
 50 |         Compute the derivative of lambda'G(θ) with respect to θ.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         theta : ndarray, shape (k,)
 55 |             Parameter vector.
 56 |         lam : ndarray, shape (m,)
 57 |             Lagrange multiplier vector.
 58 | 
 59 |         Returns
 60 |         -------
 61 |         dgl : ndarray, shape (n, k)
 62 |             The derivative matrix.
 63 |         """
 64 |         pass
 65 | 
 66 |     @abstractmethod
 67 |     def Dg_lambda_pi(self, theta, lam, pi):
 68 |         """
 69 |         Compute the derivative of pi*g with respect to θ,
 70 |         weighted by pi.
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         theta : ndarray, shape (k,)
 75 |             Parameter vector.
 76 |         lam : ndarray, shape (m,)
 77 |             Lagrange multiplier vector.
 78 |         pi : ndarray, shape (n,)
 79 |             Weight vector.
 80 | 
 81 |         Returns
 82 |         -------
 83 |         dgl : ndarray, shape (n, k)
 84 |             The weighted derivative matrix.
 85 |         """
 86 |         pass
 87 | 
 88 |     @abstractmethod
 89 |     def Hg_lambda(self, theta, lam, pi):
 90 |         """
 91 |         Compute the Hessian of lambda' g with respect to theta.
 92 |         (In this example, it returns a zero matrix as a placeholder.)
 93 | 
 94 |         Parameters
 95 |         ----------
 96 |         theta : ndarray, shape (k,)
 97 |             Parameter vector.
 98 |         lam : ndarray, shape (m,)
 99 |             Lagrange multiplier vector.
100 |         pi : ndarray, shape (n,)
101 |             Weight vector.
102 | 
103 |         Returns
104 |         -------
105 |         H : ndarray, shape (k, k)
106 |             Hessian matrix.
107 |         """
108 |         pass
109 | 
110 | # =============================================================================
111 | # A Concrete Implementation of the Moment Function
112 | # Instrumental Variables Moment Function
113 | # =============================================================================
114 | class DefaultMomentFunction(AbstractMomentFunction):
115 |     def __init__(self, y, x, z):
116 |         """
117 |         Initialize the moment function caches.
118 | 
119 |         Parameters
120 |         ----------
121 |         y : ndarray, shape (n,)
122 |             Response variable.
123 |         x : ndarray, shape (n, k)
124 |             Regressors (first column is x, additional columns may be included).
125 |         z : ndarray, shape (n, m)
126 |             Instrumental variables.
127 |         """
128 | 
129 |         self.y = np.ravel(y)
130 |         self.n = self.y.shape[0]
131 |         self.x = np.asarray(x)
132 |         if self.x.ndim == 1:
133 |             self.x = self.x.reshape(-1, 1)
134 |         self.k = self.x.shape[1]
135 | 
136 |         # Ensure z is 2D
137 |         self.z = np.asarray(z)
138 |         if self.z.ndim == 1:
139 |             self.z = self.z.reshape(-1, 1)
140 |         self.m = self.z.shape[1]
141 | 
142 |         # Validate dimensions
143 |         if self.x.shape[0] != self.n or self.z.shape[0] != self.n:
144 |             raise ValueError("All inputs (y, x, z) must have the same number of observations")
145 | 
146 |         # Allocate caches (same shapes as the originals)
147 |         self.Y = np.empty_like(self.y)      # for temporary n-vector operations
148 |         self.X = np.empty_like(self.x)      # for temporary (n,k) operations
149 |         self.Z = np.empty_like(self.z)      # for temporary (n,m) operations
150 |         # Cache for the gradient matrix: shape (m,k)
151 |         self.dG = np.empty((self.m, self.k), dtype=float)
152 | 
153 |     def g(self, theta):
154 |         # Compute Y = x dot theta, storing the result in the cache self.Y.
155 |         np.matmul(self.x, theta, out=self.Y)
156 |         # Overwrite Y with (y - x dot theta) in place.
157 |         np.subtract(self.y, self.Y, out=self.Y)
158 |         # Compute Z: for each observation i, multiply row z[i] by Y[i].
159 |         
160 |         self.Z[:] = self.z * self.Y[:, np.newaxis]
161 |         return self.Z
162 | 
163 |     def Dg(self, theta, pi):
164 |         # Compute X = pi * x (elementwise multiplication along rows).
165 |         self.X[:] = self.x * pi[:, np.newaxis]
166 |         # Compute dG = - z^T dot X, storing in self.dG.
167 |         np.dot(self.z.T, self.X, out=self.dG)
168 |         self.dG *= -1.0
169 |         return self.dG
170 | 
171 |     def Dg_lambda(self, theta, lam):
172 |         # Compute Y = z  lam (vector of length n).
173 |         np.matmul(self.z, lam, out=self.Y)
174 |         # Compute X = - Y * x (each row scaled by -Y[i]) and divide by n.
175 |         self.X[:] = -self.Y[:, np.newaxis] * self.x
176 |         self.X /= self.n
177 |         return self.X
178 | 
179 |     def Dg_lambda_pi(self, theta, lam, pi):
180 |         # Compute the unweighted derivative first.
181 |         dgl = self.Dg_lambda(theta, lam)
182 |         # Multiply each row by the corresponding pi element.
183 |         dgl[:] = dgl * pi[:, np.newaxis]
184 |         return dgl
185 | 
186 |     def Dg_lambda_inplace(self, J, theta, lam, pi=None):
187 |         if pi is None:
188 |             dgl = self.Dg_lambda(theta, lam)
189 |         else:
190 |             dgl = self.Dg_lambda_pi(theta, lam, pi)
191 |         # Flatten the dgl array (C-order) and copy into J.
192 |         np.copyto(J, dgl.ravel())
193 | 
194 |     def Hg_lambda(self, theta, lam, pi):
195 |         # Placeholder: returns a zero matrix of shape (k, k).
196 |         return np.zeros((self.k, self.k), dtype=float)
197 | 
198 | # =============================================================================
199 | # The MDProblem class
200 | # =============================================================================
201 | class MDProblem(cyipopt.Problem):
202 |     r"""
203 |     A Python translation of the Ipopt problem defined in Julia.
204 |     
205 |     The decision variable vector `u` is assumed to be partitioned as
206 |        u = [π; θ]
207 |     with π an n‐vector and θ a k‐vector.
208 |     
209 |     The constraints are defined in terms of a moment function
210 |        g(θ) = [ z_i * (y_i - x_i' θ) ]_{i=1}^n
211 |     and a “weighted‐sum” constraint computed as:
212 |        c(j) = (1/n) Σ_{i=1}^{n} π[i] g(θ)[i, j]    for j=1,…, m
213 |        c(m+1) = Σ_{i=1}^{n} π[i]
214 |     so that the overall constraint vector is of length (m+1).
215 |     
216 |     The objective function is taken to be a divergence function of π.
217 |     (Typically you’ll supply a divergence object with methods `__call__`,
218 |     `gradient`, and `hessian`.)
219 |     """
220 |     def __init__(self, moment, divergence):
221 |         """
222 |         Parameters
223 |         ----------
224 |         moment : MomentFunction instance
225 |             Holds the data and cached arrays for computing g and its derivatives.
226 |         divergence : object
227 |             A divergence object supporting __call__(pi), gradient(pi) and hessian(pi).
228 |         backend : any, optional
229 |             (Optional) backend information.
230 |         """
231 |         self.moment = moment
232 |         self.divergence = divergence
233 |         # Dimensions: n = number of observations, k = dimension of θ, m = dimension of instruments
234 |         self.n = moment.n
235 |         self.k = moment.k
236 |         self.m = moment.m
237 | 
238 |     # ----------------------------
239 |     # Objective and its gradient
240 |     # ----------------------------
241 |     def objective(self, u):
242 |         r"""
243 |         Evaluate the objective function:
244 |              f(u) = divergence(π)
245 |         where u = [π; θ].
246 |         """
247 |         pi = u[:self.n]
248 |         return self.divergence(pi)
249 | 
250 |     def gradient(self, u):
251 |         r"""
252 |         Evaluate the gradient of the objective with respect to u.
253 |         
254 |         The derivative with respect to π is given by divergence.gradient(pi)
255 |         and with respect to θ is zero.
256 |         """
257 |         pi = u[:self.n]
258 |         grad = np.empty_like(u)
259 |         grad[:self.n] = self.divergence.gradient(pi)
260 |         grad[self.n:] = 0.0
261 |         return grad
262 | 
263 |     # ----------------------------
264 |     # Constraints and their Jacobian
265 |     # ----------------------------
266 |     def constraints(self, u):
267 |         r"""
268 |         Evaluate the constraints.
269 |         
270 |         Let G = g(θ) be the (n×m) moment matrix computed by the cached moment function.
271 |         Then define
272 |            c(j) = (1/n)*Σ_{i=1}^{n} π[i]*G[i,j]   for j = 1,..., m
273 |            c(m+1) = Σ_{i=1}^{n} π[i]
274 |         so that the constraint vector has length (m+1).
275 |         """
276 |         pi = u[:self.n]
277 |         theta = u[self.n:]
278 |         # Evaluate G = g(θ) (note: this call reuses cached arrays in self.moment)
279 |         G = self.moment.g(theta)
280 |         constr = np.empty(self.m + 1, dtype=np.float64)
281 |         # For j = 0,..., m-1:
282 |         constr[:self.m] = np.sum(pi[:, None] * G, axis=0) / self.n
283 |         # Last constraint: sum of π
284 |         constr[self.m] = np.sum(pi) - self.n
285 |         return constr
286 | 
287 |     def jacobian(self, u):
288 |         r"""
289 |         Evaluate the constraint Jacobian.
290 |         
291 |         The Jacobian is the block matrix:
292 |            J = [  (G/n)^T       (Dg/n)^T ]
293 |                [  ones(1,n)       zeros(1,k) ]
294 |         where G = g(θ) is (n×m) and Dg = Dg(θ, π) is (m×k). We return J as a
295 |         2D array of shape ((m+1) x (n+k)). (Ipopt may require a flattened version.)
296 |         """
297 |         pi = u[:self.n]
298 |         theta = u[self.n:]
299 |         # Compute G and its derivative Dg; note that our moment function routines
300 |         # use cached arrays to avoid allocation.
301 |         G = self.moment.g(theta)
302 |         G_scaled = G / self.n
303 |         Dg = self.moment.Dg(theta, pi)
304 |         Dg_scaled = Dg / self.n
305 |         # Build the top m rows: for constraints 1..m.
306 |         # With respect to π: derivative is (G_scaled)^T, with respect to θ: derivative is Dg_scaled.
307 |         top_block = np.hstack((G_scaled.T, Dg_scaled))
308 |         # Last row: derivative of constraint c(m+1) with respect to π is 1 and with respect to θ is 0.
309 |         last_row = np.hstack((np.ones((1, self.n)), np.zeros((1, self.k))))
310 |         J_full = np.vstack((top_block, last_row))
311 |         # If a flat vector is needed (for example by Ipopt) then you might return J_full.ravel()
312 |         return J_full
313 | 
314 |     # ----------------------------
315 |     # Hessian of the Lagrangian
316 |     # ----------------------------
317 |     def hessianstructure(self):
318 |         """Return the (row, col) indices of the lower-triangular non-zero elements of H."""
319 |         # Diagonal elements of D: (0,0), (1,1), ..., (n-1,n-1)
320 |         n = self.n
321 |         k = self.k
322 |         diag_rows = np.arange(n)
323 |         diag_cols = np.arange(n)
324 |     
325 |         # Off-diagonal block Dg' (k x n block starting at row n, column 0)
326 |         block_rows = np.repeat(np.arange(n, n + k), n)
327 |         block_cols = np.tile(np.arange(n), k)
328 |     
329 |         # Combine indices
330 |         rows = np.concatenate([diag_rows, block_rows])
331 |         cols = np.concatenate([diag_cols, block_cols])
332 |         return rows, cols
333 | 
334 |     def hessian(self, u, lam, sigma):
335 |         r"""
336 |         Evaluate the Hessian of the Lagrangian
337 |            L(π, θ, λ) = divergence(π) + λ' g(θ)
338 |         at the point u, with scalar multiplier sigma and Lagrange multiplier lam.
339 |         
340 |         The Hessian is returned as a flat vector containing:
341 |            - The first n entries: if sigma==0, zeros; otherwise, sigma times the divergence Hessian at π.
342 |            - The next n*k entries: the flattened version (row-major) of Dgλ (the derivative of λ'g w.r.t. θ).
343 |            - The final k*(k+1)//2 entries: the lower triangular part of Hgλ.
344 |         (In the Julia code Hgλ is a zero matrix; here we follow that.)
345 |         """
346 |         pi = u[:self.n]
347 |         theta = u[self.n:]
348 |         if sigma != 0:
349 |             D_diag = self.divergence.hessian(pi)*sigma
350 |         else:
351 |             D_diag = np.zeros(self.n)
352 | 
353 |         # Get Dg (n x k matrix) and transpose it to k x n (Dg')
354 |         Dg = self.moment.Dg_lambda(theta, lam[:self.m])
355 |         Dg_T_flat = Dg.T.flatten()  # Flatten in row-major order
356 |     
357 |         # Combine D diagonal and Dg' block values
358 |         H = np.concatenate([D_diag, Dg_T_flat])
359 |         return H
360 |     
361 |     # def intermediate(self, alg_mod, iter_count, obj_value, inf_pr, inf_du, mu,
362 |     #                  d_norm, regularization_size, alpha_du, alpha_pr,
363 |     #                  ls_trials):
364 |     #     """Prints information at every Ipopt iteration."""
365 |     #     iterate = self.get_current_iterate()
366 |     #     infeas = self.get_current_violations()
367 |     #     primal = iterate["x"]
368 |     #     jac = self.jacobian(primal)
369 | 
370 |     #     print("Iteration:", iter_count)
371 |     #     print("Primal iterate:", primal)
372 |     #     print("Flattened Jacobian:", jac)
373 | 
374 | 
375 | 
376 | exec(open('divergences.py').read())
377 | 
378 | 
379 | def randiv(n=100, m=5, k=1, theta=0.0, rho=0.9, CP=20):
380 |     """
381 |     Simulates instrumental variables regression data
382 |     
383 |     Returns:
384 |     y: outcome variable (n x 1)
385 |     covariates: matrix [x w] (n x (1 + k))
386 |     instruments: matrix [z w] (n x (m + k))
387 |     theory_val: theoretical strength measure (array length m)
388 |     """
389 |     # Generate instrument strength vector
390 |     tau = np.full(m, np.sqrt(CP / (m * n)))
391 |     
392 |     # Generate base data matrices
393 |     z = np.random.randn(n, m)    # Instruments
394 |     w = np.random.randn(n, k)    # Exogenous controls (corrected to k columns)
395 |     
396 |     # Generate correlated errors
397 |     eta = np.random.randn(n, 1)
398 |     u = rho * eta + np.sqrt(1 - rho**2) * np.random.randn(n, 1)
399 |     
400 |     # Create endogenous variable x
401 |     x = z @ tau.reshape(-1, 1) + eta
402 |     
403 |     # Create outcome variable y (n,)
404 |     y = x * theta + u
405 |     
406 |     # Create combined matrices
407 |     covariates = np.hstack((x, w))
408 |     instruments = np.hstack((z, w))
409 |     
410 |     
411 |     
412 |     return y, covariates, instruments
413 | 
414 | 
415 | n = 100
416 | n_instruments = 5
417 | n_exo = 1
418 | np.random.seed(42)
419 | y, x, z = randiv(n=n,k=n_exo, m=n_instruments)    
420 | n, m = z.shape
421 | n, k = x.shape
422 | 
423 | np.savetxt('y.csv', y, delimiter=',')          # Shape (n,)
424 | np.savetxt('x.csv', x, delimiter=',')          # Shape (n, n_exo)
425 | np.savetxt('z.csv', z, delimiter=',')          # Shape (n, n_instruments)
426 | 
427 | divergence = KullbackLeibler()
428 | momfun = DefaultMomentFunction(y,x,z)
429 | problem = MDProblem(momfun, divergence)
430 | 
431 | pi = np.random.uniform(0,1,n)
432 | theta = np.random.uniform(0, 1, k)
433 | 
434 | 
435 | u0 = np.concatenate((pi, theta))
436 | lb = np.concatenate((np.zeros_like(pi), -10.0 * np.ones_like(theta)))
437 | ub = np.concatenate((np.inf*np.ones_like(pi), 10.0 * np.ones_like(theta)))
438 | 
439 | # Define constraint bounds. Our constraint vector has length m+1.
440 | # For equality constraints, we set cl = cu.
441 | # For instance, suppose we require c(u) == 0.
442 | cl = np.zeros(m + 1)
443 | cu = np.zeros(m + 1)
444 | 
445 | #prob = MDOptProblem(problem, u0, lb, ub, cl, cu)
446 | pi = u0[:n]
447 | theta = u0[n:]
448 | lam = np.ones(m)
449 | 
450 | momfun.g(theta)
451 | momfun.Dg(theta, pi)
452 | momfun.Dg_lambda(theta, lam)
453 | momfun.Dg_lambda_pi(theta, lam, pi)
454 | 
455 | p = cyipopt.Problem(
456 |     n=len(u0),
457 |     m=len(cl),
458 |     problem_obj=problem,
459 |     lb=lb,
460 |     ub=ub,
461 |     cl=cl,
462 |     cu=cu,
463 | )
464 | 
465 | p.add_option('derivative_test', 'second-order')
466 | p.add_option('print_level', 5)
467 | p.add_option('derivative_test_print_all', 'no')
468 | p.solve(u0)
469 | 


--------------------------------------------------------------------------------
/doc/mdexample_iv.jl:
--------------------------------------------------------------------------------
  1 | using MathOptInterface, Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt
  2 | using ForwardDiff, DifferentiationInterface
  3 | using Divergences
  4 | using Statistics, LinearAlgebra
  5 | using Infiltrator
  6 | 
  7 | 
  8 | ## --------------------------------------------------------------------- ##
  9 | ## Moment Conditions & Jacobian
 10 | ## --------------------------------------------------------------------- ##
 11 | 
 12 | ## This function if the moment matrix of the estimation problem.
 13 | ## This function should always be defined by the user.
 14 | function g(θ, p)
 15 |     (y,x,z) = p.data
 16 |     z.*(y .- x*θ)
 17 | end
 18 | 
 19 | function Jgλ(θ, λ, p)
 20 |     -(z*λ).*x
 21 | end
 22 | 
 23 | function Hgλ!(H, θ, π, λ, p)
 24 |     fill!(H, 0.0)
 25 | end
 26 | 
 27 | function ∇g(θ, λ, π)
 28 |     sum(Jgλ!(J, θ, ))
 29 | 
 30 | 
 31 | ## -----
 32 | ## High-performant
 33 | ## -----
 34 | 
 35 | y = randn(100)
 36 | x = randn(100,2)
 37 | z = randn(100,4)
 38 | 
 39 | p = (y y,
 40 |     x = x,
 41 |     z = z,
 42 |     Y = similar(y),
 43 |     X = similar(X),
 44 |     Z = similar(z),
 45 |     ∂G = Vector{Float64}(undef, m, k)
 46 |     );
 47 | 
 48 | 
 49 | function g(θ, p)
 50 |     (y, x, z, Y, X, Z, ∂G) = p
 51 |     mul!(Y, x, θ)
 52 |     broadcast!(-, Y, y, u)
 53 |     broadcast!(*, Z, z, u)
 54 |     return Z
 55 | end
 56 | 
 57 | function Dgn(θ, π, p)
 58 |     (y, x, z, Y, X, Z, ∂G) = p
 59 |     broadcast!(*, Y, π, x)
 60 |     mul!(∂g, z', Y)
 61 |     return ∂g
 62 | end
 63 | 
 64 | function Dgλ(θ, λ, p)
 65 |     (y, x, z, Y, X, Z, ∂G) = p
 66 |     mul!(Y, z, λ)
 67 |     broadcast!(*, Z, -u, x)
 68 |     return Z
 69 | end
 70 | 
 71 | function Dgλ(θ, λ, π, p)
 72 |     (y, x, z, G, ∇, u) = p
 73 |     ∂gλ = Dgλ(θ, λ, p)
 74 |     broadcast!(*, ∂gλ, ∂gλ, π)
 75 |     return ∂gλ
 76 | end
 77 | 
 78 | function Dgλ!(J, θ, λ, p)
 79 |     Dgλ(θ, λ, p)
 80 |     copy!(J, p.∇)
 81 | end
 82 | 
 83 | function Dgλ!(J, θ, λ, π, p)
 84 |     Dgλ(θ, λ, π, p)
 85 |     copy!(J, p.∇)
 86 | end
 87 | 
 88 | function Hgλ!(H, θ, λ, π, p)
 89 |     ## Note: Only the lower triangular matrix needs to
 90 |     ## be updated.
 91 |     fill!(H, 0.0)
 92 | end
 93 | 
 94 | ## Derived
 95 | 
 96 | 
 97 | 
 98 | 
 99 | ## This function is the gradient of the mean moment matrix
100 | ## which is a (m, k), where m is the number of moments and k
101 | ## is the number of parameters.
102 | ## \frac{\partial}{\partial\theta}\left[\sum_{i=1}^{n}\pi_{i}g(x_{i},\theta)/n\right]
103 | ## It should be written in a way that it can be used with ForwardDiff or Zygote.
104 | function ∇g(θ, π, p)
105 |     n, k, m = size(p)
106 |     (y,x,z) = p.data
107 |     res = - z'*(π.*x)
108 |     return res
109 | end
110 | 
111 | function λ∇g(θ, π, λ, p)
112 |     vec(λ'∇g(θ, π, p)./n)
113 | end
114 | 
115 | ## This must return a (n, k)
116 | function ∇gᵢλ!(dest::AbstractMatrix, θ, λ, p)
117 |     n, k, m = size(p)
118 |     (y,x,z) = p.data
119 |     for j in axes(dest, 2)
120 |             xv = view(x, :, j)
121 |             dest[:,j] = (-(z*λ).*xv)/n
122 |     end
123 |     return dest
124 | end
125 | 
126 | function ∇gᵢλ(θ, λ, p)
127 |     n, k, m = size(p)
128 |     res = Matrix{promote_type(eltype(θ), eltype(λ))}(undef, n, k)
129 |     ∇gᵢλ!(res, θ, λ, p)
130 | end
131 | 
132 | ## --------------------------------------------------------------------- ##
133 | ## Optimization Problem
134 | ## --------------------------------------------------------------------- ##
135 | 
136 | const MOI = MathOptInterface
137 | 
138 | struct MDProblem{D} <: MOI.AbstractNLPEvaluator
139 |     div::Divergences.AbstractDivergence
140 |     data::D
141 |     size::Tuple{Int, Int, Int}
142 |     backend::DifferentiationInterface.AbstractADType
143 | end
144 | 
145 | Base.size(md::MDProblem) = md.size
146 | divergence(md::MDProblem) = md.div
147 | 
148 | function MOI.initialize(md::MDProblem, rf::Vector{Symbol})
149 |     for feat in rf
150 |         if !(feat in [:Grad, :Jac, :Hess])
151 |             error("Unsupported feature $feat")
152 |         end
153 |     end
154 | end
155 | 
156 | MOI.features_available(md::MDProblem) = [:Grad, :Jac, :Hess]
157 | 
158 | ## --------------------------------------------------------------------- ##
159 | ## Objective function
160 | ## --------------------------------------------------------------------- ##
161 | function MOI.eval_objective(md::MDProblem, u::Vector{Float64})
162 |     n, k, m = size(md)
163 |     divergence(md)(view(u, 1:n))
164 | end
165 | 
166 | function MOI.eval_objective_gradient(md::MDProblem, res, u)
167 |     n, k, m = size(md)
168 |     T = eltype(res)
169 |     Divergences.gradient!(view(res, 1:n), divergence(md), view(u, 1:n))
170 |     fill!(view(res, (n+1):(n+k)), zero(T))
171 | end
172 | 
173 | ## --------------------------------------------------------------------- ##
174 | ## Constraints
175 | ## --------------------------------------------------------------------- ##
176 | function MOI.eval_constraint(md::MDProblem, res, u)
177 |     n, k, m = size(md)
178 |     θ = view(u, (n+1):(n+k))
179 |     π = view(u, 1:n)
180 |     G = g(θ, md)
181 |     weighted_mean!(res, π, G)
182 | end
183 | 
184 | 
185 | ## --------------------------------------------------------------------- ##
186 | ## Constraints Jacobian
187 | ## --------------------------------------------------------------------- ##
188 | function MOI.jacobian_structure(md::MDProblem)
189 |     n, k, m = size(md)
190 |     rowcol_of_dense(n+k,m+1)
191 | end
192 | 
193 | 
194 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u)
195 |     n, k, m = size(md)
196 |     θ = view(u, (n+1):(n+k))
197 |     π = view(u, 1:n)
198 |     G = g(θ, md)
199 |     G .= G./n
200 |     ∇gₙ = ∇g(θ, π, md)
201 |     ∇gₙ .= ∇gₙ./n
202 |     assign_matrix!(J, G, ∇gₙ)
203 | end
204 | 
205 | ## --------------------------------------------------------------------- ##
206 | ## Hessian of the Lagrangian
207 | ## --------------------------------------------------------------------- ##
208 | 
209 | ## The lagrangian is given by:
210 | ##
211 | ## L(π, θ, λ) = D(π, p) + λ'g(θ)
212 | 
213 | function MOI.hessian_lagrangian_structure(md::MDProblem)
214 |     n, k, m = size(md)
215 |     hele = Int(n + n*k + k*(k+1)÷2)
216 |     rows = Array{Int64}(undef, hele)
217 |     cols = Array{Int64}(undef, hele)
218 |     ## Diagonal Elements
219 |     for j = 1:n
220 |         rows[j] = j
221 |         cols[j] = j
222 |     end
223 |     idx = n+1
224 | 
225 |     # for j = 1:k
226 |     #     for s = 1:n
227 |     #         rows[idx] = s
228 |     #         cols[idx] = n + j
229 |     #         idx += 1
230 |     #     end
231 |     # end
232 | 
233 |     ## Off-diagonal elements
234 |     for j = 1:k
235 |         for s = 1:n
236 |             rows[idx] = n + j
237 |             cols[idx] = s
238 |             idx += 1
239 |         end
240 |     end
241 |     @infiltrate
242 |     ## Last Block
243 |     for j = 1:k
244 |         for s = 1:j
245 |             rows[idx] = n + j
246 |             cols[idx] = n + s
247 |             idx += 1
248 |         end
249 |     end
250 | 
251 |     [(r, c) for (r, c) in zip(rows,cols)]
252 | end
253 | 
254 | function MOI.eval_hessian_lagrangian(md::MDProblem, hess, u, σ, λ)
255 |     n, k, m = size(md)
256 |     π = view(u, 1:n)
257 |     θ = view(u, (n+1):(n+k))
258 |     if σ==0
259 |         @inbounds for j=1:n
260 |             hess[j] = 0.0
261 |         end
262 |     else
263 |         hv = view(hess, 1:n)
264 |         Divergences.hessian!(hv, divergence(md), π)
265 |         hv .= hv.*σ
266 |     end
267 | 
268 |     λv = view(λ, 1:m)
269 |     #v = ∇gᵢ(md.data, θ)*λv./n
270 |     ## As this matrix is symmetric, Ipopt expects that only the lower diagonal entries are specified.!!!!
271 |     ## hess[n+1:n+n*k] .= vec(v')
272 |     ∇gᵢλ!(reshape(view(hess, n+1:n+n*k), n, k), θ, λv, md)
273 |     #@infiltrate
274 |     ## If k>1, the we should only get the lower diagonal entries of
275 |     ## the gradient of λ∇g
276 |     ## hess[n+n*k+1:n+n*k+k^2] .= gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data))
277 |     ##vv = gradient(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md.data))
278 |     ##@infiltrate
279 |     copy_lower_triangular!(view(hess, n+n*k+1:n+n*k+(k*(k+1)÷2)), jacobian(λ∇g, md.backend, θ, Constant(π), Constant(λv), Constant(md)))
280 | end
281 | 
282 | 
283 | 
284 | ## --------------------------------------------------------------------- ##
285 | ## Problem
286 | ## --------------------------------------------------------------------- ##
287 | 
288 | ## Small problem to test the implementation
289 | n = 100
290 | k = 2
291 | m = 4
292 | 
293 | 𝒟 = ChiSquared()
294 | ℳ𝒟 = FullyModifiedDivergence(𝒟, 0.7, 1.2)
295 | 
296 | mdprob = MDProblem(𝒟, (y = randn(n), x = randn(n,k), z = randn(n,m), u = randn(n), η = randn(n)), (n, k, m), AutoForwardDiff())
297 | 
298 | function simulate!(mdprob; θ = 0.5, γ = 0.1)
299 |     n, k, m = size(mdprob)
300 |     (y,x,z, u, η) = mdprob.data
301 |     z .= randn(n,m)
302 |     x[:,1] .= z*repeat([γ], m) + randn!(η)
303 |     x[:,2] .= z*repeat([γ], m) + randn!(u)
304 |     y .= x*repeat([θ], k) .+ randn!(u)
305 |     return nothing
306 | end
307 | 
308 | using Random
309 | simulate!(mdprob)
310 | n, k, m = size(mdprob)
311 | 
312 | model = Ipopt.Optimizer()
313 | π = MOI.add_variables(model, n)
314 | MOI.add_constraint.(model, π, MOI.GreaterThan(0.0))
315 | θ = MOI.add_variables(model, k)
316 | MOI.add_constraint.(model, θ, MOI.GreaterThan(-10.0))
317 | MOI.add_constraint.(model, θ, MOI.LessThan(+10.0))
318 | 
319 | MOI.get(model, MOI.NumberOfVariables())
320 | 
321 | for i ∈ 1:n
322 |     MOI.set(model, MOI.VariablePrimalStart(), π[i], 1.0)
323 | end
324 | 
325 | for i ∈ 1:k
326 |     MOI.set(model, MOI.VariablePrimalStart(), θ[i], 0.0)
327 | end
328 | 
329 | lb = [zeros(m); n]
330 | ub = [zeros(m); n]
331 | 
332 | MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
333 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true)
334 | MOI.set(model, MOI.NLPBlock(), block_data)
335 | 
336 | model.options["derivative_test"] = "none"
337 | model.options["derivative_test_print_all"] = "no"
338 | 
339 | model.options["print_level"] = 4
340 | 
341 | MOI.optimize!(model)
342 | MOI.get(model, MOI.TerminationStatus())
343 | MOI.get(model, MOI.DualStatus())
344 | MOI.get(model, MOI.PrimalStatus())
345 | 
346 | MOI.get(model, MOI.SolveTimeSec())
347 | MOI.get(model, MOI.BarrierIterations())
348 | 
349 | xstar = MOI.get(model, MOI.VariablePrimal(), θ)
350 | 
351 | 
352 | function lagrangian(md::MDProblem, u, σ, λ)
353 |     n, k, m = size(md)
354 |     π = u[1:n]
355 |     θ = u[(n+1):(n+k)]
356 |     σ.*divergence(md)(π) +mean(π.*g(θ, md)*λ)
357 | end
358 | 
359 | 
360 | using Statistics
361 | p = [0.45793379249066035, 4.999416892014921, 9.182989399836064, 3.6958463315972025, 6.220383439227501, 0.019436036309187443, 2.063484686999562, 10.894774879314305, 8.25546846552471, 4.029010019680072, -2.975818044182361, 1.4669020891138018]
362 | 
363 | lagrangian(mdprob, p, 1.0, [1.5, 0.0])
364 | 
365 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 1.5, [1.5, 0.0]), p);
366 | H = zeros(34)
367 | MOI.eval_hessian_lagrangian(mdprob, H, p, 1.5, [1.5, 0.0])
368 | 
369 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 0.0, [1.5, 0]), p);
370 | MOI.eval_hessian_lagrangian(mdprob, H, p, 0.0, [1.5, 0])
371 | 
372 | ## --------------------------------------------------------------------- ##
373 | ## Simple MC
374 | ## --------------------------------------------------------------------- ##
375 | 
376 | β = Matrix{Float64}(undef, 1000, 2)
377 | for j in 1:1000
378 |     simulate!(mdprob)
379 |     MOI.optimize!(model)
380 |     β[j,:] .= MOI.get(model, MOI.VariablePrimal(), θ)
381 | end
382 | 
383 | using StatsPlots
384 | 
385 | StatsPlots.density(β)
386 | StatsPlots.histogram(β, nbins = 80)
387 | 
388 | 
389 | 
390 | ## --------------------------------------------------------------------- ##
391 | ## Utilities
392 | ## --------------------------------------------------------------------- ##
393 | 
394 | """
395 |     assign_matrix!(J, g, ∇g)
396 | 
397 | Assigns the elements of the block matrix `X = [[g'; ones(1, n)]; [∇g; zeros(m, k)]]` into the preallocated array `J`, excluding
398 | the `ones(1, n)` and `zeros(m, k)` blocks.
399 | 
400 | # Arguments
401 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`.
402 | - `g::AbstractMatrix{T}`: An `n × m` matrix.
403 | - `∇g::AbstractMatrix{T}`: An `m × k` matrix.
404 | 
405 | # Behavior
406 | - The function directly assigns:
407 |   - The elements of the transpose of `g` (`g'`) in column-major order.
408 |   - The elements of `∇g` in column-major order.
409 | - The blocks `ones(1, n)` and `zeros(m, k)` are skipped.
410 | 
411 | # Example
412 | ```julia
413 | A = [1 2; 3 4; 5 6]  # 3 × 2 matrix (n = 3, m = 2)
414 | B = [7 8; 9 10]     # 2 × 2 matrix (m = 2, k = 2)
415 | 
416 | J = Vector{Float64}(undef, 2 * 3 + 2 * 2)  # Preallocate array
417 | assign_matrix!(J, g, ∇g)
418 | 
419 | # J will be:
420 | # [1.0, 3.0, 5.0, 2.0, 4.0, 6.0, 7.0, 9.0, 8.0, 10.0]
421 | ```
422 | """
423 | function assign_matrix!(J, gg, Dg)
424 |     n, m = size(gg)  # gg is n×m
425 |     k = size(Dg,2)   # Dg is m×k
426 | 
427 |     # Each column in R has (m+1) elements
428 |     # First n columns come from gg' and ones
429 |     # Next k columns come from Dg and zeros
430 | 
431 |     # First n columns (from gg' and ones row)
432 |     for j in 1:n
433 |         # Elements from gg'
434 |         for i in 1:m
435 |             J[(j-1)*(m+1) + i] = gg[j,i]
436 |         end
437 |         # Element from ones row
438 |         J[j*(m+1)] = 1.0
439 |     end
440 | 
441 |     # Next k columns (from Dg and zeros row)
442 |     offset = n*(m+1)
443 |     for j in 1:k
444 |         # Elements from Dg
445 |         for i in 1:m
446 |             J[offset + (j-1)*(m+1) + i] = Dg[i,j]
447 |         end
448 |         # Element from zeros row
449 |         J[offset + j*(m+1)] = 0.0
450 |     end
451 |     return J
452 | end
453 | 
454 | function assign_matrix(J, gg, Dg)
455 |     n, m = size(gg)
456 |     k = size(Dg,2)
457 |     R = [ [gg'; ones(1, n)] [Dg; zeros(1,k)]]
458 |     J .= vec(R)
459 | end
460 | 
461 | 
462 | using SparseArrays
463 | 
464 | function rowcol_of_sparse(g::SparseMatrixCSC; offset_row = 0, offset_col = 0)
465 |     rows = rowvals(g)
466 |     vals = nonzeros(g)
467 |     m, n = size(g)
468 |     tup = Tuple{Int64, Int64}[]
469 |     for j ∈ 1:n
470 |         for i ∈ nzrange(g, j)
471 |             push!(tup, (rows[i]+offset_row, j+offset_col))
472 |         end
473 |     end
474 |     return tup
475 | end
476 | 
477 | function weighted_mean!(μ::AbstractVector{T}, w::AbstractVector, x::AbstractMatrix) where T
478 |     fill!(μ, zero(T))
479 |     @inbounds for j in axes(x,2)
480 |         for i in axes(x,1)
481 |             μ[j] += w[i]*x[i,j]/n
482 |         end
483 |     end
484 |     μ[end] = sum(w)
485 |     #μ[1:end-1] ./= n
486 |     return μ
487 | end
488 | 
489 | function weighted_sum(G, w)
490 |     n, m = size(G)
491 |     res = zeros(eltype(G), m)
492 |     @inbounds for j in axes(G,2)
493 |         for i in axes(G,1)
494 |             res[j] += w[i]*G[i,j]
495 |         end
496 |     end
497 |     return res
498 | end
499 | 
500 | function weighted_sum2(G, w)
501 |     @inbounds vec(sum(w.*G, dims=1))
502 | end
503 | 
504 | 
505 | """
506 |     rowcol_of_dense(g::AbstractMatrix; offset_row = 0, offset_col = 0)
507 | 
508 | Returns a tuple of row and column indices for all elements in a dense matrix `g`, with optional offsets for rows and columns.
509 | 
510 | # Arguments
511 | - `g::AbstractMatrix`: The input dense matrix.
512 | - `offset_row::Int` (default: 0): An offset to be added to each row index.
513 | - `offset_col::Int` (default: 0): An offset to be added to each column index.
514 | 
515 | # Returns
516 | A vector of tuples `(row, col)` representing the indices of all elements in the dense matrix.
517 | 
518 | # Example
519 | ```julia
520 | g = [1 2; 3 4]
521 | rowcol_of_dense(g)  # [(1, 1), (2, 1), (1, 2), (2, 2)]
522 | ```
523 | """
524 | function rowcol_of_dense(n ,m; offset_row = 0, offset_col = 0)
525 |     tup = Tuple{Int64, Int64}[]  # Initialize an empty vector of tuples
526 |     @inbounds for j ∈ 1:n
527 |         for i ∈ 1:m
528 |             push!(tup, (i + offset_row, j + offset_col))
529 |         end
530 |     end
531 |     return tup
532 | end
533 | 
534 | 
535 | 
536 | 
537 | function copy_lower_triangular!(x::AbstractVector{T}, A::Matrix{T}) where T
538 |     @assert issquare(A)
539 |     n = size(A, 1)
540 |     len = (n * (n + 1)) ÷ 2  # Length of output vector
541 |     @assert len == (n * (n + 1)) ÷ 2
542 |     idx = 1
543 |     @inbounds for j in 1:n
544 |         for i in j:n
545 |             x[idx] = A[i, j]
546 |             idx += 1
547 |         end
548 |     end
549 |     return x
550 | end
551 | 
552 | function copy_lower_triangular!(x::AbstractVector{T}, A::Vector{T}) where T
553 |     n = length(A)
554 |     @assert n == 1 "`copy_lower_triangular!` for vector make sense only for singleton vector"
555 |     @assert 1 == (n * (n + 1)) ÷ 2 "The dimension of the dest vector is wrong as it should be $(n*(n+1))//2"
556 |     x .= A
557 |     return x
558 | end
559 | 
560 | 
561 | 
562 | abstract type SmootherType end
563 | 
564 | struct Truncated <: SmootherType end
565 | struct Bartlett <: SmootherType end
566 | 
567 | @inline weight(::Truncated, s, St) = 1.0
568 | @inline weight(::Bartlett, s, St) = 1.0 - s/St
569 | 
570 | # Base version
571 | function smooter_base(tt::T, G::Matrix, ξ::Integer) where {T<:SmootherType}
572 |     N, M = size(G)
573 |     nG = zeros(N, M)
574 |     St = (2.0 * ξ + 1.0) / 2.0
575 |     for m = 1:M
576 |         for t = 1:N
577 |             low = max((t - N), -ξ)
578 |             high = min(t - 1, ξ)
579 |             for s = low:high
580 |                 κ = weight(tt, s, St)
581 |                 @inbounds nG[t, m] += κ * G[t-s, m]
582 |             end
583 |         end
584 |     end
585 |     return (nG ./ (2 * ξ + 1))
586 | end
587 | 
588 | function smoother(tt::Truncated, G::Matrix{T}, ξ::Integer) where {T}
589 |     N, M = size(G)
590 |     nG   = Matrix{T}(undef, N, M)
591 |     smoother!(tt, nG, G, ξ)
592 | end
593 | 
594 | function smoother!(tt::Truncated, dest, G::Matrix{T}, ξ::Integer) where {T}
595 |     N, M = size(G)
596 |     denom = 2ξ + 1  # normalization
597 |     Threads.@threads for m in 1:M
598 |         for t in 1:N
599 |             low  = max(t - N, -ξ)
600 |             high = min(t - 1,  ξ)
601 |             acc  = zero(T)
602 |             @inbounds for s in low:high
603 |                 κ = weight(tt, s, ξ)
604 |                 acc += G[t - s, m]
605 |             end
606 |             dest[t, m] = acc / denom
607 |         end
608 |     end
609 |     return dest
610 | end
611 | 
612 | # optprob = OptimizationFunction(divergence, Optimization.AutoForwardDiff(), cons = cons)
613 | # prob = OptimizationProblem(optprob, x0, _p,
614 | #                            lcons = repeat([0.], 2),
615 | #                            ucons = repeat([0.], 2),
616 | #                            lb = [repeat([0], 100); -Inf],
617 | #                            ub = [repeat([+Inf], 100); +Inf])
618 | 
619 | # solver = OptimizationMOI.MOI.OptimizerWithAttributes(Ipopt.Optimizer, "print_level" => 0)
620 | 
621 | # solve(prob, solver)
622 | 


--------------------------------------------------------------------------------
/doc/divergences_files/libs/quarto-html/popper.min.js:
--------------------------------------------------------------------------------
1 | /**
2 |  * @popperjs/core v2.11.7 - MIT License
3 |  */
4 | 
5 | !function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).Popper={})}(this,(function(e){"use strict";function t(e){if(null==e)return window;if("[object Window]"!==e.toString()){var t=e.ownerDocument;return t&&t.defaultView||window}return e}function n(e){return e instanceof t(e).Element||e instanceof Element}function r(e){return e instanceof t(e).HTMLElement||e instanceof HTMLElement}function o(e){return"undefined"!=typeof ShadowRoot&&(e instanceof t(e).ShadowRoot||e instanceof ShadowRoot)}var i=Math.max,a=Math.min,s=Math.round;function f(){var e=navigator.userAgentData;return null!=e&&e.brands&&Array.isArray(e.brands)?e.brands.map((function(e){return e.brand+"/"+e.version})).join(" "):navigator.userAgent}function c(){return!/^((?!chrome|android).)*safari/i.test(f())}function p(e,o,i){void 0===o&&(o=!1),void 0===i&&(i=!1);var a=e.getBoundingClientRect(),f=1,p=1;o&&r(e)&&(f=e.offsetWidth>0&&s(a.width)/e.offsetWidth||1,p=e.offsetHeight>0&&s(a.height)/e.offsetHeight||1);var u=(n(e)?t(e):window).visualViewport,l=!c()&&i,d=(a.left+(l&&u?u.offsetLeft:0))/f,h=(a.top+(l&&u?u.offsetTop:0))/p,m=a.width/f,v=a.height/p;return{width:m,height:v,top:h,right:d+m,bottom:h+v,left:d,x:d,y:h}}function u(e){var n=t(e);return{scrollLeft:n.pageXOffset,scrollTop:n.pageYOffset}}function l(e){return e?(e.nodeName||"").toLowerCase():null}function d(e){return((n(e)?e.ownerDocument:e.document)||window.document).documentElement}function h(e){return p(d(e)).left+u(e).scrollLeft}function m(e){return t(e).getComputedStyle(e)}function v(e){var t=m(e),n=t.overflow,r=t.overflowX,o=t.overflowY;return/auto|scroll|overlay|hidden/.test(n+o+r)}function y(e,n,o){void 0===o&&(o=!1);var i,a,f=r(n),c=r(n)&&function(e){var t=e.getBoundingClientRect(),n=s(t.width)/e.offsetWidth||1,r=s(t.height)/e.offsetHeight||1;return 1!==n||1!==r}(n),m=d(n),y=p(e,c,o),g={scrollLeft:0,scrollTop:0},b={x:0,y:0};return(f||!f&&!o)&&(("body"!==l(n)||v(m))&&(g=(i=n)!==t(i)&&r(i)?{scrollLeft:(a=i).scrollLeft,scrollTop:a.scrollTop}:u(i)),r(n)?((b=p(n,!0)).x+=n.clientLeft,b.y+=n.clientTop):m&&(b.x=h(m))),{x:y.left+g.scrollLeft-b.x,y:y.top+g.scrollTop-b.y,width:y.width,height:y.height}}function g(e){var t=p(e),n=e.offsetWidth,r=e.offsetHeight;return Math.abs(t.width-n)<=1&&(n=t.width),Math.abs(t.height-r)<=1&&(r=t.height),{x:e.offsetLeft,y:e.offsetTop,width:n,height:r}}function b(e){return"html"===l(e)?e:e.assignedSlot||e.parentNode||(o(e)?e.host:null)||d(e)}function x(e){return["html","body","#document"].indexOf(l(e))>=0?e.ownerDocument.body:r(e)&&v(e)?e:x(b(e))}function w(e,n){var r;void 0===n&&(n=[]);var o=x(e),i=o===(null==(r=e.ownerDocument)?void 0:r.body),a=t(o),s=i?[a].concat(a.visualViewport||[],v(o)?o:[]):o,f=n.concat(s);return i?f:f.concat(w(b(s)))}function O(e){return["table","td","th"].indexOf(l(e))>=0}function j(e){return r(e)&&"fixed"!==m(e).position?e.offsetParent:null}function E(e){for(var n=t(e),i=j(e);i&&O(i)&&"static"===m(i).position;)i=j(i);return i&&("html"===l(i)||"body"===l(i)&&"static"===m(i).position)?n:i||function(e){var t=/firefox/i.test(f());if(/Trident/i.test(f())&&r(e)&&"fixed"===m(e).position)return null;var n=b(e);for(o(n)&&(n=n.host);r(n)&&["html","body"].indexOf(l(n))<0;){var i=m(n);if("none"!==i.transform||"none"!==i.perspective||"paint"===i.contain||-1!==["transform","perspective"].indexOf(i.willChange)||t&&"filter"===i.willChange||t&&i.filter&&"none"!==i.filter)return n;n=n.parentNode}return null}(e)||n}var D="top",A="bottom",L="right",P="left",M="auto",k=[D,A,L,P],W="start",B="end",H="viewport",T="popper",R=k.reduce((function(e,t){return e.concat([t+"-"+W,t+"-"+B])}),[]),S=[].concat(k,[M]).reduce((function(e,t){return e.concat([t,t+"-"+W,t+"-"+B])}),[]),V=["beforeRead","read","afterRead","beforeMain","main","afterMain","beforeWrite","write","afterWrite"];function q(e){var t=new Map,n=new Set,r=[];function o(e){n.add(e.name),[].concat(e.requires||[],e.requiresIfExists||[]).forEach((function(e){if(!n.has(e)){var r=t.get(e);r&&o(r)}})),r.push(e)}return e.forEach((function(e){t.set(e.name,e)})),e.forEach((function(e){n.has(e.name)||o(e)})),r}function C(e){return e.split("-")[0]}function N(e,t){var n=t.getRootNode&&t.getRootNode();if(e.contains(t))return!0;if(n&&o(n)){var r=t;do{if(r&&e.isSameNode(r))return!0;r=r.parentNode||r.host}while(r)}return!1}function I(e){return Object.assign({},e,{left:e.x,top:e.y,right:e.x+e.width,bottom:e.y+e.height})}function _(e,r,o){return r===H?I(function(e,n){var r=t(e),o=d(e),i=r.visualViewport,a=o.clientWidth,s=o.clientHeight,f=0,p=0;if(i){a=i.width,s=i.height;var u=c();(u||!u&&"fixed"===n)&&(f=i.offsetLeft,p=i.offsetTop)}return{width:a,height:s,x:f+h(e),y:p}}(e,o)):n(r)?function(e,t){var n=p(e,!1,"fixed"===t);return n.top=n.top+e.clientTop,n.left=n.left+e.clientLeft,n.bottom=n.top+e.clientHeight,n.right=n.left+e.clientWidth,n.width=e.clientWidth,n.height=e.clientHeight,n.x=n.left,n.y=n.top,n}(r,o):I(function(e){var t,n=d(e),r=u(e),o=null==(t=e.ownerDocument)?void 0:t.body,a=i(n.scrollWidth,n.clientWidth,o?o.scrollWidth:0,o?o.clientWidth:0),s=i(n.scrollHeight,n.clientHeight,o?o.scrollHeight:0,o?o.clientHeight:0),f=-r.scrollLeft+h(e),c=-r.scrollTop;return"rtl"===m(o||n).direction&&(f+=i(n.clientWidth,o?o.clientWidth:0)-a),{width:a,height:s,x:f,y:c}}(d(e)))}function F(e,t,o,s){var f="clippingParents"===t?function(e){var t=w(b(e)),o=["absolute","fixed"].indexOf(m(e).position)>=0&&r(e)?E(e):e;return n(o)?t.filter((function(e){return n(e)&&N(e,o)&&"body"!==l(e)})):[]}(e):[].concat(t),c=[].concat(f,[o]),p=c[0],u=c.reduce((function(t,n){var r=_(e,n,s);return t.top=i(r.top,t.top),t.right=a(r.right,t.right),t.bottom=a(r.bottom,t.bottom),t.left=i(r.left,t.left),t}),_(e,p,s));return u.width=u.right-u.left,u.height=u.bottom-u.top,u.x=u.left,u.y=u.top,u}function U(e){return e.split("-")[1]}function z(e){return["top","bottom"].indexOf(e)>=0?"x":"y"}function X(e){var t,n=e.reference,r=e.element,o=e.placement,i=o?C(o):null,a=o?U(o):null,s=n.x+n.width/2-r.width/2,f=n.y+n.height/2-r.height/2;switch(i){case D:t={x:s,y:n.y-r.height};break;case A:t={x:s,y:n.y+n.height};break;case L:t={x:n.x+n.width,y:f};break;case P:t={x:n.x-r.width,y:f};break;default:t={x:n.x,y:n.y}}var c=i?z(i):null;if(null!=c){var p="y"===c?"height":"width";switch(a){case W:t[c]=t[c]-(n[p]/2-r[p]/2);break;case B:t[c]=t[c]+(n[p]/2-r[p]/2)}}return t}function Y(e){return Object.assign({},{top:0,right:0,bottom:0,left:0},e)}function G(e,t){return t.reduce((function(t,n){return t[n]=e,t}),{})}function J(e,t){void 0===t&&(t={});var r=t,o=r.placement,i=void 0===o?e.placement:o,a=r.strategy,s=void 0===a?e.strategy:a,f=r.boundary,c=void 0===f?"clippingParents":f,u=r.rootBoundary,l=void 0===u?H:u,h=r.elementContext,m=void 0===h?T:h,v=r.altBoundary,y=void 0!==v&&v,g=r.padding,b=void 0===g?0:g,x=Y("number"!=typeof b?b:G(b,k)),w=m===T?"reference":T,O=e.rects.popper,j=e.elements[y?w:m],E=F(n(j)?j:j.contextElement||d(e.elements.popper),c,l,s),P=p(e.elements.reference),M=X({reference:P,element:O,strategy:"absolute",placement:i}),W=I(Object.assign({},O,M)),B=m===T?W:P,R={top:E.top-B.top+x.top,bottom:B.bottom-E.bottom+x.bottom,left:E.left-B.left+x.left,right:B.right-E.right+x.right},S=e.modifiersData.offset;if(m===T&&S){var V=S[i];Object.keys(R).forEach((function(e){var t=[L,A].indexOf(e)>=0?1:-1,n=[D,A].indexOf(e)>=0?"y":"x";R[e]+=V[n]*t}))}return R}var K={placement:"bottom",modifiers:[],strategy:"absolute"};function Q(){for(var e=arguments.length,t=new Array(e),n=0;n<e;n++)t[n]=arguments[n];return!t.some((function(e){return!(e&&"function"==typeof e.getBoundingClientRect)}))}function Z(e){void 0===e&&(e={});var t=e,r=t.defaultModifiers,o=void 0===r?[]:r,i=t.defaultOptions,a=void 0===i?K:i;return function(e,t,r){void 0===r&&(r=a);var i,s,f={placement:"bottom",orderedModifiers:[],options:Object.assign({},K,a),modifiersData:{},elements:{reference:e,popper:t},attributes:{},styles:{}},c=[],p=!1,u={state:f,setOptions:function(r){var i="function"==typeof r?r(f.options):r;l(),f.options=Object.assign({},a,f.options,i),f.scrollParents={reference:n(e)?w(e):e.contextElement?w(e.contextElement):[],popper:w(t)};var s,p,d=function(e){var t=q(e);return V.reduce((function(e,n){return e.concat(t.filter((function(e){return e.phase===n})))}),[])}((s=[].concat(o,f.options.modifiers),p=s.reduce((function(e,t){var n=e[t.name];return e[t.name]=n?Object.assign({},n,t,{options:Object.assign({},n.options,t.options),data:Object.assign({},n.data,t.data)}):t,e}),{}),Object.keys(p).map((function(e){return p[e]}))));return f.orderedModifiers=d.filter((function(e){return e.enabled})),f.orderedModifiers.forEach((function(e){var t=e.name,n=e.options,r=void 0===n?{}:n,o=e.effect;if("function"==typeof o){var i=o({state:f,name:t,instance:u,options:r}),a=function(){};c.push(i||a)}})),u.update()},forceUpdate:function(){if(!p){var e=f.elements,t=e.reference,n=e.popper;if(Q(t,n)){f.rects={reference:y(t,E(n),"fixed"===f.options.strategy),popper:g(n)},f.reset=!1,f.placement=f.options.placement,f.orderedModifiers.forEach((function(e){return f.modifiersData[e.name]=Object.assign({},e.data)}));for(var r=0;r<f.orderedModifiers.length;r++)if(!0!==f.reset){var o=f.orderedModifiers[r],i=o.fn,a=o.options,s=void 0===a?{}:a,c=o.name;"function"==typeof i&&(f=i({state:f,options:s,name:c,instance:u})||f)}else f.reset=!1,r=-1}}},update:(i=function(){return new Promise((function(e){u.forceUpdate(),e(f)}))},function(){return s||(s=new Promise((function(e){Promise.resolve().then((function(){s=void 0,e(i())}))}))),s}),destroy:function(){l(),p=!0}};if(!Q(e,t))return u;function l(){c.forEach((function(e){return e()})),c=[]}return u.setOptions(r).then((function(e){!p&&r.onFirstUpdate&&r.onFirstUpdate(e)})),u}}var $={passive:!0};var ee={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:function(e){var n=e.state,r=e.instance,o=e.options,i=o.scroll,a=void 0===i||i,s=o.resize,f=void 0===s||s,c=t(n.elements.popper),p=[].concat(n.scrollParents.reference,n.scrollParents.popper);return a&&p.forEach((function(e){e.addEventListener("scroll",r.update,$)})),f&&c.addEventListener("resize",r.update,$),function(){a&&p.forEach((function(e){e.removeEventListener("scroll",r.update,$)})),f&&c.removeEventListener("resize",r.update,$)}},data:{}};var te={name:"popperOffsets",enabled:!0,phase:"read",fn:function(e){var t=e.state,n=e.name;t.modifiersData[n]=X({reference:t.rects.reference,element:t.rects.popper,strategy:"absolute",placement:t.placement})},data:{}},ne={top:"auto",right:"auto",bottom:"auto",left:"auto"};function re(e){var n,r=e.popper,o=e.popperRect,i=e.placement,a=e.variation,f=e.offsets,c=e.position,p=e.gpuAcceleration,u=e.adaptive,l=e.roundOffsets,h=e.isFixed,v=f.x,y=void 0===v?0:v,g=f.y,b=void 0===g?0:g,x="function"==typeof l?l({x:y,y:b}):{x:y,y:b};y=x.x,b=x.y;var w=f.hasOwnProperty("x"),O=f.hasOwnProperty("y"),j=P,M=D,k=window;if(u){var W=E(r),H="clientHeight",T="clientWidth";if(W===t(r)&&"static"!==m(W=d(r)).position&&"absolute"===c&&(H="scrollHeight",T="scrollWidth"),W=W,i===D||(i===P||i===L)&&a===B)M=A,b-=(h&&W===k&&k.visualViewport?k.visualViewport.height:W[H])-o.height,b*=p?1:-1;if(i===P||(i===D||i===A)&&a===B)j=L,y-=(h&&W===k&&k.visualViewport?k.visualViewport.width:W[T])-o.width,y*=p?1:-1}var R,S=Object.assign({position:c},u&&ne),V=!0===l?function(e,t){var n=e.x,r=e.y,o=t.devicePixelRatio||1;return{x:s(n*o)/o||0,y:s(r*o)/o||0}}({x:y,y:b},t(r)):{x:y,y:b};return y=V.x,b=V.y,p?Object.assign({},S,((R={})[M]=O?"0":"",R[j]=w?"0":"",R.transform=(k.devicePixelRatio||1)<=1?"translate("+y+"px, "+b+"px)":"translate3d("+y+"px, "+b+"px, 0)",R)):Object.assign({},S,((n={})[M]=O?b+"px":"",n[j]=w?y+"px":"",n.transform="",n))}var oe={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:function(e){var t=e.state,n=e.options,r=n.gpuAcceleration,o=void 0===r||r,i=n.adaptive,a=void 0===i||i,s=n.roundOffsets,f=void 0===s||s,c={placement:C(t.placement),variation:U(t.placement),popper:t.elements.popper,popperRect:t.rects.popper,gpuAcceleration:o,isFixed:"fixed"===t.options.strategy};null!=t.modifiersData.popperOffsets&&(t.styles.popper=Object.assign({},t.styles.popper,re(Object.assign({},c,{offsets:t.modifiersData.popperOffsets,position:t.options.strategy,adaptive:a,roundOffsets:f})))),null!=t.modifiersData.arrow&&(t.styles.arrow=Object.assign({},t.styles.arrow,re(Object.assign({},c,{offsets:t.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:f})))),t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-placement":t.placement})},data:{}};var ie={name:"applyStyles",enabled:!0,phase:"write",fn:function(e){var t=e.state;Object.keys(t.elements).forEach((function(e){var n=t.styles[e]||{},o=t.attributes[e]||{},i=t.elements[e];r(i)&&l(i)&&(Object.assign(i.style,n),Object.keys(o).forEach((function(e){var t=o[e];!1===t?i.removeAttribute(e):i.setAttribute(e,!0===t?"":t)})))}))},effect:function(e){var t=e.state,n={popper:{position:t.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(t.elements.popper.style,n.popper),t.styles=n,t.elements.arrow&&Object.assign(t.elements.arrow.style,n.arrow),function(){Object.keys(t.elements).forEach((function(e){var o=t.elements[e],i=t.attributes[e]||{},a=Object.keys(t.styles.hasOwnProperty(e)?t.styles[e]:n[e]).reduce((function(e,t){return e[t]="",e}),{});r(o)&&l(o)&&(Object.assign(o.style,a),Object.keys(i).forEach((function(e){o.removeAttribute(e)})))}))}},requires:["computeStyles"]};var ae={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:function(e){var t=e.state,n=e.options,r=e.name,o=n.offset,i=void 0===o?[0,0]:o,a=S.reduce((function(e,n){return e[n]=function(e,t,n){var r=C(e),o=[P,D].indexOf(r)>=0?-1:1,i="function"==typeof n?n(Object.assign({},t,{placement:e})):n,a=i[0],s=i[1];return a=a||0,s=(s||0)*o,[P,L].indexOf(r)>=0?{x:s,y:a}:{x:a,y:s}}(n,t.rects,i),e}),{}),s=a[t.placement],f=s.x,c=s.y;null!=t.modifiersData.popperOffsets&&(t.modifiersData.popperOffsets.x+=f,t.modifiersData.popperOffsets.y+=c),t.modifiersData[r]=a}},se={left:"right",right:"left",bottom:"top",top:"bottom"};function fe(e){return e.replace(/left|right|bottom|top/g,(function(e){return se[e]}))}var ce={start:"end",end:"start"};function pe(e){return e.replace(/start|end/g,(function(e){return ce[e]}))}function ue(e,t){void 0===t&&(t={});var n=t,r=n.placement,o=n.boundary,i=n.rootBoundary,a=n.padding,s=n.flipVariations,f=n.allowedAutoPlacements,c=void 0===f?S:f,p=U(r),u=p?s?R:R.filter((function(e){return U(e)===p})):k,l=u.filter((function(e){return c.indexOf(e)>=0}));0===l.length&&(l=u);var d=l.reduce((function(t,n){return t[n]=J(e,{placement:n,boundary:o,rootBoundary:i,padding:a})[C(n)],t}),{});return Object.keys(d).sort((function(e,t){return d[e]-d[t]}))}var le={name:"flip",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name;if(!t.modifiersData[r]._skip){for(var o=n.mainAxis,i=void 0===o||o,a=n.altAxis,s=void 0===a||a,f=n.fallbackPlacements,c=n.padding,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.flipVariations,h=void 0===d||d,m=n.allowedAutoPlacements,v=t.options.placement,y=C(v),g=f||(y===v||!h?[fe(v)]:function(e){if(C(e)===M)return[];var t=fe(e);return[pe(e),t,pe(t)]}(v)),b=[v].concat(g).reduce((function(e,n){return e.concat(C(n)===M?ue(t,{placement:n,boundary:p,rootBoundary:u,padding:c,flipVariations:h,allowedAutoPlacements:m}):n)}),[]),x=t.rects.reference,w=t.rects.popper,O=new Map,j=!0,E=b[0],k=0;k<b.length;k++){var B=b[k],H=C(B),T=U(B)===W,R=[D,A].indexOf(H)>=0,S=R?"width":"height",V=J(t,{placement:B,boundary:p,rootBoundary:u,altBoundary:l,padding:c}),q=R?T?L:P:T?A:D;x[S]>w[S]&&(q=fe(q));var N=fe(q),I=[];if(i&&I.push(V[H]<=0),s&&I.push(V[q]<=0,V[N]<=0),I.every((function(e){return e}))){E=B,j=!1;break}O.set(B,I)}if(j)for(var _=function(e){var t=b.find((function(t){var n=O.get(t);if(n)return n.slice(0,e).every((function(e){return e}))}));if(t)return E=t,"break"},F=h?3:1;F>0;F--){if("break"===_(F))break}t.placement!==E&&(t.modifiersData[r]._skip=!0,t.placement=E,t.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function de(e,t,n){return i(e,a(t,n))}var he={name:"preventOverflow",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name,o=n.mainAxis,s=void 0===o||o,f=n.altAxis,c=void 0!==f&&f,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.padding,h=n.tether,m=void 0===h||h,v=n.tetherOffset,y=void 0===v?0:v,b=J(t,{boundary:p,rootBoundary:u,padding:d,altBoundary:l}),x=C(t.placement),w=U(t.placement),O=!w,j=z(x),M="x"===j?"y":"x",k=t.modifiersData.popperOffsets,B=t.rects.reference,H=t.rects.popper,T="function"==typeof y?y(Object.assign({},t.rects,{placement:t.placement})):y,R="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),S=t.modifiersData.offset?t.modifiersData.offset[t.placement]:null,V={x:0,y:0};if(k){if(s){var q,N="y"===j?D:P,I="y"===j?A:L,_="y"===j?"height":"width",F=k[j],X=F+b[N],Y=F-b[I],G=m?-H[_]/2:0,K=w===W?B[_]:H[_],Q=w===W?-H[_]:-B[_],Z=t.elements.arrow,$=m&&Z?g(Z):{width:0,height:0},ee=t.modifiersData["arrow#persistent"]?t.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},te=ee[N],ne=ee[I],re=de(0,B[_],$[_]),oe=O?B[_]/2-G-re-te-R.mainAxis:K-re-te-R.mainAxis,ie=O?-B[_]/2+G+re+ne+R.mainAxis:Q+re+ne+R.mainAxis,ae=t.elements.arrow&&E(t.elements.arrow),se=ae?"y"===j?ae.clientTop||0:ae.clientLeft||0:0,fe=null!=(q=null==S?void 0:S[j])?q:0,ce=F+ie-fe,pe=de(m?a(X,F+oe-fe-se):X,F,m?i(Y,ce):Y);k[j]=pe,V[j]=pe-F}if(c){var ue,le="x"===j?D:P,he="x"===j?A:L,me=k[M],ve="y"===M?"height":"width",ye=me+b[le],ge=me-b[he],be=-1!==[D,P].indexOf(x),xe=null!=(ue=null==S?void 0:S[M])?ue:0,we=be?ye:me-B[ve]-H[ve]-xe+R.altAxis,Oe=be?me+B[ve]+H[ve]-xe-R.altAxis:ge,je=m&&be?function(e,t,n){var r=de(e,t,n);return r>n?n:r}(we,me,Oe):de(m?we:ye,me,m?Oe:ge);k[M]=je,V[M]=je-me}t.modifiersData[r]=V}},requiresIfExists:["offset"]};var me={name:"arrow",enabled:!0,phase:"main",fn:function(e){var t,n=e.state,r=e.name,o=e.options,i=n.elements.arrow,a=n.modifiersData.popperOffsets,s=C(n.placement),f=z(s),c=[P,L].indexOf(s)>=0?"height":"width";if(i&&a){var p=function(e,t){return Y("number"!=typeof(e="function"==typeof e?e(Object.assign({},t.rects,{placement:t.placement})):e)?e:G(e,k))}(o.padding,n),u=g(i),l="y"===f?D:P,d="y"===f?A:L,h=n.rects.reference[c]+n.rects.reference[f]-a[f]-n.rects.popper[c],m=a[f]-n.rects.reference[f],v=E(i),y=v?"y"===f?v.clientHeight||0:v.clientWidth||0:0,b=h/2-m/2,x=p[l],w=y-u[c]-p[d],O=y/2-u[c]/2+b,j=de(x,O,w),M=f;n.modifiersData[r]=((t={})[M]=j,t.centerOffset=j-O,t)}},effect:function(e){var t=e.state,n=e.options.element,r=void 0===n?"[data-popper-arrow]":n;null!=r&&("string"!=typeof r||(r=t.elements.popper.querySelector(r)))&&N(t.elements.popper,r)&&(t.elements.arrow=r)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function ve(e,t,n){return void 0===n&&(n={x:0,y:0}),{top:e.top-t.height-n.y,right:e.right-t.width+n.x,bottom:e.bottom-t.height+n.y,left:e.left-t.width-n.x}}function ye(e){return[D,L,A,P].some((function(t){return e[t]>=0}))}var ge={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(e){var t=e.state,n=e.name,r=t.rects.reference,o=t.rects.popper,i=t.modifiersData.preventOverflow,a=J(t,{elementContext:"reference"}),s=J(t,{altBoundary:!0}),f=ve(a,r),c=ve(s,o,i),p=ye(f),u=ye(c);t.modifiersData[n]={referenceClippingOffsets:f,popperEscapeOffsets:c,isReferenceHidden:p,hasPopperEscaped:u},t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-reference-hidden":p,"data-popper-escaped":u})}},be=Z({defaultModifiers:[ee,te,oe,ie]}),xe=[ee,te,oe,ie,ae,le,he,me,ge],we=Z({defaultModifiers:xe});e.applyStyles=ie,e.arrow=me,e.computeStyles=oe,e.createPopper=we,e.createPopperLite=be,e.defaultModifiers=xe,e.detectOverflow=J,e.eventListeners=ee,e.flip=le,e.hide=ge,e.offset=ae,e.popperGenerator=Z,e.popperOffsets=te,e.preventOverflow=he,Object.defineProperty(e,"__esModule",{value:!0})}));
6 | 
7 | 


--------------------------------------------------------------------------------
/doc/divergences.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | # -----------------------------------------------------------------------------
  4 | # Utility functions (mimicking the Julia xlogx / xlogy / aloga / alogab etc.)
  5 | # -----------------------------------------------------------------------------
  6 | def xlogx(x):
  7 |     """
  8 |     Returns x * log(x) with the convention that 0*log(0)=0.
  9 |     Works for scalars or NumPy arrays.
 10 |     """
 11 |     x = np.asarray(x)
 12 |     return np.where(x == 0, 0.0, x * np.log(x))
 13 | 
 14 | def xlogy(x, y):
 15 |     """
 16 |     Returns x * log(y) with the convention that if x==0 then the result is 0.
 17 |     """
 18 |     x = np.asarray(x)
 19 |     return np.where(x == 0, 0.0, x * np.log(y))
 20 | 
 21 | def alogab(a, b):
 22 |     """
 23 |     a*log(a/b) - a + b.
 24 |     """
 25 |     a = np.asarray(a)
 26 |     b = np.asarray(b)
 27 |     return xlogy(a, a / b) - a + b
 28 | 
 29 | def blogab(a, b):
 30 |     """
 31 |     -b*log(a/b) + a - b.
 32 |     """
 33 |     a = np.asarray(a)
 34 |     b = np.asarray(b)
 35 |     return -xlogy(b, a / b) + a - b
 36 | 
 37 | def aloga(a):
 38 |     """
 39 |     a*log(a) - a + 1.
 40 |     """
 41 |     a = np.asarray(a)
 42 |     return xlogx(a) - a + 1.0
 43 | 
 44 | def loga(a):
 45 |     """
 46 |     -log(a) + a - 1.
 47 |     """
 48 |     a = np.asarray(a)
 49 |     # For a<=0, we return Inf.
 50 |     return np.where(a > 0, -np.log(a) + a - 1.0, np.inf)
 51 | 
 52 | # -----------------------------------------------------------------------------
 53 | # Base divergence class
 54 | # -----------------------------------------------------------------------------
 55 | class AbstractDivergence:
 56 |     def __call__(self, a, b=None):
 57 |         """
 58 |         Evaluate the divergence. If b is provided, the two‐argument version is used.
 59 |         Otherwise the one‐argument version is used.
 60 |         """
 61 |         if b is None:
 62 |             return self.eval(a)
 63 |         else:
 64 |             return self.eval(a, b)
 65 | 
 66 |     def eval_scalar(self, a, b=None):
 67 |         raise NotImplementedError("eval_scalar must be implemented in subclasses.")
 68 |     
 69 |     def eval(self, a, b=None):
 70 |         return np.sum(self.eval_scalar(a, b))
 71 | 
 72 |     def gradient(self, a, b=None):
 73 |         """
 74 |         Returns the gradient with respect to the second argument.
 75 |         For one-argument evaluation, returns the derivative with respect to a.
 76 |         """
 77 |         raise NotImplementedError("gradient must be implemented in subclasses.")
 78 | 
 79 |     def hessian(self, a, b=None):
 80 |         """
 81 |         Returns the (scalar or elementwise) Hessian.
 82 |         """
 83 |         raise NotImplementedError("hessian must be implemented in subclasses.")
 84 | 
 85 | # -----------------------------------------------------------------------------
 86 | # Kullback-Leibler Divergence
 87 | # -----------------------------------------------------------------------------
 88 | class KullbackLeibler(AbstractDivergence):
 89 |     def eval_scalar(self, a, b=None):
 90 |         a = np.asarray(a)
 91 |         if b is None:
 92 |             return aloga(a)
 93 |         else:
 94 |             return aloga(a/b)
 95 | 
 96 |     def gradient(self, a, b=None):
 97 |         a = np.asarray(a)
 98 |         if b is None:
 99 |             # derivative of aloga: log(a)
100 |             return np.where(a > 0, np.log(a), -np.inf)
101 |         else:
102 |             a = np.asarray(a)
103 |             b = np.asarray(b)
104 |             cond = (a > 0) & (b > 0)
105 |             return np.where(cond, np.log(a / b), -np.inf)
106 | 
107 |     def hessian(self, a, b=None):
108 |         a = np.asarray(a)
109 |         if b is None:
110 |             # Hessian: 1/a for a>0, else Inf.
111 |             return np.where(a > 0, 1.0 / a, np.inf)
112 |         else:
113 |             a = np.asarray(a)
114 |             b = np.asarray(b)
115 |             cond = (a > 0) & (b > 0)
116 |             return np.where(cond, 1.0 / a, np.inf)
117 | 
118 | # -----------------------------------------------------------------------------
119 | # Reverse Kullback-Leibler Divergence
120 | # -----------------------------------------------------------------------------
121 | class ReverseKullbackLeibler(AbstractDivergence):
122 |     def eval_scalar(self, a, b=None):
123 |         a = np.asarray(a)
124 |         if b is None:
125 |             return loga(a)
126 |         else:
127 |             return loga(a/b)*b
128 | 
129 |     def gradient(self, a, b=None):
130 |         a = np.asarray(a)
131 |         if b is None:
132 |             # derivative of loga: -1/a + 1
133 |             return np.where(a > 0, -1.0 / a + 1.0, -np.inf)
134 |         else:
135 |             a = np.asarray(a)
136 |             b = np.asarray(b)
137 |             cond = (a > 0) & (b > 0)
138 |             # gradient: -b/a + 1.
139 |             return np.where(cond, -b / a + 1.0, -np.inf)
140 | 
141 |     def hessian(self, a, b=None):
142 |         a = np.asarray(a)
143 |         if b is None:
144 |             # Hessian: 1/a^2 for a>0.
145 |             return np.where(a > 0, 1.0 / (a ** 2), np.inf)
146 |         else:
147 |             a = np.asarray(a)
148 |             b = np.asarray(b)
149 |             cond = (a > 0) & (b > 0)
150 |             return np.where(cond, b / (a ** 2), np.inf)
151 | 
152 | # -----------------------------------------------------------------------------
153 | # Hellinger Divergence
154 | # -----------------------------------------------------------------------------
155 | class Hellinger(AbstractDivergence):
156 |     def eval_scalar(self, a, b=None):
157 |         a = np.asarray(a)
158 |         if b is None:
159 |             # γ(a) = 2*a - 4*sqrt(a) + 2.
160 |             return 2 * a - 4 * np.sqrt(a) + 2
161 |         else:
162 |             a = np.asarray(a)
163 |             b = np.asarray(b)
164 |             # γ(a,b) = 2*a + (2 - 4*sqrt(a/b))*b.
165 |             return 2 * a + (2 - 4 * np.sqrt(a / b)) * b
166 | 
167 |     def gradient(self, a, b=None):
168 |         a = np.asarray(a)
169 |         if b is None:
170 |             # derivative: 2 - 2/sqrt(a)
171 |             return np.where(a > 0, 2 - 2 / np.sqrt(a), -np.inf)
172 |         else:
173 |             a = np.asarray(a)
174 |             b = np.asarray(b)
175 |             cond = (a > 0) & (b > 0)
176 |             # derivative: 2*(1 - 1/sqrt(a/b))
177 |             return np.where(cond, 2 * (1 - 1 / np.sqrt(a / b)), -np.inf)
178 | 
179 |     def hessian(self, a, b=None):
180 |         a = np.asarray(a)
181 |         if b is None:
182 |             # Hessian: 1/sqrt(a^3)
183 |             return np.where(a > 0, 1.0 / np.sqrt(a ** 3), np.inf)
184 |         else:
185 |             a = np.asarray(a)
186 |             b = np.asarray(b)
187 |             cond = (a > 0) & (b > 0)
188 |             return np.where(cond, np.sqrt(b) / np.sqrt(a ** 3), np.inf)
189 | 
190 | # -----------------------------------------------------------------------------
191 | # Chi-Squared Divergence
192 | # -----------------------------------------------------------------------------
193 | class ChiSquared(AbstractDivergence):
194 |     def eval_scalar(self, a, b=None):
195 |         a = np.asarray(a)
196 |         if b is None:
197 |             # γ(a) = 0.5*(a - 1)^2.
198 |             return 0.5 * (a - 1) ** 2
199 |         else:
200 |             a = np.asarray(a)
201 |             b = np.asarray(b)
202 |             # γ(a,b) = 0.5*(a - b)^2 / b.
203 |             return 0.5 * ((a - b) ** 2) / b
204 | 
205 |     def gradient(self, a, b=None):
206 |         a = np.asarray(a)
207 |         if b is None:
208 |             # derivative: a - 1.
209 |             return a - 1
210 |         else:
211 |             a = np.asarray(a)
212 |             b = np.asarray(b)
213 |             # derivative: a/b - 1.
214 |             return a / b - 1
215 | 
216 |     def hessian(self, a, b=None):
217 |         a = np.asarray(a)
218 |         if b is None:
219 |             return np.ones_like(a)
220 |         else:
221 |             b = np.asarray(b)
222 |             return np.where(b != 0, 1.0 / b, np.inf)
223 | 
224 | # -----------------------------------------------------------------------------
225 | # Cressie-Read Divergence (with parameter alpha)
226 | # -----------------------------------------------------------------------------
227 | class CressieRead(AbstractDivergence):
228 |     def __init__(self, alpha):
229 |         self.alpha = alpha
230 | 
231 |     def eval_scalar(self, a, b=None):
232 |         a = np.asarray(a)
233 |         α = self.alpha
234 |         if b is None:
235 |             # For one argument: if a>=0 then
236 |             # (a^(1+α) + α - a*(1+α))/(α*(1+α))   else (if α>0 then 0 else NaN)
237 |             cond = (a >= 0)
238 |             val = (a ** (1 + α) + α - a * (1 + α)) / (α * (1 + α))
239 |             # For negative a, return 0 if α>0 else NaN.
240 |             return np.where(cond, val, 0.0 if α > 0 else np.nan)
241 |         else:
242 |             b = np.asarray(b)
243 |             cond = (a > 0) & (b > 0)
244 |             val = ((a / b) ** (1 + α) + α - (a / b) * (1 + α)) * b / (α * (1 + α))
245 |             return np.where(cond, val, 0.0 if α > 0 else np.nan)
246 | 
247 |     def gradient(self, a, b=None):
248 |         a = np.asarray(a)
249 |         α = self.alpha
250 |         if b is None:
251 |             cond = (a >= 0)
252 |             val = (a ** α - 1) / α
253 |             return np.where(cond, val, 0.0 if α > 0 else np.nan)
254 |         else:
255 |             b = np.asarray(b)
256 |             cond = (a >= 0) & (b > 0)
257 |             val = ((a / b) ** α - 1) / α
258 |             return np.where(cond, val, 0.0 if α > 0 else np.nan)
259 | 
260 |     def hessian(self, a, b=None):
261 |         a = np.asarray(a)
262 |         α = self.alpha
263 |         if b is None:
264 |             cond = (a > 0)
265 |             val = a ** (α - 1)
266 |             return np.where(cond, val, np.inf)
267 |         else:
268 |             b = np.asarray(b)
269 |             cond = (a > 0) & (b > 0)
270 |             val = a ** (α - 1) * b ** (-α)
271 |             return np.where(cond, val, np.inf)
272 | 
273 | # -----------------------------------------------------------------------------
274 | # Modified Divergences
275 | # -----------------------------------------------------------------------------
276 | class ModifiedDivergence(AbstractDivergence):
277 |     """
278 |     A modified divergence which uses an underlying divergence (self.base)
279 |     and applies an upper modification when a > ρ * b.
280 |     
281 |     The parameters are passed as a dictionary with keys:
282 |       - 'rho': the threshold parameter,
283 |       - 'gamma0', 'gamma1', 'gamma2': parameters for the upper modification.
284 |     """
285 |     def __init__(self, base_divergence, params):
286 |         self.base = base_divergence
287 |         self.params = params
288 | 
289 |     def eval_scalar(self, a, b=None):
290 |         if b is None:
291 |             a = np.asarray(a)
292 |             rho = self.params.get('rho', 1)
293 |             cond = a > rho
294 |             # Upper modification for one argument:
295 |             gamma0 = self.params.get('gamma0', 0)
296 |             gamma1 = self.params.get('gamma1', 0)
297 |             gamma2 = self.params.get('gamma2', 0)
298 |             val_upper = gamma0 + gamma1 * (a - rho) + 0.5 * gamma2 * (a - rho) ** 2
299 |             val_base = self.base.eval_scalar(a)
300 |             return np.where(cond, val_upper, val_base)
301 |         else:
302 |             a = np.asarray(a)
303 |             b = np.asarray(b)
304 |             rho = self.params.get('rho', 1)
305 |             cond = a > rho * b
306 |             gamma0 = self.params.get('gamma0', 0)
307 |             gamma1 = self.params.get('gamma1', 0)
308 |             gamma2 = self.params.get('gamma2', 0)
309 |             # Upper modification for two arguments:
310 |             val_upper = (gamma0 + gamma1 * ((a / b) - rho) + 0.5 * gamma2 * ((a / b) - rho) ** 2) * b
311 |             val_base = self.base.eval_scalar(a, b)
312 |             return np.where(cond, val_upper, val_base)
313 | 
314 |     def gradient(self, a, b=None):
315 |         rho = self.params.get('rho', 1)
316 |         if b is None:
317 |             a = np.asarray(a)
318 |             cond = a > rho
319 |             gamma1 = self.params.get('gamma1', 0)
320 |             gamma2 = self.params.get('gamma2', 0)
321 |             grad_upper = gamma1 + gamma2 * (a - rho)
322 |             grad_base = self.base.gradient(a)
323 |             return np.where(cond, grad_upper, grad_base)
324 |         else:
325 |             a = np.asarray(a)
326 |             b = np.asarray(b)
327 |             cond = a > rho * b
328 |             gamma1 = self.params.get('gamma1', 0)
329 |             gamma2 = self.params.get('gamma2', 0)
330 |             grad_upper = gamma1 + (a / b) * gamma2 - gamma2 * rho
331 |             grad_base = self.base.gradient(a, b)
332 |             return np.where(cond, grad_upper, grad_base)
333 | 
334 |     def hessian(self, a, b=None):
335 |         rho = self.params.get('rho', 1)
336 |         if b is None:
337 |             a = np.asarray(a)
338 |             cond = a > rho
339 |             gamma2 = self.params.get('gamma2', 0)
340 |             hess_upper = gamma2
341 |             hess_base = self.base.hessian(a)
342 |             return np.where(cond, hess_upper, hess_base)
343 |         else:
344 |             a = np.asarray(a)
345 |             b = np.asarray(b)
346 |             cond = a > rho * b
347 |             gamma2 = self.params.get('gamma2', 0)
348 |             hess_upper = gamma2 / b
349 |             hess_base = self.base.hessian(a, b)
350 |             return np.where(cond, hess_upper, hess_base)
351 | 
352 | # -----------------------------------------------------------------------------
353 | # Fully Modified Divergence
354 | # -----------------------------------------------------------------------------
355 | class FullyModifiedDivergence(AbstractDivergence):
356 |     """
357 |     A fully modified divergence that uses both an upper and lower modification.
358 |     
359 |     Parameters are passed as a dictionary with keys:
360 |       - 'rho' and 'phi': thresholds,
361 |       - For the upper part: 'gamma0', 'gamma1', 'gamma2',
362 |       - For the lower part: 'g0', 'g1', 'g2'.
363 |     """
364 |     def __init__(self, base_divergence, params):
365 |         self.base = base_divergence
366 |         self.params = params
367 | 
368 |     def eval_scalar(self, a, b=None):
369 |         rho = self.params.get('rho', 1)
370 |         phi = self.params.get('phi', 1)
371 |         if b is None:
372 |             a = np.asarray(a)
373 |             cond_upper = a > rho
374 |             cond_lower = a < phi
375 |             gamma0 = self.params.get('gamma0', 0)
376 |             gamma1 = self.params.get('gamma1', 0)
377 |             gamma2 = self.params.get('gamma2', 0)
378 |             val_upper = gamma0 + gamma1 * (a - rho) + 0.5 * gamma2 * (a - rho) ** 2
379 |             g0 = self.params.get('g0', 0)
380 |             g1 = self.params.get('g1', 0)
381 |             g2 = self.params.get('g2', 0)
382 |             val_lower = g0 + g1 * (a - phi) + 0.5 * g2 * (a - phi) ** 2
383 |             val_base = self.base.eval_scalar(a)
384 |             return np.where(cond_upper, val_upper, np.where(cond_lower, val_lower, val_base))
385 |         else:
386 |             a = np.asarray(a)
387 |             b = np.asarray(b)
388 |             cond_upper = a > rho * b
389 |             cond_lower = a < phi * b
390 |             gamma0 = self.params.get('gamma0', 0)
391 |             gamma1 = self.params.get('gamma1', 0)
392 |             gamma2 = self.params.get('gamma2', 0)
393 |             val_upper = (gamma0 + gamma1 * ((a / b) - rho) + 0.5 * gamma2 * ((a / b) - rho) ** 2) * b
394 |             g0 = self.params.get('g0', 0)
395 |             g1 = self.params.get('g1', 0)
396 |             g2 = self.params.get('g2', 0)
397 |             val_lower = (g0 + g1 * ((a / b) - phi) + 0.5 * g2 * ((a / b) - phi) ** 2) * b
398 |             val_base = self.base.eval_scalar(a, b)
399 |             return np.where(cond_upper, val_upper, np.where(cond_lower, val_lower, val_base))
400 | 
401 |     def gradient(self, a, b=None):
402 |         rho = self.params.get('rho', 1)
403 |         phi = self.params.get('phi', 1)
404 |         if b is None:
405 |             a = np.asarray(a)
406 |             cond_upper = a > rho
407 |             cond_lower = a < phi
408 |             gamma1 = self.params.get('gamma1', 0)
409 |             gamma2 = self.params.get('gamma2', 0)
410 |             grad_upper = gamma1 + gamma2 * (a - rho)
411 |             g1 = self.params.get('g1', 0)
412 |             g2 = self.params.get('g2', 0)
413 |             grad_lower = g1 + g2 * (a - phi)
414 |             grad_base = self.base.gradient(a)
415 |             return np.where(cond_upper, grad_upper, np.where(cond_lower, grad_lower, grad_base))
416 |         else:
417 |             a = np.asarray(a)
418 |             b = np.asarray(b)
419 |             cond_upper = a > rho * b
420 |             cond_lower = a < phi * b
421 |             gamma1 = self.params.get('gamma1', 0)
422 |             gamma2 = self.params.get('gamma2', 0)
423 |             grad_upper = gamma1 + (a / b) * gamma2 - gamma2 * rho
424 |             g1 = self.params.get('g1', 0)
425 |             g2 = self.params.get('g2', 0)
426 |             grad_lower = g1 + (a / b) * g2 - g2 * phi
427 |             grad_base = self.base.gradient(a, b)
428 |             return np.where(cond_upper, grad_upper, np.where(cond_lower, grad_lower, grad_base))
429 | 
430 |     def hessian(self, a, b=None):
431 |         rho = self.params.get('rho', 1)
432 |         phi = self.params.get('phi', 1)
433 |         if b is None:
434 |             a = np.asarray(a)
435 |             cond_upper = a > rho
436 |             cond_lower = a < phi
437 |             gamma2 = self.params.get('gamma2', 0)
438 |             hess_upper = gamma2
439 |             g2 = self.params.get('g2', 0)
440 |             hess_lower = g2
441 |             hess_base = self.base.hessian(a)
442 |             return np.where(cond_upper, hess_upper, np.where(cond_lower, hess_lower, hess_base))
443 |         else:
444 |             a = np.asarray(a)
445 |             b = np.asarray(b)
446 |             cond_upper = a > rho * b
447 |             cond_lower = a < phi * b
448 |             gamma2 = self.params.get('gamma2', 0)
449 |             hess_upper = gamma2 / b
450 |             g2 = self.params.get('g2', 0)
451 |             hess_lower = g2 / b
452 |             hess_base = self.base.hessian(a, b)
453 |             return np.where(cond_upper, hess_upper, np.where(cond_lower, hess_lower, hess_base))
454 | 
455 | # -----------------------------------------------------------------------------
456 | # Vectorized versions for arrays (optional)
457 | # -----------------------------------------------------------------------------
458 | def eval_divergence(d, a, b=None):
459 |     """
460 |     Evaluates the divergence d on each element of a (and b if provided)
461 |     and returns the sum.
462 |     """
463 |     if b is None:
464 |         a = np.asarray(a)
465 |         return np.sum([d.eval_scalar(val) for val in np.nditer(a)])
466 |     else:
467 |         a = np.asarray(a)
468 |         b = np.asarray(b)
469 |         return np.sum([d.eval_scalar(a_val, b_val)
470 |                        for a_val, b_val in zip(np.nditer(a), np.nditer(b))])
471 | 
472 | def gradient_divergence(d, a, b=None):
473 |     """
474 |     Returns an array with the elementwise gradient.
475 |     """
476 |     if b is None:
477 |         a = np.asarray(a)
478 |         return np.array([d.gradient(val) for val in np.nditer(a)])
479 |     else:
480 |         a = np.asarray(a)
481 |         b = np.asarray(b)
482 |         return np.array([d.gradient(a_val, b_val)
483 |                          for a_val, b_val in zip(np.nditer(a), np.nditer(b))])
484 | 
485 | def hessian_divergence(d, a, b=None):
486 |     """
487 |     Returns an array with the elementwise Hessian.
488 |     """
489 |     if b is None:
490 |         a = np.asarray(a)
491 |         return np.array([d.hessian(val) for val in np.nditer(a)])
492 |     else:
493 |         a = np.asarray(a)
494 |         b = np.asarray(b)
495 |         return np.array([d.hessian(a_val, b_val)
496 |                          for a_val, b_val in zip(np.nditer(a), np.nditer(b))])
497 | 


--------------------------------------------------------------------------------
/doc/example_iv_effcient_analytical.jl:
--------------------------------------------------------------------------------
  1 | using MathOptInterface, Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt
  2 | using ForwardDiff, DifferentiationInterface
  3 | using Divergences
  4 | using Statistics, LinearAlgebra
  5 | using Infiltrator
  6 | using StableRNGs
  7 | 
  8 | ## -----
  9 | ## High-performant
 10 | ## -----
 11 | function randiv(; n = 100, m = 5, k = 1, θ = 0.0, ρ = 0.9, CP = 20)
 12 |     ## Simulate
 13 |     ## y = xθ + w'γ + u
 14 |     ## x = zτ + w'ξ + η
 15 |     ## where z ∼ N(0, Iₘ), w ∼ N(0, Iₖ) 
 16 |     ## (η ∼ N(0, I), u ∼ N(0, I)
 17 |     τ = fill(sqrt(CP / (m * n)), m)
 18 |     z = randn(n, m)  ## Instruments
 19 |     w = randn(n, k-1)  ## Exogenous
 20 |     η = randn(n, 1)
 21 |     u = ρ * η + √(1 - ρ^2) * randn(n, 1)
 22 |     x = z * τ + η
 23 |     y = x * θ + u
 24 |     return y, [x w], [z w]
 25 | end
 26 | 
 27 | y, x, z = randiv(; k = 1, CP = 5)
 28 | 
 29 | p = (y = y,
 30 |     x = x,
 31 |     z = z,
 32 |     Y = similar(y),
 33 |     X = similar(x),
 34 |     Z = similar(z),
 35 |     ∂G = Matrix{Float64}(undef, size(z, 2), size(x, 2)));
 36 | 
 37 | function g(θ, p)
 38 |     ## Gₙ(θ)
 39 |     (y, x, z, Y, X, Z, ∂G) = p.data
 40 |     mul!(Y, x, θ)
 41 |     broadcast!(-, Y, y, Y)
 42 |     broadcast!(*, Z, z, Y)
 43 |     return Z
 44 | end
 45 | 
 46 | function Dg(θ, π, p)
 47 |     (y, x, z, Y, X, Z, ∂G) = p.data
 48 |     broadcast!(*, X, π, x)
 49 |     mul!(∂G, z', -X)
 50 |     return ∂G
 51 | end
 52 | 
 53 | function Dgλ(θ, λ, p)
 54 |     (y, x, z, Y, X, Z, ∂G) = p.data
 55 |     mul!(Y, z, λ)
 56 |     broadcast!(*, X, -Y, x)
 57 |     broadcast!(/, X, X, n)
 58 |     return X
 59 | end
 60 | 
 61 | function Dgλ(θ, λ, π, p)
 62 |     ## Hπθ
 63 |     (y, x, z, Y, X, Z, ∂G) = p.data
 64 |     ∂gλ = Dgλ(θ, λ, p)
 65 |     broadcast!(*, ∂gλ, ∂gλ, π)
 66 |     return ∂gλ
 67 | end
 68 | 
 69 | function Dgλ!(J, θ, λ, p)
 70 |     ∂gλ = Dgλ(θ, λ, p)
 71 |     return copy!(J, vec(∂gλ))
 72 | end
 73 | 
 74 | function Dgλ!(J, θ, λ, π, p)
 75 |     Dgλ(θ, λ, π, p)
 76 |     return copy!(J, p.∇)
 77 | end
 78 | 
 79 | function Hgλ(θ, λ, π, p)
 80 |     n, k, m = size(p)
 81 |     return zeros(k, k)
 82 | end
 83 | 
 84 | ## --------------------------------------------------------------------- ##
 85 | ## Optimization Problem
 86 | ## --------------------------------------------------------------------- ##
 87 | 
 88 | const MOI = MathOptInterface
 89 | 
 90 | struct MDProblem <: MOI.AbstractNLPEvaluator
 91 |     div::Divergences.AbstractDivergence
 92 |     data::Any
 93 |     backend::Any
 94 | end
 95 | 
 96 | Base.size(md::MDProblem) = (size(p.x)..., size(p.z, 2))
 97 | divergence(md::MDProblem) = md.div
 98 | 
 99 | function MOI.initialize(md::MDProblem, rf::Vector{Symbol})
100 |     for feat in rf
101 |         if !(feat in [:Grad, :Jac, :Hess])
102 |             error("Unsupported feature $feat")
103 |         end
104 |     end
105 | end
106 | 
107 | MOI.features_available(md::MDProblem) = [:Grad, :Jac, :Hess]
108 | 
109 | ## --------------------------------------------------------------------- ##
110 | ## Objective function
111 | ## --------------------------------------------------------------------- ##
112 | function MOI.eval_objective(md::MDProblem, u::Vector{Float64})
113 |     ## Objective function
114 |     ## ∑ᵢ γ(πᵢ)
115 |     n, k, m = size(md)
116 |     return divergence(md)(view(u, 1:n))
117 | end
118 | 
119 | function MOI.eval_objective_gradient(md::MDProblem, res, u)
120 |     ## Gradient of the objective function
121 |     ## ∇π ∑ᵢ γ'(πᵢ)
122 |     n, k, m = size(md)
123 |     T = eltype(res)
124 |     Divergences.gradient!(view(res, 1:n), divergence(md), view(u, 1:n))
125 |     return fill!(view(res, (n + 1):(n + k)), zero(T))
126 | end
127 | 
128 | ## --------------------------------------------------------------------- ##
129 | ## Constraints
130 | ## --------------------------------------------------------------------- ##
131 | function MOI.eval_constraint(md::MDProblem, res, u)
132 |     ## Constraints
133 |     ## ∑ᵢ πᵢ g(θᵢ) = 0
134 |     ## ∑ᵢ πᵢ = n
135 |     n, k, m = size(md)
136 |     θ = view(u, (n + 1):(n + k))
137 |     π = view(u, 1:n)
138 |     G = g(θ, md)
139 |     return constraint!(res, π, G)
140 | end
141 | 
142 | function constraint!(μ::AbstractVector{T}, w::AbstractVector, x::AbstractMatrix) where {T}
143 |     fill!(μ, zero(T))
144 |     @inbounds for j in axes(x, 2)
145 |         for i in axes(x, 1)
146 |             μ[j] += w[i]*x[i, j]
147 |         end
148 |     end
149 |     μ[end] = sum(w)
150 |     return μ
151 | end
152 | 
153 | ## --------------------------------------------------------------------- ##
154 | ## Constraints Jacobian
155 | ## --------------------------------------------------------------------- ##
156 | function MOI.jacobian_structure(md::MDProblem)
157 |     n, k, m = size(md)
158 |     return rowcol_of_dense(n+k, m+1)
159 | end
160 | 
161 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u)
162 |     n, k, m = size(md)
163 |     θ = view(u, (n + 1):(n + k))
164 |     π = view(u, 1:n)
165 |     G = g(θ, md)
166 |     #@. G = G/n
167 |     ∂g = Dg(θ, π, md)
168 |     @. ∂g = ∂g
169 |     return assign_constraint_jacobian!(J, G, ∂g)
170 | end
171 | 
172 | """
173 |     assign_constraint_jacobian!(J, g, ∇g)
174 | 
175 | Assigns the elements of the block matrix `X = [[G'; ones(1, n)]; [∇g ; zeros(m, k)]]`.
176 | 
177 | # Arguments
178 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`.
179 | - `g::AbstractMatrix{T}`: An `n × m` matrix.
180 | - `∇g::AbstractMatrix{T}`: An `m × k` matrix.
181 | ```
182 | """
183 | function assign_constraint_jacobian!(J, gg, Dg)
184 |     n, m = size(gg)
185 |     k = size(Dg, 2)
186 |     for j in 1:n
187 |         # Elements from gg'
188 |         for i in 1:m
189 |             J[(j - 1) * (m + 1) + i] = gg[j, i]
190 |         end
191 |         # Element from ones row
192 |         J[j * (m + 1)] = 1.0
193 |     end
194 |     # Next k columns (from Dg and zeros row)
195 |     offset = n*(m+1)
196 |     for j in 1:k
197 |         # Elements from Dg
198 |         for i in 1:m
199 |             J[offset + (j - 1) * (m + 1) + i] = Dg[i, j]
200 |         end
201 |         # Element from 0
202 |         J[offset + j * (m + 1)] = 0.0
203 |     end
204 |     return J
205 | end
206 | 
207 | ## --------------------------------------------------------------------- ##
208 | ## Hessian of the Lagrangian of L(π, θ, λ) = D(π, p) + λ'g(θ)
209 | ## --------------------------------------------------------------------- ##
210 | function MOI.hessian_lagrangian_structure(md::MDProblem)
211 |     n, k, m = size(md)
212 |     hele = Int(n + n*k + k*(k+1)÷2)
213 |     rows = Array{Int64}(undef, hele)
214 |     cols = Array{Int64}(undef, hele)
215 |     ## Diagonal Elements
216 |     for j in 1:n
217 |         rows[j] = j
218 |         cols[j] = j
219 |     end
220 |     idx = n+1
221 |     ## Off-diagonal elements
222 |     for j in 1:k
223 |         for s in 1:n
224 |             rows[idx] = n + j
225 |             cols[idx] = s
226 |             idx += 1
227 |         end
228 |     end
229 |     ## For linear problem this is not needed
230 |     for j in 1:k
231 |         for s in 1:j
232 |             rows[idx] = n + j
233 |             cols[idx] = n + s
234 |             idx += 1
235 |         end
236 |     end
237 |     return [(r, c) for (r, c) in zip(rows, cols)]
238 | end
239 | 
240 | function MOI.eval_hessian_lagrangian(md::MDProblem, hess, u, σ, λ)
241 |     n, k, m = size(md)
242 |     π = view(u, 1:n)
243 |     θ = view(u, (n + 1):(n + k))
244 |     if σ==0
245 |         @inbounds for j in 1:n
246 |             hess[j] = 0.0
247 |         end
248 |     else
249 |         hv = view(hess, 1:n)
250 |         Divergences.hessian!(hv, divergence(md), π)
251 |         hv .= hv .* σ
252 |     end
253 |     λv = view(λ, 1:m)
254 |     Dgλ!(view(hess, (n + 1):(n + n * k)), θ, λv, md)
255 |     ## For linear problem this is not needed
256 |     return copy_lower_triangular!(
257 |         view(hess,
258 |             (n + n * k + 1):(n + n * k + (k * (k + 1) ÷ 2))),
259 |         Hgλ(θ, λ, π, md))
260 | end
261 | 
262 | ## --------------------------------------------------------------------- ##
263 | ## Problem with fixed theta
264 | ## --------------------------------------------------------------------- ##
265 | 
266 | struct SMDProblem <: MOI.AbstractNLPEvaluator
267 |     div::Divergences.AbstractDivergence
268 |     G::Matrix{Float64}
269 |     data::Any
270 |     backend::Any
271 | end
272 | 
273 | divergence(md::SMDProblem) = md.div
274 | momfun(md::SMDProblem) = md.G
275 | 
276 | function MOI.initialize(md::SMDProblem, rf::Vector{Symbol})
277 |     for feat in rf
278 |         if !(feat in [:Grad, :Jac, :Hess])
279 |             error("Unsupported feature $feat")
280 |         end
281 |     end
282 | end
283 | 
284 | MOI.features_available(md::SMDProblem) = [:Grad, :Jac, :Hess]
285 | 
286 | function MOI.eval_objective(md::SMDProblem, u::Vector{Float64})
287 |     return divergence(md)(u)
288 | end
289 | 
290 | function MOI.eval_objective_gradient(md::SMDProblem, res, u)
291 |     n, k, m = size(md)
292 |     T = eltype(res)
293 |     return Divergences.gradient!(res, divergence(md), u)
294 | end
295 | 
296 | ## --------------------------------------------------------------------- ##
297 | ## Constraints
298 | ## --------------------------------------------------------------------- ##
299 | function MOI.eval_constraint(md::SMDProblem, res, u)
300 |     π = u
301 |     G = md.G
302 |     return constraint!(res, π, G)
303 | end
304 | 
305 | function constraint!(μ::AbstractVector{T}, w::AbstractVector, x::AbstractMatrix) where {T}
306 |     fill!(μ, zero(T))
307 |     @inbounds for j in axes(x, 2)
308 |         for i in axes(x, 1)
309 |             μ[j] += w[i]*x[i, j]
310 |         end
311 |     end
312 |     μ[end] = sum(w)
313 |     return μ
314 | end
315 | 
316 | ## --------------------------------------------------------------------- ##
317 | ## Constraints Jacobian
318 | ## --------------------------------------------------------------------- ##
319 | function MOI.jacobian_structure(md::SMDProblem)
320 |     n, k, m = size(md)
321 |     return rowcol_of_dense(n, m+1)
322 | end
323 | 
324 | function MOI.eval_constraint_jacobian(md::MDProblem, J, u)
325 |     π = u
326 |     G = md.G
327 |     #@. G = G/n
328 |     return assign_constraint_jacobian!(J, G)
329 | end
330 | 
331 | """
332 |     assign_constraint_jacobian!(J, g)
333 | 
334 | Assigns the elements of the block matrix `X = G'`.
335 | 
336 | # Arguments
337 | - `J::Vector{Float64}`: A preallocated array of size `m * n + m * k`, where `m`, `n`, and `k` are the dimensions of `g` and `∇g`.
338 | - `g::AbstractMatrix{T}`: An `n × m` matrix.
339 | ```
340 | """
341 | function assign_constraint_jacobian!(J, gg)
342 |     n, m = size(gg)
343 |     k = size(Dg, 2)
344 |     for j in 1:n
345 |         # Elements from gg'
346 |         for i in 1:m
347 |             J[(j - 1) * (m + 1) + i] = gg[j, i]
348 |         end
349 |         # Element from ones row
350 |         J[j * (m + 1)] = 1.0
351 |     end
352 |     return J
353 | end
354 | 
355 | ## --------------------------------------------------------------------- ##
356 | ## Hessian of the Lagrangian of L(π, θ, λ) = D(π, p) + λ'g(θ)
357 | ## --------------------------------------------------------------------- ##
358 | function MOI.hessian_lagrangian_structure(md::SMDProblem)
359 |     rows = Array{Int64}(undef, n)
360 |     cols = Array{Int64}(undef, n)
361 |     ## Diagonal Elements
362 |     for j in 1:n
363 |         rows[j] = j
364 |         cols[j] = j
365 |     end
366 |     return [(r, c) for (r, c) in zip(rows, cols)]
367 | end
368 | 
369 | function MOI.eval_hessian_lagrangian(md::SMDProblem, hess, u, σ, λ)
370 |     π = u
371 |     if σ==0
372 |         @inbounds for j in 1:n
373 |             hess[j] = 0.0
374 |         end
375 |     else
376 |         hv = view(hess, 1:n)
377 |         Divergences.hessian!(hv, divergence(md), π)
378 |         hv .= hv .* σ
379 |     end
380 | end
381 | 
382 | ## --------------------------------------------------------------------- ##
383 | ## Problem
384 | ## --------------------------------------------------------------------- ##
385 | 
386 | ℳ𝒟 = FullyModifiedDivergence(ReverseKullbackLeibler(), 0.1, 1.2)
387 | mdprob = MDProblem(ℳ𝒟, p, nothing)
388 | 
389 | model = Ipopt.Optimizer()
390 | π = MOI.add_variables(model, n)
391 | MOI.add_constraint.(model, π, MOI.GreaterThan(0.0))
392 | θ = MOI.add_variables(model, k)
393 | MOI.add_constraint.(model, θ, MOI.GreaterThan(-10.0))
394 | MOI.add_constraint.(model, θ, MOI.LessThan(+10.0))
395 | for i in 1:n
396 |     MOI.set(model, MOI.VariablePrimalStart(), π[i], 1.0)
397 | end
398 | for i in 1:k
399 |     MOI.set(model, MOI.VariablePrimalStart(), θ[i], 0.0)
400 | end
401 | lb = [zeros(m); n]
402 | ub = [zeros(m); n]
403 | MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
404 | 
405 | model_el = deepcopy(model)
406 | model_md = deepcopy(model)
407 | 
408 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true)
409 | MOI.set(model_md, MOI.NLPBlock(), block_data)
410 | for i in 1:k
411 |     MOI.set(model_md, MOI.VariablePrimalStart(), θ[i], -0.01)
412 | end
413 | 
414 | mdprob = MDProblem(ReverseKullbackLeibler(), p, nothing)
415 | block_data = MOI.NLPBlockData(MOI.NLPBoundsPair.(lb, ub), mdprob, true)
416 | MOI.set(model_el, MOI.NLPBlock(), block_data)
417 | 
418 | model.options["derivative_test"] = "none"
419 | model.options["derivative_test_print_all"] = "no"
420 | 
421 | model.options["print_level"] = 4
422 | 
423 | MOI.optimize!(model)
424 | MOI.get(model, MOI.TerminationStatus())
425 | MOI.get(model, MOI.DualStatus())
426 | MOI.get(model, MOI.PrimalStatus())
427 | 
428 | MOI.get(model, MOI.SolveTimeSec())
429 | MOI.get(model, MOI.BarrierIterations())
430 | 
431 | xstar = MOI.get(model, MOI.VariablePrimal(), θ)
432 | 
433 | function lagrangian(md::MDProblem, u, σ, λ)
434 |     n, k, m = size(md)
435 |     π = u[1:n]
436 |     θ = u[(n + 1):(n + k)]
437 |     return σ .* divergence(md)(π) + mean(π .* g(θ, md)*λ)
438 | end
439 | 
440 | using Statistics
441 | p = [0.45793379249066035,
442 |     4.999416892014921,
443 |     9.182989399836064,
444 |     3.6958463315972025,
445 |     6.220383439227501,
446 |     0.019436036309187443,
447 |     2.063484686999562,
448 |     10.894774879314305,
449 |     8.25546846552471,
450 |     4.029010019680072,
451 |     -2.975818044182361,
452 |     1.4669020891138018]
453 | 
454 | lagrangian(mdprob, p, 1.0, [1.5, 0.0])
455 | 
456 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 1.5, [1.5, 0.0]), p);
457 | H = zeros(34)
458 | MOI.eval_hessian_lagrangian(mdprob, H, p, 1.5, [1.5, 0.0])
459 | 
460 | H0 = ForwardDiff.hessian(x -> lagrangian(mdprob, x, 0.0, [1.5, 0]), p);
461 | MOI.eval_hessian_lagrangian(mdprob, H, p, 0.0, [1.5, 0])
462 | 
463 | ## --------------------------------------------------------------------- ##
464 | ## Simple MC
465 | ## --------------------------------------------------------------------- ##
466 | 
467 | β_el = Matrix{Float64}(undef, 1000, 3)
468 | f_el = zeros(1000)
469 | β_md = Matrix{Float64}(undef, 1000, 3)
470 | f_md = zeros(1000)
471 | for j in 1:1000
472 |     y, x, z = randiv(; k = 1, CP = 5)
473 |     p.y .= y
474 |     p.x .= x
475 |     p.z .= z
476 |     MOI.optimize!(model_el)
477 |     MOI.optimize!(model_md)
478 |     β_el[j, :] .= MOI.get(model_el, MOI.VariablePrimal(), θ)
479 |     β_md[j, :] .= MOI.get(model_md, MOI.VariablePrimal(), θ)
480 |     f_el[j] = model_el.inner.status
481 |     f_md[j] = model_md.inner.status
482 | end
483 | 
484 | using StatsPlots
485 | 
486 | StatsPlots.density(β)
487 | StatsPlots.histogram(β; nbins = 80)
488 | 
489 | ## --------------------------------------------------------------------- ##
490 | ## Utilities
491 | ## --------------------------------------------------------------------- ##
492 | 
493 | # function assign_matrix(J, gg, Dg)
494 | #     n, m = size(gg)
495 | #     k = size(Dg,2)
496 | #     R = [ [gg'; ones(1, n)] [Dg; zeros(1,k)]]
497 | #     J .= vec(R)
498 | # end
499 | 
500 | using SparseArrays
501 | 
502 | function rowcol_of_sparse(g::SparseMatrixCSC; offset_row = 0, offset_col = 0)
503 |     rows = rowvals(g)
504 |     vals = nonzeros(g)
505 |     m, n = size(g)
506 |     tup = Tuple{Int64, Int64}[]
507 |     for j in 1:n
508 |         for i in nzrange(g, j)
509 |             push!(tup, (rows[i]+offset_row, j+offset_col))
510 |         end
511 |     end
512 |     return tup
513 | end
514 | 
515 | function weighted_sum(G, w)
516 |     n, m = size(G)
517 |     res = zeros(eltype(G), m)
518 |     @inbounds for j in axes(G, 2)
519 |         for i in axes(G, 1)
520 |             res[j] += w[i]*G[i, j]
521 |         end
522 |     end
523 |     return res
524 | end
525 | 
526 | function weighted_sum2(G, w)
527 |     @inbounds vec(sum(w .* G, dims = 1))
528 | end
529 | 
530 | """
531 |     rowcol_of_dense(g::AbstractMatrix; offset_row = 0, offset_col = 0)
532 | 
533 | Returns a tuple of row and column indices for all elements in a dense matrix `g`, with optional offsets for rows and columns.
534 | 
535 | # Arguments
536 | - `g::AbstractMatrix`: The input dense matrix.
537 | - `offset_row::Int` (default: 0): An offset to be added to each row index.
538 | - `offset_col::Int` (default: 0): An offset to be added to each column index.
539 | 
540 | # Returns
541 | A vector of tuples `(row, col)` representing the indices of all elements in the dense matrix.
542 | 
543 | # Example
544 | ```julia
545 | g = [1 2; 3 4]
546 | rowcol_of_dense(g)  # [(1, 1), (2, 1), (1, 2), (2, 2)]
547 | ```
548 | """
549 | function rowcol_of_dense(n, m; offset_row = 0, offset_col = 0)
550 |     tup = Tuple{Int64, Int64}[]  # Initialize an empty vector of tuples
551 |     @inbounds for j in 1:n
552 |         for i in 1:m
553 |             push!(tup, (i + offset_row, j + offset_col))
554 |         end
555 |     end
556 |     return tup
557 | end
558 | 
559 | function copy_lower_triangular!(x::AbstractVector{T}, A::Matrix{T}) where {T}
560 |     @assert issquare(A)
561 |     n = size(A, 1)
562 |     len = (n * (n + 1)) ÷ 2  # Length of output vector
563 |     @assert len == (n * (n + 1)) ÷ 2
564 |     idx = 1
565 |     @inbounds for j in 1:n
566 |         for i in j:n
567 |             x[idx] = A[i, j]
568 |             idx += 1
569 |         end
570 |     end
571 |     return x
572 | end
573 | 
574 | function copy_lower_triangular!(x::AbstractVector{T}, A::Vector{T}) where {T}
575 |     n = length(A)
576 |     @assert n == 1 "`copy_lower_triangular!` for vector make sense only for singleton vector"
577 |     @assert 1 == (n * (n + 1)) ÷ 2 "The dimension of the dest vector is wrong as it should be $(n*(n+1))//2"
578 |     x .= A
579 |     return x
580 | end
581 | 
582 | abstract type SmootherType end
583 | 
584 | struct Truncated <: SmootherType end
585 | struct Bartlett <: SmootherType end
586 | 
587 | @inline weight(::Truncated, s, St) = 1.0
588 | @inline weight(::Bartlett, s, St) = 1.0 - s/St
589 | 
590 | # Base version
591 | function smooter_base(tt::T, G::Matrix, ξ::Integer) where {T <: SmootherType}
592 |     N, M = size(G)
593 |     nG = zeros(N, M)
594 |     St = (2.0 * ξ + 1.0) / 2.0
595 |     for m in 1:M
596 |         for t in 1:N
597 |             low = max((t - N), -ξ)
598 |             high = min(t - 1, ξ)
599 |             for s in low:high
600 |                 κ = weight(tt, s, St)
601 |                 @inbounds nG[t, m] += κ * G[t - s, m]
602 |             end
603 |         end
604 |     end
605 |     return (nG ./ (2 * ξ + 1))
606 | end
607 | 
608 | function smoother(tt::Truncated, G::Matrix{T}, ξ::Integer) where {T}
609 |     N, M = size(G)
610 |     nG = Matrix{T}(undef, N, M)
611 |     return smoother!(tt, nG, G, ξ)
612 | end
613 | 
614 | function smoother!(tt::Truncated, dest, G::Matrix{T}, ξ::Integer) where {T}
615 |     N, M = size(G)
616 |     denom = 2ξ + 1  # normalization
617 |     Threads.@threads for m in 1:M
618 |         for t in 1:N
619 |             low = max(t - N, -ξ)
620 |             high = min(t - 1, ξ)
621 |             acc = zero(T)
622 |             @inbounds for s in low:high
623 |                 κ = weight(tt, s, ξ)
624 |                 acc += G[t - s, m]
625 |             end
626 |             dest[t, m] = acc / denom
627 |         end
628 |     end
629 |     return dest
630 | end
631 | 
632 | # optprob = OptimizationFunction(divergence, Optimization.AutoForwardDiff(), cons = cons)
633 | # prob = OptimizationProblem(optprob, x0, _p,
634 | #                            lcons = repeat([0.], 2),
635 | #                            ucons = repeat([0.], 2),
636 | #                            lb = [repeat([0], 100); -Inf],
637 | #                            ub = [repeat([+Inf], 100); +Inf])
638 | 
639 | # solver = OptimizationMOI.MOI.OptimizerWithAttributes(Ipopt.Optimizer, "print_level" => 0)
640 | 
641 | # solve(prob, solver)
642 | 


--------------------------------------------------------------------------------